@kirkelabs/agent-readiness-scan 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AUTHORS +11 -0
- package/CITATION.cff +30 -0
- package/LICENSE +21 -0
- package/README.md +142 -0
- package/bin/cli.js +172 -0
- package/package.json +64 -0
- package/src/checks/01-per-bot-policy.js +124 -0
- package/src/checks/02-declared-use-signals.js +93 -0
- package/src/checks/03-bot-auth-readiness.js +63 -0
- package/src/checks/04-mcp-exposure.js +104 -0
- package/src/checks/05-agentic-commerce.js +85 -0
- package/src/checks/06-product-offer.js +138 -0
- package/src/checks/07-identity-corroboration.js +131 -0
- package/src/checks/08-source-regulatory.js +125 -0
- package/src/fetcher.js +89 -0
- package/src/generators.js +174 -0
- package/src/index.js +126 -0
- package/src/scorecard.js +87 -0
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* check 03 — Bot-Auth readiness
|
|
3
|
+
*
|
|
4
|
+
* Web Bot Auth (IETF draft-meunier-web-bot-auth-architecture) lets sites
|
|
5
|
+
* cryptographically verify the identity of an incoming bot via signed
|
|
6
|
+
* requests + a public-key directory at /.well-known/http-message-
|
|
7
|
+
* signatures-directory. As of mid-2026 it is rising — Cloudflare,
|
|
8
|
+
* Google-Agent and AWS WAF are actively implementing.
|
|
9
|
+
*
|
|
10
|
+
* This check is forward-looking. A 0 today does not condemn a site;
|
|
11
|
+
* but a non-zero is a credible "we're treating bots as identified
|
|
12
|
+
* visitors" signal worth surfacing.
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
export const meta = {
|
|
16
|
+
id: 'bot-auth-readiness',
|
|
17
|
+
title: 'Bot-Auth readiness',
|
|
18
|
+
weight: 5,
|
|
19
|
+
why: 'Cryptographic crawler-identity verification (Web Bot Auth, RFC 9421 HTTP Message Signatures) is the durable successor to user-agent strings. A published key directory means the site is ready to distinguish a verified Google-Agent from a random scraper.',
|
|
20
|
+
};
|
|
21
|
+
|
|
22
|
+
export function run({ wellKnown }) {
|
|
23
|
+
const findings = [];
|
|
24
|
+
const dir = wellKnown?.botAuthDirectory;
|
|
25
|
+
|
|
26
|
+
if (!dir || !dir.found) {
|
|
27
|
+
findings.push({
|
|
28
|
+
level: 'fail',
|
|
29
|
+
msg: 'No `/.well-known/http-message-signatures-directory` found. The site cannot cryptographically distinguish verified AI agents from impostors. (Forward-looking — not yet universal, but increasingly relied on.)',
|
|
30
|
+
});
|
|
31
|
+
return { score: 0, max: 5, findings, detail: { found: false } };
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
// Try to parse as JWK or JWKS.
|
|
35
|
+
let valid = false;
|
|
36
|
+
let keyCount = 0;
|
|
37
|
+
try {
|
|
38
|
+
const parsed = JSON.parse(dir.content);
|
|
39
|
+
if (Array.isArray(parsed.keys)) {
|
|
40
|
+
keyCount = parsed.keys.length;
|
|
41
|
+
valid = parsed.keys.every((k) => k && typeof k.kty === 'string');
|
|
42
|
+
} else if (parsed && typeof parsed.kty === 'string') {
|
|
43
|
+
keyCount = 1;
|
|
44
|
+
valid = true;
|
|
45
|
+
}
|
|
46
|
+
} catch {
|
|
47
|
+
valid = false;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
if (!valid) {
|
|
51
|
+
findings.push({
|
|
52
|
+
level: 'warn',
|
|
53
|
+
msg: 'Key directory present but does not parse as a valid JWK / JWKS document. A bot-auth verifier would reject it.',
|
|
54
|
+
});
|
|
55
|
+
return { score: 2, max: 5, findings, detail: { found: true, valid: false } };
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
findings.push({
|
|
59
|
+
level: 'pass',
|
|
60
|
+
msg: `Web Bot Auth key directory present with ${keyCount} key(s). Bots can be cryptographically verified before content is served.`,
|
|
61
|
+
});
|
|
62
|
+
return { score: 5, max: 5, findings, detail: { found: true, valid: true, keyCount } };
|
|
63
|
+
}
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* check 04 — MCP exposure
|
|
3
|
+
*
|
|
4
|
+
* Model Context Protocol (MCP, Anthropic) is the agent-tooling layer
|
|
5
|
+
* for the customs-house era. A brand that exposes an MCP server card
|
|
6
|
+
* at /.well-known/mcp/server-card.json is declaring "agents may act on
|
|
7
|
+
* me with these scoped tools, behind this auth flow."
|
|
8
|
+
*
|
|
9
|
+
* NSA's May-2026 guidance recommends OAuth-protected-resource metadata
|
|
10
|
+
* + PKCE/S256; we check for both as positive signals.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
export const meta = {
|
|
14
|
+
id: 'mcp-exposure',
|
|
15
|
+
title: 'MCP exposure',
|
|
16
|
+
weight: 7,
|
|
17
|
+
why: 'Model Context Protocol is how a brand publishes the tools an AI agent can call on its behalf. Without a public server card, agents have nothing to invoke — the brand is read-only to the AI economy.',
|
|
18
|
+
};
|
|
19
|
+
|
|
20
|
+
export function run({ wellKnown }) {
|
|
21
|
+
const findings = [];
|
|
22
|
+
const detail = { card: { found: false, valid: false }, oauth: { found: false, pkce: false } };
|
|
23
|
+
|
|
24
|
+
const card = wellKnown?.mcpServerCard;
|
|
25
|
+
if (!card || !card.found) {
|
|
26
|
+
findings.push({
|
|
27
|
+
level: 'fail',
|
|
28
|
+
msg: 'No `/.well-known/mcp/server-card.json` found. The brand exposes no agent-callable tools. AI agents can read but cannot act.',
|
|
29
|
+
});
|
|
30
|
+
return { score: 0, max: 7, findings, detail };
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
detail.card.found = true;
|
|
34
|
+
let parsed;
|
|
35
|
+
try {
|
|
36
|
+
parsed = JSON.parse(card.content);
|
|
37
|
+
detail.card.valid = typeof parsed === 'object' && parsed !== null;
|
|
38
|
+
} catch {
|
|
39
|
+
parsed = null;
|
|
40
|
+
}
|
|
41
|
+
if (!detail.card.valid) {
|
|
42
|
+
findings.push({
|
|
43
|
+
level: 'warn',
|
|
44
|
+
msg: 'MCP server card present but does not parse as JSON. Agents discovering this surface would skip it.',
|
|
45
|
+
});
|
|
46
|
+
return { score: 2, max: 7, findings, detail };
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
let score = 4;
|
|
50
|
+
findings.push({
|
|
51
|
+
level: 'pass',
|
|
52
|
+
msg: `MCP server card present and valid${parsed.name ? ` (\`${parsed.name}\`)` : ''}. Agents can discover the tool surface.`,
|
|
53
|
+
});
|
|
54
|
+
if (Array.isArray(parsed.tools) && parsed.tools.length > 0) {
|
|
55
|
+
findings.push({
|
|
56
|
+
level: 'pass',
|
|
57
|
+
msg: `${parsed.tools.length} tool(s) declared in the server card.`,
|
|
58
|
+
});
|
|
59
|
+
} else {
|
|
60
|
+
findings.push({
|
|
61
|
+
level: 'warn',
|
|
62
|
+
msg: 'Server card declares no `tools[]`. The card exists but has no callable surface for agents.',
|
|
63
|
+
});
|
|
64
|
+
score = 3;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// OAuth-protected-resource metadata (per NSA May-2026 guidance).
|
|
68
|
+
const oauth = wellKnown?.oauthProtectedResource;
|
|
69
|
+
if (oauth && oauth.found) {
|
|
70
|
+
detail.oauth.found = true;
|
|
71
|
+
score += 2;
|
|
72
|
+
findings.push({
|
|
73
|
+
level: 'pass',
|
|
74
|
+
msg: '`/.well-known/oauth-protected-resource` discoverable — meets NSA May-2026 trust-boundary recommendation.',
|
|
75
|
+
});
|
|
76
|
+
try {
|
|
77
|
+
const meta = JSON.parse(oauth.content);
|
|
78
|
+
const auth = wellKnown?.oauthAuthorizationServer;
|
|
79
|
+
if (auth && auth.found) {
|
|
80
|
+
const authMeta = JSON.parse(auth.content);
|
|
81
|
+
const methods = authMeta.code_challenge_methods_supported || [];
|
|
82
|
+
if (Array.isArray(methods) && methods.includes('S256')) {
|
|
83
|
+
detail.oauth.pkce = true;
|
|
84
|
+
score += 1;
|
|
85
|
+
findings.push({
|
|
86
|
+
level: 'pass',
|
|
87
|
+
msg: 'OAuth authorization server declares PKCE (S256) — scoped auth posture as recommended.',
|
|
88
|
+
});
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
// Suppress unused-var lint for `meta` if minimal.
|
|
92
|
+
void meta;
|
|
93
|
+
} catch {
|
|
94
|
+
// ignore parse errors; partial credit already awarded.
|
|
95
|
+
}
|
|
96
|
+
} else {
|
|
97
|
+
findings.push({
|
|
98
|
+
level: 'warn',
|
|
99
|
+
msg: 'No `/.well-known/oauth-protected-resource` metadata. NSA guidance recommends scoped OAuth gating before tool-call forwarding.',
|
|
100
|
+
});
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
return { score: Math.min(score, 7), max: 7, findings, detail };
|
|
104
|
+
}
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* check 05 — Agentic-commerce manifests
|
|
3
|
+
*
|
|
4
|
+
* The customs declaration for commerce. The two live specs as of mid-2026:
|
|
5
|
+
* - Agentic Commerce Protocol (OpenAI/Stripe): /.well-known/acp/manifest.json
|
|
6
|
+
* - Google Universal Cart Protocol (UCP): /.well-known/ucp
|
|
7
|
+
*
|
|
8
|
+
* Either one is a credible declaration that an agent can transact on
|
|
9
|
+
* the brand's behalf. Both is full coverage.
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
export const meta = {
|
|
13
|
+
id: 'agentic-commerce',
|
|
14
|
+
title: 'Agentic-commerce manifests',
|
|
15
|
+
weight: 7,
|
|
16
|
+
why: 'ACP (OpenAI/Stripe) and UCP (Google) are the two live 2026 protocols for agent-initiated checkout. Without a manifest at the canonical .well-known path, conversational-commerce surfaces have no place to bind to.',
|
|
17
|
+
};
|
|
18
|
+
|
|
19
|
+
export function run({ wellKnown }) {
|
|
20
|
+
const findings = [];
|
|
21
|
+
const detail = { acp: { found: false, valid: false }, ucp: { found: false, valid: false } };
|
|
22
|
+
|
|
23
|
+
// ACP
|
|
24
|
+
const acp = wellKnown?.acpManifest;
|
|
25
|
+
if (acp && acp.found) {
|
|
26
|
+
detail.acp.found = true;
|
|
27
|
+
try {
|
|
28
|
+
const parsed = JSON.parse(acp.content);
|
|
29
|
+
detail.acp.valid = typeof parsed === 'object' && parsed !== null;
|
|
30
|
+
if (detail.acp.valid) {
|
|
31
|
+
findings.push({
|
|
32
|
+
level: 'pass',
|
|
33
|
+
msg: `Agentic Commerce Protocol manifest present at \`/.well-known/acp/manifest.json\`${parsed.version ? ` (version \`${parsed.version}\`)` : ''}.`,
|
|
34
|
+
});
|
|
35
|
+
} else {
|
|
36
|
+
findings.push({ level: 'warn', msg: 'ACP manifest present but invalid JSON shape.' });
|
|
37
|
+
}
|
|
38
|
+
} catch {
|
|
39
|
+
findings.push({ level: 'warn', msg: 'ACP manifest present but does not parse as JSON.' });
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
// UCP
|
|
44
|
+
const ucp = wellKnown?.ucp;
|
|
45
|
+
if (ucp && ucp.found) {
|
|
46
|
+
detail.ucp.found = true;
|
|
47
|
+
try {
|
|
48
|
+
const parsed = JSON.parse(ucp.content);
|
|
49
|
+
detail.ucp.valid = typeof parsed === 'object' && parsed !== null;
|
|
50
|
+
if (detail.ucp.valid) {
|
|
51
|
+
findings.push({
|
|
52
|
+
level: 'pass',
|
|
53
|
+
msg: `Google Universal Cart manifest present at \`/.well-known/ucp\`${parsed.version ? ` (version \`${parsed.version}\`)` : ''}.`,
|
|
54
|
+
});
|
|
55
|
+
} else {
|
|
56
|
+
findings.push({ level: 'warn', msg: 'UCP manifest present but invalid JSON shape.' });
|
|
57
|
+
}
|
|
58
|
+
} catch {
|
|
59
|
+
findings.push({ level: 'warn', msg: 'UCP manifest present but does not parse as JSON.' });
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
let score;
|
|
64
|
+
if (detail.acp.valid && detail.ucp.valid) {
|
|
65
|
+
score = 7;
|
|
66
|
+
findings.push({
|
|
67
|
+
level: 'pass',
|
|
68
|
+
msg: 'Both ACP and UCP manifests declared. Cross-platform agent commerce is wired.',
|
|
69
|
+
});
|
|
70
|
+
} else if (detail.acp.valid || detail.ucp.valid) {
|
|
71
|
+
score = 5;
|
|
72
|
+
findings.push({
|
|
73
|
+
level: 'warn',
|
|
74
|
+
msg: 'One of the two live agentic-commerce protocols declared. Consider adding the other to widen agent-marketplace coverage.',
|
|
75
|
+
});
|
|
76
|
+
} else {
|
|
77
|
+
score = 0;
|
|
78
|
+
findings.push({
|
|
79
|
+
level: 'fail',
|
|
80
|
+
msg: 'Neither `/.well-known/acp/manifest.json` (OpenAI/Stripe) nor `/.well-known/ucp` (Google) present. The site cannot be transacted on by an AI agent.',
|
|
81
|
+
});
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
return { score, max: 7, findings, detail };
|
|
85
|
+
}
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* check 06 — Agent-actionable Product/Offer
|
|
3
|
+
*
|
|
4
|
+
* Even with ACP/UCP manifests, an agent needs structured-data per product
|
|
5
|
+
* to compare and transact. The required Offer surface for agentic
|
|
6
|
+
* checkout, drawn from Google Merchant Center spec + the agentic-commerce
|
|
7
|
+
* consensus as of mid-2026:
|
|
8
|
+
*
|
|
9
|
+
* REQUIRED price, priceCurrency, availability
|
|
10
|
+
* RECOMMENDED priceValidUntil (future), shippingDetails,
|
|
11
|
+
* acceptedPaymentMethod, hasMerchantReturnPolicy,
|
|
12
|
+
* aggregateRating
|
|
13
|
+
*
|
|
14
|
+
* If no Product JSON-LD is present at all, this dimension is graded
|
|
15
|
+
* not-applicable (neutral 4/7 — same pattern as the on-chain dimension
|
|
16
|
+
* in ai-legibility-scan).
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
export const meta = {
|
|
20
|
+
id: 'product-offer',
|
|
21
|
+
title: 'Agent-actionable Product/Offer',
|
|
22
|
+
weight: 7,
|
|
23
|
+
why: 'Conversational commerce reads structured offers, not landing-page prose. An Offer missing shippingDetails or priceValidUntil is invisible to a comparison agent — even if the brand is willing to sell.',
|
|
24
|
+
};
|
|
25
|
+
|
|
26
|
+
function collectJsonLd($) {
|
|
27
|
+
const out = [];
|
|
28
|
+
$('script[type="application/ld+json"]').each((_, el) => {
|
|
29
|
+
const text = ($(el).contents().first().text() || '').trim();
|
|
30
|
+
if (!text) return;
|
|
31
|
+
try {
|
|
32
|
+
const parsed = JSON.parse(text);
|
|
33
|
+
if (Array.isArray(parsed)) out.push(...parsed);
|
|
34
|
+
else out.push(parsed);
|
|
35
|
+
} catch {
|
|
36
|
+
// skip malformed
|
|
37
|
+
}
|
|
38
|
+
});
|
|
39
|
+
return out;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
function findProducts(nodes) {
|
|
43
|
+
const products = [];
|
|
44
|
+
const walk = (n) => {
|
|
45
|
+
if (!n || typeof n !== 'object') return;
|
|
46
|
+
const type = n['@type'];
|
|
47
|
+
if (type === 'Product' || (Array.isArray(type) && type.includes('Product'))) {
|
|
48
|
+
products.push(n);
|
|
49
|
+
}
|
|
50
|
+
if (Array.isArray(n['@graph'])) n['@graph'].forEach(walk);
|
|
51
|
+
if (Array.isArray(n.itemListElement)) n.itemListElement.forEach(walk);
|
|
52
|
+
};
|
|
53
|
+
nodes.forEach(walk);
|
|
54
|
+
return products;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
function extractOffer(product) {
|
|
58
|
+
let offer = product.offers;
|
|
59
|
+
if (Array.isArray(offer)) offer = offer[0];
|
|
60
|
+
return offer && typeof offer === 'object' ? offer : null;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
export function run({ $ }) {
|
|
64
|
+
const findings = [];
|
|
65
|
+
const detail = { productCount: 0, offerFields: {} };
|
|
66
|
+
|
|
67
|
+
const nodes = collectJsonLd($);
|
|
68
|
+
const products = findProducts(nodes);
|
|
69
|
+
detail.productCount = products.length;
|
|
70
|
+
|
|
71
|
+
if (products.length === 0) {
|
|
72
|
+
findings.push({
|
|
73
|
+
level: 'info',
|
|
74
|
+
msg: 'No Product schema detected — this dimension is not applicable to non-commerce sites. Scored neutral.',
|
|
75
|
+
});
|
|
76
|
+
return { score: 4, max: 7, findings, detail };
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
// Aggregate fields across all detected offers.
|
|
80
|
+
const required = ['price', 'priceCurrency', 'availability'];
|
|
81
|
+
const recommended = [
|
|
82
|
+
'priceValidUntil',
|
|
83
|
+
'shippingDetails',
|
|
84
|
+
'acceptedPaymentMethod',
|
|
85
|
+
'hasMerchantReturnPolicy',
|
|
86
|
+
'aggregateRating',
|
|
87
|
+
];
|
|
88
|
+
const presence = {};
|
|
89
|
+
for (const f of [...required, ...recommended]) presence[f] = false;
|
|
90
|
+
|
|
91
|
+
for (const p of products) {
|
|
92
|
+
const offer = extractOffer(p);
|
|
93
|
+
if (!offer) continue;
|
|
94
|
+
for (const f of required) if (offer[f]) presence[f] = true;
|
|
95
|
+
presence.priceValidUntil = presence.priceValidUntil || (offer.priceValidUntil && new Date(offer.priceValidUntil) > new Date());
|
|
96
|
+
presence.shippingDetails = presence.shippingDetails || !!offer.shippingDetails;
|
|
97
|
+
presence.acceptedPaymentMethod = presence.acceptedPaymentMethod || !!offer.acceptedPaymentMethod;
|
|
98
|
+
presence.hasMerchantReturnPolicy =
|
|
99
|
+
presence.hasMerchantReturnPolicy || !!offer.hasMerchantReturnPolicy || !!p.hasMerchantReturnPolicy;
|
|
100
|
+
presence.aggregateRating = presence.aggregateRating || !!p.aggregateRating || !!offer.aggregateRating;
|
|
101
|
+
}
|
|
102
|
+
detail.offerFields = presence;
|
|
103
|
+
|
|
104
|
+
const reqCount = required.filter((f) => presence[f]).length;
|
|
105
|
+
const recCount = recommended.filter((f) => presence[f]).length;
|
|
106
|
+
|
|
107
|
+
// Required: 0-3 → 0-3 points. Recommended: 0-5 → 0-4 points (capped). Max 7.
|
|
108
|
+
let score = reqCount + Math.min(recCount, 4);
|
|
109
|
+
|
|
110
|
+
if (reqCount < 3) {
|
|
111
|
+
findings.push({
|
|
112
|
+
level: 'fail',
|
|
113
|
+
msg: `Offer is missing required field(s): ${required.filter((f) => !presence[f]).join(', ')}. Agents will skip this product in comparison flows.`,
|
|
114
|
+
});
|
|
115
|
+
} else {
|
|
116
|
+
findings.push({
|
|
117
|
+
level: 'pass',
|
|
118
|
+
msg: 'All required Offer fields present (price, priceCurrency, availability).',
|
|
119
|
+
});
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
const missingRec = recommended.filter((f) => !presence[f]);
|
|
123
|
+
if (missingRec.length > 0) {
|
|
124
|
+
findings.push({
|
|
125
|
+
level: missingRec.length >= 3 ? 'warn' : 'info',
|
|
126
|
+
msg: `Recommended Offer fields missing: ${missingRec.join(', ')}. Each one closes a comparison-step the agent would otherwise drop you from.`,
|
|
127
|
+
});
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
if (!presence.priceValidUntil) {
|
|
131
|
+
findings.push({
|
|
132
|
+
level: 'warn',
|
|
133
|
+
msg: 'No future `priceValidUntil` on the Offer. Conversational-commerce flows treat absent or expired price-validity as a stale-data signal.',
|
|
134
|
+
});
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
return { score: Math.min(score, 7), max: 7, findings, detail };
|
|
138
|
+
}
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* check 07 — Brand identity corroboration
|
|
3
|
+
*
|
|
4
|
+
* AI engines resolve a brand to an entity by following its sameAs graph
|
|
5
|
+
* out to registry-grade sources (Wikidata, Crunchbase, OpenCorporates,
|
|
6
|
+
* Companies House, SEC EDGAR, GLEIF LEI) — sources that are themselves
|
|
7
|
+
* widely indexed and trusted.
|
|
8
|
+
*
|
|
9
|
+
* Social profiles (LinkedIn, GitHub) are corroborating signals, but
|
|
10
|
+
* registry-grade is what turns "a website" into "an entity an AI engine
|
|
11
|
+
* will confidently cite."
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
export const meta = {
|
|
15
|
+
id: 'identity-corroboration',
|
|
16
|
+
title: 'Brand identity corroboration',
|
|
17
|
+
weight: 8,
|
|
18
|
+
why: 'A sameAs link to Wikidata or Companies House is a verifiable, registry-grade entity claim. A site with three registry-grade sameAs links has a closed identity loop; one without is a brand the AI must guess at.',
|
|
19
|
+
};
|
|
20
|
+
|
|
21
|
+
const REGISTRY_PATTERNS = [
|
|
22
|
+
{ name: 'Wikidata', re: /(^|\/\/)(www\.)?wikidata\.org\/(wiki|entity)\/Q\d+/i },
|
|
23
|
+
{ name: 'Crunchbase', re: /(^|\/\/)(www\.)?crunchbase\.com\/organization\//i },
|
|
24
|
+
{ name: 'OpenCorporates', re: /(^|\/\/)(www\.)?opencorporates\.com\/companies\//i },
|
|
25
|
+
{ name: 'Companies House (UK)', re: /find-and-update\.company-information\.service\.gov\.uk\/company\//i },
|
|
26
|
+
{ name: 'SEC EDGAR', re: /(www\.)?sec\.gov\/(cgi-bin\/browse-edgar|edgar|cgi-bin\/browse|data\.sec\.gov)/i },
|
|
27
|
+
{ name: 'GLEIF / LEI', re: /(www\.)?(gleif\.org|lei\.info)\//i },
|
|
28
|
+
{ name: 'ORCID', re: /orcid\.org\/[0-9X-]+/i },
|
|
29
|
+
];
|
|
30
|
+
const SOCIAL_PATTERNS = [
|
|
31
|
+
{ name: 'LinkedIn', re: /(^|\/\/)(www\.)?linkedin\.com\/(company|in)\//i },
|
|
32
|
+
{ name: 'GitHub', re: /(^|\/\/)(www\.)?github\.com\/[^/]+\/?$|github\.com\/[^/]+\/[^/]+/i },
|
|
33
|
+
];
|
|
34
|
+
|
|
35
|
+
function collectSameAs($) {
|
|
36
|
+
const found = new Set();
|
|
37
|
+
$('script[type="application/ld+json"]').each((_, el) => {
|
|
38
|
+
const text = ($(el).contents().first().text() || '').trim();
|
|
39
|
+
if (!text) return;
|
|
40
|
+
let nodes;
|
|
41
|
+
try {
|
|
42
|
+
nodes = JSON.parse(text);
|
|
43
|
+
} catch {
|
|
44
|
+
return;
|
|
45
|
+
}
|
|
46
|
+
const queue = Array.isArray(nodes) ? [...nodes] : [nodes];
|
|
47
|
+
while (queue.length) {
|
|
48
|
+
const n = queue.shift();
|
|
49
|
+
if (!n || typeof n !== 'object') continue;
|
|
50
|
+
if (Array.isArray(n.sameAs)) {
|
|
51
|
+
for (const u of n.sameAs) if (typeof u === 'string') found.add(u);
|
|
52
|
+
} else if (typeof n.sameAs === 'string') {
|
|
53
|
+
found.add(n.sameAs);
|
|
54
|
+
}
|
|
55
|
+
if (Array.isArray(n['@graph'])) queue.push(...n['@graph']);
|
|
56
|
+
for (const k of ['founder', 'employee', 'contributor', 'publisher']) {
|
|
57
|
+
if (n[k]) queue.push(n[k]);
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
});
|
|
61
|
+
return [...found];
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
export function run({ $ }) {
|
|
65
|
+
const findings = [];
|
|
66
|
+
const detail = { sameAsUrls: [], registry: [], social: [] };
|
|
67
|
+
|
|
68
|
+
const urls = collectSameAs($);
|
|
69
|
+
detail.sameAsUrls = urls;
|
|
70
|
+
|
|
71
|
+
for (const u of urls) {
|
|
72
|
+
for (const p of REGISTRY_PATTERNS) {
|
|
73
|
+
if (p.re.test(u) && !detail.registry.includes(p.name)) detail.registry.push(p.name);
|
|
74
|
+
}
|
|
75
|
+
for (const p of SOCIAL_PATTERNS) {
|
|
76
|
+
if (p.re.test(u) && !detail.social.includes(p.name)) detail.social.push(p.name);
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
const reg = detail.registry.length;
|
|
81
|
+
const soc = detail.social.length;
|
|
82
|
+
|
|
83
|
+
let score;
|
|
84
|
+
if (reg === 0 && soc === 0) {
|
|
85
|
+
score = 0;
|
|
86
|
+
findings.push({
|
|
87
|
+
level: 'fail',
|
|
88
|
+
msg: 'No sameAs links found anywhere in JSON-LD. Nothing corroborates the brand\'s identity to an AI engine — it has to guess who you are.',
|
|
89
|
+
});
|
|
90
|
+
} else if (reg === 0 && soc >= 1) {
|
|
91
|
+
score = 2 + Math.min(soc - 1, 1);
|
|
92
|
+
findings.push({
|
|
93
|
+
level: 'warn',
|
|
94
|
+
msg: `Social profile(s) declared (${detail.social.join(', ')}) but no registry-grade source. Add Wikidata, Crunchbase, OpenCorporates, Companies House, SEC EDGAR or GLEIF LEI to lift the identity claim.`,
|
|
95
|
+
});
|
|
96
|
+
} else if (reg === 1) {
|
|
97
|
+
score = 5;
|
|
98
|
+
findings.push({
|
|
99
|
+
level: 'warn',
|
|
100
|
+
msg: `1 registry-grade source (${detail.registry[0]}). Aim for 3 — a single source is a corroboration of one; three is a graph.`,
|
|
101
|
+
});
|
|
102
|
+
} else if (reg === 2) {
|
|
103
|
+
score = 6;
|
|
104
|
+
findings.push({
|
|
105
|
+
level: 'warn',
|
|
106
|
+
msg: `2 registry-grade sources (${detail.registry.join(', ')}). One more closes the loop.`,
|
|
107
|
+
});
|
|
108
|
+
} else {
|
|
109
|
+
score = 8;
|
|
110
|
+
findings.push({
|
|
111
|
+
level: 'pass',
|
|
112
|
+
msg: `${reg} registry-grade sources (${detail.registry.join(', ')}). Identity graph is closed.`,
|
|
113
|
+
});
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
if (soc > 0 && reg > 0) {
|
|
117
|
+
findings.push({
|
|
118
|
+
level: 'pass',
|
|
119
|
+
msg: `Social corroboration also present: ${detail.social.join(', ')}.`,
|
|
120
|
+
});
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
if (!detail.registry.includes('Wikidata')) {
|
|
124
|
+
findings.push({
|
|
125
|
+
level: 'warn',
|
|
126
|
+
msg: 'No Wikidata reference. Wikidata is the upstream graph feeding Google Knowledge Panels and major-model entity resolution — creating an item lifts every other sameAs claim.',
|
|
127
|
+
});
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
return { score, max: 8, findings, detail };
|
|
131
|
+
}
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* check 08 — Source provenance & regulatory transparency
|
|
3
|
+
*
|
|
4
|
+
* The "source operations" bucket from the strategic paper, plus the
|
|
5
|
+
* baseline regulatory artefacts a 2026 AI agent will look for before
|
|
6
|
+
* treating a site as authoritative:
|
|
7
|
+
*
|
|
8
|
+
* - dateModified / dateCreated on the page (freshness signal)
|
|
9
|
+
* - /.well-known/security.txt (RFC 9116)
|
|
10
|
+
* - Terms of service link
|
|
11
|
+
* - Contact path (mailto: or /contact)
|
|
12
|
+
* - Privacy policy link
|
|
13
|
+
*
|
|
14
|
+
* One point per artefact, max 5.
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
export const meta = {
|
|
18
|
+
id: 'source-regulatory',
|
|
19
|
+
title: 'Source provenance & regulatory transparency',
|
|
20
|
+
weight: 5,
|
|
21
|
+
why: 'Freshness, security disclosure, terms, contact, and privacy are the customs-form data on every legitimate source. Absent these, an AI engine treats the page as anonymous prose.',
|
|
22
|
+
};
|
|
23
|
+
|
|
24
|
+
function hasDateSignal($) {
|
|
25
|
+
let found = false;
|
|
26
|
+
$('script[type="application/ld+json"]').each((_, el) => {
|
|
27
|
+
const text = ($(el).contents().first().text() || '').trim();
|
|
28
|
+
if (!text || found) return;
|
|
29
|
+
try {
|
|
30
|
+
const parsed = JSON.parse(text);
|
|
31
|
+
const queue = Array.isArray(parsed) ? [...parsed] : [parsed];
|
|
32
|
+
while (queue.length) {
|
|
33
|
+
const n = queue.shift();
|
|
34
|
+
if (!n || typeof n !== 'object') continue;
|
|
35
|
+
if (n.dateModified || n.dateCreated || n.datePublished) {
|
|
36
|
+
found = true;
|
|
37
|
+
return;
|
|
38
|
+
}
|
|
39
|
+
if (Array.isArray(n['@graph'])) queue.push(...n['@graph']);
|
|
40
|
+
}
|
|
41
|
+
} catch {
|
|
42
|
+
// ignore
|
|
43
|
+
}
|
|
44
|
+
});
|
|
45
|
+
if (found) return true;
|
|
46
|
+
if ($('time[datetime]').length > 0) return true;
|
|
47
|
+
if ($('meta[property="article:modified_time"]').length > 0) return true;
|
|
48
|
+
if ($('meta[property="article:published_time"]').length > 0) return true;
|
|
49
|
+
return false;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
function findLink($, ...needles) {
|
|
53
|
+
let hit = null;
|
|
54
|
+
$('a[href]').each((_, el) => {
|
|
55
|
+
if (hit) return;
|
|
56
|
+
const href = ($(el).attr('href') || '').toLowerCase();
|
|
57
|
+
const text = ($(el).text() || '').toLowerCase();
|
|
58
|
+
for (const needle of needles) {
|
|
59
|
+
if (href.includes(needle) || text.includes(needle)) {
|
|
60
|
+
hit = $(el).attr('href');
|
|
61
|
+
return;
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
});
|
|
65
|
+
return hit;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
export function run({ $, wellKnown }) {
|
|
69
|
+
const findings = [];
|
|
70
|
+
let score = 0;
|
|
71
|
+
const detail = {};
|
|
72
|
+
|
|
73
|
+
// 1. Date signal
|
|
74
|
+
detail.dateSignal = hasDateSignal($);
|
|
75
|
+
if (detail.dateSignal) {
|
|
76
|
+
score += 1;
|
|
77
|
+
findings.push({ level: 'pass', msg: 'Page declares a `dateModified` / `datePublished` (in JSON-LD or `<time>`). Freshness is legible.' });
|
|
78
|
+
} else {
|
|
79
|
+
findings.push({
|
|
80
|
+
level: 'warn',
|
|
81
|
+
msg: 'No dateModified / datePublished signal. AI engines treat undated pages as low-confidence.',
|
|
82
|
+
});
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
// 2. security.txt
|
|
86
|
+
detail.securityTxt = !!(wellKnown?.securityTxt && wellKnown.securityTxt.found);
|
|
87
|
+
if (detail.securityTxt) {
|
|
88
|
+
score += 1;
|
|
89
|
+
findings.push({ level: 'pass', msg: '`/.well-known/security.txt` present (RFC 9116).' });
|
|
90
|
+
} else {
|
|
91
|
+
findings.push({
|
|
92
|
+
level: 'info',
|
|
93
|
+
msg: 'No `/.well-known/security.txt`. A 4-line file with a contact email and expiry date is a trust signal an AI customs officer notices.',
|
|
94
|
+
});
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
// 3. T&Cs link
|
|
98
|
+
detail.termsLink = findLink($, '/terms', '/tos', 'terms of', 'terms-and-conditions', 'terms_and_conditions');
|
|
99
|
+
if (detail.termsLink) {
|
|
100
|
+
score += 1;
|
|
101
|
+
findings.push({ level: 'pass', msg: `Terms of service link found (\`${detail.termsLink}\`).` });
|
|
102
|
+
} else {
|
|
103
|
+
findings.push({ level: 'warn', msg: 'No visible Terms of Service link. DSA Art. 14 expects publicly-accessible T&Cs.' });
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
// 4. Contact path
|
|
107
|
+
detail.contactPath = findLink($, '/contact', 'mailto:', 'contact us');
|
|
108
|
+
if (detail.contactPath) {
|
|
109
|
+
score += 1;
|
|
110
|
+
findings.push({ level: 'pass', msg: `Contact path found (\`${detail.contactPath}\`).` });
|
|
111
|
+
} else {
|
|
112
|
+
findings.push({ level: 'warn', msg: 'No visible contact path. Agents handling disputes or escalations have nowhere to route to.' });
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
// 5. Privacy policy
|
|
116
|
+
detail.privacyLink = findLink($, '/privacy', 'privacy-policy', 'privacy policy');
|
|
117
|
+
if (detail.privacyLink) {
|
|
118
|
+
score += 1;
|
|
119
|
+
findings.push({ level: 'pass', msg: `Privacy policy link found (\`${detail.privacyLink}\`).` });
|
|
120
|
+
} else {
|
|
121
|
+
findings.push({ level: 'warn', msg: 'No visible privacy policy link. Regulators (and increasingly, agents) will treat this as missing customs paperwork.' });
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
return { score, max: 5, findings, detail };
|
|
125
|
+
}
|