npm - @kirkelabs/agent-readiness-scan - Versions diffs - 0.1.0 - Mend

@kirkelabs/agent-readiness-scan 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/AUTHORS +11 -0
package/CITATION.cff +30 -0
package/LICENSE +21 -0
package/README.md +142 -0
package/bin/cli.js +172 -0
package/package.json +64 -0
package/src/checks/01-per-bot-policy.js +124 -0
package/src/checks/02-declared-use-signals.js +93 -0
package/src/checks/03-bot-auth-readiness.js +63 -0
package/src/checks/04-mcp-exposure.js +104 -0
package/src/checks/05-agentic-commerce.js +85 -0
package/src/checks/06-product-offer.js +138 -0
package/src/checks/07-identity-corroboration.js +131 -0
package/src/checks/08-source-regulatory.js +125 -0
package/src/fetcher.js +89 -0
package/src/generators.js +174 -0
package/src/index.js +126 -0
package/src/scorecard.js +87 -0

package/AUTHORS ADDED Viewed

@@ -0,0 +1,11 @@
+agent-readiness-scan authors
+============================
+Lead author:
+  Soleman El Gelawi <soleman@kirkelabs.com> — CTO, Kirke Labs
+Co-author / publisher:
+  Steve Kirton <steve@kirkelabs.com> — Founder & CEO, Kirke Labs
+Organization:
+  Kirke Labs — https://www.kirkelabs.com

package/CITATION.cff ADDED Viewed

@@ -0,0 +1,30 @@
+cff-version: 1.2.0
+message: "If you use this software, please cite it as below."
+title: "agent-readiness-scan"
+abstract: "Open-source customs-house auditor for AI agents: scores 8 weighted dimensions covering crawler policy, agent-action surfaces (MCP/ACP), Product/Offer completeness, and brand identity corroboration. Generates a drop-in customs declaration (robots.txt + .well-known/ manifests)."
+type: software
+license: MIT
+repository-code: "https://github.com/KirkeLabs/agent-readiness-scan"
+url: "https://kirkelabs.github.io/agent-readiness-scan/"
+version: 0.1.0
+date-released: 2026-06-01
+authors:
+  - given-names: Soleman
+    family-names: El Gelawi
+    email: soleman@kirkelabs.com
+    affiliation: "Kirke Labs"
+    # sameAs: https://www.linkedin.com/in/soleman-gelawi/ , https://github.com/sgelawi
+  - given-names: Steve
+    family-names: Kirton
+    email: steve@kirkelabs.com
+    affiliation: "Kirke Labs"
+    # sameAs: https://www.linkedin.com/in/stevekirton-kirkelabs/
+keywords:
+  - ai-agents
+  - mcp
+  - acp
+  - agentic-commerce
+  - crawler-policy
+  - web-bot-auth
+  - schema-org
+  - algorand

package/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 Kirke Labs — Soleman El Gelawi and Steve Kirton
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

package/README.md ADDED Viewed

@@ -0,0 +1,142 @@
+# agent-readiness-scan
+[![npm version](https://img.shields.io/npm/v/@kirkelabs/agent-readiness-scan?color=00dc94&style=flat)](https://www.npmjs.com/package/@kirkelabs/agent-readiness-scan)
+[![License: MIT](https://img.shields.io/badge/license-MIT-00dc94?style=flat)](./LICENSE)
+[![Node](https://img.shields.io/badge/node-%3E%3D20-00dc94?style=flat)](https://nodejs.org)
+[![CI](https://img.shields.io/badge/CI-passing-00dc94?style=flat)](./.github/workflows/ci.yml)
+**Is your brand ready for AI agents to act on it?** Audit your customs-house posture — crawler policy, MCP/ACP exposure, agent-actionable Product/Offer, brand identity corroboration — and get the drop-in files to fix it.
+```bash
+npx @kirkelabs/agent-readiness-scan https://your-site.com
+```
+No install. No account. No data leaves your machine.
+> Built by Soleman El Gelawi (CTO, [Kirke Labs](https://www.kirkelabs.com)), with Steve Kirton — open-sourced as a gift to the Algorand ecosystem. MIT licensed. Use it, fork it, ship it.
+---
+## What is this?
+The open web is becoming a customs house. AI search, agentic commerce (ACP, Universal Cart), bot authentication (Web Bot Auth), crawler policy (Cloudflare Content Signals), and the EU DSA / DMA all push in the same direction: every web property now needs a *declared access posture*, not just a content strategy.
+`agent-readiness-scan` audits that posture. It fetches a URL plus seven `.well-known/*` paths plus `robots.txt`, and scores 8 dimensions covering:
+- **Crawler policy** — does your `robots.txt` name the major AI bots individually, with declared use-policy signals?
+- **Bot authentication** — is a Web Bot Auth key directory present?
+- **Agent action surfaces** — MCP server card, Agentic Commerce Protocol manifest, Google Universal Cart manifest?
+- **Commerce structured data** — are your Product/Offer JSON-LD blocks complete enough for agent-driven checkout?
+- **Identity corroboration** — does the `sameAs` graph reach registry-grade sources (Wikidata, Crunchbase, Companies House, SEC EDGAR, GLEIF)?
+- **Source operations & regulatory transparency** — dateModified, security.txt, T&Cs, contact, privacy.
+Then it generates the files you need to fix the gaps — a drop-in `robots.txt`, `.well-known/security.txt`, MCP server card, and ACP manifest scaffolds.
+Companion to [`@kirkelabs/ai-legibility-scan`](https://github.com/KirkeLabs/ai-legibility-scan): that one scores how *legible* your site is to an AI crawler. This one scores how *agent-ready* it is once the crawler can read it.
+## Why?
+The strategic paper this tool is built on — [*The Web Becomes a Customs House*](https://www.kirkelabs.com/papers/customs-house) — argues that the new web bargain is declared-access-for-action. A page may be cited without being visited; a product may be transacted without a click. Existing "AI visibility" tools tell you you're invisible. This one is a free CLI that audits your *customs-house posture* and hands you the drop-in declarations to fix it.
+## Install
+Nothing to install — use `npx`:
+```bash
+npx @kirkelabs/agent-readiness-scan https://your-site.com
+```
+Or add it to a project:
+```bash
+npm i -D @kirkelabs/agent-readiness-scan
+```
+Requires Node.js ≥ 20.
+## Quickstart
+```bash
+# default scan
+npx @kirkelabs/agent-readiness-scan https://your-site.com
+# write artefacts to ./report
+npx @kirkelabs/agent-readiness-scan https://your-site.com --out ./report
+# machine-readable output for scripting
+npx @kirkelabs/agent-readiness-scan https://your-site.com --json
+```
+Files land in the output directory (default `./agent-readiness-out/`):
+| File / Directory | What it is |
+|---|---|
+| `score.json` | Machine-readable result — gate your CI on it |
+| `report.md` | Human-readable findings |
+| `scorecard.html` | Self-contained shareable scorecard |
+| `customs-declaration/robots.txt` | Drop-in robots.txt with per-AI-bot rules + Cloudflare Content Signals |
+| `customs-declaration/.well-known/security.txt` | RFC 9116 scaffold |
+| `customs-declaration/.well-known/mcp/server-card.json` | MCP server card scaffold |
+| `customs-declaration/.well-known/acp/manifest.json` | Agentic Commerce Protocol manifest scaffold |
+## How it scores
+Eight weighted dimensions, normalised to 0–100 and graded A–F:
+| # | Dimension | Weight | What it checks |
+|---|---|---|---|
+| 1 | Per-bot crawler policy | 10 | robots.txt names individual AI bots (GPTBot, ClaudeBot, OAI-SearchBot, PerplexityBot, Google-Extended, anthropic-ai, Claude-Web, ChatGPT-User, Claude-User, CCBot, Bytespider, Amazonbot, Applebot-Extended, meta-externalagent) |
+| 2 | Declared use-policy signals | 7 | Cloudflare Content Signals (search / ai-input / ai-train), `noai` / `noimageai` meta, `X-Robots-Tag` |
+| 3 | Bot-Auth readiness | 5 | `/.well-known/http-message-signatures-directory` (Web Bot Auth, IETF draft) |
+| 4 | MCP exposure | 7 | `/.well-known/mcp/server-card.json` + `/.well-known/oauth-protected-resource` with PKCE/S256 (NSA May-2026 guidance) |
+| 5 | Agentic-commerce manifests | 7 | `/.well-known/acp/manifest.json` (OpenAI/Stripe) and/or `/.well-known/ucp` (Google Universal Cart) |
+| 6 | Agent-actionable Product/Offer | 7 | Product/Offer JSON-LD completeness (price, availability, priceValidUntil-future, shippingDetails, acceptedPaymentMethod, hasMerchantReturnPolicy, aggregateRating) |
+| 7 | Brand identity corroboration | 8 | sameAs to registry-grade sources (Wikidata, Crunchbase, OpenCorporates, Companies House, SEC EDGAR, GLEIF, plus LinkedIn/GitHub) |
+| 8 | Source provenance & regulatory | 5 | dateModified/datePublished, security.txt, T&Cs, contact, privacy policy |
+Full rubric, thresholds and rationale: **[docs/METHODOLOGY.md](./docs/METHODOLOGY.md)**.
+## Use in CI
+The CLI exits non-zero when the score drops below 50:
+```yaml
+# .github/workflows/agent-readiness.yml
+- run: npx @kirkelabs/agent-readiness-scan https://staging.your-site.com
+```
+## Programmatic use
+```js
+import { scan } from '@kirkelabs/agent-readiness-scan';
+const result = await scan('https://your-site.com');
+console.log(result.score, result.grade);
+```
+## Limitations (read this)
+This tool measures **heuristic indicators** of agent-readiness. A high score makes a site easier for an AI agent to discover, declare access to, and act on — it is **not a guarantee** of agent uptake, citation, or transaction. The weights are informed by 2026 standards work (MCP, ACP, UCP, Web Bot Auth, Content Signals) but are judgement calls, documented openly in [docs/METHODOLOGY.md](./docs/METHODOLOGY.md). See also [`SECURITY.md`](./SECURITY.md).
+Most of the dimensions check standards that are *emerging*, not universal. A v0.1.0 score below 50 is normal today; a score above 80 puts you among the earliest customs-house operators. The bar will rise.
+## Audit, recon, fix — three steps to lift your score
+Once the scanner has graded your site, two prompt templates let Claude Code in your source repo do the rest:
+1. **[docs/RECON_PROMPT.md](./docs/RECON_PROMPT.md)** — read-only reconnaissance prompt that greps the codebase and returns a structured report of your framework, existing manifests, identity URLs, and routes.
+2. **[docs/PROMPT_TEMPLATE.md](./docs/PROMPT_TEMPLATE.md)** — the fix prompt. Fill in the placeholders informed by the recon, paste into a new Claude Code session to ship the customs declaration.
+## Companion tool
+See also [`@kirkelabs/ai-legibility-scan`](https://github.com/KirkeLabs/ai-legibility-scan) — scores how legible your page is to AI *crawlers* (the layer below this one). Together they cover the audit-recon-fix loop for both halves of the customs-house thesis: legibility + declared access.
+## Contributing
+Issues and PRs welcome — especially scoring false positives, new checks tracking emerging standards, and additional identity-registry coverage. See [CONTRIBUTING.md](./CONTRIBUTING.md) and the [Code of Conduct](./CODE_OF_CONDUCT.md).
+## Licence
+[MIT](./LICENSE) © 2026 Kirke Labs — Soleman El Gelawi and Steve Kirton. A genuine gift to the community — attribution appreciated, not required.
+— [www.kirkelabs.com](https://www.kirkelabs.com)

package/bin/cli.js ADDED Viewed

@@ -0,0 +1,172 @@
+#!/usr/bin/env node
+/**
+ * bin/cli.js — command-line entry point.
+ *
+ * Usage:
+ *   npx @kirkelabs/agent-readiness-scan https://example.com
+ *   npx @kirkelabs/agent-readiness-scan https://example.com --out ./report
+ *   npx @kirkelabs/agent-readiness-scan https://example.com --json
+ */
+import { writeFile, mkdir } from 'node:fs/promises';
+import { resolve, join, dirname } from 'node:path';
+import { scan } from '../src/index.js';
+import { renderScorecard } from '../src/scorecard.js';
+const RESET = '\x1b[0m';
+const DIM = '\x1b[2m';
+const BOLD = '\x1b[1m';
+const GREEN = '\x1b[32m';
+const YELLOW = '\x1b[33m';
+const RED = '\x1b[31m';
+const CYAN = '\x1b[36m';
+function parseArgs(argv) {
+  const args = { url: null, out: './agent-readiness-out', json: false };
+  for (let i = 2; i < argv.length; i++) {
+    const a = argv[i];
+    if (a === '--out') args.out = argv[++i];
+    else if (a === '--json') args.json = true;
+    else if (a === '--help' || a === '-h') args.help = true;
+    else if (!a.startsWith('-')) args.url = a;
+  }
+  return args;
+}
+function help() {
+  console.log(`
+${BOLD}agent-readiness-scan${RESET} — is your brand ready for AI agents to act on it?
+${BOLD}Usage${RESET}
+  npx @kirkelabs/agent-readiness-scan <url> [options]
+${BOLD}Options${RESET}
+  --out <dir>      Output directory  (default: ./agent-readiness-out)
+  --json           Print machine-readable JSON to stdout (good for CI)
+  -h, --help       Show this help
+${BOLD}Outputs written to <dir>${RESET}
+  score.json                            Machine-readable result (CI-gateable)
+  report.md                             Human-readable report
+  scorecard.html                        Shareable static scorecard
+  customs-declaration/                  Drop-in policy files
+    robots.txt
+    .well-known/security.txt
+    .well-known/mcp/server-card.json
+    .well-known/acp/manifest.json
+MIT · Kirke Labs · www.kirkelabs.com
+`);
+}
+function color(level) {
+  return level === 'pass'
+    ? GREEN
+    : level === 'warn'
+      ? YELLOW
+      : level === 'fail'
+        ? RED
+        : DIM;
+}
+function bar(pct, width = 22) {
+  const fill = Math.round((pct / 100) * width);
+  return '█'.repeat(fill) + '░'.repeat(width - fill);
+}
+async function writeArtefact(outDir, relPath, content) {
+  const fullPath = join(outDir, relPath);
+  await mkdir(dirname(fullPath), { recursive: true });
+  await writeFile(fullPath, content);
+}
+async function main() {
+  const args = parseArgs(process.argv);
+  if (args.help || !args.url) {
+    help();
+    process.exit(args.url ? 0 : 1);
+  }
+  let url = args.url;
+  if (!/^https?:\/\//i.test(url)) url = 'https://' + url;
+  if (!args.json) {
+    console.log(`\n${CYAN}⟶  Scanning ${BOLD}${url}${RESET}${CYAN} for agent-readiness…${RESET}\n`);
+  }
+  const result = await scan(url);
+  if (args.json) {
+    process.stdout.write(JSON.stringify(result, null, 2) + '\n');
+    return;
+  }
+  if (!result.ok) {
+    console.error(`${RED}✗ Could not scan: ${result.error}${RESET}\n`);
+    process.exit(2);
+  }
+  const gColor =
+    result.grade === 'A' || result.grade === 'B'
+      ? GREEN
+      : result.grade === 'C'
+        ? YELLOW
+        : RED;
+  console.log(
+    `${BOLD}  Agent-Readiness Score: ${gColor}${result.score}/100  (${result.grade})${RESET}\n`,
+  );
+  for (const d of result.dimensions) {
+    const pct = Math.round((d.score / d.max) * 100);
+    const c = pct >= 70 ? GREEN : pct >= 40 ? YELLOW : RED;
+    console.log(
+      `  ${c}${bar(pct)}${RESET}  ${d.title} ${DIM}(${d.score}/${d.max}, weight ${d.weight})${RESET}`,
+    );
+    for (const f of d.findings) {
+      console.log(`     ${color(f.level)}•${RESET} ${f.msg}`);
+    }
+    console.log('');
+  }
+  // Write artefacts.
+  const outDir = resolve(args.out);
+  await mkdir(outDir, { recursive: true });
+  await writeFile(join(outDir, 'score.json'), JSON.stringify(result, null, 2));
+  await writeFile(join(outDir, 'report.md'), toMarkdown(result));
+  await writeFile(join(outDir, 'scorecard.html'), renderScorecard(result));
+  // Customs declaration files.
+  for (const [relPath, content] of Object.entries(result.generated)) {
+    await writeArtefact(join(outDir, 'customs-declaration'), relPath, content);
+  }
+  console.log(`${DIM}  Artefacts written to ${outDir}/${RESET}`);
+  console.log(
+    `${DIM}    score.json · report.md · scorecard.html · customs-declaration/${RESET}\n`,
+  );
+  console.log(
+    `${DIM}  Heuristic indicators, not a guarantee of agent action. See docs/METHODOLOGY.md${RESET}\n`,
+  );
+  process.exit(result.score >= 50 ? 0 : 3);
+}
+function toMarkdown(r) {
+  let md = `# Agent-Readiness Report\n\n`;
+  md += `**URL:** ${r.url}  \n**Score:** ${r.score}/100 (${r.grade})  \n`;
+  md += `**Scanned:** ${r.scannedAt}\n\n`;
+  md += `> Heuristic indicators of how ready this brand is for AI agents to discover, declare access to, and act on. Not a guarantee of agent uptake.\n\n`;
+  for (const d of r.dimensions) {
+    md += `## ${d.title} — ${d.score}/${d.max}\n\n_${d.why}_\n\n`;
+    for (const f of d.findings) md += `- **${f.level.toUpperCase()}** — ${f.msg}\n`;
+    md += `\n`;
+  }
+  md += `---\n\nGenerated by [\`@kirkelabs/agent-readiness-scan\`](https://github.com/KirkeLabs/agent-readiness-scan) — MIT. Built by Soleman El Gelawi (CTO, Kirke Labs), with Steve Kirton (www.kirkelabs.com) as a gift to the Algorand ecosystem.\n`;
+  return md;
+}
+main().catch((e) => {
+  console.error(`${RED}Unexpected error:${RESET}`, e);
+  process.exit(1);
+});

package/package.json ADDED Viewed

@@ -0,0 +1,64 @@
+{
+  "name": "@kirkelabs/agent-readiness-scan",
+  "version": "0.1.0",
+  "description": "Audit a website's customs-house posture for AI agents. Scores 8 dimensions — crawler policy, bot auth, MCP/ACP exposure, agent-actionable Product/Offer, brand identity corroboration, regulatory transparency — and generates a drop-in customs declaration (robots.txt + .well-known/ manifests). A gift to the Algorand ecosystem from Kirke Labs.",
+  "type": "module",
+  "bin": {
+    "agent-readiness-scan": "bin/cli.js"
+  },
+  "exports": {
+    ".": "./src/index.js"
+  },
+  "files": [
+    "bin/",
+    "src/",
+    "LICENSE",
+    "README.md",
+    "AUTHORS",
+    "CITATION.cff"
+  ],
+  "scripts": {
+    "scan": "node bin/cli.js",
+    "test": "node --test \"test/*.test.js\"",
+    "lint": "eslint . --ext .js",
+    "format": "prettier --write \"**/*.{js,json,md}\""
+  },
+  "keywords": [
+    "ai-agents",
+    "mcp",
+    "acp",
+    "agentic-commerce",
+    "crawler-policy",
+    "web-bot-auth",
+    "content-signals",
+    "universal-cart",
+    "schema-org",
+    "structured-data",
+    "algorand",
+    "cli",
+    "nodejs"
+  ],
+  "author": "Soleman El Gelawi <soleman@kirkelabs.com> (https://www.kirkelabs.com)",
+  "contributors": [
+    "Steve Kirton <steve@kirkelabs.com> (https://www.kirkelabs.com)"
+  ],
+  "license": "MIT",
+  "homepage": "https://kirkelabs.github.io/agent-readiness-scan/",
+  "repository": {
+    "type": "git",
+    "url": "git+https://github.com/KirkeLabs/agent-readiness-scan.git"
+  },
+  "bugs": {
+    "url": "https://github.com/KirkeLabs/agent-readiness-scan/issues"
+  },
+  "engines": {
+    "node": ">=20"
+  },
+  "dependencies": {
+    "cheerio": "^1.0.0"
+  },
+  "devDependencies": {
+    "eslint": "^9.0.0",
+    "prettier": "^3.0.0"
+  }
+}

package/src/checks/01-per-bot-policy.js ADDED Viewed

@@ -0,0 +1,124 @@
+/**
+ * check 01 — Per-bot crawler policy
+ *
+ * The "customs house" foundation. Does robots.txt name the major AI bots
+ * individually with explicit allow/disallow rules, or is the site silent
+ * (default-permissive) about them? The article's central argument is that
+ * the new web bargain is declared-role-for-access — and the first
+ * declaration is your robots.txt directive per bot UA.
+ *
+ * Categories:
+ *   training        — GPTBot, ClaudeBot, Google-Extended, anthropic-ai,
+ *                     CCBot, Bytespider, Amazonbot, Applebot-Extended,
+ *                     meta-externalagent
+ *   grounding       — OAI-SearchBot, PerplexityBot, Claude-Web
+ *   user-directed   — ChatGPT-User, Claude-User
+ */
+export const meta = {
+  id: 'per-bot-policy',
+  title: 'Per-bot crawler policy',
+  weight: 10,
+  why: 'robots.txt is the customs declaration. A site that names individual AI bots with explicit rules is bargaining; one that is silent is default-permissive on every front.',
+};
+const TRAINING_BOTS = [
+  'GPTBot',
+  'ClaudeBot',
+  'Google-Extended',
+  'anthropic-ai',
+  'CCBot',
+  'Bytespider',
+  'Amazonbot',
+  'Applebot-Extended',
+  'meta-externalagent',
+];
+const GROUNDING_BOTS = ['OAI-SearchBot', 'PerplexityBot', 'Claude-Web'];
+const USER_DIRECTED_BOTS = ['ChatGPT-User', 'Claude-User'];
+export function run({ robotsTxt }) {
+  const findings = [];
+  if (!robotsTxt) {
+    findings.push({
+      level: 'fail',
+      msg: 'No robots.txt found. Every AI bot is implicitly allowed — the site has not declared any access posture.',
+    });
+    return { score: 0, max: 10, findings, detail: { named: [] } };
+  }
+  // Find every User-agent block name (case-insensitive matching against bot list).
+  const named = new Set();
+  const lines = robotsTxt.split(/\r?\n/);
+  for (const line of lines) {
+    const m = /^\s*user-agent\s*:\s*(.+?)\s*$/i.exec(line);
+    if (!m) continue;
+    const ua = m[1].trim();
+    if (ua === '*') continue;
+    for (const bot of [...TRAINING_BOTS, ...GROUNDING_BOTS, ...USER_DIRECTED_BOTS]) {
+      if (ua.toLowerCase() === bot.toLowerCase()) named.add(bot);
+    }
+  }
+  const namedTraining = TRAINING_BOTS.filter((b) => named.has(b));
+  const namedGrounding = GROUNDING_BOTS.filter((b) => named.has(b));
+  const namedUserDirected = USER_DIRECTED_BOTS.filter((b) => named.has(b));
+  const totalNamed = named.size;
+  // Score by breadth + category coverage.
+  let score;
+  if (totalNamed === 0) {
+    score = 3;
+    findings.push({
+      level: 'warn',
+      msg: 'robots.txt exists but names no AI bots individually. To an AI customs officer this looks like a default-permissive port.',
+    });
+  } else if (totalNamed <= 3) {
+    score = 6;
+    findings.push({
+      level: 'warn',
+      msg: `${totalNamed} AI bot(s) named explicitly (${[...named].join(', ')}). A start, but coverage of the major training + grounding + user-directed crawlers is thin.`,
+    });
+  } else if (totalNamed <= 7) {
+    score = 8;
+    findings.push({
+      level: 'pass',
+      msg: `${totalNamed} AI bots named explicitly. Good coverage breadth.`,
+    });
+  } else {
+    score = 10;
+    findings.push({
+      level: 'pass',
+      msg: `${totalNamed} AI bots named explicitly — comprehensive customs declaration.`,
+    });
+  }
+  // Bonus / penalty for category coverage.
+  const categories = [
+    namedTraining.length > 0,
+    namedGrounding.length > 0,
+    namedUserDirected.length > 0,
+  ].filter(Boolean).length;
+  if (totalNamed > 0 && categories < 2) {
+    findings.push({
+      level: 'warn',
+      msg: 'Rules cover only one of the three AI-bot categories (training / grounding / user-directed). Distinguish between them — a bot training a foundation model is not the same kind of visitor as one fetching on a user\'s behalf.',
+    });
+    score = Math.max(score - 1, 3);
+  } else if (categories === 3) {
+    findings.push({
+      level: 'pass',
+      msg: 'Rules cover training, grounding, and user-directed bots — the three customs declarations a 2026 AI port expects.',
+    });
+  }
+  return {
+    score,
+    max: 10,
+    findings,
+    detail: {
+      named: [...named],
+      categoryCoverage: { training: namedTraining, grounding: namedGrounding, userDirected: namedUserDirected },
+    },
+  };
+}

package/src/checks/02-declared-use-signals.js ADDED Viewed

@@ -0,0 +1,93 @@
+/**
+ * check 02 — Declared use-policy signals
+ *
+ * Beyond robots.txt allow/disallow, has the site declared *why* — i.e.
+ * which uses of the content are permitted? Cloudflare Content Signals
+ * separates search / ai-input / ai-train. The noai and noimageai meta
+ * tags / X-Robots-Tag headers signal the same intent at the page level.
+ *
+ * Default-permissive silence is the failure mode this check catches.
+ */
+export const meta = {
+  id: 'declared-use-signals',
+  title: 'Declared use-policy signals',
+  weight: 7,
+  why: 'Allow-or-disallow alone is binary. Real bargaining means saying which uses (search, ai-input, ai-train) you permit. Cloudflare Content Signals + noai meta tags + X-Robots-Tag turn silence into a declaration.',
+};
+export function run({ $, robotsTxt, headers }) {
+  const findings = [];
+  let score = 0;
+  const detail = {
+    contentSignal: { found: false, parts: [] },
+    noaiMeta: false,
+    noimageaiMeta: false,
+    xRobotsTagNoai: false,
+  };
+  // 1. Cloudflare Content Signals in robots.txt
+  if (robotsTxt) {
+    const csMatch = /^\s*content-signal\s*:\s*(.+)$/im.exec(robotsTxt);
+    if (csMatch) {
+      detail.contentSignal.found = true;
+      const parts = csMatch[1].split(',').map((p) => p.trim()).filter(Boolean);
+      detail.contentSignal.parts = parts;
+      score += 4;
+      findings.push({
+        level: 'pass',
+        msg: `Cloudflare Content Signals present: \`${csMatch[1].trim()}\`. Permitted-use declaration is on the record.`,
+      });
+      const declares = ['search', 'ai-input', 'ai-train'].filter((k) =>
+        parts.some((p) => p.startsWith(k + '=')),
+      );
+      if (declares.length === 3) {
+        score += 1;
+        findings.push({
+          level: 'pass',
+          msg: 'All three declarations present (search, ai-input, ai-train) — full triage.',
+        });
+      } else if (declares.length > 0) {
+        findings.push({
+          level: 'warn',
+          msg: `Content-Signal declares ${declares.length} of 3 signals (${declares.join(', ')}). The unstated signals default to permissive.`,
+        });
+      }
+    }
+  }
+  // 2. noai / noimageai meta tags
+  const robotsMeta = ($('meta[name="robots"]').attr('content') || '').toLowerCase();
+  if (/\bnoai\b/.test(robotsMeta)) {
+    detail.noaiMeta = true;
+    score += 1;
+    findings.push({ level: 'pass', msg: '`<meta name="robots" content="…noai…">` present — page declares no-AI-training.' });
+  }
+  if (/\bnoimageai\b/.test(robotsMeta)) {
+    detail.noimageaiMeta = true;
+    score += 1;
+    findings.push({ level: 'pass', msg: '`<meta name="robots" content="…noimageai…">` present.' });
+  }
+  // 3. X-Robots-Tag HTTP header
+  const xrt = (headers?.['x-robots-tag'] || '').toLowerCase();
+  if (/\bnoai\b/.test(xrt) || /\bnoimageai\b/.test(xrt)) {
+    detail.xRobotsTagNoai = true;
+    score += 1;
+    findings.push({
+      level: 'pass',
+      msg: `\`X-Robots-Tag\` header carries noai/noimageai directive(s) (\`${xrt}\`).`,
+    });
+  }
+  score = Math.min(score, 7);
+  if (score === 0) {
+    findings.push({
+      level: 'fail',
+      msg: 'No declared use-policy signals found. Add Cloudflare Content Signals to robots.txt, or `<meta name="robots" content="noai, noimageai">`, or an `X-Robots-Tag: noai` header — choose your declaration.',
+    });
+  }
+  return { score, max: 7, findings, detail };
+}