npm - @vercel/agent-readability - Versions diffs - 0.2.1 → 0.3.1 - Mend

@vercel/agent-readability 0.2.1 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/dist/index.cjs CHANGED Viewed

@@ -146,7 +146,7 @@ function generateNotFoundMarkdown(path, options) {
   const sitemap = options?.sitemapUrl ?? "/sitemap.md";
   const index = options?.indexUrl ?? "/llms.txt";
   const fullContent = options?.fullContentUrl;
-  const example = options?.exampleUrl ?? "/docs/functions.md";
+  const example = options?.exampleUrl ?? "/docs/functions";
   const base = options?.baseUrl ?? "";
   const fullContentLine = fullContent ? `
 3. **View the full content**: [${fullContent}](${base}${fullContent}) - Full content export` : "";
@@ -161,8 +161,8 @@ The URL \`${path}\` does not exist.
 ## Tips for requesting documentation
-- For markdown responses, append \`.md\` to URLs (e.g., \`${example}\`)
-- Use \`Accept: text/markdown\` header for content negotiation
+- Request the canonical URL (e.g., \`${example}\`) with \`Accept: text/markdown\`
+- Prefer canonical page URLs in links and citations instead of \`.md\` variants
 `;
 }
 // Annotate the CommonJS export names for ESM import in node:

package/dist/index.cjs.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"sources":["../src/index.ts","../src/patterns.ts","../src/detection.ts","../src/negotiation.ts","../src/not-found.ts"],"sourcesContent":["export { isAIAgent } from \"./detection\";\nexport { acceptsMarkdown, shouldServeMarkdown } from \"./negotiation\";\nexport { generateNotFoundMarkdown } from \"./not-found\";\nexport {\n\tAI_AGENT_UA_PATTERNS,\n\tBOT_LIKE_REGEX,\n\tSIGNATURE_AGENT_DOMAINS,\n\tTRADITIONAL_BOT_PATTERNS,\n} from \"./patterns\";\nexport type {\n\tDetectionMethod,\n\tDetectionResult,\n\tMinimalRequest,\n} from \"./types\";\nexport type { NotFoundOptions } from \"./not-found\";\nexport type { AcceptMarkdownOptions, ShouldServeMarkdownResult } from \"./negotiation\";\n","/*\n Layer 1: Known AI agent UA substrings (lowercase).\n * Curated from https://bots.fyi/?tags=ai_assistant + official vendor docs.\n * Last reviewed: 2026-03-20\n /\nexport const AI_AGENT_UA_PATTERNS: readonly string[] = [\n\t// Anthropic — https://support.claude.com/en/articles/8896518\n\t\"claudebot\",\n\t\"claude-searchbot\",\n\t\"claude-user\",\n\t\"anthropic-ai\",\n\t\"claude-web\",\n\n\t// OpenAI — https://platform.openai.com/docs/bots\n\t\"chatgpt\",\n\t\"gptbot\",\n\t\"oai-searchbot\",\n\t\"openai\",\n\n\t// Google AI\n\t\"gemini\",\n\t\"bard\",\n\t\"google-cloudvertexbot\",\n\t\"google-extended\",\n\n\t// Meta\n\t\"meta-externalagent\",\n\t\"meta-externalfetcher\",\n\t\"meta-webindexer\",\n\n\t// Search/Research AI\n\t\"perplexity\",\n\t\"youbot\",\n\t\"you.com\",\n\t\"deepseekbot\",\n\n\t// Coding assistants\n\t\"cursor\",\n\t\"github-copilot\",\n\t\"codeium\",\n\t\"tabnine\",\n\t\"sourcegraph\",\n\n\t// Other AI agents / data scrapers\n\t\"cohere-ai\",\n\t\"bytespider\",\n\t\"amazonbot\",\n\t\"ai2bot\",\n\t\"diffbot\",\n\t\"omgili\",\n\t\"omgilibot\",\n];\n\n/\n Layer 2: Known AI service URLs in Signature-Agent header (RFC 9421).\n /\nexport const SIGNATURE_AGENT_DOMAINS: readonly string[] = [\"chatgpt.com\"];\n\n/\n Layer 3: Traditional bot exclusion list. Bots that should NOT trigger the\n * heuristic layer (search engine crawlers, social previews, monitoring tools).\n /\nexport const TRADITIONAL_BOT_PATTERNS: readonly string[] = [\n\t\"googlebot\",\n\t\"bingbot\",\n\t\"yandexbot\",\n\t\"baiduspider\",\n\t\"duckduckbot\",\n\t\"slurp\",\n\t\"msnbot\",\n\t\"facebot\",\n\t\"twitterbot\",\n\t\"linkedinbot\",\n\t\"whatsapp\",\n\t\"telegrambot\",\n\t\"pingdom\",\n\t\"uptimerobot\",\n\t\"newrelic\",\n\t\"datadog\",\n\t\"statuspage\",\n\t\"site24x7\",\n\t\"applebot\",\n];\n\n/\n Broad regex for bot-like UA strings (used only in Layer 3 heuristic).\n * No word boundaries — keywords commonly appear in compound names.\n /\nexport const BOT_LIKE_REGEX: RegExp = /bot\|agent\|fetch\|crawl\|spider\|search/i;\n","import {\n\tAI_AGENT_UA_PATTERNS,\n\tBOT_LIKE_REGEX,\n\tSIGNATURE_AGENT_DOMAINS,\n\tTRADITIONAL_BOT_PATTERNS,\n} from \"./patterns\";\nimport type { DetectionResult, MinimalRequest } from \"./types\";\n\n/\n Detects AI agents from HTTP request headers.\n \n Three detection layers (checked in order):\n * 1. Known UA patterns (definitive)\n * 2. Signature-Agent header (definitive, RFC 9421)\n * 3. Missing sec-fetch-mode heuristic (catches unknown bots)\n \n Optimizes for recall over precision: serving markdown to a non-AI bot\n * is low-harm; missing an AI agent means a worse experience.\n /\nexport function isAIAgent(request: MinimalRequest): DetectionResult {\n\tconst userAgent = request.headers.get(\"user-agent\");\n\tconst lowerUA = userAgent?.toLowerCase() ?? \"\";\n\n\t// Layer 1: Known UA pattern match\n\tif (lowerUA && AI_AGENT_UA_PATTERNS.some((pattern) => lowerUA.includes(pattern))) {\n\t\treturn { detected: true, method: \"ua-match\" };\n\t}\n\n\t// Layer 2: Signature-Agent header (RFC 9421, used by ChatGPT agent)\n\tconst signatureAgent = request.headers.get(\"signature-agent\");\n\tif (signatureAgent) {\n\t\tconst lowerSig = signatureAgent.toLowerCase();\n\t\tif (SIGNATURE_AGENT_DOMAINS.some((domain) => lowerSig.includes(domain))) {\n\t\t\treturn { detected: true, method: \"signature-agent\" };\n\t\t}\n\t}\n\n\t// Layer 3: Missing browser fingerprint heuristic\n\t// Real browsers (Chrome 76+, Firefox 90+, Safari 16.4+) send sec-fetch-mode\n\t// on navigation requests. Its absence signals a programmatic client.\n\tconst secFetchMode = request.headers.get(\"sec-fetch-mode\");\n\tif (!secFetchMode && lowerUA && BOT_LIKE_REGEX.test(lowerUA)) {\n\t\tconst isTraditionalBot = TRADITIONAL_BOT_PATTERNS.some((pattern) => lowerUA.includes(pattern));\n\t\tif (!isTraditionalBot) {\n\t\t\treturn { detected: true, method: \"heuristic\" };\n\t\t}\n\t}\n\n\treturn { detected: false, method: null };\n}\n","import { isAIAgent } from \"./detection\";\nimport type { DetectionResult, MinimalRequest } from \"./types\";\n\nconst DEFAULT_MARKDOWN_TYPES = [\"text/markdown\", \"text/x-markdown\"];\n\nexport interface AcceptMarkdownOptions {\n\tmediaTypes?: string[];\n}\n\n/\n Check if the request prefers markdown via the Accept header.\n /\nexport function acceptsMarkdown(request: MinimalRequest, options?: AcceptMarkdownOptions): boolean {\n\tconst accept = request.headers.get(\"accept\");\n\tif (!accept) return false;\n\n\tconst types = options?.mediaTypes ?? DEFAULT_MARKDOWN_TYPES;\n\tconst lowerAccept = accept.toLowerCase();\n\treturn types.some((type) => lowerAccept.includes(type));\n}\n\nexport interface ShouldServeMarkdownResult {\n\tserve: boolean;\n\treason: \"agent\" \| \"accept-header\" \| null;\n\tdetection: DetectionResult;\n}\n\n/\n Combines agent detection and content negotiation into one call.\n * Returns whether to serve markdown and why.\n /\nexport function shouldServeMarkdown(\n\trequest: MinimalRequest,\n\toptions?: AcceptMarkdownOptions,\n): ShouldServeMarkdownResult {\n\tconst detection = isAIAgent(request);\n\tif (detection.detected) {\n\t\treturn { serve: true, reason: \"agent\", detection };\n\t}\n\n\tif (acceptsMarkdown(request, options)) {\n\t\treturn { serve: true, reason: \"accept-header\", detection };\n\t}\n\n\treturn { serve: false, reason: null, detection };\n}\n","export interface NotFoundOptions {\n\tsitemapUrl?: string;\n\tindexUrl?: string;\n\t/* URL for full content export (e.g. /llms-full.txt). Adds a 3rd discovery link when set. /\n\tfullContentUrl?: string;\n\t/* Example URL shown in the tips section. Default: `/docs/functions~~.md~~` /\n\texampleUrl?: string;\n\tbaseUrl?: string;\n}\n\n/\n Generates a markdown body for missing pages with links to discovery endpoints.\n * Return with a 200 status (agents discard 404 response bodies).\n /\nexport function generateNotFoundMarkdown(path: string, options?: NotFoundOptions): string {\n\tconst sitemap = options?.sitemapUrl ?? \"/sitemap.md\";\n\tconst index = options?.indexUrl ?? \"/llms.txt\";\n\tconst fullContent = options?.fullContentUrl;\n\tconst example = options?.exampleUrl ?? \"/docs/functions~~.md~~\";\n\tconst base = options?.baseUrl ?? \"\";\n\n\tconst fullContentLine = fullContent\n\t\t? `\\n3. View the full content: [${fullContent}](${base}${fullContent}) - Full content export`\n\t\t: \"\";\n\n\treturn `# Page Not Found\n\nThe URL \\`${path}\\` does not exist.\n\n## How to find the correct page\n\n1. Browse the sitemap: [${sitemap}](${base}${sitemap}) - A structured index of all pages\n2. Browse the full index*: [${index}](${base}${index}) - Complete documentation index${fullContentLine}\n\n## Tips for requesting documentation\n\n- ~~For~~ ~~markdown~~ ~~responses,~~ ~~append~~ ~~\\`.md\\` to URLs~~ (e.g., \\`${example}\\`)~~\n-~~ ~~Use~~ \\`Accept: text/markdown~~\\`~~ ~~header~~ ~~for~~ ~~content~~ ~~negotiation~~\n`;\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACKO,IAAM,uBAA0C;AAAA;AAAA,EAEtD;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACD;AAKO,IAAM,0BAA6C,CAAC,aAAa;AAMjE,IAAM,2BAA8C;AAAA,EAC1D;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACD;AAMO,IAAM,iBAAyB;;;ACrE/B,SAAS,UAAU,SAA0C;AACnE,QAAM,YAAY,QAAQ,QAAQ,IAAI,YAAY;AAClD,QAAM,UAAU,WAAW,YAAY,KAAK;AAG5C,MAAI,WAAW,qBAAqB,KAAK,CAAC,YAAY,QAAQ,SAAS,OAAO,CAAC,GAAG;AACjF,WAAO,EAAE,UAAU,MAAM,QAAQ,WAAW;AAAA,EAC7C;AAGA,QAAM,iBAAiB,QAAQ,QAAQ,IAAI,iBAAiB;AAC5D,MAAI,gBAAgB;AACnB,UAAM,WAAW,eAAe,YAAY;AAC5C,QAAI,wBAAwB,KAAK,CAAC,WAAW,SAAS,SAAS,MAAM,CAAC,GAAG;AACxE,aAAO,EAAE,UAAU,MAAM,QAAQ,kBAAkB;AAAA,IACpD;AAAA,EACD;AAKA,QAAM,eAAe,QAAQ,QAAQ,IAAI,gBAAgB;AACzD,MAAI,CAAC,gBAAgB,WAAW,eAAe,KAAK,OAAO,GAAG;AAC7D,UAAM,mBAAmB,yBAAyB,KAAK,CAAC,YAAY,QAAQ,SAAS,OAAO,CAAC;AAC7F,QAAI,CAAC,kBAAkB;AACtB,aAAO,EAAE,UAAU,MAAM,QAAQ,YAAY;AAAA,IAC9C;AAAA,EACD;AAEA,SAAO,EAAE,UAAU,OAAO,QAAQ,KAAK;AACxC;;;AC9CA,IAAM,yBAAyB,CAAC,iBAAiB,iBAAiB;AAS3D,SAAS,gBAAgB,SAAyB,SAA0C;AAClG,QAAM,SAAS,QAAQ,QAAQ,IAAI,QAAQ;AAC3C,MAAI,CAAC,OAAQ,QAAO;AAEpB,QAAM,QAAQ,SAAS,cAAc;AACrC,QAAM,cAAc,OAAO,YAAY;AACvC,SAAO,MAAM,KAAK,CAAC,SAAS,YAAY,SAAS,IAAI,CAAC;AACvD;AAYO,SAAS,oBACf,SACA,SAC4B;AAC5B,QAAM,YAAY,UAAU,OAAO;AACnC,MAAI,UAAU,UAAU;AACvB,WAAO,EAAE,OAAO,MAAM,QAAQ,SAAS,UAAU;AAAA,EAClD;AAEA,MAAI,gBAAgB,SAAS,OAAO,GAAG;AACtC,WAAO,EAAE,OAAO,MAAM,QAAQ,iBAAiB,UAAU;AAAA,EAC1D;AAEA,SAAO,EAAE,OAAO,OAAO,QAAQ,MAAM,UAAU;AAChD;;;AC/BO,SAAS,yBAAyB,MAAc,SAAmC;AACzF,QAAM,UAAU,SAAS,cAAc;AACvC,QAAM,QAAQ,SAAS,YAAY;AACnC,QAAM,cAAc,SAAS;AAC7B,QAAM,UAAU,SAAS,cAAc;AACvC,QAAM,OAAO,SAAS,WAAW;AAEjC,QAAM,kBAAkB,cACrB;AAAA,iCAAoC,WAAW,KAAK,IAAI,GAAG,WAAW,4BACtE;AAEH,SAAO;AAAA;AAAA,YAEI,IAAI;AAAA;AAAA;AAAA;AAAA,8BAIc,OAAO,KAAK,IAAI,GAAG,OAAO;AAAA,iCACvB,KAAK,KAAK,IAAI,GAAG,KAAK,mCAAmC,eAAe;AAAA;AAAA;AAAA;AAAA,~~4DAI7C~~,OAAO;AAAA;AAAA;~~AAGnE~~;","names":[]}
1	+ {"version":3,"sources":["../src/index.ts","../src/patterns.ts","../src/detection.ts","../src/negotiation.ts","../src/not-found.ts"],"sourcesContent":["export { isAIAgent } from \"./detection\";\nexport { acceptsMarkdown, shouldServeMarkdown } from \"./negotiation\";\nexport { generateNotFoundMarkdown } from \"./not-found\";\nexport {\n\tAI_AGENT_UA_PATTERNS,\n\tBOT_LIKE_REGEX,\n\tSIGNATURE_AGENT_DOMAINS,\n\tTRADITIONAL_BOT_PATTERNS,\n} from \"./patterns\";\nexport type {\n\tDetectionMethod,\n\tDetectionResult,\n\tMinimalRequest,\n} from \"./types\";\nexport type { NotFoundOptions } from \"./not-found\";\nexport type { AcceptMarkdownOptions, ShouldServeMarkdownResult } from \"./negotiation\";\n","/*\n Layer 1: Known AI agent UA substrings (lowercase).\n * Curated from https://bots.fyi/?tags=ai_assistant + official vendor docs.\n * Last reviewed: 2026-03-20\n /\nexport const AI_AGENT_UA_PATTERNS: readonly string[] = [\n\t// Anthropic — https://support.claude.com/en/articles/8896518\n\t\"claudebot\",\n\t\"claude-searchbot\",\n\t\"claude-user\",\n\t\"anthropic-ai\",\n\t\"claude-web\",\n\n\t// OpenAI — https://platform.openai.com/docs/bots\n\t\"chatgpt\",\n\t\"gptbot\",\n\t\"oai-searchbot\",\n\t\"openai\",\n\n\t// Google AI\n\t\"gemini\",\n\t\"bard\",\n\t\"google-cloudvertexbot\",\n\t\"google-extended\",\n\n\t// Meta\n\t\"meta-externalagent\",\n\t\"meta-externalfetcher\",\n\t\"meta-webindexer\",\n\n\t// Search/Research AI\n\t\"perplexity\",\n\t\"youbot\",\n\t\"you.com\",\n\t\"deepseekbot\",\n\n\t// Coding assistants\n\t\"cursor\",\n\t\"github-copilot\",\n\t\"codeium\",\n\t\"tabnine\",\n\t\"sourcegraph\",\n\n\t// Other AI agents / data scrapers\n\t\"cohere-ai\",\n\t\"bytespider\",\n\t\"amazonbot\",\n\t\"ai2bot\",\n\t\"diffbot\",\n\t\"omgili\",\n\t\"omgilibot\",\n];\n\n/\n Layer 2: Known AI service URLs in Signature-Agent header (RFC 9421).\n /\nexport const SIGNATURE_AGENT_DOMAINS: readonly string[] = [\"chatgpt.com\"];\n\n/\n Layer 3: Traditional bot exclusion list. Bots that should NOT trigger the\n * heuristic layer (search engine crawlers, social previews, monitoring tools).\n /\nexport const TRADITIONAL_BOT_PATTERNS: readonly string[] = [\n\t\"googlebot\",\n\t\"bingbot\",\n\t\"yandexbot\",\n\t\"baiduspider\",\n\t\"duckduckbot\",\n\t\"slurp\",\n\t\"msnbot\",\n\t\"facebot\",\n\t\"twitterbot\",\n\t\"linkedinbot\",\n\t\"whatsapp\",\n\t\"telegrambot\",\n\t\"pingdom\",\n\t\"uptimerobot\",\n\t\"newrelic\",\n\t\"datadog\",\n\t\"statuspage\",\n\t\"site24x7\",\n\t\"applebot\",\n];\n\n/\n Broad regex for bot-like UA strings (used only in Layer 3 heuristic).\n * No word boundaries — keywords commonly appear in compound names.\n /\nexport const BOT_LIKE_REGEX: RegExp = /bot\|agent\|fetch\|crawl\|spider\|search/i;\n","import {\n\tAI_AGENT_UA_PATTERNS,\n\tBOT_LIKE_REGEX,\n\tSIGNATURE_AGENT_DOMAINS,\n\tTRADITIONAL_BOT_PATTERNS,\n} from \"./patterns\";\nimport type { DetectionResult, MinimalRequest } from \"./types\";\n\n/\n Detects AI agents from HTTP request headers.\n \n Three detection layers (checked in order):\n * 1. Known UA patterns (definitive)\n * 2. Signature-Agent header (definitive, RFC 9421)\n * 3. Missing sec-fetch-mode heuristic (catches unknown bots)\n \n Optimizes for recall over precision: serving markdown to a non-AI bot\n * is low-harm; missing an AI agent means a worse experience.\n /\nexport function isAIAgent(request: MinimalRequest): DetectionResult {\n\tconst userAgent = request.headers.get(\"user-agent\");\n\tconst lowerUA = userAgent?.toLowerCase() ?? \"\";\n\n\t// Layer 1: Known UA pattern match\n\tif (lowerUA && AI_AGENT_UA_PATTERNS.some((pattern) => lowerUA.includes(pattern))) {\n\t\treturn { detected: true, method: \"ua-match\" };\n\t}\n\n\t// Layer 2: Signature-Agent header (RFC 9421, used by ChatGPT agent)\n\tconst signatureAgent = request.headers.get(\"signature-agent\");\n\tif (signatureAgent) {\n\t\tconst lowerSig = signatureAgent.toLowerCase();\n\t\tif (SIGNATURE_AGENT_DOMAINS.some((domain) => lowerSig.includes(domain))) {\n\t\t\treturn { detected: true, method: \"signature-agent\" };\n\t\t}\n\t}\n\n\t// Layer 3: Missing browser fingerprint heuristic\n\t// Real browsers (Chrome 76+, Firefox 90+, Safari 16.4+) send sec-fetch-mode\n\t// on navigation requests. Its absence signals a programmatic client.\n\tconst secFetchMode = request.headers.get(\"sec-fetch-mode\");\n\tif (!secFetchMode && lowerUA && BOT_LIKE_REGEX.test(lowerUA)) {\n\t\tconst isTraditionalBot = TRADITIONAL_BOT_PATTERNS.some((pattern) => lowerUA.includes(pattern));\n\t\tif (!isTraditionalBot) {\n\t\t\treturn { detected: true, method: \"heuristic\" };\n\t\t}\n\t}\n\n\treturn { detected: false, method: null };\n}\n","import { isAIAgent } from \"./detection\";\nimport type { DetectionResult, MinimalRequest } from \"./types\";\n\nconst DEFAULT_MARKDOWN_TYPES = [\"text/markdown\", \"text/x-markdown\"];\n\nexport interface AcceptMarkdownOptions {\n\tmediaTypes?: string[];\n}\n\n/\n Check if the request prefers markdown via the Accept header.\n /\nexport function acceptsMarkdown(request: MinimalRequest, options?: AcceptMarkdownOptions): boolean {\n\tconst accept = request.headers.get(\"accept\");\n\tif (!accept) return false;\n\n\tconst types = options?.mediaTypes ?? DEFAULT_MARKDOWN_TYPES;\n\tconst lowerAccept = accept.toLowerCase();\n\treturn types.some((type) => lowerAccept.includes(type));\n}\n\nexport interface ShouldServeMarkdownResult {\n\tserve: boolean;\n\treason: \"agent\" \| \"accept-header\" \| null;\n\tdetection: DetectionResult;\n}\n\n/\n Combines agent detection and content negotiation into one call.\n * Returns whether to serve markdown and why.\n /\nexport function shouldServeMarkdown(\n\trequest: MinimalRequest,\n\toptions?: AcceptMarkdownOptions,\n): ShouldServeMarkdownResult {\n\tconst detection = isAIAgent(request);\n\tif (detection.detected) {\n\t\treturn { serve: true, reason: \"agent\", detection };\n\t}\n\n\tif (acceptsMarkdown(request, options)) {\n\t\treturn { serve: true, reason: \"accept-header\", detection };\n\t}\n\n\treturn { serve: false, reason: null, detection };\n}\n","export interface NotFoundOptions {\n\tsitemapUrl?: string;\n\tindexUrl?: string;\n\t/* URL for full content export (e.g. /llms-full.txt). Adds a 3rd discovery link when set. /\n\tfullContentUrl?: string;\n\t/* Example canonical URL shown in the tips section. Default: `/docs/functions` /\n\texampleUrl?: string;\n\tbaseUrl?: string;\n}\n\n/\n Generates a markdown body for missing pages with links to discovery endpoints.\n * Return with a 200 status (agents discard 404 response bodies).\n /\nexport function generateNotFoundMarkdown(path: string, options?: NotFoundOptions): string {\n\tconst sitemap = options?.sitemapUrl ?? \"/sitemap.md\";\n\tconst index = options?.indexUrl ?? \"/llms.txt\";\n\tconst fullContent = options?.fullContentUrl;\n\tconst example = options?.exampleUrl ?? \"/docs/functions\";\n\tconst base = options?.baseUrl ?? \"\";\n\n\tconst fullContentLine = fullContent\n\t\t? `\\n3. View the full content: [${fullContent}](${base}${fullContent}) - Full content export`\n\t\t: \"\";\n\n\treturn `# Page Not Found\n\nThe URL \\`${path}\\` does not exist.\n\n## How to find the correct page\n\n1. Browse the sitemap: [${sitemap}](${base}${sitemap}) - A structured index of all pages\n2. Browse the full index*: [${index}](${base}${index}) - Complete documentation index${fullContentLine}\n\n## Tips for requesting documentation\n\n- Request the canonical URL (e.g., \\`${example}\\`) with \\`Accept: text/markdown\\`\n- Prefer canonical page URLs in links and citations instead of \\`.md\\` variants\n`;\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACKO,IAAM,uBAA0C;AAAA;AAAA,EAEtD;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACD;AAKO,IAAM,0BAA6C,CAAC,aAAa;AAMjE,IAAM,2BAA8C;AAAA,EAC1D;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACD;AAMO,IAAM,iBAAyB;;;ACrE/B,SAAS,UAAU,SAA0C;AACnE,QAAM,YAAY,QAAQ,QAAQ,IAAI,YAAY;AAClD,QAAM,UAAU,WAAW,YAAY,KAAK;AAG5C,MAAI,WAAW,qBAAqB,KAAK,CAAC,YAAY,QAAQ,SAAS,OAAO,CAAC,GAAG;AACjF,WAAO,EAAE,UAAU,MAAM,QAAQ,WAAW;AAAA,EAC7C;AAGA,QAAM,iBAAiB,QAAQ,QAAQ,IAAI,iBAAiB;AAC5D,MAAI,gBAAgB;AACnB,UAAM,WAAW,eAAe,YAAY;AAC5C,QAAI,wBAAwB,KAAK,CAAC,WAAW,SAAS,SAAS,MAAM,CAAC,GAAG;AACxE,aAAO,EAAE,UAAU,MAAM,QAAQ,kBAAkB;AAAA,IACpD;AAAA,EACD;AAKA,QAAM,eAAe,QAAQ,QAAQ,IAAI,gBAAgB;AACzD,MAAI,CAAC,gBAAgB,WAAW,eAAe,KAAK,OAAO,GAAG;AAC7D,UAAM,mBAAmB,yBAAyB,KAAK,CAAC,YAAY,QAAQ,SAAS,OAAO,CAAC;AAC7F,QAAI,CAAC,kBAAkB;AACtB,aAAO,EAAE,UAAU,MAAM,QAAQ,YAAY;AAAA,IAC9C;AAAA,EACD;AAEA,SAAO,EAAE,UAAU,OAAO,QAAQ,KAAK;AACxC;;;AC9CA,IAAM,yBAAyB,CAAC,iBAAiB,iBAAiB;AAS3D,SAAS,gBAAgB,SAAyB,SAA0C;AAClG,QAAM,SAAS,QAAQ,QAAQ,IAAI,QAAQ;AAC3C,MAAI,CAAC,OAAQ,QAAO;AAEpB,QAAM,QAAQ,SAAS,cAAc;AACrC,QAAM,cAAc,OAAO,YAAY;AACvC,SAAO,MAAM,KAAK,CAAC,SAAS,YAAY,SAAS,IAAI,CAAC;AACvD;AAYO,SAAS,oBACf,SACA,SAC4B;AAC5B,QAAM,YAAY,UAAU,OAAO;AACnC,MAAI,UAAU,UAAU;AACvB,WAAO,EAAE,OAAO,MAAM,QAAQ,SAAS,UAAU;AAAA,EAClD;AAEA,MAAI,gBAAgB,SAAS,OAAO,GAAG;AACtC,WAAO,EAAE,OAAO,MAAM,QAAQ,iBAAiB,UAAU;AAAA,EAC1D;AAEA,SAAO,EAAE,OAAO,OAAO,QAAQ,MAAM,UAAU;AAChD;;;AC/BO,SAAS,yBAAyB,MAAc,SAAmC;AACzF,QAAM,UAAU,SAAS,cAAc;AACvC,QAAM,QAAQ,SAAS,YAAY;AACnC,QAAM,cAAc,SAAS;AAC7B,QAAM,UAAU,SAAS,cAAc;AACvC,QAAM,OAAO,SAAS,WAAW;AAEjC,QAAM,kBAAkB,cACrB;AAAA,iCAAoC,WAAW,KAAK,IAAI,GAAG,WAAW,4BACtE;AAEH,SAAO;AAAA;AAAA,YAEI,IAAI;AAAA;AAAA;AAAA;AAAA,8BAIc,OAAO,KAAK,IAAI,GAAG,OAAO;AAAA,iCACvB,KAAK,KAAK,IAAI,GAAG,KAAK,mCAAmC,eAAe;AAAA;AAAA;AAAA;AAAA,uCAIlE,OAAO;AAAA;AAAA;AAG9C;","names":[]}

package/dist/index.d.cts CHANGED Viewed

@@ -52,7 +52,7 @@ interface NotFoundOptions {
     indexUrl?: string;
     /** URL for full content export (e.g. /llms-full.txt). Adds a 3rd discovery link when set. */
     fullContentUrl?: string;
-    /** Example URL shown in the tips section. Default: `/docs/functions.md` */
+    /** Example canonical URL shown in the tips section. Default: `/docs/functions` */
     exampleUrl?: string;
     baseUrl?: string;
 }

package/dist/index.d.ts CHANGED Viewed

@@ -52,7 +52,7 @@ interface NotFoundOptions {
     indexUrl?: string;
     /** URL for full content export (e.g. /llms-full.txt). Adds a 3rd discovery link when set. */
     fullContentUrl?: string;
-    /** Example URL shown in the tips section. Default: `/docs/functions.md` */
+    /** Example canonical URL shown in the tips section. Default: `/docs/functions` */
     exampleUrl?: string;
     baseUrl?: string;
 }

package/dist/index.js CHANGED Viewed

@@ -113,7 +113,7 @@ function generateNotFoundMarkdown(path, options) {
   const sitemap = options?.sitemapUrl ?? "/sitemap.md";
   const index = options?.indexUrl ?? "/llms.txt";
   const fullContent = options?.fullContentUrl;
-  const example = options?.exampleUrl ?? "/docs/functions.md";
+  const example = options?.exampleUrl ?? "/docs/functions";
   const base = options?.baseUrl ?? "";
   const fullContentLine = fullContent ? `
 3. **View the full content**: [${fullContent}](${base}${fullContent}) - Full content export` : "";
@@ -128,8 +128,8 @@ The URL \`${path}\` does not exist.
 ## Tips for requesting documentation
-- For markdown responses, append \`.md\` to URLs (e.g., \`${example}\`)
-- Use \`Accept: text/markdown\` header for content negotiation
+- Request the canonical URL (e.g., \`${example}\`) with \`Accept: text/markdown\`
+- Prefer canonical page URLs in links and citations instead of \`.md\` variants
 `;
 }
 export {

package/dist/index.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"sources":["../src/patterns.ts","../src/detection.ts","../src/negotiation.ts","../src/not-found.ts"],"sourcesContent":["/*\n Layer 1: Known AI agent UA substrings (lowercase).\n * Curated from https://bots.fyi/?tags=ai_assistant + official vendor docs.\n * Last reviewed: 2026-03-20\n /\nexport const AI_AGENT_UA_PATTERNS: readonly string[] = [\n\t// Anthropic — https://support.claude.com/en/articles/8896518\n\t\"claudebot\",\n\t\"claude-searchbot\",\n\t\"claude-user\",\n\t\"anthropic-ai\",\n\t\"claude-web\",\n\n\t// OpenAI — https://platform.openai.com/docs/bots\n\t\"chatgpt\",\n\t\"gptbot\",\n\t\"oai-searchbot\",\n\t\"openai\",\n\n\t// Google AI\n\t\"gemini\",\n\t\"bard\",\n\t\"google-cloudvertexbot\",\n\t\"google-extended\",\n\n\t// Meta\n\t\"meta-externalagent\",\n\t\"meta-externalfetcher\",\n\t\"meta-webindexer\",\n\n\t// Search/Research AI\n\t\"perplexity\",\n\t\"youbot\",\n\t\"you.com\",\n\t\"deepseekbot\",\n\n\t// Coding assistants\n\t\"cursor\",\n\t\"github-copilot\",\n\t\"codeium\",\n\t\"tabnine\",\n\t\"sourcegraph\",\n\n\t// Other AI agents / data scrapers\n\t\"cohere-ai\",\n\t\"bytespider\",\n\t\"amazonbot\",\n\t\"ai2bot\",\n\t\"diffbot\",\n\t\"omgili\",\n\t\"omgilibot\",\n];\n\n/\n Layer 2: Known AI service URLs in Signature-Agent header (RFC 9421).\n /\nexport const SIGNATURE_AGENT_DOMAINS: readonly string[] = [\"chatgpt.com\"];\n\n/\n Layer 3: Traditional bot exclusion list. Bots that should NOT trigger the\n * heuristic layer (search engine crawlers, social previews, monitoring tools).\n /\nexport const TRADITIONAL_BOT_PATTERNS: readonly string[] = [\n\t\"googlebot\",\n\t\"bingbot\",\n\t\"yandexbot\",\n\t\"baiduspider\",\n\t\"duckduckbot\",\n\t\"slurp\",\n\t\"msnbot\",\n\t\"facebot\",\n\t\"twitterbot\",\n\t\"linkedinbot\",\n\t\"whatsapp\",\n\t\"telegrambot\",\n\t\"pingdom\",\n\t\"uptimerobot\",\n\t\"newrelic\",\n\t\"datadog\",\n\t\"statuspage\",\n\t\"site24x7\",\n\t\"applebot\",\n];\n\n/\n Broad regex for bot-like UA strings (used only in Layer 3 heuristic).\n * No word boundaries — keywords commonly appear in compound names.\n /\nexport const BOT_LIKE_REGEX: RegExp = /bot\|agent\|fetch\|crawl\|spider\|search/i;\n","import {\n\tAI_AGENT_UA_PATTERNS,\n\tBOT_LIKE_REGEX,\n\tSIGNATURE_AGENT_DOMAINS,\n\tTRADITIONAL_BOT_PATTERNS,\n} from \"./patterns\";\nimport type { DetectionResult, MinimalRequest } from \"./types\";\n\n/\n Detects AI agents from HTTP request headers.\n \n Three detection layers (checked in order):\n * 1. Known UA patterns (definitive)\n * 2. Signature-Agent header (definitive, RFC 9421)\n * 3. Missing sec-fetch-mode heuristic (catches unknown bots)\n \n Optimizes for recall over precision: serving markdown to a non-AI bot\n * is low-harm; missing an AI agent means a worse experience.\n /\nexport function isAIAgent(request: MinimalRequest): DetectionResult {\n\tconst userAgent = request.headers.get(\"user-agent\");\n\tconst lowerUA = userAgent?.toLowerCase() ?? \"\";\n\n\t// Layer 1: Known UA pattern match\n\tif (lowerUA && AI_AGENT_UA_PATTERNS.some((pattern) => lowerUA.includes(pattern))) {\n\t\treturn { detected: true, method: \"ua-match\" };\n\t}\n\n\t// Layer 2: Signature-Agent header (RFC 9421, used by ChatGPT agent)\n\tconst signatureAgent = request.headers.get(\"signature-agent\");\n\tif (signatureAgent) {\n\t\tconst lowerSig = signatureAgent.toLowerCase();\n\t\tif (SIGNATURE_AGENT_DOMAINS.some((domain) => lowerSig.includes(domain))) {\n\t\t\treturn { detected: true, method: \"signature-agent\" };\n\t\t}\n\t}\n\n\t// Layer 3: Missing browser fingerprint heuristic\n\t// Real browsers (Chrome 76+, Firefox 90+, Safari 16.4+) send sec-fetch-mode\n\t// on navigation requests. Its absence signals a programmatic client.\n\tconst secFetchMode = request.headers.get(\"sec-fetch-mode\");\n\tif (!secFetchMode && lowerUA && BOT_LIKE_REGEX.test(lowerUA)) {\n\t\tconst isTraditionalBot = TRADITIONAL_BOT_PATTERNS.some((pattern) => lowerUA.includes(pattern));\n\t\tif (!isTraditionalBot) {\n\t\t\treturn { detected: true, method: \"heuristic\" };\n\t\t}\n\t}\n\n\treturn { detected: false, method: null };\n}\n","import { isAIAgent } from \"./detection\";\nimport type { DetectionResult, MinimalRequest } from \"./types\";\n\nconst DEFAULT_MARKDOWN_TYPES = [\"text/markdown\", \"text/x-markdown\"];\n\nexport interface AcceptMarkdownOptions {\n\tmediaTypes?: string[];\n}\n\n/\n Check if the request prefers markdown via the Accept header.\n /\nexport function acceptsMarkdown(request: MinimalRequest, options?: AcceptMarkdownOptions): boolean {\n\tconst accept = request.headers.get(\"accept\");\n\tif (!accept) return false;\n\n\tconst types = options?.mediaTypes ?? DEFAULT_MARKDOWN_TYPES;\n\tconst lowerAccept = accept.toLowerCase();\n\treturn types.some((type) => lowerAccept.includes(type));\n}\n\nexport interface ShouldServeMarkdownResult {\n\tserve: boolean;\n\treason: \"agent\" \| \"accept-header\" \| null;\n\tdetection: DetectionResult;\n}\n\n/\n Combines agent detection and content negotiation into one call.\n * Returns whether to serve markdown and why.\n /\nexport function shouldServeMarkdown(\n\trequest: MinimalRequest,\n\toptions?: AcceptMarkdownOptions,\n): ShouldServeMarkdownResult {\n\tconst detection = isAIAgent(request);\n\tif (detection.detected) {\n\t\treturn { serve: true, reason: \"agent\", detection };\n\t}\n\n\tif (acceptsMarkdown(request, options)) {\n\t\treturn { serve: true, reason: \"accept-header\", detection };\n\t}\n\n\treturn { serve: false, reason: null, detection };\n}\n","export interface NotFoundOptions {\n\tsitemapUrl?: string;\n\tindexUrl?: string;\n\t/* URL for full content export (e.g. /llms-full.txt). Adds a 3rd discovery link when set. /\n\tfullContentUrl?: string;\n\t/* Example URL shown in the tips section. Default: `/docs/functions~~.md~~` /\n\texampleUrl?: string;\n\tbaseUrl?: string;\n}\n\n/\n Generates a markdown body for missing pages with links to discovery endpoints.\n * Return with a 200 status (agents discard 404 response bodies).\n /\nexport function generateNotFoundMarkdown(path: string, options?: NotFoundOptions): string {\n\tconst sitemap = options?.sitemapUrl ?? \"/sitemap.md\";\n\tconst index = options?.indexUrl ?? \"/llms.txt\";\n\tconst fullContent = options?.fullContentUrl;\n\tconst example = options?.exampleUrl ?? \"/docs/functions~~.md~~\";\n\tconst base = options?.baseUrl ?? \"\";\n\n\tconst fullContentLine = fullContent\n\t\t? `\\n3. View the full content: [${fullContent}](${base}${fullContent}) - Full content export`\n\t\t: \"\";\n\n\treturn `# Page Not Found\n\nThe URL \\`${path}\\` does not exist.\n\n## How to find the correct page\n\n1. Browse the sitemap: [${sitemap}](${base}${sitemap}) - A structured index of all pages\n2. Browse the full index*: [${index}](${base}${index}) - Complete documentation index${fullContentLine}\n\n## Tips for requesting documentation\n\n- ~~For~~ ~~markdown~~ ~~responses,~~ ~~append~~ ~~\\`.md\\` to URLs~~ (e.g., \\`${example}\\`)~~\n-~~ ~~Use~~ \\`Accept: text/markdown~~\\`~~ ~~header~~ ~~for~~ ~~content~~ ~~negotiation~~\n`;\n}\n"],"mappings":";AAKO,IAAM,uBAA0C;AAAA;AAAA,EAEtD;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACD;AAKO,IAAM,0BAA6C,CAAC,aAAa;AAMjE,IAAM,2BAA8C;AAAA,EAC1D;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACD;AAMO,IAAM,iBAAyB;;;ACrE/B,SAAS,UAAU,SAA0C;AACnE,QAAM,YAAY,QAAQ,QAAQ,IAAI,YAAY;AAClD,QAAM,UAAU,WAAW,YAAY,KAAK;AAG5C,MAAI,WAAW,qBAAqB,KAAK,CAAC,YAAY,QAAQ,SAAS,OAAO,CAAC,GAAG;AACjF,WAAO,EAAE,UAAU,MAAM,QAAQ,WAAW;AAAA,EAC7C;AAGA,QAAM,iBAAiB,QAAQ,QAAQ,IAAI,iBAAiB;AAC5D,MAAI,gBAAgB;AACnB,UAAM,WAAW,eAAe,YAAY;AAC5C,QAAI,wBAAwB,KAAK,CAAC,WAAW,SAAS,SAAS,MAAM,CAAC,GAAG;AACxE,aAAO,EAAE,UAAU,MAAM,QAAQ,kBAAkB;AAAA,IACpD;AAAA,EACD;AAKA,QAAM,eAAe,QAAQ,QAAQ,IAAI,gBAAgB;AACzD,MAAI,CAAC,gBAAgB,WAAW,eAAe,KAAK,OAAO,GAAG;AAC7D,UAAM,mBAAmB,yBAAyB,KAAK,CAAC,YAAY,QAAQ,SAAS,OAAO,CAAC;AAC7F,QAAI,CAAC,kBAAkB;AACtB,aAAO,EAAE,UAAU,MAAM,QAAQ,YAAY;AAAA,IAC9C;AAAA,EACD;AAEA,SAAO,EAAE,UAAU,OAAO,QAAQ,KAAK;AACxC;;;AC9CA,IAAM,yBAAyB,CAAC,iBAAiB,iBAAiB;AAS3D,SAAS,gBAAgB,SAAyB,SAA0C;AAClG,QAAM,SAAS,QAAQ,QAAQ,IAAI,QAAQ;AAC3C,MAAI,CAAC,OAAQ,QAAO;AAEpB,QAAM,QAAQ,SAAS,cAAc;AACrC,QAAM,cAAc,OAAO,YAAY;AACvC,SAAO,MAAM,KAAK,CAAC,SAAS,YAAY,SAAS,IAAI,CAAC;AACvD;AAYO,SAAS,oBACf,SACA,SAC4B;AAC5B,QAAM,YAAY,UAAU,OAAO;AACnC,MAAI,UAAU,UAAU;AACvB,WAAO,EAAE,OAAO,MAAM,QAAQ,SAAS,UAAU;AAAA,EAClD;AAEA,MAAI,gBAAgB,SAAS,OAAO,GAAG;AACtC,WAAO,EAAE,OAAO,MAAM,QAAQ,iBAAiB,UAAU;AAAA,EAC1D;AAEA,SAAO,EAAE,OAAO,OAAO,QAAQ,MAAM,UAAU;AAChD;;;AC/BO,SAAS,yBAAyB,MAAc,SAAmC;AACzF,QAAM,UAAU,SAAS,cAAc;AACvC,QAAM,QAAQ,SAAS,YAAY;AACnC,QAAM,cAAc,SAAS;AAC7B,QAAM,UAAU,SAAS,cAAc;AACvC,QAAM,OAAO,SAAS,WAAW;AAEjC,QAAM,kBAAkB,cACrB;AAAA,iCAAoC,WAAW,KAAK,IAAI,GAAG,WAAW,4BACtE;AAEH,SAAO;AAAA;AAAA,YAEI,IAAI;AAAA;AAAA;AAAA;AAAA,8BAIc,OAAO,KAAK,IAAI,GAAG,OAAO;AAAA,iCACvB,KAAK,KAAK,IAAI,GAAG,KAAK,mCAAmC,eAAe;AAAA;AAAA;AAAA;AAAA,~~4DAI7C~~,OAAO;AAAA;AAAA;~~AAGnE~~;","names":[]}
1	+ {"version":3,"sources":["../src/patterns.ts","../src/detection.ts","../src/negotiation.ts","../src/not-found.ts"],"sourcesContent":["/*\n Layer 1: Known AI agent UA substrings (lowercase).\n * Curated from https://bots.fyi/?tags=ai_assistant + official vendor docs.\n * Last reviewed: 2026-03-20\n /\nexport const AI_AGENT_UA_PATTERNS: readonly string[] = [\n\t// Anthropic — https://support.claude.com/en/articles/8896518\n\t\"claudebot\",\n\t\"claude-searchbot\",\n\t\"claude-user\",\n\t\"anthropic-ai\",\n\t\"claude-web\",\n\n\t// OpenAI — https://platform.openai.com/docs/bots\n\t\"chatgpt\",\n\t\"gptbot\",\n\t\"oai-searchbot\",\n\t\"openai\",\n\n\t// Google AI\n\t\"gemini\",\n\t\"bard\",\n\t\"google-cloudvertexbot\",\n\t\"google-extended\",\n\n\t// Meta\n\t\"meta-externalagent\",\n\t\"meta-externalfetcher\",\n\t\"meta-webindexer\",\n\n\t// Search/Research AI\n\t\"perplexity\",\n\t\"youbot\",\n\t\"you.com\",\n\t\"deepseekbot\",\n\n\t// Coding assistants\n\t\"cursor\",\n\t\"github-copilot\",\n\t\"codeium\",\n\t\"tabnine\",\n\t\"sourcegraph\",\n\n\t// Other AI agents / data scrapers\n\t\"cohere-ai\",\n\t\"bytespider\",\n\t\"amazonbot\",\n\t\"ai2bot\",\n\t\"diffbot\",\n\t\"omgili\",\n\t\"omgilibot\",\n];\n\n/\n Layer 2: Known AI service URLs in Signature-Agent header (RFC 9421).\n /\nexport const SIGNATURE_AGENT_DOMAINS: readonly string[] = [\"chatgpt.com\"];\n\n/\n Layer 3: Traditional bot exclusion list. Bots that should NOT trigger the\n * heuristic layer (search engine crawlers, social previews, monitoring tools).\n /\nexport const TRADITIONAL_BOT_PATTERNS: readonly string[] = [\n\t\"googlebot\",\n\t\"bingbot\",\n\t\"yandexbot\",\n\t\"baiduspider\",\n\t\"duckduckbot\",\n\t\"slurp\",\n\t\"msnbot\",\n\t\"facebot\",\n\t\"twitterbot\",\n\t\"linkedinbot\",\n\t\"whatsapp\",\n\t\"telegrambot\",\n\t\"pingdom\",\n\t\"uptimerobot\",\n\t\"newrelic\",\n\t\"datadog\",\n\t\"statuspage\",\n\t\"site24x7\",\n\t\"applebot\",\n];\n\n/\n Broad regex for bot-like UA strings (used only in Layer 3 heuristic).\n * No word boundaries — keywords commonly appear in compound names.\n /\nexport const BOT_LIKE_REGEX: RegExp = /bot\|agent\|fetch\|crawl\|spider\|search/i;\n","import {\n\tAI_AGENT_UA_PATTERNS,\n\tBOT_LIKE_REGEX,\n\tSIGNATURE_AGENT_DOMAINS,\n\tTRADITIONAL_BOT_PATTERNS,\n} from \"./patterns\";\nimport type { DetectionResult, MinimalRequest } from \"./types\";\n\n/\n Detects AI agents from HTTP request headers.\n \n Three detection layers (checked in order):\n * 1. Known UA patterns (definitive)\n * 2. Signature-Agent header (definitive, RFC 9421)\n * 3. Missing sec-fetch-mode heuristic (catches unknown bots)\n \n Optimizes for recall over precision: serving markdown to a non-AI bot\n * is low-harm; missing an AI agent means a worse experience.\n /\nexport function isAIAgent(request: MinimalRequest): DetectionResult {\n\tconst userAgent = request.headers.get(\"user-agent\");\n\tconst lowerUA = userAgent?.toLowerCase() ?? \"\";\n\n\t// Layer 1: Known UA pattern match\n\tif (lowerUA && AI_AGENT_UA_PATTERNS.some((pattern) => lowerUA.includes(pattern))) {\n\t\treturn { detected: true, method: \"ua-match\" };\n\t}\n\n\t// Layer 2: Signature-Agent header (RFC 9421, used by ChatGPT agent)\n\tconst signatureAgent = request.headers.get(\"signature-agent\");\n\tif (signatureAgent) {\n\t\tconst lowerSig = signatureAgent.toLowerCase();\n\t\tif (SIGNATURE_AGENT_DOMAINS.some((domain) => lowerSig.includes(domain))) {\n\t\t\treturn { detected: true, method: \"signature-agent\" };\n\t\t}\n\t}\n\n\t// Layer 3: Missing browser fingerprint heuristic\n\t// Real browsers (Chrome 76+, Firefox 90+, Safari 16.4+) send sec-fetch-mode\n\t// on navigation requests. Its absence signals a programmatic client.\n\tconst secFetchMode = request.headers.get(\"sec-fetch-mode\");\n\tif (!secFetchMode && lowerUA && BOT_LIKE_REGEX.test(lowerUA)) {\n\t\tconst isTraditionalBot = TRADITIONAL_BOT_PATTERNS.some((pattern) => lowerUA.includes(pattern));\n\t\tif (!isTraditionalBot) {\n\t\t\treturn { detected: true, method: \"heuristic\" };\n\t\t}\n\t}\n\n\treturn { detected: false, method: null };\n}\n","import { isAIAgent } from \"./detection\";\nimport type { DetectionResult, MinimalRequest } from \"./types\";\n\nconst DEFAULT_MARKDOWN_TYPES = [\"text/markdown\", \"text/x-markdown\"];\n\nexport interface AcceptMarkdownOptions {\n\tmediaTypes?: string[];\n}\n\n/\n Check if the request prefers markdown via the Accept header.\n /\nexport function acceptsMarkdown(request: MinimalRequest, options?: AcceptMarkdownOptions): boolean {\n\tconst accept = request.headers.get(\"accept\");\n\tif (!accept) return false;\n\n\tconst types = options?.mediaTypes ?? DEFAULT_MARKDOWN_TYPES;\n\tconst lowerAccept = accept.toLowerCase();\n\treturn types.some((type) => lowerAccept.includes(type));\n}\n\nexport interface ShouldServeMarkdownResult {\n\tserve: boolean;\n\treason: \"agent\" \| \"accept-header\" \| null;\n\tdetection: DetectionResult;\n}\n\n/\n Combines agent detection and content negotiation into one call.\n * Returns whether to serve markdown and why.\n /\nexport function shouldServeMarkdown(\n\trequest: MinimalRequest,\n\toptions?: AcceptMarkdownOptions,\n): ShouldServeMarkdownResult {\n\tconst detection = isAIAgent(request);\n\tif (detection.detected) {\n\t\treturn { serve: true, reason: \"agent\", detection };\n\t}\n\n\tif (acceptsMarkdown(request, options)) {\n\t\treturn { serve: true, reason: \"accept-header\", detection };\n\t}\n\n\treturn { serve: false, reason: null, detection };\n}\n","export interface NotFoundOptions {\n\tsitemapUrl?: string;\n\tindexUrl?: string;\n\t/* URL for full content export (e.g. /llms-full.txt). Adds a 3rd discovery link when set. /\n\tfullContentUrl?: string;\n\t/* Example canonical URL shown in the tips section. Default: `/docs/functions` /\n\texampleUrl?: string;\n\tbaseUrl?: string;\n}\n\n/\n Generates a markdown body for missing pages with links to discovery endpoints.\n * Return with a 200 status (agents discard 404 response bodies).\n /\nexport function generateNotFoundMarkdown(path: string, options?: NotFoundOptions): string {\n\tconst sitemap = options?.sitemapUrl ?? \"/sitemap.md\";\n\tconst index = options?.indexUrl ?? \"/llms.txt\";\n\tconst fullContent = options?.fullContentUrl;\n\tconst example = options?.exampleUrl ?? \"/docs/functions\";\n\tconst base = options?.baseUrl ?? \"\";\n\n\tconst fullContentLine = fullContent\n\t\t? `\\n3. View the full content: [${fullContent}](${base}${fullContent}) - Full content export`\n\t\t: \"\";\n\n\treturn `# Page Not Found\n\nThe URL \\`${path}\\` does not exist.\n\n## How to find the correct page\n\n1. Browse the sitemap: [${sitemap}](${base}${sitemap}) - A structured index of all pages\n2. Browse the full index*: [${index}](${base}${index}) - Complete documentation index${fullContentLine}\n\n## Tips for requesting documentation\n\n- Request the canonical URL (e.g., \\`${example}\\`) with \\`Accept: text/markdown\\`\n- Prefer canonical page URLs in links and citations instead of \\`.md\\` variants\n`;\n}\n"],"mappings":";AAKO,IAAM,uBAA0C;AAAA;AAAA,EAEtD;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACD;AAKO,IAAM,0BAA6C,CAAC,aAAa;AAMjE,IAAM,2BAA8C;AAAA,EAC1D;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACD;AAMO,IAAM,iBAAyB;;;ACrE/B,SAAS,UAAU,SAA0C;AACnE,QAAM,YAAY,QAAQ,QAAQ,IAAI,YAAY;AAClD,QAAM,UAAU,WAAW,YAAY,KAAK;AAG5C,MAAI,WAAW,qBAAqB,KAAK,CAAC,YAAY,QAAQ,SAAS,OAAO,CAAC,GAAG;AACjF,WAAO,EAAE,UAAU,MAAM,QAAQ,WAAW;AAAA,EAC7C;AAGA,QAAM,iBAAiB,QAAQ,QAAQ,IAAI,iBAAiB;AAC5D,MAAI,gBAAgB;AACnB,UAAM,WAAW,eAAe,YAAY;AAC5C,QAAI,wBAAwB,KAAK,CAAC,WAAW,SAAS,SAAS,MAAM,CAAC,GAAG;AACxE,aAAO,EAAE,UAAU,MAAM,QAAQ,kBAAkB;AAAA,IACpD;AAAA,EACD;AAKA,QAAM,eAAe,QAAQ,QAAQ,IAAI,gBAAgB;AACzD,MAAI,CAAC,gBAAgB,WAAW,eAAe,KAAK,OAAO,GAAG;AAC7D,UAAM,mBAAmB,yBAAyB,KAAK,CAAC,YAAY,QAAQ,SAAS,OAAO,CAAC;AAC7F,QAAI,CAAC,kBAAkB;AACtB,aAAO,EAAE,UAAU,MAAM,QAAQ,YAAY;AAAA,IAC9C;AAAA,EACD;AAEA,SAAO,EAAE,UAAU,OAAO,QAAQ,KAAK;AACxC;;;AC9CA,IAAM,yBAAyB,CAAC,iBAAiB,iBAAiB;AAS3D,SAAS,gBAAgB,SAAyB,SAA0C;AAClG,QAAM,SAAS,QAAQ,QAAQ,IAAI,QAAQ;AAC3C,MAAI,CAAC,OAAQ,QAAO;AAEpB,QAAM,QAAQ,SAAS,cAAc;AACrC,QAAM,cAAc,OAAO,YAAY;AACvC,SAAO,MAAM,KAAK,CAAC,SAAS,YAAY,SAAS,IAAI,CAAC;AACvD;AAYO,SAAS,oBACf,SACA,SAC4B;AAC5B,QAAM,YAAY,UAAU,OAAO;AACnC,MAAI,UAAU,UAAU;AACvB,WAAO,EAAE,OAAO,MAAM,QAAQ,SAAS,UAAU;AAAA,EAClD;AAEA,MAAI,gBAAgB,SAAS,OAAO,GAAG;AACtC,WAAO,EAAE,OAAO,MAAM,QAAQ,iBAAiB,UAAU;AAAA,EAC1D;AAEA,SAAO,EAAE,OAAO,OAAO,QAAQ,MAAM,UAAU;AAChD;;;AC/BO,SAAS,yBAAyB,MAAc,SAAmC;AACzF,QAAM,UAAU,SAAS,cAAc;AACvC,QAAM,QAAQ,SAAS,YAAY;AACnC,QAAM,cAAc,SAAS;AAC7B,QAAM,UAAU,SAAS,cAAc;AACvC,QAAM,OAAO,SAAS,WAAW;AAEjC,QAAM,kBAAkB,cACrB;AAAA,iCAAoC,WAAW,KAAK,IAAI,GAAG,WAAW,4BACtE;AAEH,SAAO;AAAA;AAAA,YAEI,IAAI;AAAA;AAAA;AAAA;AAAA,8BAIc,OAAO,KAAK,IAAI,GAAG,OAAO;AAAA,iCACvB,KAAK,KAAK,IAAI,GAAG,KAAK,mCAAmC,eAAe;AAAA;AAAA;AAAA;AAAA,uCAIlE,OAAO;AAAA;AAAA;AAG9C;","names":[]}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@vercel/agent-readability",
-  "version": "0.2.1",
+  "version": "0.3.1",
   "description": "Detect AI agents. Serve them markdown. Audit your site against the Agent Readability Spec.",
   "type": "module",
   "main": "./dist/index.cjs",

package/skill/SKILL.md CHANGED Viewed

@@ -129,7 +129,7 @@ Add JSON-LD to pages with Schema.org types (Article, TechArticle, WebPage):
 Add to `<head>` on all pages that have markdown versions:
 ```html
-<link rel="alternate" type="text/markdown" href="/docs/page.md">
+<link rel="alternate" type="text/markdown" href="/docs/page">
 ```
 ### 7. Add frontmatter to markdown responses
@@ -155,7 +155,7 @@ Verify these AI bots are not blocked: GPTBot, ClaudeBot, CCBot, Google-Extended.
 npx @vercel/agent-readability audit https://your-site.com
 ```
-The audit scores your site on 16 checks across discovery, content delivery, and HTML quality. Failed checks include fix suggestions.
+The audit scores your site on 25 checks across reachability, discovery, content delivery, and HTML quality. Failed checks include fix suggestions.
 ### 10. Add CI check (optional)
@@ -167,5 +167,6 @@ The audit scores your site on 16 checks across discovery, content delivery, and
 ## Notes
 - Set `Vary: Accept` on all markdown responses for correct CDN caching
-- For missing pages, return 200 with markdown body (agents discard 404 bodies)
+- For missing pages, return 200 with markdown body on the canonical URL (agents discard 404 bodies)
+- Prefer canonical page URLs and negotiate markdown with `Accept: text/markdown` instead of exposing `.md` page URLs
 - The `onDetection` callback in the Next.js adapter runs via `event.waitUntil()`