@vercel/agent-readability 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -146,7 +146,7 @@ function generateNotFoundMarkdown(path, options) {
146
146
  const sitemap = options?.sitemapUrl ?? "/sitemap.md";
147
147
  const index = options?.indexUrl ?? "/llms.txt";
148
148
  const fullContent = options?.fullContentUrl;
149
- const example = options?.exampleUrl ?? "/docs/functions.md";
149
+ const example = options?.exampleUrl ?? "/docs/functions";
150
150
  const base = options?.baseUrl ?? "";
151
151
  const fullContentLine = fullContent ? `
152
152
  3. **View the full content**: [${fullContent}](${base}${fullContent}) - Full content export` : "";
@@ -161,8 +161,8 @@ The URL \`${path}\` does not exist.
161
161
 
162
162
  ## Tips for requesting documentation
163
163
 
164
- - For markdown responses, append \`.md\` to URLs (e.g., \`${example}\`)
165
- - Use \`Accept: text/markdown\` header for content negotiation
164
+ - Request the canonical URL (e.g., \`${example}\`) with \`Accept: text/markdown\`
165
+ - Prefer canonical page URLs in links and citations instead of \`.md\` variants
166
166
  `;
167
167
  }
168
168
  // Annotate the CommonJS export names for ESM import in node:
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/index.ts","../src/patterns.ts","../src/detection.ts","../src/negotiation.ts","../src/not-found.ts"],"sourcesContent":["export { isAIAgent } from \"./detection\";\nexport { acceptsMarkdown, shouldServeMarkdown } from \"./negotiation\";\nexport { generateNotFoundMarkdown } from \"./not-found\";\nexport {\n\tAI_AGENT_UA_PATTERNS,\n\tBOT_LIKE_REGEX,\n\tSIGNATURE_AGENT_DOMAINS,\n\tTRADITIONAL_BOT_PATTERNS,\n} from \"./patterns\";\nexport type {\n\tDetectionMethod,\n\tDetectionResult,\n\tMinimalRequest,\n} from \"./types\";\nexport type { NotFoundOptions } from \"./not-found\";\nexport type { AcceptMarkdownOptions, ShouldServeMarkdownResult } from \"./negotiation\";\n","/**\n * Layer 1: Known AI agent UA substrings (lowercase).\n * Curated from https://bots.fyi/?tags=ai_assistant + official vendor docs.\n * Last reviewed: 2026-03-20\n */\nexport const AI_AGENT_UA_PATTERNS: readonly string[] = [\n\t// Anthropic — https://support.claude.com/en/articles/8896518\n\t\"claudebot\",\n\t\"claude-searchbot\",\n\t\"claude-user\",\n\t\"anthropic-ai\",\n\t\"claude-web\",\n\n\t// OpenAI — https://platform.openai.com/docs/bots\n\t\"chatgpt\",\n\t\"gptbot\",\n\t\"oai-searchbot\",\n\t\"openai\",\n\n\t// Google AI\n\t\"gemini\",\n\t\"bard\",\n\t\"google-cloudvertexbot\",\n\t\"google-extended\",\n\n\t// Meta\n\t\"meta-externalagent\",\n\t\"meta-externalfetcher\",\n\t\"meta-webindexer\",\n\n\t// Search/Research AI\n\t\"perplexity\",\n\t\"youbot\",\n\t\"you.com\",\n\t\"deepseekbot\",\n\n\t// Coding assistants\n\t\"cursor\",\n\t\"github-copilot\",\n\t\"codeium\",\n\t\"tabnine\",\n\t\"sourcegraph\",\n\n\t// Other AI agents / data scrapers\n\t\"cohere-ai\",\n\t\"bytespider\",\n\t\"amazonbot\",\n\t\"ai2bot\",\n\t\"diffbot\",\n\t\"omgili\",\n\t\"omgilibot\",\n];\n\n/**\n * Layer 2: Known AI service URLs in Signature-Agent header (RFC 9421).\n */\nexport const SIGNATURE_AGENT_DOMAINS: readonly string[] = [\"chatgpt.com\"];\n\n/**\n * Layer 3: Traditional bot exclusion list. Bots that should NOT trigger the\n * heuristic layer (search engine crawlers, social previews, monitoring tools).\n */\nexport const TRADITIONAL_BOT_PATTERNS: readonly string[] = [\n\t\"googlebot\",\n\t\"bingbot\",\n\t\"yandexbot\",\n\t\"baiduspider\",\n\t\"duckduckbot\",\n\t\"slurp\",\n\t\"msnbot\",\n\t\"facebot\",\n\t\"twitterbot\",\n\t\"linkedinbot\",\n\t\"whatsapp\",\n\t\"telegrambot\",\n\t\"pingdom\",\n\t\"uptimerobot\",\n\t\"newrelic\",\n\t\"datadog\",\n\t\"statuspage\",\n\t\"site24x7\",\n\t\"applebot\",\n];\n\n/**\n * Broad regex for bot-like UA strings (used only in Layer 3 heuristic).\n * No word boundaries — keywords commonly appear in compound names.\n */\nexport const BOT_LIKE_REGEX: RegExp = /bot|agent|fetch|crawl|spider|search/i;\n","import {\n\tAI_AGENT_UA_PATTERNS,\n\tBOT_LIKE_REGEX,\n\tSIGNATURE_AGENT_DOMAINS,\n\tTRADITIONAL_BOT_PATTERNS,\n} from \"./patterns\";\nimport type { DetectionResult, MinimalRequest } from \"./types\";\n\n/**\n * Detects AI agents from HTTP request headers.\n *\n * Three detection layers (checked in order):\n * 1. Known UA patterns (definitive)\n * 2. Signature-Agent header (definitive, RFC 9421)\n * 3. Missing sec-fetch-mode heuristic (catches unknown bots)\n *\n * Optimizes for recall over precision: serving markdown to a non-AI bot\n * is low-harm; missing an AI agent means a worse experience.\n */\nexport function isAIAgent(request: MinimalRequest): DetectionResult {\n\tconst userAgent = request.headers.get(\"user-agent\");\n\tconst lowerUA = userAgent?.toLowerCase() ?? \"\";\n\n\t// Layer 1: Known UA pattern match\n\tif (lowerUA && AI_AGENT_UA_PATTERNS.some((pattern) => lowerUA.includes(pattern))) {\n\t\treturn { detected: true, method: \"ua-match\" };\n\t}\n\n\t// Layer 2: Signature-Agent header (RFC 9421, used by ChatGPT agent)\n\tconst signatureAgent = request.headers.get(\"signature-agent\");\n\tif (signatureAgent) {\n\t\tconst lowerSig = signatureAgent.toLowerCase();\n\t\tif (SIGNATURE_AGENT_DOMAINS.some((domain) => lowerSig.includes(domain))) {\n\t\t\treturn { detected: true, method: \"signature-agent\" };\n\t\t}\n\t}\n\n\t// Layer 3: Missing browser fingerprint heuristic\n\t// Real browsers (Chrome 76+, Firefox 90+, Safari 16.4+) send sec-fetch-mode\n\t// on navigation requests. Its absence signals a programmatic client.\n\tconst secFetchMode = request.headers.get(\"sec-fetch-mode\");\n\tif (!secFetchMode && lowerUA && BOT_LIKE_REGEX.test(lowerUA)) {\n\t\tconst isTraditionalBot = TRADITIONAL_BOT_PATTERNS.some((pattern) => lowerUA.includes(pattern));\n\t\tif (!isTraditionalBot) {\n\t\t\treturn { detected: true, method: \"heuristic\" };\n\t\t}\n\t}\n\n\treturn { detected: false, method: null };\n}\n","import { isAIAgent } from \"./detection\";\nimport type { DetectionResult, MinimalRequest } from \"./types\";\n\nconst DEFAULT_MARKDOWN_TYPES = [\"text/markdown\", \"text/x-markdown\"];\n\nexport interface AcceptMarkdownOptions {\n\tmediaTypes?: string[];\n}\n\n/**\n * Check if the request prefers markdown via the Accept header.\n */\nexport function acceptsMarkdown(request: MinimalRequest, options?: AcceptMarkdownOptions): boolean {\n\tconst accept = request.headers.get(\"accept\");\n\tif (!accept) return false;\n\n\tconst types = options?.mediaTypes ?? DEFAULT_MARKDOWN_TYPES;\n\tconst lowerAccept = accept.toLowerCase();\n\treturn types.some((type) => lowerAccept.includes(type));\n}\n\nexport interface ShouldServeMarkdownResult {\n\tserve: boolean;\n\treason: \"agent\" | \"accept-header\" | null;\n\tdetection: DetectionResult;\n}\n\n/**\n * Combines agent detection and content negotiation into one call.\n * Returns whether to serve markdown and why.\n */\nexport function shouldServeMarkdown(\n\trequest: MinimalRequest,\n\toptions?: AcceptMarkdownOptions,\n): ShouldServeMarkdownResult {\n\tconst detection = isAIAgent(request);\n\tif (detection.detected) {\n\t\treturn { serve: true, reason: \"agent\", detection };\n\t}\n\n\tif (acceptsMarkdown(request, options)) {\n\t\treturn { serve: true, reason: \"accept-header\", detection };\n\t}\n\n\treturn { serve: false, reason: null, detection };\n}\n","export interface NotFoundOptions {\n\tsitemapUrl?: string;\n\tindexUrl?: string;\n\t/** URL for full content export (e.g. /llms-full.txt). Adds a 3rd discovery link when set. */\n\tfullContentUrl?: string;\n\t/** Example URL shown in the tips section. Default: `/docs/functions.md` */\n\texampleUrl?: string;\n\tbaseUrl?: string;\n}\n\n/**\n * Generates a markdown body for missing pages with links to discovery endpoints.\n * Return with a 200 status (agents discard 404 response bodies).\n */\nexport function generateNotFoundMarkdown(path: string, options?: NotFoundOptions): string {\n\tconst sitemap = options?.sitemapUrl ?? \"/sitemap.md\";\n\tconst index = options?.indexUrl ?? \"/llms.txt\";\n\tconst fullContent = options?.fullContentUrl;\n\tconst example = options?.exampleUrl ?? \"/docs/functions.md\";\n\tconst base = options?.baseUrl ?? \"\";\n\n\tconst fullContentLine = fullContent\n\t\t? `\\n3. **View the full content**: [${fullContent}](${base}${fullContent}) - Full content export`\n\t\t: \"\";\n\n\treturn `# Page Not Found\n\nThe URL \\`${path}\\` does not exist.\n\n## How to find the correct page\n\n1. **Browse the sitemap**: [${sitemap}](${base}${sitemap}) - A structured index of all pages\n2. **Browse the full index**: [${index}](${base}${index}) - Complete documentation index${fullContentLine}\n\n## Tips for requesting documentation\n\n- For markdown responses, append \\`.md\\` to URLs (e.g., \\`${example}\\`)\n- Use \\`Accept: text/markdown\\` header for content negotiation\n`;\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACKO,IAAM,uBAA0C;AAAA;AAAA,EAEtD;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACD;AAKO,IAAM,0BAA6C,CAAC,aAAa;AAMjE,IAAM,2BAA8C;AAAA,EAC1D;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACD;AAMO,IAAM,iBAAyB;;;ACrE/B,SAAS,UAAU,SAA0C;AACnE,QAAM,YAAY,QAAQ,QAAQ,IAAI,YAAY;AAClD,QAAM,UAAU,WAAW,YAAY,KAAK;AAG5C,MAAI,WAAW,qBAAqB,KAAK,CAAC,YAAY,QAAQ,SAAS,OAAO,CAAC,GAAG;AACjF,WAAO,EAAE,UAAU,MAAM,QAAQ,WAAW;AAAA,EAC7C;AAGA,QAAM,iBAAiB,QAAQ,QAAQ,IAAI,iBAAiB;AAC5D,MAAI,gBAAgB;AACnB,UAAM,WAAW,eAAe,YAAY;AAC5C,QAAI,wBAAwB,KAAK,CAAC,WAAW,SAAS,SAAS,MAAM,CAAC,GAAG;AACxE,aAAO,EAAE,UAAU,MAAM,QAAQ,kBAAkB;AAAA,IACpD;AAAA,EACD;AAKA,QAAM,eAAe,QAAQ,QAAQ,IAAI,gBAAgB;AACzD,MAAI,CAAC,gBAAgB,WAAW,eAAe,KAAK,OAAO,GAAG;AAC7D,UAAM,mBAAmB,yBAAyB,KAAK,CAAC,YAAY,QAAQ,SAAS,OAAO,CAAC;AAC7F,QAAI,CAAC,kBAAkB;AACtB,aAAO,EAAE,UAAU,MAAM,QAAQ,YAAY;AAAA,IAC9C;AAAA,EACD;AAEA,SAAO,EAAE,UAAU,OAAO,QAAQ,KAAK;AACxC;;;AC9CA,IAAM,yBAAyB,CAAC,iBAAiB,iBAAiB;AAS3D,SAAS,gBAAgB,SAAyB,SAA0C;AAClG,QAAM,SAAS,QAAQ,QAAQ,IAAI,QAAQ;AAC3C,MAAI,CAAC,OAAQ,QAAO;AAEpB,QAAM,QAAQ,SAAS,cAAc;AACrC,QAAM,cAAc,OAAO,YAAY;AACvC,SAAO,MAAM,KAAK,CAAC,SAAS,YAAY,SAAS,IAAI,CAAC;AACvD;AAYO,SAAS,oBACf,SACA,SAC4B;AAC5B,QAAM,YAAY,UAAU,OAAO;AACnC,MAAI,UAAU,UAAU;AACvB,WAAO,EAAE,OAAO,MAAM,QAAQ,SAAS,UAAU;AAAA,EAClD;AAEA,MAAI,gBAAgB,SAAS,OAAO,GAAG;AACtC,WAAO,EAAE,OAAO,MAAM,QAAQ,iBAAiB,UAAU;AAAA,EAC1D;AAEA,SAAO,EAAE,OAAO,OAAO,QAAQ,MAAM,UAAU;AAChD;;;AC/BO,SAAS,yBAAyB,MAAc,SAAmC;AACzF,QAAM,UAAU,SAAS,cAAc;AACvC,QAAM,QAAQ,SAAS,YAAY;AACnC,QAAM,cAAc,SAAS;AAC7B,QAAM,UAAU,SAAS,cAAc;AACvC,QAAM,OAAO,SAAS,WAAW;AAEjC,QAAM,kBAAkB,cACrB;AAAA,iCAAoC,WAAW,KAAK,IAAI,GAAG,WAAW,4BACtE;AAEH,SAAO;AAAA;AAAA,YAEI,IAAI;AAAA;AAAA;AAAA;AAAA,8BAIc,OAAO,KAAK,IAAI,GAAG,OAAO;AAAA,iCACvB,KAAK,KAAK,IAAI,GAAG,KAAK,mCAAmC,eAAe;AAAA;AAAA;AAAA;AAAA,4DAI7C,OAAO;AAAA;AAAA;AAGnE;","names":[]}
1
+ {"version":3,"sources":["../src/index.ts","../src/patterns.ts","../src/detection.ts","../src/negotiation.ts","../src/not-found.ts"],"sourcesContent":["export { isAIAgent } from \"./detection\";\nexport { acceptsMarkdown, shouldServeMarkdown } from \"./negotiation\";\nexport { generateNotFoundMarkdown } from \"./not-found\";\nexport {\n\tAI_AGENT_UA_PATTERNS,\n\tBOT_LIKE_REGEX,\n\tSIGNATURE_AGENT_DOMAINS,\n\tTRADITIONAL_BOT_PATTERNS,\n} from \"./patterns\";\nexport type {\n\tDetectionMethod,\n\tDetectionResult,\n\tMinimalRequest,\n\tOnDetectionCallback,\n\tOnDetectionInfo,\n} from \"./types\";\nexport type { NotFoundOptions } from \"./not-found\";\nexport type { AcceptMarkdownOptions, ShouldServeMarkdownResult } from \"./negotiation\";\n","/**\n * Layer 1: Known AI agent UA substrings (lowercase).\n * Curated from https://bots.fyi/?tags=ai_assistant + official vendor docs.\n * Last reviewed: 2026-03-20\n */\nexport const AI_AGENT_UA_PATTERNS: readonly string[] = [\n\t// Anthropic — https://support.claude.com/en/articles/8896518\n\t\"claudebot\",\n\t\"claude-searchbot\",\n\t\"claude-user\",\n\t\"anthropic-ai\",\n\t\"claude-web\",\n\n\t// OpenAI — https://platform.openai.com/docs/bots\n\t\"chatgpt\",\n\t\"gptbot\",\n\t\"oai-searchbot\",\n\t\"openai\",\n\n\t// Google AI\n\t\"gemini\",\n\t\"bard\",\n\t\"google-cloudvertexbot\",\n\t\"google-extended\",\n\n\t// Meta\n\t\"meta-externalagent\",\n\t\"meta-externalfetcher\",\n\t\"meta-webindexer\",\n\n\t// Search/Research AI\n\t\"perplexity\",\n\t\"youbot\",\n\t\"you.com\",\n\t\"deepseekbot\",\n\n\t// Coding assistants\n\t\"cursor\",\n\t\"github-copilot\",\n\t\"codeium\",\n\t\"tabnine\",\n\t\"sourcegraph\",\n\n\t// Other AI agents / data scrapers\n\t\"cohere-ai\",\n\t\"bytespider\",\n\t\"amazonbot\",\n\t\"ai2bot\",\n\t\"diffbot\",\n\t\"omgili\",\n\t\"omgilibot\",\n];\n\n/**\n * Layer 2: Known AI service URLs in Signature-Agent header (RFC 9421).\n */\nexport const SIGNATURE_AGENT_DOMAINS: readonly string[] = [\"chatgpt.com\"];\n\n/**\n * Layer 3: Traditional bot exclusion list. Bots that should NOT trigger the\n * heuristic layer (search engine crawlers, social previews, monitoring tools).\n */\nexport const TRADITIONAL_BOT_PATTERNS: readonly string[] = [\n\t\"googlebot\",\n\t\"bingbot\",\n\t\"yandexbot\",\n\t\"baiduspider\",\n\t\"duckduckbot\",\n\t\"slurp\",\n\t\"msnbot\",\n\t\"facebot\",\n\t\"twitterbot\",\n\t\"linkedinbot\",\n\t\"whatsapp\",\n\t\"telegrambot\",\n\t\"pingdom\",\n\t\"uptimerobot\",\n\t\"newrelic\",\n\t\"datadog\",\n\t\"statuspage\",\n\t\"site24x7\",\n\t\"applebot\",\n];\n\n/**\n * Broad regex for bot-like UA strings (used only in Layer 3 heuristic).\n * No word boundaries — keywords commonly appear in compound names.\n */\nexport const BOT_LIKE_REGEX: RegExp = /bot|agent|fetch|crawl|spider|search/i;\n","import {\n\tAI_AGENT_UA_PATTERNS,\n\tBOT_LIKE_REGEX,\n\tSIGNATURE_AGENT_DOMAINS,\n\tTRADITIONAL_BOT_PATTERNS,\n} from \"./patterns\";\nimport type { DetectionResult, MinimalRequest } from \"./types\";\n\n/**\n * Detects AI agents from HTTP request headers.\n *\n * Three detection layers (checked in order):\n * 1. Known UA patterns (definitive)\n * 2. Signature-Agent header (definitive, RFC 9421)\n * 3. Missing sec-fetch-mode heuristic (catches unknown bots)\n *\n * Optimizes for recall over precision: serving markdown to a non-AI bot\n * is low-harm; missing an AI agent means a worse experience.\n */\nexport function isAIAgent(request: MinimalRequest): DetectionResult {\n\tconst userAgent = request.headers.get(\"user-agent\");\n\tconst lowerUA = userAgent?.toLowerCase() ?? \"\";\n\n\t// Layer 1: Known UA pattern match\n\tif (lowerUA && AI_AGENT_UA_PATTERNS.some((pattern) => lowerUA.includes(pattern))) {\n\t\treturn { detected: true, method: \"ua-match\" };\n\t}\n\n\t// Layer 2: Signature-Agent header (RFC 9421, used by ChatGPT agent)\n\tconst signatureAgent = request.headers.get(\"signature-agent\");\n\tif (signatureAgent) {\n\t\tconst lowerSig = signatureAgent.toLowerCase();\n\t\tif (SIGNATURE_AGENT_DOMAINS.some((domain) => lowerSig.includes(domain))) {\n\t\t\treturn { detected: true, method: \"signature-agent\" };\n\t\t}\n\t}\n\n\t// Layer 3: Missing browser fingerprint heuristic\n\t// Real browsers (Chrome 76+, Firefox 90+, Safari 16.4+) send sec-fetch-mode\n\t// on navigation requests. Its absence signals a programmatic client.\n\tconst secFetchMode = request.headers.get(\"sec-fetch-mode\");\n\tif (!secFetchMode && lowerUA && BOT_LIKE_REGEX.test(lowerUA)) {\n\t\tconst isTraditionalBot = TRADITIONAL_BOT_PATTERNS.some((pattern) => lowerUA.includes(pattern));\n\t\tif (!isTraditionalBot) {\n\t\t\treturn { detected: true, method: \"heuristic\" };\n\t\t}\n\t}\n\n\treturn { detected: false, method: null };\n}\n","import { isAIAgent } from \"./detection\";\nimport type { DetectionResult, MinimalRequest } from \"./types\";\n\nconst DEFAULT_MARKDOWN_TYPES = [\"text/markdown\", \"text/x-markdown\"];\n\nexport interface AcceptMarkdownOptions {\n\tmediaTypes?: string[];\n}\n\n/**\n * Check if the request prefers markdown via the Accept header.\n */\nexport function acceptsMarkdown(request: MinimalRequest, options?: AcceptMarkdownOptions): boolean {\n\tconst accept = request.headers.get(\"accept\");\n\tif (!accept) return false;\n\n\tconst types = options?.mediaTypes ?? DEFAULT_MARKDOWN_TYPES;\n\tconst lowerAccept = accept.toLowerCase();\n\treturn types.some((type) => lowerAccept.includes(type));\n}\n\nexport interface ShouldServeMarkdownResult {\n\tserve: boolean;\n\treason: \"agent\" | \"accept-header\" | null;\n\tdetection: DetectionResult;\n}\n\n/**\n * Combines agent detection and content negotiation into one call.\n * Returns whether to serve markdown and why.\n */\nexport function shouldServeMarkdown(\n\trequest: MinimalRequest,\n\toptions?: AcceptMarkdownOptions,\n): ShouldServeMarkdownResult {\n\tconst detection = isAIAgent(request);\n\tif (detection.detected) {\n\t\treturn { serve: true, reason: \"agent\", detection };\n\t}\n\n\tif (acceptsMarkdown(request, options)) {\n\t\treturn { serve: true, reason: \"accept-header\", detection };\n\t}\n\n\treturn { serve: false, reason: null, detection };\n}\n","export interface NotFoundOptions {\n\tsitemapUrl?: string;\n\tindexUrl?: string;\n\t/** URL for full content export (e.g. /llms-full.txt). Adds a 3rd discovery link when set. */\n\tfullContentUrl?: string;\n\t/** Example canonical URL shown in the tips section. Default: `/docs/functions` */\n\texampleUrl?: string;\n\tbaseUrl?: string;\n}\n\n/**\n * Generates a markdown body for missing pages with links to discovery endpoints.\n * Return with a 200 status (agents discard 404 response bodies).\n */\nexport function generateNotFoundMarkdown(path: string, options?: NotFoundOptions): string {\n\tconst sitemap = options?.sitemapUrl ?? \"/sitemap.md\";\n\tconst index = options?.indexUrl ?? \"/llms.txt\";\n\tconst fullContent = options?.fullContentUrl;\n\tconst example = options?.exampleUrl ?? \"/docs/functions\";\n\tconst base = options?.baseUrl ?? \"\";\n\n\tconst fullContentLine = fullContent\n\t\t? `\\n3. **View the full content**: [${fullContent}](${base}${fullContent}) - Full content export`\n\t\t: \"\";\n\n\treturn `# Page Not Found\n\nThe URL \\`${path}\\` does not exist.\n\n## How to find the correct page\n\n1. **Browse the sitemap**: [${sitemap}](${base}${sitemap}) - A structured index of all pages\n2. **Browse the full index**: [${index}](${base}${index}) - Complete documentation index${fullContentLine}\n\n## Tips for requesting documentation\n\n- Request the canonical URL (e.g., \\`${example}\\`) with \\`Accept: text/markdown\\`\n- Prefer canonical page URLs in links and citations instead of \\`.md\\` variants\n`;\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACKO,IAAM,uBAA0C;AAAA;AAAA,EAEtD;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACD;AAKO,IAAM,0BAA6C,CAAC,aAAa;AAMjE,IAAM,2BAA8C;AAAA,EAC1D;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACD;AAMO,IAAM,iBAAyB;;;ACrE/B,SAAS,UAAU,SAA0C;AACnE,QAAM,YAAY,QAAQ,QAAQ,IAAI,YAAY;AAClD,QAAM,UAAU,WAAW,YAAY,KAAK;AAG5C,MAAI,WAAW,qBAAqB,KAAK,CAAC,YAAY,QAAQ,SAAS,OAAO,CAAC,GAAG;AACjF,WAAO,EAAE,UAAU,MAAM,QAAQ,WAAW;AAAA,EAC7C;AAGA,QAAM,iBAAiB,QAAQ,QAAQ,IAAI,iBAAiB;AAC5D,MAAI,gBAAgB;AACnB,UAAM,WAAW,eAAe,YAAY;AAC5C,QAAI,wBAAwB,KAAK,CAAC,WAAW,SAAS,SAAS,MAAM,CAAC,GAAG;AACxE,aAAO,EAAE,UAAU,MAAM,QAAQ,kBAAkB;AAAA,IACpD;AAAA,EACD;AAKA,QAAM,eAAe,QAAQ,QAAQ,IAAI,gBAAgB;AACzD,MAAI,CAAC,gBAAgB,WAAW,eAAe,KAAK,OAAO,GAAG;AAC7D,UAAM,mBAAmB,yBAAyB,KAAK,CAAC,YAAY,QAAQ,SAAS,OAAO,CAAC;AAC7F,QAAI,CAAC,kBAAkB;AACtB,aAAO,EAAE,UAAU,MAAM,QAAQ,YAAY;AAAA,IAC9C;AAAA,EACD;AAEA,SAAO,EAAE,UAAU,OAAO,QAAQ,KAAK;AACxC;;;AC9CA,IAAM,yBAAyB,CAAC,iBAAiB,iBAAiB;AAS3D,SAAS,gBAAgB,SAAyB,SAA0C;AAClG,QAAM,SAAS,QAAQ,QAAQ,IAAI,QAAQ;AAC3C,MAAI,CAAC,OAAQ,QAAO;AAEpB,QAAM,QAAQ,SAAS,cAAc;AACrC,QAAM,cAAc,OAAO,YAAY;AACvC,SAAO,MAAM,KAAK,CAAC,SAAS,YAAY,SAAS,IAAI,CAAC;AACvD;AAYO,SAAS,oBACf,SACA,SAC4B;AAC5B,QAAM,YAAY,UAAU,OAAO;AACnC,MAAI,UAAU,UAAU;AACvB,WAAO,EAAE,OAAO,MAAM,QAAQ,SAAS,UAAU;AAAA,EAClD;AAEA,MAAI,gBAAgB,SAAS,OAAO,GAAG;AACtC,WAAO,EAAE,OAAO,MAAM,QAAQ,iBAAiB,UAAU;AAAA,EAC1D;AAEA,SAAO,EAAE,OAAO,OAAO,QAAQ,MAAM,UAAU;AAChD;;;AC/BO,SAAS,yBAAyB,MAAc,SAAmC;AACzF,QAAM,UAAU,SAAS,cAAc;AACvC,QAAM,QAAQ,SAAS,YAAY;AACnC,QAAM,cAAc,SAAS;AAC7B,QAAM,UAAU,SAAS,cAAc;AACvC,QAAM,OAAO,SAAS,WAAW;AAEjC,QAAM,kBAAkB,cACrB;AAAA,iCAAoC,WAAW,KAAK,IAAI,GAAG,WAAW,4BACtE;AAEH,SAAO;AAAA;AAAA,YAEI,IAAI;AAAA;AAAA;AAAA;AAAA,8BAIc,OAAO,KAAK,IAAI,GAAG,OAAO;AAAA,iCACvB,KAAK,KAAK,IAAI,GAAG,KAAK,mCAAmC,eAAe;AAAA;AAAA;AAAA;AAAA,uCAIlE,OAAO;AAAA;AAAA;AAG9C;","names":[]}
package/dist/index.d.cts CHANGED
@@ -15,6 +15,14 @@ interface MinimalRequest {
15
15
  get(name: string): string | null;
16
16
  };
17
17
  }
18
+ /** Info passed to onDetection callbacks across all adapters. */
19
+ type OnDetectionInfo = {
20
+ path: string;
21
+ method: DetectionMethod | "accept-header";
22
+ userAgent: string | null;
23
+ };
24
+ /** Fire-and-forget callback shared by Next.js, SvelteKit, and Nuxt adapters. */
25
+ type OnDetectionCallback = (info: OnDetectionInfo) => void | Promise<void>;
18
26
 
19
27
  /**
20
28
  * Detects AI agents from HTTP request headers.
@@ -52,7 +60,7 @@ interface NotFoundOptions {
52
60
  indexUrl?: string;
53
61
  /** URL for full content export (e.g. /llms-full.txt). Adds a 3rd discovery link when set. */
54
62
  fullContentUrl?: string;
55
- /** Example URL shown in the tips section. Default: `/docs/functions.md` */
63
+ /** Example canonical URL shown in the tips section. Default: `/docs/functions` */
56
64
  exampleUrl?: string;
57
65
  baseUrl?: string;
58
66
  }
@@ -83,4 +91,4 @@ declare const TRADITIONAL_BOT_PATTERNS: readonly string[];
83
91
  */
84
92
  declare const BOT_LIKE_REGEX: RegExp;
85
93
 
86
- export { AI_AGENT_UA_PATTERNS, type AcceptMarkdownOptions, BOT_LIKE_REGEX, type DetectionMethod, type DetectionResult, type MinimalRequest, type NotFoundOptions, SIGNATURE_AGENT_DOMAINS, type ShouldServeMarkdownResult, TRADITIONAL_BOT_PATTERNS, acceptsMarkdown, generateNotFoundMarkdown, isAIAgent, shouldServeMarkdown };
94
+ export { AI_AGENT_UA_PATTERNS, type AcceptMarkdownOptions, BOT_LIKE_REGEX, type DetectionMethod, type DetectionResult, type MinimalRequest, type NotFoundOptions, type OnDetectionCallback, type OnDetectionInfo, SIGNATURE_AGENT_DOMAINS, type ShouldServeMarkdownResult, TRADITIONAL_BOT_PATTERNS, acceptsMarkdown, generateNotFoundMarkdown, isAIAgent, shouldServeMarkdown };
package/dist/index.d.ts CHANGED
@@ -15,6 +15,14 @@ interface MinimalRequest {
15
15
  get(name: string): string | null;
16
16
  };
17
17
  }
18
+ /** Info passed to onDetection callbacks across all adapters. */
19
+ type OnDetectionInfo = {
20
+ path: string;
21
+ method: DetectionMethod | "accept-header";
22
+ userAgent: string | null;
23
+ };
24
+ /** Fire-and-forget callback shared by Next.js, SvelteKit, and Nuxt adapters. */
25
+ type OnDetectionCallback = (info: OnDetectionInfo) => void | Promise<void>;
18
26
 
19
27
  /**
20
28
  * Detects AI agents from HTTP request headers.
@@ -52,7 +60,7 @@ interface NotFoundOptions {
52
60
  indexUrl?: string;
53
61
  /** URL for full content export (e.g. /llms-full.txt). Adds a 3rd discovery link when set. */
54
62
  fullContentUrl?: string;
55
- /** Example URL shown in the tips section. Default: `/docs/functions.md` */
63
+ /** Example canonical URL shown in the tips section. Default: `/docs/functions` */
56
64
  exampleUrl?: string;
57
65
  baseUrl?: string;
58
66
  }
@@ -83,4 +91,4 @@ declare const TRADITIONAL_BOT_PATTERNS: readonly string[];
83
91
  */
84
92
  declare const BOT_LIKE_REGEX: RegExp;
85
93
 
86
- export { AI_AGENT_UA_PATTERNS, type AcceptMarkdownOptions, BOT_LIKE_REGEX, type DetectionMethod, type DetectionResult, type MinimalRequest, type NotFoundOptions, SIGNATURE_AGENT_DOMAINS, type ShouldServeMarkdownResult, TRADITIONAL_BOT_PATTERNS, acceptsMarkdown, generateNotFoundMarkdown, isAIAgent, shouldServeMarkdown };
94
+ export { AI_AGENT_UA_PATTERNS, type AcceptMarkdownOptions, BOT_LIKE_REGEX, type DetectionMethod, type DetectionResult, type MinimalRequest, type NotFoundOptions, type OnDetectionCallback, type OnDetectionInfo, SIGNATURE_AGENT_DOMAINS, type ShouldServeMarkdownResult, TRADITIONAL_BOT_PATTERNS, acceptsMarkdown, generateNotFoundMarkdown, isAIAgent, shouldServeMarkdown };
package/dist/index.js CHANGED
@@ -113,7 +113,7 @@ function generateNotFoundMarkdown(path, options) {
113
113
  const sitemap = options?.sitemapUrl ?? "/sitemap.md";
114
114
  const index = options?.indexUrl ?? "/llms.txt";
115
115
  const fullContent = options?.fullContentUrl;
116
- const example = options?.exampleUrl ?? "/docs/functions.md";
116
+ const example = options?.exampleUrl ?? "/docs/functions";
117
117
  const base = options?.baseUrl ?? "";
118
118
  const fullContentLine = fullContent ? `
119
119
  3. **View the full content**: [${fullContent}](${base}${fullContent}) - Full content export` : "";
@@ -128,8 +128,8 @@ The URL \`${path}\` does not exist.
128
128
 
129
129
  ## Tips for requesting documentation
130
130
 
131
- - For markdown responses, append \`.md\` to URLs (e.g., \`${example}\`)
132
- - Use \`Accept: text/markdown\` header for content negotiation
131
+ - Request the canonical URL (e.g., \`${example}\`) with \`Accept: text/markdown\`
132
+ - Prefer canonical page URLs in links and citations instead of \`.md\` variants
133
133
  `;
134
134
  }
135
135
  export {
package/dist/index.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/patterns.ts","../src/detection.ts","../src/negotiation.ts","../src/not-found.ts"],"sourcesContent":["/**\n * Layer 1: Known AI agent UA substrings (lowercase).\n * Curated from https://bots.fyi/?tags=ai_assistant + official vendor docs.\n * Last reviewed: 2026-03-20\n */\nexport const AI_AGENT_UA_PATTERNS: readonly string[] = [\n\t// Anthropic — https://support.claude.com/en/articles/8896518\n\t\"claudebot\",\n\t\"claude-searchbot\",\n\t\"claude-user\",\n\t\"anthropic-ai\",\n\t\"claude-web\",\n\n\t// OpenAI — https://platform.openai.com/docs/bots\n\t\"chatgpt\",\n\t\"gptbot\",\n\t\"oai-searchbot\",\n\t\"openai\",\n\n\t// Google AI\n\t\"gemini\",\n\t\"bard\",\n\t\"google-cloudvertexbot\",\n\t\"google-extended\",\n\n\t// Meta\n\t\"meta-externalagent\",\n\t\"meta-externalfetcher\",\n\t\"meta-webindexer\",\n\n\t// Search/Research AI\n\t\"perplexity\",\n\t\"youbot\",\n\t\"you.com\",\n\t\"deepseekbot\",\n\n\t// Coding assistants\n\t\"cursor\",\n\t\"github-copilot\",\n\t\"codeium\",\n\t\"tabnine\",\n\t\"sourcegraph\",\n\n\t// Other AI agents / data scrapers\n\t\"cohere-ai\",\n\t\"bytespider\",\n\t\"amazonbot\",\n\t\"ai2bot\",\n\t\"diffbot\",\n\t\"omgili\",\n\t\"omgilibot\",\n];\n\n/**\n * Layer 2: Known AI service URLs in Signature-Agent header (RFC 9421).\n */\nexport const SIGNATURE_AGENT_DOMAINS: readonly string[] = [\"chatgpt.com\"];\n\n/**\n * Layer 3: Traditional bot exclusion list. Bots that should NOT trigger the\n * heuristic layer (search engine crawlers, social previews, monitoring tools).\n */\nexport const TRADITIONAL_BOT_PATTERNS: readonly string[] = [\n\t\"googlebot\",\n\t\"bingbot\",\n\t\"yandexbot\",\n\t\"baiduspider\",\n\t\"duckduckbot\",\n\t\"slurp\",\n\t\"msnbot\",\n\t\"facebot\",\n\t\"twitterbot\",\n\t\"linkedinbot\",\n\t\"whatsapp\",\n\t\"telegrambot\",\n\t\"pingdom\",\n\t\"uptimerobot\",\n\t\"newrelic\",\n\t\"datadog\",\n\t\"statuspage\",\n\t\"site24x7\",\n\t\"applebot\",\n];\n\n/**\n * Broad regex for bot-like UA strings (used only in Layer 3 heuristic).\n * No word boundaries — keywords commonly appear in compound names.\n */\nexport const BOT_LIKE_REGEX: RegExp = /bot|agent|fetch|crawl|spider|search/i;\n","import {\n\tAI_AGENT_UA_PATTERNS,\n\tBOT_LIKE_REGEX,\n\tSIGNATURE_AGENT_DOMAINS,\n\tTRADITIONAL_BOT_PATTERNS,\n} from \"./patterns\";\nimport type { DetectionResult, MinimalRequest } from \"./types\";\n\n/**\n * Detects AI agents from HTTP request headers.\n *\n * Three detection layers (checked in order):\n * 1. Known UA patterns (definitive)\n * 2. Signature-Agent header (definitive, RFC 9421)\n * 3. Missing sec-fetch-mode heuristic (catches unknown bots)\n *\n * Optimizes for recall over precision: serving markdown to a non-AI bot\n * is low-harm; missing an AI agent means a worse experience.\n */\nexport function isAIAgent(request: MinimalRequest): DetectionResult {\n\tconst userAgent = request.headers.get(\"user-agent\");\n\tconst lowerUA = userAgent?.toLowerCase() ?? \"\";\n\n\t// Layer 1: Known UA pattern match\n\tif (lowerUA && AI_AGENT_UA_PATTERNS.some((pattern) => lowerUA.includes(pattern))) {\n\t\treturn { detected: true, method: \"ua-match\" };\n\t}\n\n\t// Layer 2: Signature-Agent header (RFC 9421, used by ChatGPT agent)\n\tconst signatureAgent = request.headers.get(\"signature-agent\");\n\tif (signatureAgent) {\n\t\tconst lowerSig = signatureAgent.toLowerCase();\n\t\tif (SIGNATURE_AGENT_DOMAINS.some((domain) => lowerSig.includes(domain))) {\n\t\t\treturn { detected: true, method: \"signature-agent\" };\n\t\t}\n\t}\n\n\t// Layer 3: Missing browser fingerprint heuristic\n\t// Real browsers (Chrome 76+, Firefox 90+, Safari 16.4+) send sec-fetch-mode\n\t// on navigation requests. Its absence signals a programmatic client.\n\tconst secFetchMode = request.headers.get(\"sec-fetch-mode\");\n\tif (!secFetchMode && lowerUA && BOT_LIKE_REGEX.test(lowerUA)) {\n\t\tconst isTraditionalBot = TRADITIONAL_BOT_PATTERNS.some((pattern) => lowerUA.includes(pattern));\n\t\tif (!isTraditionalBot) {\n\t\t\treturn { detected: true, method: \"heuristic\" };\n\t\t}\n\t}\n\n\treturn { detected: false, method: null };\n}\n","import { isAIAgent } from \"./detection\";\nimport type { DetectionResult, MinimalRequest } from \"./types\";\n\nconst DEFAULT_MARKDOWN_TYPES = [\"text/markdown\", \"text/x-markdown\"];\n\nexport interface AcceptMarkdownOptions {\n\tmediaTypes?: string[];\n}\n\n/**\n * Check if the request prefers markdown via the Accept header.\n */\nexport function acceptsMarkdown(request: MinimalRequest, options?: AcceptMarkdownOptions): boolean {\n\tconst accept = request.headers.get(\"accept\");\n\tif (!accept) return false;\n\n\tconst types = options?.mediaTypes ?? DEFAULT_MARKDOWN_TYPES;\n\tconst lowerAccept = accept.toLowerCase();\n\treturn types.some((type) => lowerAccept.includes(type));\n}\n\nexport interface ShouldServeMarkdownResult {\n\tserve: boolean;\n\treason: \"agent\" | \"accept-header\" | null;\n\tdetection: DetectionResult;\n}\n\n/**\n * Combines agent detection and content negotiation into one call.\n * Returns whether to serve markdown and why.\n */\nexport function shouldServeMarkdown(\n\trequest: MinimalRequest,\n\toptions?: AcceptMarkdownOptions,\n): ShouldServeMarkdownResult {\n\tconst detection = isAIAgent(request);\n\tif (detection.detected) {\n\t\treturn { serve: true, reason: \"agent\", detection };\n\t}\n\n\tif (acceptsMarkdown(request, options)) {\n\t\treturn { serve: true, reason: \"accept-header\", detection };\n\t}\n\n\treturn { serve: false, reason: null, detection };\n}\n","export interface NotFoundOptions {\n\tsitemapUrl?: string;\n\tindexUrl?: string;\n\t/** URL for full content export (e.g. /llms-full.txt). Adds a 3rd discovery link when set. */\n\tfullContentUrl?: string;\n\t/** Example URL shown in the tips section. Default: `/docs/functions.md` */\n\texampleUrl?: string;\n\tbaseUrl?: string;\n}\n\n/**\n * Generates a markdown body for missing pages with links to discovery endpoints.\n * Return with a 200 status (agents discard 404 response bodies).\n */\nexport function generateNotFoundMarkdown(path: string, options?: NotFoundOptions): string {\n\tconst sitemap = options?.sitemapUrl ?? \"/sitemap.md\";\n\tconst index = options?.indexUrl ?? \"/llms.txt\";\n\tconst fullContent = options?.fullContentUrl;\n\tconst example = options?.exampleUrl ?? \"/docs/functions.md\";\n\tconst base = options?.baseUrl ?? \"\";\n\n\tconst fullContentLine = fullContent\n\t\t? `\\n3. **View the full content**: [${fullContent}](${base}${fullContent}) - Full content export`\n\t\t: \"\";\n\n\treturn `# Page Not Found\n\nThe URL \\`${path}\\` does not exist.\n\n## How to find the correct page\n\n1. **Browse the sitemap**: [${sitemap}](${base}${sitemap}) - A structured index of all pages\n2. **Browse the full index**: [${index}](${base}${index}) - Complete documentation index${fullContentLine}\n\n## Tips for requesting documentation\n\n- For markdown responses, append \\`.md\\` to URLs (e.g., \\`${example}\\`)\n- Use \\`Accept: text/markdown\\` header for content negotiation\n`;\n}\n"],"mappings":";AAKO,IAAM,uBAA0C;AAAA;AAAA,EAEtD;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACD;AAKO,IAAM,0BAA6C,CAAC,aAAa;AAMjE,IAAM,2BAA8C;AAAA,EAC1D;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACD;AAMO,IAAM,iBAAyB;;;ACrE/B,SAAS,UAAU,SAA0C;AACnE,QAAM,YAAY,QAAQ,QAAQ,IAAI,YAAY;AAClD,QAAM,UAAU,WAAW,YAAY,KAAK;AAG5C,MAAI,WAAW,qBAAqB,KAAK,CAAC,YAAY,QAAQ,SAAS,OAAO,CAAC,GAAG;AACjF,WAAO,EAAE,UAAU,MAAM,QAAQ,WAAW;AAAA,EAC7C;AAGA,QAAM,iBAAiB,QAAQ,QAAQ,IAAI,iBAAiB;AAC5D,MAAI,gBAAgB;AACnB,UAAM,WAAW,eAAe,YAAY;AAC5C,QAAI,wBAAwB,KAAK,CAAC,WAAW,SAAS,SAAS,MAAM,CAAC,GAAG;AACxE,aAAO,EAAE,UAAU,MAAM,QAAQ,kBAAkB;AAAA,IACpD;AAAA,EACD;AAKA,QAAM,eAAe,QAAQ,QAAQ,IAAI,gBAAgB;AACzD,MAAI,CAAC,gBAAgB,WAAW,eAAe,KAAK,OAAO,GAAG;AAC7D,UAAM,mBAAmB,yBAAyB,KAAK,CAAC,YAAY,QAAQ,SAAS,OAAO,CAAC;AAC7F,QAAI,CAAC,kBAAkB;AACtB,aAAO,EAAE,UAAU,MAAM,QAAQ,YAAY;AAAA,IAC9C;AAAA,EACD;AAEA,SAAO,EAAE,UAAU,OAAO,QAAQ,KAAK;AACxC;;;AC9CA,IAAM,yBAAyB,CAAC,iBAAiB,iBAAiB;AAS3D,SAAS,gBAAgB,SAAyB,SAA0C;AAClG,QAAM,SAAS,QAAQ,QAAQ,IAAI,QAAQ;AAC3C,MAAI,CAAC,OAAQ,QAAO;AAEpB,QAAM,QAAQ,SAAS,cAAc;AACrC,QAAM,cAAc,OAAO,YAAY;AACvC,SAAO,MAAM,KAAK,CAAC,SAAS,YAAY,SAAS,IAAI,CAAC;AACvD;AAYO,SAAS,oBACf,SACA,SAC4B;AAC5B,QAAM,YAAY,UAAU,OAAO;AACnC,MAAI,UAAU,UAAU;AACvB,WAAO,EAAE,OAAO,MAAM,QAAQ,SAAS,UAAU;AAAA,EAClD;AAEA,MAAI,gBAAgB,SAAS,OAAO,GAAG;AACtC,WAAO,EAAE,OAAO,MAAM,QAAQ,iBAAiB,UAAU;AAAA,EAC1D;AAEA,SAAO,EAAE,OAAO,OAAO,QAAQ,MAAM,UAAU;AAChD;;;AC/BO,SAAS,yBAAyB,MAAc,SAAmC;AACzF,QAAM,UAAU,SAAS,cAAc;AACvC,QAAM,QAAQ,SAAS,YAAY;AACnC,QAAM,cAAc,SAAS;AAC7B,QAAM,UAAU,SAAS,cAAc;AACvC,QAAM,OAAO,SAAS,WAAW;AAEjC,QAAM,kBAAkB,cACrB;AAAA,iCAAoC,WAAW,KAAK,IAAI,GAAG,WAAW,4BACtE;AAEH,SAAO;AAAA;AAAA,YAEI,IAAI;AAAA;AAAA;AAAA;AAAA,8BAIc,OAAO,KAAK,IAAI,GAAG,OAAO;AAAA,iCACvB,KAAK,KAAK,IAAI,GAAG,KAAK,mCAAmC,eAAe;AAAA;AAAA;AAAA;AAAA,4DAI7C,OAAO;AAAA;AAAA;AAGnE;","names":[]}
1
+ {"version":3,"sources":["../src/patterns.ts","../src/detection.ts","../src/negotiation.ts","../src/not-found.ts"],"sourcesContent":["/**\n * Layer 1: Known AI agent UA substrings (lowercase).\n * Curated from https://bots.fyi/?tags=ai_assistant + official vendor docs.\n * Last reviewed: 2026-03-20\n */\nexport const AI_AGENT_UA_PATTERNS: readonly string[] = [\n\t// Anthropic — https://support.claude.com/en/articles/8896518\n\t\"claudebot\",\n\t\"claude-searchbot\",\n\t\"claude-user\",\n\t\"anthropic-ai\",\n\t\"claude-web\",\n\n\t// OpenAI — https://platform.openai.com/docs/bots\n\t\"chatgpt\",\n\t\"gptbot\",\n\t\"oai-searchbot\",\n\t\"openai\",\n\n\t// Google AI\n\t\"gemini\",\n\t\"bard\",\n\t\"google-cloudvertexbot\",\n\t\"google-extended\",\n\n\t// Meta\n\t\"meta-externalagent\",\n\t\"meta-externalfetcher\",\n\t\"meta-webindexer\",\n\n\t// Search/Research AI\n\t\"perplexity\",\n\t\"youbot\",\n\t\"you.com\",\n\t\"deepseekbot\",\n\n\t// Coding assistants\n\t\"cursor\",\n\t\"github-copilot\",\n\t\"codeium\",\n\t\"tabnine\",\n\t\"sourcegraph\",\n\n\t// Other AI agents / data scrapers\n\t\"cohere-ai\",\n\t\"bytespider\",\n\t\"amazonbot\",\n\t\"ai2bot\",\n\t\"diffbot\",\n\t\"omgili\",\n\t\"omgilibot\",\n];\n\n/**\n * Layer 2: Known AI service URLs in Signature-Agent header (RFC 9421).\n */\nexport const SIGNATURE_AGENT_DOMAINS: readonly string[] = [\"chatgpt.com\"];\n\n/**\n * Layer 3: Traditional bot exclusion list. Bots that should NOT trigger the\n * heuristic layer (search engine crawlers, social previews, monitoring tools).\n */\nexport const TRADITIONAL_BOT_PATTERNS: readonly string[] = [\n\t\"googlebot\",\n\t\"bingbot\",\n\t\"yandexbot\",\n\t\"baiduspider\",\n\t\"duckduckbot\",\n\t\"slurp\",\n\t\"msnbot\",\n\t\"facebot\",\n\t\"twitterbot\",\n\t\"linkedinbot\",\n\t\"whatsapp\",\n\t\"telegrambot\",\n\t\"pingdom\",\n\t\"uptimerobot\",\n\t\"newrelic\",\n\t\"datadog\",\n\t\"statuspage\",\n\t\"site24x7\",\n\t\"applebot\",\n];\n\n/**\n * Broad regex for bot-like UA strings (used only in Layer 3 heuristic).\n * No word boundaries — keywords commonly appear in compound names.\n */\nexport const BOT_LIKE_REGEX: RegExp = /bot|agent|fetch|crawl|spider|search/i;\n","import {\n\tAI_AGENT_UA_PATTERNS,\n\tBOT_LIKE_REGEX,\n\tSIGNATURE_AGENT_DOMAINS,\n\tTRADITIONAL_BOT_PATTERNS,\n} from \"./patterns\";\nimport type { DetectionResult, MinimalRequest } from \"./types\";\n\n/**\n * Detects AI agents from HTTP request headers.\n *\n * Three detection layers (checked in order):\n * 1. Known UA patterns (definitive)\n * 2. Signature-Agent header (definitive, RFC 9421)\n * 3. Missing sec-fetch-mode heuristic (catches unknown bots)\n *\n * Optimizes for recall over precision: serving markdown to a non-AI bot\n * is low-harm; missing an AI agent means a worse experience.\n */\nexport function isAIAgent(request: MinimalRequest): DetectionResult {\n\tconst userAgent = request.headers.get(\"user-agent\");\n\tconst lowerUA = userAgent?.toLowerCase() ?? \"\";\n\n\t// Layer 1: Known UA pattern match\n\tif (lowerUA && AI_AGENT_UA_PATTERNS.some((pattern) => lowerUA.includes(pattern))) {\n\t\treturn { detected: true, method: \"ua-match\" };\n\t}\n\n\t// Layer 2: Signature-Agent header (RFC 9421, used by ChatGPT agent)\n\tconst signatureAgent = request.headers.get(\"signature-agent\");\n\tif (signatureAgent) {\n\t\tconst lowerSig = signatureAgent.toLowerCase();\n\t\tif (SIGNATURE_AGENT_DOMAINS.some((domain) => lowerSig.includes(domain))) {\n\t\t\treturn { detected: true, method: \"signature-agent\" };\n\t\t}\n\t}\n\n\t// Layer 3: Missing browser fingerprint heuristic\n\t// Real browsers (Chrome 76+, Firefox 90+, Safari 16.4+) send sec-fetch-mode\n\t// on navigation requests. Its absence signals a programmatic client.\n\tconst secFetchMode = request.headers.get(\"sec-fetch-mode\");\n\tif (!secFetchMode && lowerUA && BOT_LIKE_REGEX.test(lowerUA)) {\n\t\tconst isTraditionalBot = TRADITIONAL_BOT_PATTERNS.some((pattern) => lowerUA.includes(pattern));\n\t\tif (!isTraditionalBot) {\n\t\t\treturn { detected: true, method: \"heuristic\" };\n\t\t}\n\t}\n\n\treturn { detected: false, method: null };\n}\n","import { isAIAgent } from \"./detection\";\nimport type { DetectionResult, MinimalRequest } from \"./types\";\n\nconst DEFAULT_MARKDOWN_TYPES = [\"text/markdown\", \"text/x-markdown\"];\n\nexport interface AcceptMarkdownOptions {\n\tmediaTypes?: string[];\n}\n\n/**\n * Check if the request prefers markdown via the Accept header.\n */\nexport function acceptsMarkdown(request: MinimalRequest, options?: AcceptMarkdownOptions): boolean {\n\tconst accept = request.headers.get(\"accept\");\n\tif (!accept) return false;\n\n\tconst types = options?.mediaTypes ?? DEFAULT_MARKDOWN_TYPES;\n\tconst lowerAccept = accept.toLowerCase();\n\treturn types.some((type) => lowerAccept.includes(type));\n}\n\nexport interface ShouldServeMarkdownResult {\n\tserve: boolean;\n\treason: \"agent\" | \"accept-header\" | null;\n\tdetection: DetectionResult;\n}\n\n/**\n * Combines agent detection and content negotiation into one call.\n * Returns whether to serve markdown and why.\n */\nexport function shouldServeMarkdown(\n\trequest: MinimalRequest,\n\toptions?: AcceptMarkdownOptions,\n): ShouldServeMarkdownResult {\n\tconst detection = isAIAgent(request);\n\tif (detection.detected) {\n\t\treturn { serve: true, reason: \"agent\", detection };\n\t}\n\n\tif (acceptsMarkdown(request, options)) {\n\t\treturn { serve: true, reason: \"accept-header\", detection };\n\t}\n\n\treturn { serve: false, reason: null, detection };\n}\n","export interface NotFoundOptions {\n\tsitemapUrl?: string;\n\tindexUrl?: string;\n\t/** URL for full content export (e.g. /llms-full.txt). Adds a 3rd discovery link when set. */\n\tfullContentUrl?: string;\n\t/** Example canonical URL shown in the tips section. Default: `/docs/functions` */\n\texampleUrl?: string;\n\tbaseUrl?: string;\n}\n\n/**\n * Generates a markdown body for missing pages with links to discovery endpoints.\n * Return with a 200 status (agents discard 404 response bodies).\n */\nexport function generateNotFoundMarkdown(path: string, options?: NotFoundOptions): string {\n\tconst sitemap = options?.sitemapUrl ?? \"/sitemap.md\";\n\tconst index = options?.indexUrl ?? \"/llms.txt\";\n\tconst fullContent = options?.fullContentUrl;\n\tconst example = options?.exampleUrl ?? \"/docs/functions\";\n\tconst base = options?.baseUrl ?? \"\";\n\n\tconst fullContentLine = fullContent\n\t\t? `\\n3. **View the full content**: [${fullContent}](${base}${fullContent}) - Full content export`\n\t\t: \"\";\n\n\treturn `# Page Not Found\n\nThe URL \\`${path}\\` does not exist.\n\n## How to find the correct page\n\n1. **Browse the sitemap**: [${sitemap}](${base}${sitemap}) - A structured index of all pages\n2. **Browse the full index**: [${index}](${base}${index}) - Complete documentation index${fullContentLine}\n\n## Tips for requesting documentation\n\n- Request the canonical URL (e.g., \\`${example}\\`) with \\`Accept: text/markdown\\`\n- Prefer canonical page URLs in links and citations instead of \\`.md\\` variants\n`;\n}\n"],"mappings":";AAKO,IAAM,uBAA0C;AAAA;AAAA,EAEtD;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACD;AAKO,IAAM,0BAA6C,CAAC,aAAa;AAMjE,IAAM,2BAA8C;AAAA,EAC1D;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACD;AAMO,IAAM,iBAAyB;;;ACrE/B,SAAS,UAAU,SAA0C;AACnE,QAAM,YAAY,QAAQ,QAAQ,IAAI,YAAY;AAClD,QAAM,UAAU,WAAW,YAAY,KAAK;AAG5C,MAAI,WAAW,qBAAqB,KAAK,CAAC,YAAY,QAAQ,SAAS,OAAO,CAAC,GAAG;AACjF,WAAO,EAAE,UAAU,MAAM,QAAQ,WAAW;AAAA,EAC7C;AAGA,QAAM,iBAAiB,QAAQ,QAAQ,IAAI,iBAAiB;AAC5D,MAAI,gBAAgB;AACnB,UAAM,WAAW,eAAe,YAAY;AAC5C,QAAI,wBAAwB,KAAK,CAAC,WAAW,SAAS,SAAS,MAAM,CAAC,GAAG;AACxE,aAAO,EAAE,UAAU,MAAM,QAAQ,kBAAkB;AAAA,IACpD;AAAA,EACD;AAKA,QAAM,eAAe,QAAQ,QAAQ,IAAI,gBAAgB;AACzD,MAAI,CAAC,gBAAgB,WAAW,eAAe,KAAK,OAAO,GAAG;AAC7D,UAAM,mBAAmB,yBAAyB,KAAK,CAAC,YAAY,QAAQ,SAAS,OAAO,CAAC;AAC7F,QAAI,CAAC,kBAAkB;AACtB,aAAO,EAAE,UAAU,MAAM,QAAQ,YAAY;AAAA,IAC9C;AAAA,EACD;AAEA,SAAO,EAAE,UAAU,OAAO,QAAQ,KAAK;AACxC;;;AC9CA,IAAM,yBAAyB,CAAC,iBAAiB,iBAAiB;AAS3D,SAAS,gBAAgB,SAAyB,SAA0C;AAClG,QAAM,SAAS,QAAQ,QAAQ,IAAI,QAAQ;AAC3C,MAAI,CAAC,OAAQ,QAAO;AAEpB,QAAM,QAAQ,SAAS,cAAc;AACrC,QAAM,cAAc,OAAO,YAAY;AACvC,SAAO,MAAM,KAAK,CAAC,SAAS,YAAY,SAAS,IAAI,CAAC;AACvD;AAYO,SAAS,oBACf,SACA,SAC4B;AAC5B,QAAM,YAAY,UAAU,OAAO;AACnC,MAAI,UAAU,UAAU;AACvB,WAAO,EAAE,OAAO,MAAM,QAAQ,SAAS,UAAU;AAAA,EAClD;AAEA,MAAI,gBAAgB,SAAS,OAAO,GAAG;AACtC,WAAO,EAAE,OAAO,MAAM,QAAQ,iBAAiB,UAAU;AAAA,EAC1D;AAEA,SAAO,EAAE,OAAO,OAAO,QAAQ,MAAM,UAAU;AAChD;;;AC/BO,SAAS,yBAAyB,MAAc,SAAmC;AACzF,QAAM,UAAU,SAAS,cAAc;AACvC,QAAM,QAAQ,SAAS,YAAY;AACnC,QAAM,cAAc,SAAS;AAC7B,QAAM,UAAU,SAAS,cAAc;AACvC,QAAM,OAAO,SAAS,WAAW;AAEjC,QAAM,kBAAkB,cACrB;AAAA,iCAAoC,WAAW,KAAK,IAAI,GAAG,WAAW,4BACtE;AAEH,SAAO;AAAA;AAAA,YAEI,IAAI;AAAA;AAAA;AAAA;AAAA,8BAIc,OAAO,KAAK,IAAI,GAAG,OAAO;AAAA,iCACvB,KAAK,KAAK,IAAI,GAAG,KAAK,mCAAmC,eAAe;AAAA;AAAA;AAAA;AAAA,uCAIlE,OAAO;AAAA;AAAA;AAG9C;","names":[]}
@@ -125,6 +125,16 @@ function acceptsMarkdown(request, options) {
125
125
  const lowerAccept = accept.toLowerCase();
126
126
  return types.some((type) => lowerAccept.includes(type));
127
127
  }
128
+ function shouldServeMarkdown(request, options) {
129
+ const detection = isAIAgent(request);
130
+ if (detection.detected) {
131
+ return { serve: true, reason: "agent", detection };
132
+ }
133
+ if (acceptsMarkdown(request, options)) {
134
+ return { serve: true, reason: "accept-header", detection };
135
+ }
136
+ return { serve: false, reason: null, detection };
137
+ }
128
138
 
129
139
  // src/next/index.ts
130
140
  function withAgentReadability(options, handler) {
@@ -132,18 +142,20 @@ function withAgentReadability(options, handler) {
132
142
  const { pathname } = req.nextUrl;
133
143
  const prefix = options.docsPrefix ?? "/docs";
134
144
  if (pathname.startsWith(prefix)) {
135
- const result = isAIAgent(req);
136
- const markdownAccepted = acceptsMarkdown(req);
137
- if (result.detected || markdownAccepted) {
145
+ const { serve, detection } = shouldServeMarkdown(req);
146
+ if (serve) {
138
147
  if (options.onDetection) {
139
- const method = result.detected ? result.method : "accept-header";
140
- const detection = options.onDetection({
141
- path: pathname,
142
- method,
143
- userAgent: req.headers.get("user-agent")
144
- });
145
- if (detection instanceof Promise) {
146
- event.waitUntil(detection);
148
+ try {
149
+ const method = detection.detected ? detection.method : "accept-header";
150
+ const p = options.onDetection({
151
+ path: pathname,
152
+ method,
153
+ userAgent: req.headers.get("user-agent")
154
+ });
155
+ if (p instanceof Promise) {
156
+ event.waitUntil(p);
157
+ }
158
+ } catch {
147
159
  }
148
160
  }
149
161
  const rewriteUrl = req.nextUrl.clone();
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/next/index.ts","../../src/patterns.ts","../../src/detection.ts","../../src/negotiation.ts"],"sourcesContent":["import type { NextFetchEvent, NextRequest } from \"next/server\";\nimport { NextResponse } from \"next/server\";\nimport { isAIAgent } from \"../detection\";\nimport { acceptsMarkdown } from \"../negotiation\";\nimport type { DetectionMethod } from \"../types\";\n\nexport interface AgentReadabilityOptions {\n\t/** URL prefix to intercept. Default: '/docs' */\n\tdocsPrefix?: string;\n\t/** Maps request path to markdown route */\n\trewrite: (pathname: string) => string;\n\t/** Fire-and-forget callback. Async returns are passed to event.waitUntil(). */\n\tonDetection?: (info: {\n\t\tpath: string;\n\t\tmethod: DetectionMethod | \"accept-header\";\n\t\tuserAgent: string | null;\n\t}) => void | Promise<void>;\n}\n\nexport type MiddlewareHandler = (\n\treq: NextRequest,\n\tevent: NextFetchEvent,\n) => NextResponse | Response | Promise<NextResponse | Response>;\n\n/**\n * Higher-order function that wraps Next.js middleware with AI agent detection.\n * Detected AI agents or requests with Accept: text/markdown are rewritten\n * to the markdown route returned by `options.rewrite()`.\n *\n * Composes with existing middleware:\n * ```ts\n * export default withAgentReadability(\n * { rewrite: (p) => `/md${p}` },\n * (req, event) => existingMiddleware(req, event),\n * )\n * ```\n */\nexport function withAgentReadability(\n\toptions: AgentReadabilityOptions,\n\thandler?: MiddlewareHandler,\n): MiddlewareHandler {\n\treturn async (req: NextRequest, event: NextFetchEvent) => {\n\t\tconst { pathname } = req.nextUrl;\n\t\tconst prefix = options.docsPrefix ?? \"/docs\";\n\n\t\tif (pathname.startsWith(prefix)) {\n\t\t\tconst result = isAIAgent(req);\n\t\t\tconst markdownAccepted = acceptsMarkdown(req);\n\n\t\t\tif (result.detected || markdownAccepted) {\n\t\t\t\tif (options.onDetection) {\n\t\t\t\t\tconst method = result.detected ? result.method : \"accept-header\";\n\t\t\t\t\tconst detection = options.onDetection({\n\t\t\t\t\t\tpath: pathname,\n\t\t\t\t\t\tmethod,\n\t\t\t\t\t\tuserAgent: req.headers.get(\"user-agent\"),\n\t\t\t\t\t});\n\t\t\t\t\tif (detection instanceof Promise) {\n\t\t\t\t\t\tevent.waitUntil(detection);\n\t\t\t\t\t}\n\t\t\t\t}\n\n\t\t\t\tconst rewriteUrl = req.nextUrl.clone();\n\t\t\t\trewriteUrl.pathname = options.rewrite(pathname);\n\t\t\t\treturn NextResponse.rewrite(rewriteUrl);\n\t\t\t}\n\t\t}\n\n\t\tif (handler) return handler(req, event);\n\t\treturn NextResponse.next();\n\t};\n}\n\n/** Recommended matcher that excludes Next.js internals and static files. */\nexport const agentReadabilityMatcher = [\n\t\"/((?!_next|api|.*\\\\..*|favicon|manifest|robots|health|status).*)\",\n] as const;\n","/**\n * Layer 1: Known AI agent UA substrings (lowercase).\n * Curated from https://bots.fyi/?tags=ai_assistant + official vendor docs.\n * Last reviewed: 2026-03-20\n */\nexport const AI_AGENT_UA_PATTERNS: readonly string[] = [\n\t// Anthropic — https://support.claude.com/en/articles/8896518\n\t\"claudebot\",\n\t\"claude-searchbot\",\n\t\"claude-user\",\n\t\"anthropic-ai\",\n\t\"claude-web\",\n\n\t// OpenAI — https://platform.openai.com/docs/bots\n\t\"chatgpt\",\n\t\"gptbot\",\n\t\"oai-searchbot\",\n\t\"openai\",\n\n\t// Google AI\n\t\"gemini\",\n\t\"bard\",\n\t\"google-cloudvertexbot\",\n\t\"google-extended\",\n\n\t// Meta\n\t\"meta-externalagent\",\n\t\"meta-externalfetcher\",\n\t\"meta-webindexer\",\n\n\t// Search/Research AI\n\t\"perplexity\",\n\t\"youbot\",\n\t\"you.com\",\n\t\"deepseekbot\",\n\n\t// Coding assistants\n\t\"cursor\",\n\t\"github-copilot\",\n\t\"codeium\",\n\t\"tabnine\",\n\t\"sourcegraph\",\n\n\t// Other AI agents / data scrapers\n\t\"cohere-ai\",\n\t\"bytespider\",\n\t\"amazonbot\",\n\t\"ai2bot\",\n\t\"diffbot\",\n\t\"omgili\",\n\t\"omgilibot\",\n];\n\n/**\n * Layer 2: Known AI service URLs in Signature-Agent header (RFC 9421).\n */\nexport const SIGNATURE_AGENT_DOMAINS: readonly string[] = [\"chatgpt.com\"];\n\n/**\n * Layer 3: Traditional bot exclusion list. Bots that should NOT trigger the\n * heuristic layer (search engine crawlers, social previews, monitoring tools).\n */\nexport const TRADITIONAL_BOT_PATTERNS: readonly string[] = [\n\t\"googlebot\",\n\t\"bingbot\",\n\t\"yandexbot\",\n\t\"baiduspider\",\n\t\"duckduckbot\",\n\t\"slurp\",\n\t\"msnbot\",\n\t\"facebot\",\n\t\"twitterbot\",\n\t\"linkedinbot\",\n\t\"whatsapp\",\n\t\"telegrambot\",\n\t\"pingdom\",\n\t\"uptimerobot\",\n\t\"newrelic\",\n\t\"datadog\",\n\t\"statuspage\",\n\t\"site24x7\",\n\t\"applebot\",\n];\n\n/**\n * Broad regex for bot-like UA strings (used only in Layer 3 heuristic).\n * No word boundaries — keywords commonly appear in compound names.\n */\nexport const BOT_LIKE_REGEX: RegExp = /bot|agent|fetch|crawl|spider|search/i;\n","import {\n\tAI_AGENT_UA_PATTERNS,\n\tBOT_LIKE_REGEX,\n\tSIGNATURE_AGENT_DOMAINS,\n\tTRADITIONAL_BOT_PATTERNS,\n} from \"./patterns\";\nimport type { DetectionResult, MinimalRequest } from \"./types\";\n\n/**\n * Detects AI agents from HTTP request headers.\n *\n * Three detection layers (checked in order):\n * 1. Known UA patterns (definitive)\n * 2. Signature-Agent header (definitive, RFC 9421)\n * 3. Missing sec-fetch-mode heuristic (catches unknown bots)\n *\n * Optimizes for recall over precision: serving markdown to a non-AI bot\n * is low-harm; missing an AI agent means a worse experience.\n */\nexport function isAIAgent(request: MinimalRequest): DetectionResult {\n\tconst userAgent = request.headers.get(\"user-agent\");\n\tconst lowerUA = userAgent?.toLowerCase() ?? \"\";\n\n\t// Layer 1: Known UA pattern match\n\tif (lowerUA && AI_AGENT_UA_PATTERNS.some((pattern) => lowerUA.includes(pattern))) {\n\t\treturn { detected: true, method: \"ua-match\" };\n\t}\n\n\t// Layer 2: Signature-Agent header (RFC 9421, used by ChatGPT agent)\n\tconst signatureAgent = request.headers.get(\"signature-agent\");\n\tif (signatureAgent) {\n\t\tconst lowerSig = signatureAgent.toLowerCase();\n\t\tif (SIGNATURE_AGENT_DOMAINS.some((domain) => lowerSig.includes(domain))) {\n\t\t\treturn { detected: true, method: \"signature-agent\" };\n\t\t}\n\t}\n\n\t// Layer 3: Missing browser fingerprint heuristic\n\t// Real browsers (Chrome 76+, Firefox 90+, Safari 16.4+) send sec-fetch-mode\n\t// on navigation requests. Its absence signals a programmatic client.\n\tconst secFetchMode = request.headers.get(\"sec-fetch-mode\");\n\tif (!secFetchMode && lowerUA && BOT_LIKE_REGEX.test(lowerUA)) {\n\t\tconst isTraditionalBot = TRADITIONAL_BOT_PATTERNS.some((pattern) => lowerUA.includes(pattern));\n\t\tif (!isTraditionalBot) {\n\t\t\treturn { detected: true, method: \"heuristic\" };\n\t\t}\n\t}\n\n\treturn { detected: false, method: null };\n}\n","import { isAIAgent } from \"./detection\";\nimport type { DetectionResult, MinimalRequest } from \"./types\";\n\nconst DEFAULT_MARKDOWN_TYPES = [\"text/markdown\", \"text/x-markdown\"];\n\nexport interface AcceptMarkdownOptions {\n\tmediaTypes?: string[];\n}\n\n/**\n * Check if the request prefers markdown via the Accept header.\n */\nexport function acceptsMarkdown(request: MinimalRequest, options?: AcceptMarkdownOptions): boolean {\n\tconst accept = request.headers.get(\"accept\");\n\tif (!accept) return false;\n\n\tconst types = options?.mediaTypes ?? DEFAULT_MARKDOWN_TYPES;\n\tconst lowerAccept = accept.toLowerCase();\n\treturn types.some((type) => lowerAccept.includes(type));\n}\n\nexport interface ShouldServeMarkdownResult {\n\tserve: boolean;\n\treason: \"agent\" | \"accept-header\" | null;\n\tdetection: DetectionResult;\n}\n\n/**\n * Combines agent detection and content negotiation into one call.\n * Returns whether to serve markdown and why.\n */\nexport function shouldServeMarkdown(\n\trequest: MinimalRequest,\n\toptions?: AcceptMarkdownOptions,\n): ShouldServeMarkdownResult {\n\tconst detection = isAIAgent(request);\n\tif (detection.detected) {\n\t\treturn { serve: true, reason: \"agent\", detection };\n\t}\n\n\tif (acceptsMarkdown(request, options)) {\n\t\treturn { serve: true, reason: \"accept-header\", detection };\n\t}\n\n\treturn { serve: false, reason: null, detection };\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AACA,oBAA6B;;;ACItB,IAAM,uBAA0C;AAAA;AAAA,EAEtD;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACD;AAKO,IAAM,0BAA6C,CAAC,aAAa;AAMjE,IAAM,2BAA8C;AAAA,EAC1D;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACD;AAMO,IAAM,iBAAyB;;;ACrE/B,SAAS,UAAU,SAA0C;AACnE,QAAM,YAAY,QAAQ,QAAQ,IAAI,YAAY;AAClD,QAAM,UAAU,WAAW,YAAY,KAAK;AAG5C,MAAI,WAAW,qBAAqB,KAAK,CAAC,YAAY,QAAQ,SAAS,OAAO,CAAC,GAAG;AACjF,WAAO,EAAE,UAAU,MAAM,QAAQ,WAAW;AAAA,EAC7C;AAGA,QAAM,iBAAiB,QAAQ,QAAQ,IAAI,iBAAiB;AAC5D,MAAI,gBAAgB;AACnB,UAAM,WAAW,eAAe,YAAY;AAC5C,QAAI,wBAAwB,KAAK,CAAC,WAAW,SAAS,SAAS,MAAM,CAAC,GAAG;AACxE,aAAO,EAAE,UAAU,MAAM,QAAQ,kBAAkB;AAAA,IACpD;AAAA,EACD;AAKA,QAAM,eAAe,QAAQ,QAAQ,IAAI,gBAAgB;AACzD,MAAI,CAAC,gBAAgB,WAAW,eAAe,KAAK,OAAO,GAAG;AAC7D,UAAM,mBAAmB,yBAAyB,KAAK,CAAC,YAAY,QAAQ,SAAS,OAAO,CAAC;AAC7F,QAAI,CAAC,kBAAkB;AACtB,aAAO,EAAE,UAAU,MAAM,QAAQ,YAAY;AAAA,IAC9C;AAAA,EACD;AAEA,SAAO,EAAE,UAAU,OAAO,QAAQ,KAAK;AACxC;;;AC9CA,IAAM,yBAAyB,CAAC,iBAAiB,iBAAiB;AAS3D,SAAS,gBAAgB,SAAyB,SAA0C;AAClG,QAAM,SAAS,QAAQ,QAAQ,IAAI,QAAQ;AAC3C,MAAI,CAAC,OAAQ,QAAO;AAEpB,QAAM,QAAQ,SAAS,cAAc;AACrC,QAAM,cAAc,OAAO,YAAY;AACvC,SAAO,MAAM,KAAK,CAAC,SAAS,YAAY,SAAS,IAAI,CAAC;AACvD;;;AHkBO,SAAS,qBACf,SACA,SACoB;AACpB,SAAO,OAAO,KAAkB,UAA0B;AACzD,UAAM,EAAE,SAAS,IAAI,IAAI;AACzB,UAAM,SAAS,QAAQ,cAAc;AAErC,QAAI,SAAS,WAAW,MAAM,GAAG;AAChC,YAAM,SAAS,UAAU,GAAG;AAC5B,YAAM,mBAAmB,gBAAgB,GAAG;AAE5C,UAAI,OAAO,YAAY,kBAAkB;AACxC,YAAI,QAAQ,aAAa;AACxB,gBAAM,SAAS,OAAO,WAAW,OAAO,SAAS;AACjD,gBAAM,YAAY,QAAQ,YAAY;AAAA,YACrC,MAAM;AAAA,YACN;AAAA,YACA,WAAW,IAAI,QAAQ,IAAI,YAAY;AAAA,UACxC,CAAC;AACD,cAAI,qBAAqB,SAAS;AACjC,kBAAM,UAAU,SAAS;AAAA,UAC1B;AAAA,QACD;AAEA,cAAM,aAAa,IAAI,QAAQ,MAAM;AACrC,mBAAW,WAAW,QAAQ,QAAQ,QAAQ;AAC9C,eAAO,2BAAa,QAAQ,UAAU;AAAA,MACvC;AAAA,IACD;AAEA,QAAI,QAAS,QAAO,QAAQ,KAAK,KAAK;AACtC,WAAO,2BAAa,KAAK;AAAA,EAC1B;AACD;AAGO,IAAM,0BAA0B;AAAA,EACtC;AACD;","names":[]}
1
+ {"version":3,"sources":["../../src/next/index.ts","../../src/patterns.ts","../../src/detection.ts","../../src/negotiation.ts"],"sourcesContent":["import type { NextFetchEvent, NextRequest } from \"next/server\";\nimport { NextResponse } from \"next/server\";\nimport { shouldServeMarkdown } from \"../negotiation\";\nimport type { OnDetectionCallback } from \"../types\";\n\nexport interface AgentReadabilityOptions {\n\t/** URL prefix to intercept. Default: '/docs' */\n\tdocsPrefix?: string;\n\t/** Maps request path to markdown route */\n\trewrite: (pathname: string) => string;\n\t/** Fire-and-forget callback. Async returns are passed to event.waitUntil(). */\n\tonDetection?: OnDetectionCallback;\n}\n\nexport type MiddlewareHandler = (\n\treq: NextRequest,\n\tevent: NextFetchEvent,\n) => NextResponse | Response | Promise<NextResponse | Response>;\n\n/**\n * Higher-order function that wraps Next.js middleware with AI agent detection.\n * Detected AI agents or requests with Accept: text/markdown are rewritten\n * to the markdown route returned by `options.rewrite()`.\n *\n * Composes with existing middleware:\n * ```ts\n * export default withAgentReadability(\n * { rewrite: (p) => `/md${p}` },\n * (req, event) => existingMiddleware(req, event),\n * )\n * ```\n */\nexport function withAgentReadability(\n\toptions: AgentReadabilityOptions,\n\thandler?: MiddlewareHandler,\n): MiddlewareHandler {\n\treturn async (req: NextRequest, event: NextFetchEvent) => {\n\t\tconst { pathname } = req.nextUrl;\n\t\tconst prefix = options.docsPrefix ?? \"/docs\";\n\n\t\tif (pathname.startsWith(prefix)) {\n\t\t\tconst { serve, detection } = shouldServeMarkdown(req);\n\n\t\t\tif (serve) {\n\t\t\t\tif (options.onDetection) {\n\t\t\t\t\ttry {\n\t\t\t\t\t\tconst method = detection.detected ? detection.method : \"accept-header\";\n\t\t\t\t\t\tconst p = options.onDetection({\n\t\t\t\t\t\t\tpath: pathname,\n\t\t\t\t\t\t\tmethod,\n\t\t\t\t\t\t\tuserAgent: req.headers.get(\"user-agent\"),\n\t\t\t\t\t\t});\n\t\t\t\t\t\tif (p instanceof Promise) {\n\t\t\t\t\t\t\tevent.waitUntil(p);\n\t\t\t\t\t\t}\n\t\t\t\t\t} catch {\n\t\t\t\t\t\t/* swallow sync errors — onDetection should never break serving */\n\t\t\t\t\t}\n\t\t\t\t}\n\n\t\t\t\tconst rewriteUrl = req.nextUrl.clone();\n\t\t\t\trewriteUrl.pathname = options.rewrite(pathname);\n\t\t\t\treturn NextResponse.rewrite(rewriteUrl);\n\t\t\t}\n\t\t}\n\n\t\tif (handler) return handler(req, event);\n\t\treturn NextResponse.next();\n\t};\n}\n\n/** Recommended matcher that excludes Next.js internals and static files. */\nexport const agentReadabilityMatcher = [\n\t\"/((?!_next|api|.*\\\\..*|favicon|manifest|robots|health|status).*)\",\n] as const;\n","/**\n * Layer 1: Known AI agent UA substrings (lowercase).\n * Curated from https://bots.fyi/?tags=ai_assistant + official vendor docs.\n * Last reviewed: 2026-03-20\n */\nexport const AI_AGENT_UA_PATTERNS: readonly string[] = [\n\t// Anthropic — https://support.claude.com/en/articles/8896518\n\t\"claudebot\",\n\t\"claude-searchbot\",\n\t\"claude-user\",\n\t\"anthropic-ai\",\n\t\"claude-web\",\n\n\t// OpenAI — https://platform.openai.com/docs/bots\n\t\"chatgpt\",\n\t\"gptbot\",\n\t\"oai-searchbot\",\n\t\"openai\",\n\n\t// Google AI\n\t\"gemini\",\n\t\"bard\",\n\t\"google-cloudvertexbot\",\n\t\"google-extended\",\n\n\t// Meta\n\t\"meta-externalagent\",\n\t\"meta-externalfetcher\",\n\t\"meta-webindexer\",\n\n\t// Search/Research AI\n\t\"perplexity\",\n\t\"youbot\",\n\t\"you.com\",\n\t\"deepseekbot\",\n\n\t// Coding assistants\n\t\"cursor\",\n\t\"github-copilot\",\n\t\"codeium\",\n\t\"tabnine\",\n\t\"sourcegraph\",\n\n\t// Other AI agents / data scrapers\n\t\"cohere-ai\",\n\t\"bytespider\",\n\t\"amazonbot\",\n\t\"ai2bot\",\n\t\"diffbot\",\n\t\"omgili\",\n\t\"omgilibot\",\n];\n\n/**\n * Layer 2: Known AI service URLs in Signature-Agent header (RFC 9421).\n */\nexport const SIGNATURE_AGENT_DOMAINS: readonly string[] = [\"chatgpt.com\"];\n\n/**\n * Layer 3: Traditional bot exclusion list. Bots that should NOT trigger the\n * heuristic layer (search engine crawlers, social previews, monitoring tools).\n */\nexport const TRADITIONAL_BOT_PATTERNS: readonly string[] = [\n\t\"googlebot\",\n\t\"bingbot\",\n\t\"yandexbot\",\n\t\"baiduspider\",\n\t\"duckduckbot\",\n\t\"slurp\",\n\t\"msnbot\",\n\t\"facebot\",\n\t\"twitterbot\",\n\t\"linkedinbot\",\n\t\"whatsapp\",\n\t\"telegrambot\",\n\t\"pingdom\",\n\t\"uptimerobot\",\n\t\"newrelic\",\n\t\"datadog\",\n\t\"statuspage\",\n\t\"site24x7\",\n\t\"applebot\",\n];\n\n/**\n * Broad regex for bot-like UA strings (used only in Layer 3 heuristic).\n * No word boundaries — keywords commonly appear in compound names.\n */\nexport const BOT_LIKE_REGEX: RegExp = /bot|agent|fetch|crawl|spider|search/i;\n","import {\n\tAI_AGENT_UA_PATTERNS,\n\tBOT_LIKE_REGEX,\n\tSIGNATURE_AGENT_DOMAINS,\n\tTRADITIONAL_BOT_PATTERNS,\n} from \"./patterns\";\nimport type { DetectionResult, MinimalRequest } from \"./types\";\n\n/**\n * Detects AI agents from HTTP request headers.\n *\n * Three detection layers (checked in order):\n * 1. Known UA patterns (definitive)\n * 2. Signature-Agent header (definitive, RFC 9421)\n * 3. Missing sec-fetch-mode heuristic (catches unknown bots)\n *\n * Optimizes for recall over precision: serving markdown to a non-AI bot\n * is low-harm; missing an AI agent means a worse experience.\n */\nexport function isAIAgent(request: MinimalRequest): DetectionResult {\n\tconst userAgent = request.headers.get(\"user-agent\");\n\tconst lowerUA = userAgent?.toLowerCase() ?? \"\";\n\n\t// Layer 1: Known UA pattern match\n\tif (lowerUA && AI_AGENT_UA_PATTERNS.some((pattern) => lowerUA.includes(pattern))) {\n\t\treturn { detected: true, method: \"ua-match\" };\n\t}\n\n\t// Layer 2: Signature-Agent header (RFC 9421, used by ChatGPT agent)\n\tconst signatureAgent = request.headers.get(\"signature-agent\");\n\tif (signatureAgent) {\n\t\tconst lowerSig = signatureAgent.toLowerCase();\n\t\tif (SIGNATURE_AGENT_DOMAINS.some((domain) => lowerSig.includes(domain))) {\n\t\t\treturn { detected: true, method: \"signature-agent\" };\n\t\t}\n\t}\n\n\t// Layer 3: Missing browser fingerprint heuristic\n\t// Real browsers (Chrome 76+, Firefox 90+, Safari 16.4+) send sec-fetch-mode\n\t// on navigation requests. Its absence signals a programmatic client.\n\tconst secFetchMode = request.headers.get(\"sec-fetch-mode\");\n\tif (!secFetchMode && lowerUA && BOT_LIKE_REGEX.test(lowerUA)) {\n\t\tconst isTraditionalBot = TRADITIONAL_BOT_PATTERNS.some((pattern) => lowerUA.includes(pattern));\n\t\tif (!isTraditionalBot) {\n\t\t\treturn { detected: true, method: \"heuristic\" };\n\t\t}\n\t}\n\n\treturn { detected: false, method: null };\n}\n","import { isAIAgent } from \"./detection\";\nimport type { DetectionResult, MinimalRequest } from \"./types\";\n\nconst DEFAULT_MARKDOWN_TYPES = [\"text/markdown\", \"text/x-markdown\"];\n\nexport interface AcceptMarkdownOptions {\n\tmediaTypes?: string[];\n}\n\n/**\n * Check if the request prefers markdown via the Accept header.\n */\nexport function acceptsMarkdown(request: MinimalRequest, options?: AcceptMarkdownOptions): boolean {\n\tconst accept = request.headers.get(\"accept\");\n\tif (!accept) return false;\n\n\tconst types = options?.mediaTypes ?? DEFAULT_MARKDOWN_TYPES;\n\tconst lowerAccept = accept.toLowerCase();\n\treturn types.some((type) => lowerAccept.includes(type));\n}\n\nexport interface ShouldServeMarkdownResult {\n\tserve: boolean;\n\treason: \"agent\" | \"accept-header\" | null;\n\tdetection: DetectionResult;\n}\n\n/**\n * Combines agent detection and content negotiation into one call.\n * Returns whether to serve markdown and why.\n */\nexport function shouldServeMarkdown(\n\trequest: MinimalRequest,\n\toptions?: AcceptMarkdownOptions,\n): ShouldServeMarkdownResult {\n\tconst detection = isAIAgent(request);\n\tif (detection.detected) {\n\t\treturn { serve: true, reason: \"agent\", detection };\n\t}\n\n\tif (acceptsMarkdown(request, options)) {\n\t\treturn { serve: true, reason: \"accept-header\", detection };\n\t}\n\n\treturn { serve: false, reason: null, detection };\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AACA,oBAA6B;;;ACItB,IAAM,uBAA0C;AAAA;AAAA,EAEtD;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACD;AAKO,IAAM,0BAA6C,CAAC,aAAa;AAMjE,IAAM,2BAA8C;AAAA,EAC1D;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACD;AAMO,IAAM,iBAAyB;;;ACrE/B,SAAS,UAAU,SAA0C;AACnE,QAAM,YAAY,QAAQ,QAAQ,IAAI,YAAY;AAClD,QAAM,UAAU,WAAW,YAAY,KAAK;AAG5C,MAAI,WAAW,qBAAqB,KAAK,CAAC,YAAY,QAAQ,SAAS,OAAO,CAAC,GAAG;AACjF,WAAO,EAAE,UAAU,MAAM,QAAQ,WAAW;AAAA,EAC7C;AAGA,QAAM,iBAAiB,QAAQ,QAAQ,IAAI,iBAAiB;AAC5D,MAAI,gBAAgB;AACnB,UAAM,WAAW,eAAe,YAAY;AAC5C,QAAI,wBAAwB,KAAK,CAAC,WAAW,SAAS,SAAS,MAAM,CAAC,GAAG;AACxE,aAAO,EAAE,UAAU,MAAM,QAAQ,kBAAkB;AAAA,IACpD;AAAA,EACD;AAKA,QAAM,eAAe,QAAQ,QAAQ,IAAI,gBAAgB;AACzD,MAAI,CAAC,gBAAgB,WAAW,eAAe,KAAK,OAAO,GAAG;AAC7D,UAAM,mBAAmB,yBAAyB,KAAK,CAAC,YAAY,QAAQ,SAAS,OAAO,CAAC;AAC7F,QAAI,CAAC,kBAAkB;AACtB,aAAO,EAAE,UAAU,MAAM,QAAQ,YAAY;AAAA,IAC9C;AAAA,EACD;AAEA,SAAO,EAAE,UAAU,OAAO,QAAQ,KAAK;AACxC;;;AC9CA,IAAM,yBAAyB,CAAC,iBAAiB,iBAAiB;AAS3D,SAAS,gBAAgB,SAAyB,SAA0C;AAClG,QAAM,SAAS,QAAQ,QAAQ,IAAI,QAAQ;AAC3C,MAAI,CAAC,OAAQ,QAAO;AAEpB,QAAM,QAAQ,SAAS,cAAc;AACrC,QAAM,cAAc,OAAO,YAAY;AACvC,SAAO,MAAM,KAAK,CAAC,SAAS,YAAY,SAAS,IAAI,CAAC;AACvD;AAYO,SAAS,oBACf,SACA,SAC4B;AAC5B,QAAM,YAAY,UAAU,OAAO;AACnC,MAAI,UAAU,UAAU;AACvB,WAAO,EAAE,OAAO,MAAM,QAAQ,SAAS,UAAU;AAAA,EAClD;AAEA,MAAI,gBAAgB,SAAS,OAAO,GAAG;AACtC,WAAO,EAAE,OAAO,MAAM,QAAQ,iBAAiB,UAAU;AAAA,EAC1D;AAEA,SAAO,EAAE,OAAO,OAAO,QAAQ,MAAM,UAAU;AAChD;;;AHbO,SAAS,qBACf,SACA,SACoB;AACpB,SAAO,OAAO,KAAkB,UAA0B;AACzD,UAAM,EAAE,SAAS,IAAI,IAAI;AACzB,UAAM,SAAS,QAAQ,cAAc;AAErC,QAAI,SAAS,WAAW,MAAM,GAAG;AAChC,YAAM,EAAE,OAAO,UAAU,IAAI,oBAAoB,GAAG;AAEpD,UAAI,OAAO;AACV,YAAI,QAAQ,aAAa;AACxB,cAAI;AACH,kBAAM,SAAS,UAAU,WAAW,UAAU,SAAS;AACvD,kBAAM,IAAI,QAAQ,YAAY;AAAA,cAC7B,MAAM;AAAA,cACN;AAAA,cACA,WAAW,IAAI,QAAQ,IAAI,YAAY;AAAA,YACxC,CAAC;AACD,gBAAI,aAAa,SAAS;AACzB,oBAAM,UAAU,CAAC;AAAA,YAClB;AAAA,UACD,QAAQ;AAAA,UAER;AAAA,QACD;AAEA,cAAM,aAAa,IAAI,QAAQ,MAAM;AACrC,mBAAW,WAAW,QAAQ,QAAQ,QAAQ;AAC9C,eAAO,2BAAa,QAAQ,UAAU;AAAA,MACvC;AAAA,IACD;AAEA,QAAI,QAAS,QAAO,QAAQ,KAAK,KAAK;AACtC,WAAO,2BAAa,KAAK;AAAA,EAC1B;AACD;AAGO,IAAM,0BAA0B;AAAA,EACtC;AACD;","names":[]}
@@ -1,6 +1,14 @@
1
1
  import { NextRequest, NextFetchEvent, NextResponse } from 'next/server';
2
2
 
3
3
  type DetectionMethod = "ua-match" | "signature-agent" | "heuristic";
4
+ /** Info passed to onDetection callbacks across all adapters. */
5
+ type OnDetectionInfo = {
6
+ path: string;
7
+ method: DetectionMethod | "accept-header";
8
+ userAgent: string | null;
9
+ };
10
+ /** Fire-and-forget callback shared by Next.js, SvelteKit, and Nuxt adapters. */
11
+ type OnDetectionCallback = (info: OnDetectionInfo) => void | Promise<void>;
4
12
 
5
13
  interface AgentReadabilityOptions {
6
14
  /** URL prefix to intercept. Default: '/docs' */
@@ -8,11 +16,7 @@ interface AgentReadabilityOptions {
8
16
  /** Maps request path to markdown route */
9
17
  rewrite: (pathname: string) => string;
10
18
  /** Fire-and-forget callback. Async returns are passed to event.waitUntil(). */
11
- onDetection?: (info: {
12
- path: string;
13
- method: DetectionMethod | "accept-header";
14
- userAgent: string | null;
15
- }) => void | Promise<void>;
19
+ onDetection?: OnDetectionCallback;
16
20
  }
17
21
  type MiddlewareHandler = (req: NextRequest, event: NextFetchEvent) => NextResponse | Response | Promise<NextResponse | Response>;
18
22
  /**
@@ -1,6 +1,14 @@
1
1
  import { NextRequest, NextFetchEvent, NextResponse } from 'next/server';
2
2
 
3
3
  type DetectionMethod = "ua-match" | "signature-agent" | "heuristic";
4
+ /** Info passed to onDetection callbacks across all adapters. */
5
+ type OnDetectionInfo = {
6
+ path: string;
7
+ method: DetectionMethod | "accept-header";
8
+ userAgent: string | null;
9
+ };
10
+ /** Fire-and-forget callback shared by Next.js, SvelteKit, and Nuxt adapters. */
11
+ type OnDetectionCallback = (info: OnDetectionInfo) => void | Promise<void>;
4
12
 
5
13
  interface AgentReadabilityOptions {
6
14
  /** URL prefix to intercept. Default: '/docs' */
@@ -8,11 +16,7 @@ interface AgentReadabilityOptions {
8
16
  /** Maps request path to markdown route */
9
17
  rewrite: (pathname: string) => string;
10
18
  /** Fire-and-forget callback. Async returns are passed to event.waitUntil(). */
11
- onDetection?: (info: {
12
- path: string;
13
- method: DetectionMethod | "accept-header";
14
- userAgent: string | null;
15
- }) => void | Promise<void>;
19
+ onDetection?: OnDetectionCallback;
16
20
  }
17
21
  type MiddlewareHandler = (req: NextRequest, event: NextFetchEvent) => NextResponse | Response | Promise<NextResponse | Response>;
18
22
  /**
@@ -100,6 +100,16 @@ function acceptsMarkdown(request, options) {
100
100
  const lowerAccept = accept.toLowerCase();
101
101
  return types.some((type) => lowerAccept.includes(type));
102
102
  }
103
+ function shouldServeMarkdown(request, options) {
104
+ const detection = isAIAgent(request);
105
+ if (detection.detected) {
106
+ return { serve: true, reason: "agent", detection };
107
+ }
108
+ if (acceptsMarkdown(request, options)) {
109
+ return { serve: true, reason: "accept-header", detection };
110
+ }
111
+ return { serve: false, reason: null, detection };
112
+ }
103
113
 
104
114
  // src/next/index.ts
105
115
  function withAgentReadability(options, handler) {
@@ -107,18 +117,20 @@ function withAgentReadability(options, handler) {
107
117
  const { pathname } = req.nextUrl;
108
118
  const prefix = options.docsPrefix ?? "/docs";
109
119
  if (pathname.startsWith(prefix)) {
110
- const result = isAIAgent(req);
111
- const markdownAccepted = acceptsMarkdown(req);
112
- if (result.detected || markdownAccepted) {
120
+ const { serve, detection } = shouldServeMarkdown(req);
121
+ if (serve) {
113
122
  if (options.onDetection) {
114
- const method = result.detected ? result.method : "accept-header";
115
- const detection = options.onDetection({
116
- path: pathname,
117
- method,
118
- userAgent: req.headers.get("user-agent")
119
- });
120
- if (detection instanceof Promise) {
121
- event.waitUntil(detection);
123
+ try {
124
+ const method = detection.detected ? detection.method : "accept-header";
125
+ const p = options.onDetection({
126
+ path: pathname,
127
+ method,
128
+ userAgent: req.headers.get("user-agent")
129
+ });
130
+ if (p instanceof Promise) {
131
+ event.waitUntil(p);
132
+ }
133
+ } catch {
122
134
  }
123
135
  }
124
136
  const rewriteUrl = req.nextUrl.clone();
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/next/index.ts","../../src/patterns.ts","../../src/detection.ts","../../src/negotiation.ts"],"sourcesContent":["import type { NextFetchEvent, NextRequest } from \"next/server\";\nimport { NextResponse } from \"next/server\";\nimport { isAIAgent } from \"../detection\";\nimport { acceptsMarkdown } from \"../negotiation\";\nimport type { DetectionMethod } from \"../types\";\n\nexport interface AgentReadabilityOptions {\n\t/** URL prefix to intercept. Default: '/docs' */\n\tdocsPrefix?: string;\n\t/** Maps request path to markdown route */\n\trewrite: (pathname: string) => string;\n\t/** Fire-and-forget callback. Async returns are passed to event.waitUntil(). */\n\tonDetection?: (info: {\n\t\tpath: string;\n\t\tmethod: DetectionMethod | \"accept-header\";\n\t\tuserAgent: string | null;\n\t}) => void | Promise<void>;\n}\n\nexport type MiddlewareHandler = (\n\treq: NextRequest,\n\tevent: NextFetchEvent,\n) => NextResponse | Response | Promise<NextResponse | Response>;\n\n/**\n * Higher-order function that wraps Next.js middleware with AI agent detection.\n * Detected AI agents or requests with Accept: text/markdown are rewritten\n * to the markdown route returned by `options.rewrite()`.\n *\n * Composes with existing middleware:\n * ```ts\n * export default withAgentReadability(\n * { rewrite: (p) => `/md${p}` },\n * (req, event) => existingMiddleware(req, event),\n * )\n * ```\n */\nexport function withAgentReadability(\n\toptions: AgentReadabilityOptions,\n\thandler?: MiddlewareHandler,\n): MiddlewareHandler {\n\treturn async (req: NextRequest, event: NextFetchEvent) => {\n\t\tconst { pathname } = req.nextUrl;\n\t\tconst prefix = options.docsPrefix ?? \"/docs\";\n\n\t\tif (pathname.startsWith(prefix)) {\n\t\t\tconst result = isAIAgent(req);\n\t\t\tconst markdownAccepted = acceptsMarkdown(req);\n\n\t\t\tif (result.detected || markdownAccepted) {\n\t\t\t\tif (options.onDetection) {\n\t\t\t\t\tconst method = result.detected ? result.method : \"accept-header\";\n\t\t\t\t\tconst detection = options.onDetection({\n\t\t\t\t\t\tpath: pathname,\n\t\t\t\t\t\tmethod,\n\t\t\t\t\t\tuserAgent: req.headers.get(\"user-agent\"),\n\t\t\t\t\t});\n\t\t\t\t\tif (detection instanceof Promise) {\n\t\t\t\t\t\tevent.waitUntil(detection);\n\t\t\t\t\t}\n\t\t\t\t}\n\n\t\t\t\tconst rewriteUrl = req.nextUrl.clone();\n\t\t\t\trewriteUrl.pathname = options.rewrite(pathname);\n\t\t\t\treturn NextResponse.rewrite(rewriteUrl);\n\t\t\t}\n\t\t}\n\n\t\tif (handler) return handler(req, event);\n\t\treturn NextResponse.next();\n\t};\n}\n\n/** Recommended matcher that excludes Next.js internals and static files. */\nexport const agentReadabilityMatcher = [\n\t\"/((?!_next|api|.*\\\\..*|favicon|manifest|robots|health|status).*)\",\n] as const;\n","/**\n * Layer 1: Known AI agent UA substrings (lowercase).\n * Curated from https://bots.fyi/?tags=ai_assistant + official vendor docs.\n * Last reviewed: 2026-03-20\n */\nexport const AI_AGENT_UA_PATTERNS: readonly string[] = [\n\t// Anthropic — https://support.claude.com/en/articles/8896518\n\t\"claudebot\",\n\t\"claude-searchbot\",\n\t\"claude-user\",\n\t\"anthropic-ai\",\n\t\"claude-web\",\n\n\t// OpenAI — https://platform.openai.com/docs/bots\n\t\"chatgpt\",\n\t\"gptbot\",\n\t\"oai-searchbot\",\n\t\"openai\",\n\n\t// Google AI\n\t\"gemini\",\n\t\"bard\",\n\t\"google-cloudvertexbot\",\n\t\"google-extended\",\n\n\t// Meta\n\t\"meta-externalagent\",\n\t\"meta-externalfetcher\",\n\t\"meta-webindexer\",\n\n\t// Search/Research AI\n\t\"perplexity\",\n\t\"youbot\",\n\t\"you.com\",\n\t\"deepseekbot\",\n\n\t// Coding assistants\n\t\"cursor\",\n\t\"github-copilot\",\n\t\"codeium\",\n\t\"tabnine\",\n\t\"sourcegraph\",\n\n\t// Other AI agents / data scrapers\n\t\"cohere-ai\",\n\t\"bytespider\",\n\t\"amazonbot\",\n\t\"ai2bot\",\n\t\"diffbot\",\n\t\"omgili\",\n\t\"omgilibot\",\n];\n\n/**\n * Layer 2: Known AI service URLs in Signature-Agent header (RFC 9421).\n */\nexport const SIGNATURE_AGENT_DOMAINS: readonly string[] = [\"chatgpt.com\"];\n\n/**\n * Layer 3: Traditional bot exclusion list. Bots that should NOT trigger the\n * heuristic layer (search engine crawlers, social previews, monitoring tools).\n */\nexport const TRADITIONAL_BOT_PATTERNS: readonly string[] = [\n\t\"googlebot\",\n\t\"bingbot\",\n\t\"yandexbot\",\n\t\"baiduspider\",\n\t\"duckduckbot\",\n\t\"slurp\",\n\t\"msnbot\",\n\t\"facebot\",\n\t\"twitterbot\",\n\t\"linkedinbot\",\n\t\"whatsapp\",\n\t\"telegrambot\",\n\t\"pingdom\",\n\t\"uptimerobot\",\n\t\"newrelic\",\n\t\"datadog\",\n\t\"statuspage\",\n\t\"site24x7\",\n\t\"applebot\",\n];\n\n/**\n * Broad regex for bot-like UA strings (used only in Layer 3 heuristic).\n * No word boundaries — keywords commonly appear in compound names.\n */\nexport const BOT_LIKE_REGEX: RegExp = /bot|agent|fetch|crawl|spider|search/i;\n","import {\n\tAI_AGENT_UA_PATTERNS,\n\tBOT_LIKE_REGEX,\n\tSIGNATURE_AGENT_DOMAINS,\n\tTRADITIONAL_BOT_PATTERNS,\n} from \"./patterns\";\nimport type { DetectionResult, MinimalRequest } from \"./types\";\n\n/**\n * Detects AI agents from HTTP request headers.\n *\n * Three detection layers (checked in order):\n * 1. Known UA patterns (definitive)\n * 2. Signature-Agent header (definitive, RFC 9421)\n * 3. Missing sec-fetch-mode heuristic (catches unknown bots)\n *\n * Optimizes for recall over precision: serving markdown to a non-AI bot\n * is low-harm; missing an AI agent means a worse experience.\n */\nexport function isAIAgent(request: MinimalRequest): DetectionResult {\n\tconst userAgent = request.headers.get(\"user-agent\");\n\tconst lowerUA = userAgent?.toLowerCase() ?? \"\";\n\n\t// Layer 1: Known UA pattern match\n\tif (lowerUA && AI_AGENT_UA_PATTERNS.some((pattern) => lowerUA.includes(pattern))) {\n\t\treturn { detected: true, method: \"ua-match\" };\n\t}\n\n\t// Layer 2: Signature-Agent header (RFC 9421, used by ChatGPT agent)\n\tconst signatureAgent = request.headers.get(\"signature-agent\");\n\tif (signatureAgent) {\n\t\tconst lowerSig = signatureAgent.toLowerCase();\n\t\tif (SIGNATURE_AGENT_DOMAINS.some((domain) => lowerSig.includes(domain))) {\n\t\t\treturn { detected: true, method: \"signature-agent\" };\n\t\t}\n\t}\n\n\t// Layer 3: Missing browser fingerprint heuristic\n\t// Real browsers (Chrome 76+, Firefox 90+, Safari 16.4+) send sec-fetch-mode\n\t// on navigation requests. Its absence signals a programmatic client.\n\tconst secFetchMode = request.headers.get(\"sec-fetch-mode\");\n\tif (!secFetchMode && lowerUA && BOT_LIKE_REGEX.test(lowerUA)) {\n\t\tconst isTraditionalBot = TRADITIONAL_BOT_PATTERNS.some((pattern) => lowerUA.includes(pattern));\n\t\tif (!isTraditionalBot) {\n\t\t\treturn { detected: true, method: \"heuristic\" };\n\t\t}\n\t}\n\n\treturn { detected: false, method: null };\n}\n","import { isAIAgent } from \"./detection\";\nimport type { DetectionResult, MinimalRequest } from \"./types\";\n\nconst DEFAULT_MARKDOWN_TYPES = [\"text/markdown\", \"text/x-markdown\"];\n\nexport interface AcceptMarkdownOptions {\n\tmediaTypes?: string[];\n}\n\n/**\n * Check if the request prefers markdown via the Accept header.\n */\nexport function acceptsMarkdown(request: MinimalRequest, options?: AcceptMarkdownOptions): boolean {\n\tconst accept = request.headers.get(\"accept\");\n\tif (!accept) return false;\n\n\tconst types = options?.mediaTypes ?? DEFAULT_MARKDOWN_TYPES;\n\tconst lowerAccept = accept.toLowerCase();\n\treturn types.some((type) => lowerAccept.includes(type));\n}\n\nexport interface ShouldServeMarkdownResult {\n\tserve: boolean;\n\treason: \"agent\" | \"accept-header\" | null;\n\tdetection: DetectionResult;\n}\n\n/**\n * Combines agent detection and content negotiation into one call.\n * Returns whether to serve markdown and why.\n */\nexport function shouldServeMarkdown(\n\trequest: MinimalRequest,\n\toptions?: AcceptMarkdownOptions,\n): ShouldServeMarkdownResult {\n\tconst detection = isAIAgent(request);\n\tif (detection.detected) {\n\t\treturn { serve: true, reason: \"agent\", detection };\n\t}\n\n\tif (acceptsMarkdown(request, options)) {\n\t\treturn { serve: true, reason: \"accept-header\", detection };\n\t}\n\n\treturn { serve: false, reason: null, detection };\n}\n"],"mappings":";AACA,SAAS,oBAAoB;;;ACItB,IAAM,uBAA0C;AAAA;AAAA,EAEtD;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACD;AAKO,IAAM,0BAA6C,CAAC,aAAa;AAMjE,IAAM,2BAA8C;AAAA,EAC1D;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACD;AAMO,IAAM,iBAAyB;;;ACrE/B,SAAS,UAAU,SAA0C;AACnE,QAAM,YAAY,QAAQ,QAAQ,IAAI,YAAY;AAClD,QAAM,UAAU,WAAW,YAAY,KAAK;AAG5C,MAAI,WAAW,qBAAqB,KAAK,CAAC,YAAY,QAAQ,SAAS,OAAO,CAAC,GAAG;AACjF,WAAO,EAAE,UAAU,MAAM,QAAQ,WAAW;AAAA,EAC7C;AAGA,QAAM,iBAAiB,QAAQ,QAAQ,IAAI,iBAAiB;AAC5D,MAAI,gBAAgB;AACnB,UAAM,WAAW,eAAe,YAAY;AAC5C,QAAI,wBAAwB,KAAK,CAAC,WAAW,SAAS,SAAS,MAAM,CAAC,GAAG;AACxE,aAAO,EAAE,UAAU,MAAM,QAAQ,kBAAkB;AAAA,IACpD;AAAA,EACD;AAKA,QAAM,eAAe,QAAQ,QAAQ,IAAI,gBAAgB;AACzD,MAAI,CAAC,gBAAgB,WAAW,eAAe,KAAK,OAAO,GAAG;AAC7D,UAAM,mBAAmB,yBAAyB,KAAK,CAAC,YAAY,QAAQ,SAAS,OAAO,CAAC;AAC7F,QAAI,CAAC,kBAAkB;AACtB,aAAO,EAAE,UAAU,MAAM,QAAQ,YAAY;AAAA,IAC9C;AAAA,EACD;AAEA,SAAO,EAAE,UAAU,OAAO,QAAQ,KAAK;AACxC;;;AC9CA,IAAM,yBAAyB,CAAC,iBAAiB,iBAAiB;AAS3D,SAAS,gBAAgB,SAAyB,SAA0C;AAClG,QAAM,SAAS,QAAQ,QAAQ,IAAI,QAAQ;AAC3C,MAAI,CAAC,OAAQ,QAAO;AAEpB,QAAM,QAAQ,SAAS,cAAc;AACrC,QAAM,cAAc,OAAO,YAAY;AACvC,SAAO,MAAM,KAAK,CAAC,SAAS,YAAY,SAAS,IAAI,CAAC;AACvD;;;AHkBO,SAAS,qBACf,SACA,SACoB;AACpB,SAAO,OAAO,KAAkB,UAA0B;AACzD,UAAM,EAAE,SAAS,IAAI,IAAI;AACzB,UAAM,SAAS,QAAQ,cAAc;AAErC,QAAI,SAAS,WAAW,MAAM,GAAG;AAChC,YAAM,SAAS,UAAU,GAAG;AAC5B,YAAM,mBAAmB,gBAAgB,GAAG;AAE5C,UAAI,OAAO,YAAY,kBAAkB;AACxC,YAAI,QAAQ,aAAa;AACxB,gBAAM,SAAS,OAAO,WAAW,OAAO,SAAS;AACjD,gBAAM,YAAY,QAAQ,YAAY;AAAA,YACrC,MAAM;AAAA,YACN;AAAA,YACA,WAAW,IAAI,QAAQ,IAAI,YAAY;AAAA,UACxC,CAAC;AACD,cAAI,qBAAqB,SAAS;AACjC,kBAAM,UAAU,SAAS;AAAA,UAC1B;AAAA,QACD;AAEA,cAAM,aAAa,IAAI,QAAQ,MAAM;AACrC,mBAAW,WAAW,QAAQ,QAAQ,QAAQ;AAC9C,eAAO,aAAa,QAAQ,UAAU;AAAA,MACvC;AAAA,IACD;AAEA,QAAI,QAAS,QAAO,QAAQ,KAAK,KAAK;AACtC,WAAO,aAAa,KAAK;AAAA,EAC1B;AACD;AAGO,IAAM,0BAA0B;AAAA,EACtC;AACD;","names":[]}
1
+ {"version":3,"sources":["../../src/next/index.ts","../../src/patterns.ts","../../src/detection.ts","../../src/negotiation.ts"],"sourcesContent":["import type { NextFetchEvent, NextRequest } from \"next/server\";\nimport { NextResponse } from \"next/server\";\nimport { shouldServeMarkdown } from \"../negotiation\";\nimport type { OnDetectionCallback } from \"../types\";\n\nexport interface AgentReadabilityOptions {\n\t/** URL prefix to intercept. Default: '/docs' */\n\tdocsPrefix?: string;\n\t/** Maps request path to markdown route */\n\trewrite: (pathname: string) => string;\n\t/** Fire-and-forget callback. Async returns are passed to event.waitUntil(). */\n\tonDetection?: OnDetectionCallback;\n}\n\nexport type MiddlewareHandler = (\n\treq: NextRequest,\n\tevent: NextFetchEvent,\n) => NextResponse | Response | Promise<NextResponse | Response>;\n\n/**\n * Higher-order function that wraps Next.js middleware with AI agent detection.\n * Detected AI agents or requests with Accept: text/markdown are rewritten\n * to the markdown route returned by `options.rewrite()`.\n *\n * Composes with existing middleware:\n * ```ts\n * export default withAgentReadability(\n * { rewrite: (p) => `/md${p}` },\n * (req, event) => existingMiddleware(req, event),\n * )\n * ```\n */\nexport function withAgentReadability(\n\toptions: AgentReadabilityOptions,\n\thandler?: MiddlewareHandler,\n): MiddlewareHandler {\n\treturn async (req: NextRequest, event: NextFetchEvent) => {\n\t\tconst { pathname } = req.nextUrl;\n\t\tconst prefix = options.docsPrefix ?? \"/docs\";\n\n\t\tif (pathname.startsWith(prefix)) {\n\t\t\tconst { serve, detection } = shouldServeMarkdown(req);\n\n\t\t\tif (serve) {\n\t\t\t\tif (options.onDetection) {\n\t\t\t\t\ttry {\n\t\t\t\t\t\tconst method = detection.detected ? detection.method : \"accept-header\";\n\t\t\t\t\t\tconst p = options.onDetection({\n\t\t\t\t\t\t\tpath: pathname,\n\t\t\t\t\t\t\tmethod,\n\t\t\t\t\t\t\tuserAgent: req.headers.get(\"user-agent\"),\n\t\t\t\t\t\t});\n\t\t\t\t\t\tif (p instanceof Promise) {\n\t\t\t\t\t\t\tevent.waitUntil(p);\n\t\t\t\t\t\t}\n\t\t\t\t\t} catch {\n\t\t\t\t\t\t/* swallow sync errors — onDetection should never break serving */\n\t\t\t\t\t}\n\t\t\t\t}\n\n\t\t\t\tconst rewriteUrl = req.nextUrl.clone();\n\t\t\t\trewriteUrl.pathname = options.rewrite(pathname);\n\t\t\t\treturn NextResponse.rewrite(rewriteUrl);\n\t\t\t}\n\t\t}\n\n\t\tif (handler) return handler(req, event);\n\t\treturn NextResponse.next();\n\t};\n}\n\n/** Recommended matcher that excludes Next.js internals and static files. */\nexport const agentReadabilityMatcher = [\n\t\"/((?!_next|api|.*\\\\..*|favicon|manifest|robots|health|status).*)\",\n] as const;\n","/**\n * Layer 1: Known AI agent UA substrings (lowercase).\n * Curated from https://bots.fyi/?tags=ai_assistant + official vendor docs.\n * Last reviewed: 2026-03-20\n */\nexport const AI_AGENT_UA_PATTERNS: readonly string[] = [\n\t// Anthropic — https://support.claude.com/en/articles/8896518\n\t\"claudebot\",\n\t\"claude-searchbot\",\n\t\"claude-user\",\n\t\"anthropic-ai\",\n\t\"claude-web\",\n\n\t// OpenAI — https://platform.openai.com/docs/bots\n\t\"chatgpt\",\n\t\"gptbot\",\n\t\"oai-searchbot\",\n\t\"openai\",\n\n\t// Google AI\n\t\"gemini\",\n\t\"bard\",\n\t\"google-cloudvertexbot\",\n\t\"google-extended\",\n\n\t// Meta\n\t\"meta-externalagent\",\n\t\"meta-externalfetcher\",\n\t\"meta-webindexer\",\n\n\t// Search/Research AI\n\t\"perplexity\",\n\t\"youbot\",\n\t\"you.com\",\n\t\"deepseekbot\",\n\n\t// Coding assistants\n\t\"cursor\",\n\t\"github-copilot\",\n\t\"codeium\",\n\t\"tabnine\",\n\t\"sourcegraph\",\n\n\t// Other AI agents / data scrapers\n\t\"cohere-ai\",\n\t\"bytespider\",\n\t\"amazonbot\",\n\t\"ai2bot\",\n\t\"diffbot\",\n\t\"omgili\",\n\t\"omgilibot\",\n];\n\n/**\n * Layer 2: Known AI service URLs in Signature-Agent header (RFC 9421).\n */\nexport const SIGNATURE_AGENT_DOMAINS: readonly string[] = [\"chatgpt.com\"];\n\n/**\n * Layer 3: Traditional bot exclusion list. Bots that should NOT trigger the\n * heuristic layer (search engine crawlers, social previews, monitoring tools).\n */\nexport const TRADITIONAL_BOT_PATTERNS: readonly string[] = [\n\t\"googlebot\",\n\t\"bingbot\",\n\t\"yandexbot\",\n\t\"baiduspider\",\n\t\"duckduckbot\",\n\t\"slurp\",\n\t\"msnbot\",\n\t\"facebot\",\n\t\"twitterbot\",\n\t\"linkedinbot\",\n\t\"whatsapp\",\n\t\"telegrambot\",\n\t\"pingdom\",\n\t\"uptimerobot\",\n\t\"newrelic\",\n\t\"datadog\",\n\t\"statuspage\",\n\t\"site24x7\",\n\t\"applebot\",\n];\n\n/**\n * Broad regex for bot-like UA strings (used only in Layer 3 heuristic).\n * No word boundaries — keywords commonly appear in compound names.\n */\nexport const BOT_LIKE_REGEX: RegExp = /bot|agent|fetch|crawl|spider|search/i;\n","import {\n\tAI_AGENT_UA_PATTERNS,\n\tBOT_LIKE_REGEX,\n\tSIGNATURE_AGENT_DOMAINS,\n\tTRADITIONAL_BOT_PATTERNS,\n} from \"./patterns\";\nimport type { DetectionResult, MinimalRequest } from \"./types\";\n\n/**\n * Detects AI agents from HTTP request headers.\n *\n * Three detection layers (checked in order):\n * 1. Known UA patterns (definitive)\n * 2. Signature-Agent header (definitive, RFC 9421)\n * 3. Missing sec-fetch-mode heuristic (catches unknown bots)\n *\n * Optimizes for recall over precision: serving markdown to a non-AI bot\n * is low-harm; missing an AI agent means a worse experience.\n */\nexport function isAIAgent(request: MinimalRequest): DetectionResult {\n\tconst userAgent = request.headers.get(\"user-agent\");\n\tconst lowerUA = userAgent?.toLowerCase() ?? \"\";\n\n\t// Layer 1: Known UA pattern match\n\tif (lowerUA && AI_AGENT_UA_PATTERNS.some((pattern) => lowerUA.includes(pattern))) {\n\t\treturn { detected: true, method: \"ua-match\" };\n\t}\n\n\t// Layer 2: Signature-Agent header (RFC 9421, used by ChatGPT agent)\n\tconst signatureAgent = request.headers.get(\"signature-agent\");\n\tif (signatureAgent) {\n\t\tconst lowerSig = signatureAgent.toLowerCase();\n\t\tif (SIGNATURE_AGENT_DOMAINS.some((domain) => lowerSig.includes(domain))) {\n\t\t\treturn { detected: true, method: \"signature-agent\" };\n\t\t}\n\t}\n\n\t// Layer 3: Missing browser fingerprint heuristic\n\t// Real browsers (Chrome 76+, Firefox 90+, Safari 16.4+) send sec-fetch-mode\n\t// on navigation requests. Its absence signals a programmatic client.\n\tconst secFetchMode = request.headers.get(\"sec-fetch-mode\");\n\tif (!secFetchMode && lowerUA && BOT_LIKE_REGEX.test(lowerUA)) {\n\t\tconst isTraditionalBot = TRADITIONAL_BOT_PATTERNS.some((pattern) => lowerUA.includes(pattern));\n\t\tif (!isTraditionalBot) {\n\t\t\treturn { detected: true, method: \"heuristic\" };\n\t\t}\n\t}\n\n\treturn { detected: false, method: null };\n}\n","import { isAIAgent } from \"./detection\";\nimport type { DetectionResult, MinimalRequest } from \"./types\";\n\nconst DEFAULT_MARKDOWN_TYPES = [\"text/markdown\", \"text/x-markdown\"];\n\nexport interface AcceptMarkdownOptions {\n\tmediaTypes?: string[];\n}\n\n/**\n * Check if the request prefers markdown via the Accept header.\n */\nexport function acceptsMarkdown(request: MinimalRequest, options?: AcceptMarkdownOptions): boolean {\n\tconst accept = request.headers.get(\"accept\");\n\tif (!accept) return false;\n\n\tconst types = options?.mediaTypes ?? DEFAULT_MARKDOWN_TYPES;\n\tconst lowerAccept = accept.toLowerCase();\n\treturn types.some((type) => lowerAccept.includes(type));\n}\n\nexport interface ShouldServeMarkdownResult {\n\tserve: boolean;\n\treason: \"agent\" | \"accept-header\" | null;\n\tdetection: DetectionResult;\n}\n\n/**\n * Combines agent detection and content negotiation into one call.\n * Returns whether to serve markdown and why.\n */\nexport function shouldServeMarkdown(\n\trequest: MinimalRequest,\n\toptions?: AcceptMarkdownOptions,\n): ShouldServeMarkdownResult {\n\tconst detection = isAIAgent(request);\n\tif (detection.detected) {\n\t\treturn { serve: true, reason: \"agent\", detection };\n\t}\n\n\tif (acceptsMarkdown(request, options)) {\n\t\treturn { serve: true, reason: \"accept-header\", detection };\n\t}\n\n\treturn { serve: false, reason: null, detection };\n}\n"],"mappings":";AACA,SAAS,oBAAoB;;;ACItB,IAAM,uBAA0C;AAAA;AAAA,EAEtD;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACD;AAKO,IAAM,0BAA6C,CAAC,aAAa;AAMjE,IAAM,2BAA8C;AAAA,EAC1D;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACD;AAMO,IAAM,iBAAyB;;;ACrE/B,SAAS,UAAU,SAA0C;AACnE,QAAM,YAAY,QAAQ,QAAQ,IAAI,YAAY;AAClD,QAAM,UAAU,WAAW,YAAY,KAAK;AAG5C,MAAI,WAAW,qBAAqB,KAAK,CAAC,YAAY,QAAQ,SAAS,OAAO,CAAC,GAAG;AACjF,WAAO,EAAE,UAAU,MAAM,QAAQ,WAAW;AAAA,EAC7C;AAGA,QAAM,iBAAiB,QAAQ,QAAQ,IAAI,iBAAiB;AAC5D,MAAI,gBAAgB;AACnB,UAAM,WAAW,eAAe,YAAY;AAC5C,QAAI,wBAAwB,KAAK,CAAC,WAAW,SAAS,SAAS,MAAM,CAAC,GAAG;AACxE,aAAO,EAAE,UAAU,MAAM,QAAQ,kBAAkB;AAAA,IACpD;AAAA,EACD;AAKA,QAAM,eAAe,QAAQ,QAAQ,IAAI,gBAAgB;AACzD,MAAI,CAAC,gBAAgB,WAAW,eAAe,KAAK,OAAO,GAAG;AAC7D,UAAM,mBAAmB,yBAAyB,KAAK,CAAC,YAAY,QAAQ,SAAS,OAAO,CAAC;AAC7F,QAAI,CAAC,kBAAkB;AACtB,aAAO,EAAE,UAAU,MAAM,QAAQ,YAAY;AAAA,IAC9C;AAAA,EACD;AAEA,SAAO,EAAE,UAAU,OAAO,QAAQ,KAAK;AACxC;;;AC9CA,IAAM,yBAAyB,CAAC,iBAAiB,iBAAiB;AAS3D,SAAS,gBAAgB,SAAyB,SAA0C;AAClG,QAAM,SAAS,QAAQ,QAAQ,IAAI,QAAQ;AAC3C,MAAI,CAAC,OAAQ,QAAO;AAEpB,QAAM,QAAQ,SAAS,cAAc;AACrC,QAAM,cAAc,OAAO,YAAY;AACvC,SAAO,MAAM,KAAK,CAAC,SAAS,YAAY,SAAS,IAAI,CAAC;AACvD;AAYO,SAAS,oBACf,SACA,SAC4B;AAC5B,QAAM,YAAY,UAAU,OAAO;AACnC,MAAI,UAAU,UAAU;AACvB,WAAO,EAAE,OAAO,MAAM,QAAQ,SAAS,UAAU;AAAA,EAClD;AAEA,MAAI,gBAAgB,SAAS,OAAO,GAAG;AACtC,WAAO,EAAE,OAAO,MAAM,QAAQ,iBAAiB,UAAU;AAAA,EAC1D;AAEA,SAAO,EAAE,OAAO,OAAO,QAAQ,MAAM,UAAU;AAChD;;;AHbO,SAAS,qBACf,SACA,SACoB;AACpB,SAAO,OAAO,KAAkB,UAA0B;AACzD,UAAM,EAAE,SAAS,IAAI,IAAI;AACzB,UAAM,SAAS,QAAQ,cAAc;AAErC,QAAI,SAAS,WAAW,MAAM,GAAG;AAChC,YAAM,EAAE,OAAO,UAAU,IAAI,oBAAoB,GAAG;AAEpD,UAAI,OAAO;AACV,YAAI,QAAQ,aAAa;AACxB,cAAI;AACH,kBAAM,SAAS,UAAU,WAAW,UAAU,SAAS;AACvD,kBAAM,IAAI,QAAQ,YAAY;AAAA,cAC7B,MAAM;AAAA,cACN;AAAA,cACA,WAAW,IAAI,QAAQ,IAAI,YAAY;AAAA,YACxC,CAAC;AACD,gBAAI,aAAa,SAAS;AACzB,oBAAM,UAAU,CAAC;AAAA,YAClB;AAAA,UACD,QAAQ;AAAA,UAER;AAAA,QACD;AAEA,cAAM,aAAa,IAAI,QAAQ,MAAM;AACrC,mBAAW,WAAW,QAAQ,QAAQ,QAAQ;AAC9C,eAAO,aAAa,QAAQ,UAAU;AAAA,MACvC;AAAA,IACD;AAEA,QAAI,QAAS,QAAO,QAAQ,KAAK,KAAK;AACtC,WAAO,aAAa,KAAK;AAAA,EAC1B;AACD;AAGO,IAAM,0BAA0B;AAAA,EACtC;AACD;","names":[]}