@vercel/agent-readability 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,170 @@
1
+ "use strict";
2
+ var __defProp = Object.defineProperty;
3
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
+ var __getOwnPropNames = Object.getOwnPropertyNames;
5
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
6
+ var __export = (target, all) => {
7
+ for (var name in all)
8
+ __defProp(target, name, { get: all[name], enumerable: true });
9
+ };
10
+ var __copyProps = (to, from, except, desc) => {
11
+ if (from && typeof from === "object" || typeof from === "function") {
12
+ for (let key of __getOwnPropNames(from))
13
+ if (!__hasOwnProp.call(to, key) && key !== except)
14
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
15
+ }
16
+ return to;
17
+ };
18
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
+
20
+ // src/nuxt/index.ts
21
+ var nuxt_exports = {};
22
+ __export(nuxt_exports, {
23
+ defineAgentMiddleware: () => defineAgentMiddleware
24
+ });
25
+ module.exports = __toCommonJS(nuxt_exports);
26
+ var import_h3 = require("h3");
27
+
28
+ // src/patterns.ts
29
+ var AI_AGENT_UA_PATTERNS = [
30
+ // Anthropic — https://support.claude.com/en/articles/8896518
31
+ "claudebot",
32
+ "claude-searchbot",
33
+ "claude-user",
34
+ "anthropic-ai",
35
+ "claude-web",
36
+ // OpenAI — https://platform.openai.com/docs/bots
37
+ "chatgpt",
38
+ "gptbot",
39
+ "oai-searchbot",
40
+ "openai",
41
+ // Google AI
42
+ "gemini",
43
+ "bard",
44
+ "google-cloudvertexbot",
45
+ "google-extended",
46
+ // Meta
47
+ "meta-externalagent",
48
+ "meta-externalfetcher",
49
+ "meta-webindexer",
50
+ // Search/Research AI
51
+ "perplexity",
52
+ "youbot",
53
+ "you.com",
54
+ "deepseekbot",
55
+ // Coding assistants
56
+ "cursor",
57
+ "github-copilot",
58
+ "codeium",
59
+ "tabnine",
60
+ "sourcegraph",
61
+ // Other AI agents / data scrapers
62
+ "cohere-ai",
63
+ "bytespider",
64
+ "amazonbot",
65
+ "ai2bot",
66
+ "diffbot",
67
+ "omgili",
68
+ "omgilibot"
69
+ ];
70
+ var SIGNATURE_AGENT_DOMAINS = ["chatgpt.com"];
71
+ var TRADITIONAL_BOT_PATTERNS = [
72
+ "googlebot",
73
+ "bingbot",
74
+ "yandexbot",
75
+ "baiduspider",
76
+ "duckduckbot",
77
+ "slurp",
78
+ "msnbot",
79
+ "facebot",
80
+ "twitterbot",
81
+ "linkedinbot",
82
+ "whatsapp",
83
+ "telegrambot",
84
+ "pingdom",
85
+ "uptimerobot",
86
+ "newrelic",
87
+ "datadog",
88
+ "statuspage",
89
+ "site24x7",
90
+ "applebot"
91
+ ];
92
+ var BOT_LIKE_REGEX = /bot|agent|fetch|crawl|spider|search/i;
93
+
94
+ // src/detection.ts
95
+ function isAIAgent(request) {
96
+ const userAgent = request.headers.get("user-agent");
97
+ const lowerUA = userAgent?.toLowerCase() ?? "";
98
+ if (lowerUA && AI_AGENT_UA_PATTERNS.some((pattern) => lowerUA.includes(pattern))) {
99
+ return { detected: true, method: "ua-match" };
100
+ }
101
+ const signatureAgent = request.headers.get("signature-agent");
102
+ if (signatureAgent) {
103
+ const lowerSig = signatureAgent.toLowerCase();
104
+ if (SIGNATURE_AGENT_DOMAINS.some((domain) => lowerSig.includes(domain))) {
105
+ return { detected: true, method: "signature-agent" };
106
+ }
107
+ }
108
+ const secFetchMode = request.headers.get("sec-fetch-mode");
109
+ if (!secFetchMode && lowerUA && BOT_LIKE_REGEX.test(lowerUA)) {
110
+ const isTraditionalBot = TRADITIONAL_BOT_PATTERNS.some((pattern) => lowerUA.includes(pattern));
111
+ if (!isTraditionalBot) {
112
+ return { detected: true, method: "heuristic" };
113
+ }
114
+ }
115
+ return { detected: false, method: null };
116
+ }
117
+
118
+ // src/negotiation.ts
119
+ var DEFAULT_MARKDOWN_TYPES = ["text/markdown", "text/x-markdown"];
120
+ function acceptsMarkdown(request, options) {
121
+ const accept = request.headers.get("accept");
122
+ if (!accept) return false;
123
+ const types = options?.mediaTypes ?? DEFAULT_MARKDOWN_TYPES;
124
+ const lowerAccept = accept.toLowerCase();
125
+ return types.some((type) => lowerAccept.includes(type));
126
+ }
127
+ function shouldServeMarkdown(request, options) {
128
+ const detection = isAIAgent(request);
129
+ if (detection.detected) {
130
+ return { serve: true, reason: "agent", detection };
131
+ }
132
+ if (acceptsMarkdown(request, options)) {
133
+ return { serve: true, reason: "accept-header", detection };
134
+ }
135
+ return { serve: false, reason: null, detection };
136
+ }
137
+
138
+ // src/nuxt/index.ts
139
+ function defineAgentMiddleware(options) {
140
+ return (0, import_h3.defineEventHandler)(async (event) => {
141
+ const pathname = event.path.split("?")[0];
142
+ const prefix = options.docsPrefix ?? "/docs";
143
+ if (!pathname.startsWith(prefix)) return;
144
+ const { serve, detection } = shouldServeMarkdown({ headers: event.headers });
145
+ if (!serve) return;
146
+ if (options.onDetection) {
147
+ try {
148
+ const method = detection.detected ? detection.method : "accept-header";
149
+ const p = options.onDetection({
150
+ path: pathname,
151
+ method,
152
+ userAgent: event.headers.get("user-agent")
153
+ });
154
+ if (p instanceof Promise) p.catch(() => {
155
+ });
156
+ } catch {
157
+ }
158
+ }
159
+ const content = await options.getMarkdown(pathname, event);
160
+ if (content instanceof Response) return content;
161
+ return new Response(content, {
162
+ headers: { "content-type": "text/markdown; charset=utf-8", vary: "Accept" }
163
+ });
164
+ });
165
+ }
166
+ // Annotate the CommonJS export names for ESM import in node:
167
+ 0 && (module.exports = {
168
+ defineAgentMiddleware
169
+ });
170
+ //# sourceMappingURL=index.cjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../../src/nuxt/index.ts","../../src/patterns.ts","../../src/detection.ts","../../src/negotiation.ts"],"sourcesContent":["import type { H3Event } from \"h3\";\nimport { defineEventHandler } from \"h3\";\nimport { shouldServeMarkdown } from \"../negotiation\";\nimport type { OnDetectionCallback } from \"../types\";\n\nexport interface NuxtAgentReadabilityOptions {\n\t/** URL prefix to intercept. Default: '/docs' */\n\tdocsPrefix?: string;\n\t/** Returns markdown content for the given path. Can return a string or a full Response. */\n\tgetMarkdown: (pathname: string, event: H3Event) => string | Response | Promise<string | Response>;\n\t/** Fire-and-forget callback for analytics/logging. */\n\tonDetection?: OnDetectionCallback;\n}\n\n/**\n * Nuxt server middleware that detects AI agents and serves markdown.\n *\n * ```ts\n * // server/middleware/agent.ts\n * import { defineAgentMiddleware } from '@vercel/agent-readability/nuxt'\n *\n * export default defineAgentMiddleware({\n * getMarkdown: async (pathname) => {\n * const doc = await fetchDoc(pathname)\n * return doc.markdown\n * },\n * })\n * ```\n */\nexport function defineAgentMiddleware(options: NuxtAgentReadabilityOptions) {\n\treturn defineEventHandler(async (event: H3Event) => {\n\t\t// h3 event.path includes query string — strip it for prefix matching\n\t\tconst pathname = event.path.split(\"?\")[0];\n\t\tconst prefix = options.docsPrefix ?? \"/docs\";\n\n\t\tif (!pathname.startsWith(prefix)) return; // undefined = h3 cascades to next handler\n\n\t\tconst { serve, detection } = shouldServeMarkdown({ headers: event.headers });\n\t\tif (!serve) return;\n\n\t\t// Fire-and-forget — never block the response\n\t\tif (options.onDetection) {\n\t\t\ttry {\n\t\t\t\tconst method = detection.detected ? detection.method : \"accept-header\";\n\t\t\t\tconst p = options.onDetection({\n\t\t\t\t\tpath: pathname,\n\t\t\t\t\tmethod,\n\t\t\t\t\tuserAgent: event.headers.get(\"user-agent\"),\n\t\t\t\t});\n\t\t\t\tif (p instanceof Promise) p.catch(() => {});\n\t\t\t} catch {\n\t\t\t\t/* swallow sync errors */\n\t\t\t}\n\t\t}\n\n\t\tconst content = await options.getMarkdown(pathname, event);\n\t\tif (content instanceof Response) return content;\n\n\t\treturn new Response(content, {\n\t\t\theaders: { \"content-type\": \"text/markdown; charset=utf-8\", vary: \"Accept\" },\n\t\t});\n\t});\n}\n","/**\n * Layer 1: Known AI agent UA substrings (lowercase).\n * Curated from https://bots.fyi/?tags=ai_assistant + official vendor docs.\n * Last reviewed: 2026-03-20\n */\nexport const AI_AGENT_UA_PATTERNS: readonly string[] = [\n\t// Anthropic — https://support.claude.com/en/articles/8896518\n\t\"claudebot\",\n\t\"claude-searchbot\",\n\t\"claude-user\",\n\t\"anthropic-ai\",\n\t\"claude-web\",\n\n\t// OpenAI — https://platform.openai.com/docs/bots\n\t\"chatgpt\",\n\t\"gptbot\",\n\t\"oai-searchbot\",\n\t\"openai\",\n\n\t// Google AI\n\t\"gemini\",\n\t\"bard\",\n\t\"google-cloudvertexbot\",\n\t\"google-extended\",\n\n\t// Meta\n\t\"meta-externalagent\",\n\t\"meta-externalfetcher\",\n\t\"meta-webindexer\",\n\n\t// Search/Research AI\n\t\"perplexity\",\n\t\"youbot\",\n\t\"you.com\",\n\t\"deepseekbot\",\n\n\t// Coding assistants\n\t\"cursor\",\n\t\"github-copilot\",\n\t\"codeium\",\n\t\"tabnine\",\n\t\"sourcegraph\",\n\n\t// Other AI agents / data scrapers\n\t\"cohere-ai\",\n\t\"bytespider\",\n\t\"amazonbot\",\n\t\"ai2bot\",\n\t\"diffbot\",\n\t\"omgili\",\n\t\"omgilibot\",\n];\n\n/**\n * Layer 2: Known AI service URLs in Signature-Agent header (RFC 9421).\n */\nexport const SIGNATURE_AGENT_DOMAINS: readonly string[] = [\"chatgpt.com\"];\n\n/**\n * Layer 3: Traditional bot exclusion list. Bots that should NOT trigger the\n * heuristic layer (search engine crawlers, social previews, monitoring tools).\n */\nexport const TRADITIONAL_BOT_PATTERNS: readonly string[] = [\n\t\"googlebot\",\n\t\"bingbot\",\n\t\"yandexbot\",\n\t\"baiduspider\",\n\t\"duckduckbot\",\n\t\"slurp\",\n\t\"msnbot\",\n\t\"facebot\",\n\t\"twitterbot\",\n\t\"linkedinbot\",\n\t\"whatsapp\",\n\t\"telegrambot\",\n\t\"pingdom\",\n\t\"uptimerobot\",\n\t\"newrelic\",\n\t\"datadog\",\n\t\"statuspage\",\n\t\"site24x7\",\n\t\"applebot\",\n];\n\n/**\n * Broad regex for bot-like UA strings (used only in Layer 3 heuristic).\n * No word boundaries — keywords commonly appear in compound names.\n */\nexport const BOT_LIKE_REGEX: RegExp = /bot|agent|fetch|crawl|spider|search/i;\n","import {\n\tAI_AGENT_UA_PATTERNS,\n\tBOT_LIKE_REGEX,\n\tSIGNATURE_AGENT_DOMAINS,\n\tTRADITIONAL_BOT_PATTERNS,\n} from \"./patterns\";\nimport type { DetectionResult, MinimalRequest } from \"./types\";\n\n/**\n * Detects AI agents from HTTP request headers.\n *\n * Three detection layers (checked in order):\n * 1. Known UA patterns (definitive)\n * 2. Signature-Agent header (definitive, RFC 9421)\n * 3. Missing sec-fetch-mode heuristic (catches unknown bots)\n *\n * Optimizes for recall over precision: serving markdown to a non-AI bot\n * is low-harm; missing an AI agent means a worse experience.\n */\nexport function isAIAgent(request: MinimalRequest): DetectionResult {\n\tconst userAgent = request.headers.get(\"user-agent\");\n\tconst lowerUA = userAgent?.toLowerCase() ?? \"\";\n\n\t// Layer 1: Known UA pattern match\n\tif (lowerUA && AI_AGENT_UA_PATTERNS.some((pattern) => lowerUA.includes(pattern))) {\n\t\treturn { detected: true, method: \"ua-match\" };\n\t}\n\n\t// Layer 2: Signature-Agent header (RFC 9421, used by ChatGPT agent)\n\tconst signatureAgent = request.headers.get(\"signature-agent\");\n\tif (signatureAgent) {\n\t\tconst lowerSig = signatureAgent.toLowerCase();\n\t\tif (SIGNATURE_AGENT_DOMAINS.some((domain) => lowerSig.includes(domain))) {\n\t\t\treturn { detected: true, method: \"signature-agent\" };\n\t\t}\n\t}\n\n\t// Layer 3: Missing browser fingerprint heuristic\n\t// Real browsers (Chrome 76+, Firefox 90+, Safari 16.4+) send sec-fetch-mode\n\t// on navigation requests. Its absence signals a programmatic client.\n\tconst secFetchMode = request.headers.get(\"sec-fetch-mode\");\n\tif (!secFetchMode && lowerUA && BOT_LIKE_REGEX.test(lowerUA)) {\n\t\tconst isTraditionalBot = TRADITIONAL_BOT_PATTERNS.some((pattern) => lowerUA.includes(pattern));\n\t\tif (!isTraditionalBot) {\n\t\t\treturn { detected: true, method: \"heuristic\" };\n\t\t}\n\t}\n\n\treturn { detected: false, method: null };\n}\n","import { isAIAgent } from \"./detection\";\nimport type { DetectionResult, MinimalRequest } from \"./types\";\n\nconst DEFAULT_MARKDOWN_TYPES = [\"text/markdown\", \"text/x-markdown\"];\n\nexport interface AcceptMarkdownOptions {\n\tmediaTypes?: string[];\n}\n\n/**\n * Check if the request prefers markdown via the Accept header.\n */\nexport function acceptsMarkdown(request: MinimalRequest, options?: AcceptMarkdownOptions): boolean {\n\tconst accept = request.headers.get(\"accept\");\n\tif (!accept) return false;\n\n\tconst types = options?.mediaTypes ?? DEFAULT_MARKDOWN_TYPES;\n\tconst lowerAccept = accept.toLowerCase();\n\treturn types.some((type) => lowerAccept.includes(type));\n}\n\nexport interface ShouldServeMarkdownResult {\n\tserve: boolean;\n\treason: \"agent\" | \"accept-header\" | null;\n\tdetection: DetectionResult;\n}\n\n/**\n * Combines agent detection and content negotiation into one call.\n * Returns whether to serve markdown and why.\n */\nexport function shouldServeMarkdown(\n\trequest: MinimalRequest,\n\toptions?: AcceptMarkdownOptions,\n): ShouldServeMarkdownResult {\n\tconst detection = isAIAgent(request);\n\tif (detection.detected) {\n\t\treturn { serve: true, reason: \"agent\", detection };\n\t}\n\n\tif (acceptsMarkdown(request, options)) {\n\t\treturn { serve: true, reason: \"accept-header\", detection };\n\t}\n\n\treturn { serve: false, reason: null, detection };\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AACA,gBAAmC;;;ACI5B,IAAM,uBAA0C;AAAA;AAAA,EAEtD;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACD;AAKO,IAAM,0BAA6C,CAAC,aAAa;AAMjE,IAAM,2BAA8C;AAAA,EAC1D;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACD;AAMO,IAAM,iBAAyB;;;ACrE/B,SAAS,UAAU,SAA0C;AACnE,QAAM,YAAY,QAAQ,QAAQ,IAAI,YAAY;AAClD,QAAM,UAAU,WAAW,YAAY,KAAK;AAG5C,MAAI,WAAW,qBAAqB,KAAK,CAAC,YAAY,QAAQ,SAAS,OAAO,CAAC,GAAG;AACjF,WAAO,EAAE,UAAU,MAAM,QAAQ,WAAW;AAAA,EAC7C;AAGA,QAAM,iBAAiB,QAAQ,QAAQ,IAAI,iBAAiB;AAC5D,MAAI,gBAAgB;AACnB,UAAM,WAAW,eAAe,YAAY;AAC5C,QAAI,wBAAwB,KAAK,CAAC,WAAW,SAAS,SAAS,MAAM,CAAC,GAAG;AACxE,aAAO,EAAE,UAAU,MAAM,QAAQ,kBAAkB;AAAA,IACpD;AAAA,EACD;AAKA,QAAM,eAAe,QAAQ,QAAQ,IAAI,gBAAgB;AACzD,MAAI,CAAC,gBAAgB,WAAW,eAAe,KAAK,OAAO,GAAG;AAC7D,UAAM,mBAAmB,yBAAyB,KAAK,CAAC,YAAY,QAAQ,SAAS,OAAO,CAAC;AAC7F,QAAI,CAAC,kBAAkB;AACtB,aAAO,EAAE,UAAU,MAAM,QAAQ,YAAY;AAAA,IAC9C;AAAA,EACD;AAEA,SAAO,EAAE,UAAU,OAAO,QAAQ,KAAK;AACxC;;;AC9CA,IAAM,yBAAyB,CAAC,iBAAiB,iBAAiB;AAS3D,SAAS,gBAAgB,SAAyB,SAA0C;AAClG,QAAM,SAAS,QAAQ,QAAQ,IAAI,QAAQ;AAC3C,MAAI,CAAC,OAAQ,QAAO;AAEpB,QAAM,QAAQ,SAAS,cAAc;AACrC,QAAM,cAAc,OAAO,YAAY;AACvC,SAAO,MAAM,KAAK,CAAC,SAAS,YAAY,SAAS,IAAI,CAAC;AACvD;AAYO,SAAS,oBACf,SACA,SAC4B;AAC5B,QAAM,YAAY,UAAU,OAAO;AACnC,MAAI,UAAU,UAAU;AACvB,WAAO,EAAE,OAAO,MAAM,QAAQ,SAAS,UAAU;AAAA,EAClD;AAEA,MAAI,gBAAgB,SAAS,OAAO,GAAG;AACtC,WAAO,EAAE,OAAO,MAAM,QAAQ,iBAAiB,UAAU;AAAA,EAC1D;AAEA,SAAO,EAAE,OAAO,OAAO,QAAQ,MAAM,UAAU;AAChD;;;AHhBO,SAAS,sBAAsB,SAAsC;AAC3E,aAAO,8BAAmB,OAAO,UAAmB;AAEnD,UAAM,WAAW,MAAM,KAAK,MAAM,GAAG,EAAE,CAAC;AACxC,UAAM,SAAS,QAAQ,cAAc;AAErC,QAAI,CAAC,SAAS,WAAW,MAAM,EAAG;AAElC,UAAM,EAAE,OAAO,UAAU,IAAI,oBAAoB,EAAE,SAAS,MAAM,QAAQ,CAAC;AAC3E,QAAI,CAAC,MAAO;AAGZ,QAAI,QAAQ,aAAa;AACxB,UAAI;AACH,cAAM,SAAS,UAAU,WAAW,UAAU,SAAS;AACvD,cAAM,IAAI,QAAQ,YAAY;AAAA,UAC7B,MAAM;AAAA,UACN;AAAA,UACA,WAAW,MAAM,QAAQ,IAAI,YAAY;AAAA,QAC1C,CAAC;AACD,YAAI,aAAa,QAAS,GAAE,MAAM,MAAM;AAAA,QAAC,CAAC;AAAA,MAC3C,QAAQ;AAAA,MAER;AAAA,IACD;AAEA,UAAM,UAAU,MAAM,QAAQ,YAAY,UAAU,KAAK;AACzD,QAAI,mBAAmB,SAAU,QAAO;AAExC,WAAO,IAAI,SAAS,SAAS;AAAA,MAC5B,SAAS,EAAE,gBAAgB,gCAAgC,MAAM,SAAS;AAAA,IAC3E,CAAC;AAAA,EACF,CAAC;AACF;","names":[]}
@@ -0,0 +1,39 @@
1
+ import * as h3 from 'h3';
2
+ import { H3Event } from 'h3';
3
+
4
+ type DetectionMethod = "ua-match" | "signature-agent" | "heuristic";
5
+ /** Info passed to onDetection callbacks across all adapters. */
6
+ type OnDetectionInfo = {
7
+ path: string;
8
+ method: DetectionMethod | "accept-header";
9
+ userAgent: string | null;
10
+ };
11
+ /** Fire-and-forget callback shared by Next.js, SvelteKit, and Nuxt adapters. */
12
+ type OnDetectionCallback = (info: OnDetectionInfo) => void | Promise<void>;
13
+
14
+ interface NuxtAgentReadabilityOptions {
15
+ /** URL prefix to intercept. Default: '/docs' */
16
+ docsPrefix?: string;
17
+ /** Returns markdown content for the given path. Can return a string or a full Response. */
18
+ getMarkdown: (pathname: string, event: H3Event) => string | Response | Promise<string | Response>;
19
+ /** Fire-and-forget callback for analytics/logging. */
20
+ onDetection?: OnDetectionCallback;
21
+ }
22
+ /**
23
+ * Nuxt server middleware that detects AI agents and serves markdown.
24
+ *
25
+ * ```ts
26
+ * // server/middleware/agent.ts
27
+ * import { defineAgentMiddleware } from '@vercel/agent-readability/nuxt'
28
+ *
29
+ * export default defineAgentMiddleware({
30
+ * getMarkdown: async (pathname) => {
31
+ * const doc = await fetchDoc(pathname)
32
+ * return doc.markdown
33
+ * },
34
+ * })
35
+ * ```
36
+ */
37
+ declare function defineAgentMiddleware(options: NuxtAgentReadabilityOptions): h3.EventHandler<h3.EventHandlerRequest, Promise<Response | undefined>>;
38
+
39
+ export { type NuxtAgentReadabilityOptions, defineAgentMiddleware };
@@ -0,0 +1,39 @@
1
+ import * as h3 from 'h3';
2
+ import { H3Event } from 'h3';
3
+
4
+ type DetectionMethod = "ua-match" | "signature-agent" | "heuristic";
5
+ /** Info passed to onDetection callbacks across all adapters. */
6
+ type OnDetectionInfo = {
7
+ path: string;
8
+ method: DetectionMethod | "accept-header";
9
+ userAgent: string | null;
10
+ };
11
+ /** Fire-and-forget callback shared by Next.js, SvelteKit, and Nuxt adapters. */
12
+ type OnDetectionCallback = (info: OnDetectionInfo) => void | Promise<void>;
13
+
14
+ interface NuxtAgentReadabilityOptions {
15
+ /** URL prefix to intercept. Default: '/docs' */
16
+ docsPrefix?: string;
17
+ /** Returns markdown content for the given path. Can return a string or a full Response. */
18
+ getMarkdown: (pathname: string, event: H3Event) => string | Response | Promise<string | Response>;
19
+ /** Fire-and-forget callback for analytics/logging. */
20
+ onDetection?: OnDetectionCallback;
21
+ }
22
+ /**
23
+ * Nuxt server middleware that detects AI agents and serves markdown.
24
+ *
25
+ * ```ts
26
+ * // server/middleware/agent.ts
27
+ * import { defineAgentMiddleware } from '@vercel/agent-readability/nuxt'
28
+ *
29
+ * export default defineAgentMiddleware({
30
+ * getMarkdown: async (pathname) => {
31
+ * const doc = await fetchDoc(pathname)
32
+ * return doc.markdown
33
+ * },
34
+ * })
35
+ * ```
36
+ */
37
+ declare function defineAgentMiddleware(options: NuxtAgentReadabilityOptions): h3.EventHandler<h3.EventHandlerRequest, Promise<Response | undefined>>;
38
+
39
+ export { type NuxtAgentReadabilityOptions, defineAgentMiddleware };
@@ -0,0 +1,145 @@
1
+ // src/nuxt/index.ts
2
+ import { defineEventHandler } from "h3";
3
+
4
+ // src/patterns.ts
5
+ var AI_AGENT_UA_PATTERNS = [
6
+ // Anthropic — https://support.claude.com/en/articles/8896518
7
+ "claudebot",
8
+ "claude-searchbot",
9
+ "claude-user",
10
+ "anthropic-ai",
11
+ "claude-web",
12
+ // OpenAI — https://platform.openai.com/docs/bots
13
+ "chatgpt",
14
+ "gptbot",
15
+ "oai-searchbot",
16
+ "openai",
17
+ // Google AI
18
+ "gemini",
19
+ "bard",
20
+ "google-cloudvertexbot",
21
+ "google-extended",
22
+ // Meta
23
+ "meta-externalagent",
24
+ "meta-externalfetcher",
25
+ "meta-webindexer",
26
+ // Search/Research AI
27
+ "perplexity",
28
+ "youbot",
29
+ "you.com",
30
+ "deepseekbot",
31
+ // Coding assistants
32
+ "cursor",
33
+ "github-copilot",
34
+ "codeium",
35
+ "tabnine",
36
+ "sourcegraph",
37
+ // Other AI agents / data scrapers
38
+ "cohere-ai",
39
+ "bytespider",
40
+ "amazonbot",
41
+ "ai2bot",
42
+ "diffbot",
43
+ "omgili",
44
+ "omgilibot"
45
+ ];
46
+ var SIGNATURE_AGENT_DOMAINS = ["chatgpt.com"];
47
+ var TRADITIONAL_BOT_PATTERNS = [
48
+ "googlebot",
49
+ "bingbot",
50
+ "yandexbot",
51
+ "baiduspider",
52
+ "duckduckbot",
53
+ "slurp",
54
+ "msnbot",
55
+ "facebot",
56
+ "twitterbot",
57
+ "linkedinbot",
58
+ "whatsapp",
59
+ "telegrambot",
60
+ "pingdom",
61
+ "uptimerobot",
62
+ "newrelic",
63
+ "datadog",
64
+ "statuspage",
65
+ "site24x7",
66
+ "applebot"
67
+ ];
68
+ var BOT_LIKE_REGEX = /bot|agent|fetch|crawl|spider|search/i;
69
+
70
+ // src/detection.ts
71
+ function isAIAgent(request) {
72
+ const userAgent = request.headers.get("user-agent");
73
+ const lowerUA = userAgent?.toLowerCase() ?? "";
74
+ if (lowerUA && AI_AGENT_UA_PATTERNS.some((pattern) => lowerUA.includes(pattern))) {
75
+ return { detected: true, method: "ua-match" };
76
+ }
77
+ const signatureAgent = request.headers.get("signature-agent");
78
+ if (signatureAgent) {
79
+ const lowerSig = signatureAgent.toLowerCase();
80
+ if (SIGNATURE_AGENT_DOMAINS.some((domain) => lowerSig.includes(domain))) {
81
+ return { detected: true, method: "signature-agent" };
82
+ }
83
+ }
84
+ const secFetchMode = request.headers.get("sec-fetch-mode");
85
+ if (!secFetchMode && lowerUA && BOT_LIKE_REGEX.test(lowerUA)) {
86
+ const isTraditionalBot = TRADITIONAL_BOT_PATTERNS.some((pattern) => lowerUA.includes(pattern));
87
+ if (!isTraditionalBot) {
88
+ return { detected: true, method: "heuristic" };
89
+ }
90
+ }
91
+ return { detected: false, method: null };
92
+ }
93
+
94
+ // src/negotiation.ts
95
+ var DEFAULT_MARKDOWN_TYPES = ["text/markdown", "text/x-markdown"];
96
+ function acceptsMarkdown(request, options) {
97
+ const accept = request.headers.get("accept");
98
+ if (!accept) return false;
99
+ const types = options?.mediaTypes ?? DEFAULT_MARKDOWN_TYPES;
100
+ const lowerAccept = accept.toLowerCase();
101
+ return types.some((type) => lowerAccept.includes(type));
102
+ }
103
+ function shouldServeMarkdown(request, options) {
104
+ const detection = isAIAgent(request);
105
+ if (detection.detected) {
106
+ return { serve: true, reason: "agent", detection };
107
+ }
108
+ if (acceptsMarkdown(request, options)) {
109
+ return { serve: true, reason: "accept-header", detection };
110
+ }
111
+ return { serve: false, reason: null, detection };
112
+ }
113
+
114
+ // src/nuxt/index.ts
115
+ function defineAgentMiddleware(options) {
116
+ return defineEventHandler(async (event) => {
117
+ const pathname = event.path.split("?")[0];
118
+ const prefix = options.docsPrefix ?? "/docs";
119
+ if (!pathname.startsWith(prefix)) return;
120
+ const { serve, detection } = shouldServeMarkdown({ headers: event.headers });
121
+ if (!serve) return;
122
+ if (options.onDetection) {
123
+ try {
124
+ const method = detection.detected ? detection.method : "accept-header";
125
+ const p = options.onDetection({
126
+ path: pathname,
127
+ method,
128
+ userAgent: event.headers.get("user-agent")
129
+ });
130
+ if (p instanceof Promise) p.catch(() => {
131
+ });
132
+ } catch {
133
+ }
134
+ }
135
+ const content = await options.getMarkdown(pathname, event);
136
+ if (content instanceof Response) return content;
137
+ return new Response(content, {
138
+ headers: { "content-type": "text/markdown; charset=utf-8", vary: "Accept" }
139
+ });
140
+ });
141
+ }
142
+ export {
143
+ defineAgentMiddleware
144
+ };
145
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../../src/nuxt/index.ts","../../src/patterns.ts","../../src/detection.ts","../../src/negotiation.ts"],"sourcesContent":["import type { H3Event } from \"h3\";\nimport { defineEventHandler } from \"h3\";\nimport { shouldServeMarkdown } from \"../negotiation\";\nimport type { OnDetectionCallback } from \"../types\";\n\nexport interface NuxtAgentReadabilityOptions {\n\t/** URL prefix to intercept. Default: '/docs' */\n\tdocsPrefix?: string;\n\t/** Returns markdown content for the given path. Can return a string or a full Response. */\n\tgetMarkdown: (pathname: string, event: H3Event) => string | Response | Promise<string | Response>;\n\t/** Fire-and-forget callback for analytics/logging. */\n\tonDetection?: OnDetectionCallback;\n}\n\n/**\n * Nuxt server middleware that detects AI agents and serves markdown.\n *\n * ```ts\n * // server/middleware/agent.ts\n * import { defineAgentMiddleware } from '@vercel/agent-readability/nuxt'\n *\n * export default defineAgentMiddleware({\n * getMarkdown: async (pathname) => {\n * const doc = await fetchDoc(pathname)\n * return doc.markdown\n * },\n * })\n * ```\n */\nexport function defineAgentMiddleware(options: NuxtAgentReadabilityOptions) {\n\treturn defineEventHandler(async (event: H3Event) => {\n\t\t// h3 event.path includes query string — strip it for prefix matching\n\t\tconst pathname = event.path.split(\"?\")[0];\n\t\tconst prefix = options.docsPrefix ?? \"/docs\";\n\n\t\tif (!pathname.startsWith(prefix)) return; // undefined = h3 cascades to next handler\n\n\t\tconst { serve, detection } = shouldServeMarkdown({ headers: event.headers });\n\t\tif (!serve) return;\n\n\t\t// Fire-and-forget — never block the response\n\t\tif (options.onDetection) {\n\t\t\ttry {\n\t\t\t\tconst method = detection.detected ? detection.method : \"accept-header\";\n\t\t\t\tconst p = options.onDetection({\n\t\t\t\t\tpath: pathname,\n\t\t\t\t\tmethod,\n\t\t\t\t\tuserAgent: event.headers.get(\"user-agent\"),\n\t\t\t\t});\n\t\t\t\tif (p instanceof Promise) p.catch(() => {});\n\t\t\t} catch {\n\t\t\t\t/* swallow sync errors */\n\t\t\t}\n\t\t}\n\n\t\tconst content = await options.getMarkdown(pathname, event);\n\t\tif (content instanceof Response) return content;\n\n\t\treturn new Response(content, {\n\t\t\theaders: { \"content-type\": \"text/markdown; charset=utf-8\", vary: \"Accept\" },\n\t\t});\n\t});\n}\n","/**\n * Layer 1: Known AI agent UA substrings (lowercase).\n * Curated from https://bots.fyi/?tags=ai_assistant + official vendor docs.\n * Last reviewed: 2026-03-20\n */\nexport const AI_AGENT_UA_PATTERNS: readonly string[] = [\n\t// Anthropic — https://support.claude.com/en/articles/8896518\n\t\"claudebot\",\n\t\"claude-searchbot\",\n\t\"claude-user\",\n\t\"anthropic-ai\",\n\t\"claude-web\",\n\n\t// OpenAI — https://platform.openai.com/docs/bots\n\t\"chatgpt\",\n\t\"gptbot\",\n\t\"oai-searchbot\",\n\t\"openai\",\n\n\t// Google AI\n\t\"gemini\",\n\t\"bard\",\n\t\"google-cloudvertexbot\",\n\t\"google-extended\",\n\n\t// Meta\n\t\"meta-externalagent\",\n\t\"meta-externalfetcher\",\n\t\"meta-webindexer\",\n\n\t// Search/Research AI\n\t\"perplexity\",\n\t\"youbot\",\n\t\"you.com\",\n\t\"deepseekbot\",\n\n\t// Coding assistants\n\t\"cursor\",\n\t\"github-copilot\",\n\t\"codeium\",\n\t\"tabnine\",\n\t\"sourcegraph\",\n\n\t// Other AI agents / data scrapers\n\t\"cohere-ai\",\n\t\"bytespider\",\n\t\"amazonbot\",\n\t\"ai2bot\",\n\t\"diffbot\",\n\t\"omgili\",\n\t\"omgilibot\",\n];\n\n/**\n * Layer 2: Known AI service URLs in Signature-Agent header (RFC 9421).\n */\nexport const SIGNATURE_AGENT_DOMAINS: readonly string[] = [\"chatgpt.com\"];\n\n/**\n * Layer 3: Traditional bot exclusion list. Bots that should NOT trigger the\n * heuristic layer (search engine crawlers, social previews, monitoring tools).\n */\nexport const TRADITIONAL_BOT_PATTERNS: readonly string[] = [\n\t\"googlebot\",\n\t\"bingbot\",\n\t\"yandexbot\",\n\t\"baiduspider\",\n\t\"duckduckbot\",\n\t\"slurp\",\n\t\"msnbot\",\n\t\"facebot\",\n\t\"twitterbot\",\n\t\"linkedinbot\",\n\t\"whatsapp\",\n\t\"telegrambot\",\n\t\"pingdom\",\n\t\"uptimerobot\",\n\t\"newrelic\",\n\t\"datadog\",\n\t\"statuspage\",\n\t\"site24x7\",\n\t\"applebot\",\n];\n\n/**\n * Broad regex for bot-like UA strings (used only in Layer 3 heuristic).\n * No word boundaries — keywords commonly appear in compound names.\n */\nexport const BOT_LIKE_REGEX: RegExp = /bot|agent|fetch|crawl|spider|search/i;\n","import {\n\tAI_AGENT_UA_PATTERNS,\n\tBOT_LIKE_REGEX,\n\tSIGNATURE_AGENT_DOMAINS,\n\tTRADITIONAL_BOT_PATTERNS,\n} from \"./patterns\";\nimport type { DetectionResult, MinimalRequest } from \"./types\";\n\n/**\n * Detects AI agents from HTTP request headers.\n *\n * Three detection layers (checked in order):\n * 1. Known UA patterns (definitive)\n * 2. Signature-Agent header (definitive, RFC 9421)\n * 3. Missing sec-fetch-mode heuristic (catches unknown bots)\n *\n * Optimizes for recall over precision: serving markdown to a non-AI bot\n * is low-harm; missing an AI agent means a worse experience.\n */\nexport function isAIAgent(request: MinimalRequest): DetectionResult {\n\tconst userAgent = request.headers.get(\"user-agent\");\n\tconst lowerUA = userAgent?.toLowerCase() ?? \"\";\n\n\t// Layer 1: Known UA pattern match\n\tif (lowerUA && AI_AGENT_UA_PATTERNS.some((pattern) => lowerUA.includes(pattern))) {\n\t\treturn { detected: true, method: \"ua-match\" };\n\t}\n\n\t// Layer 2: Signature-Agent header (RFC 9421, used by ChatGPT agent)\n\tconst signatureAgent = request.headers.get(\"signature-agent\");\n\tif (signatureAgent) {\n\t\tconst lowerSig = signatureAgent.toLowerCase();\n\t\tif (SIGNATURE_AGENT_DOMAINS.some((domain) => lowerSig.includes(domain))) {\n\t\t\treturn { detected: true, method: \"signature-agent\" };\n\t\t}\n\t}\n\n\t// Layer 3: Missing browser fingerprint heuristic\n\t// Real browsers (Chrome 76+, Firefox 90+, Safari 16.4+) send sec-fetch-mode\n\t// on navigation requests. Its absence signals a programmatic client.\n\tconst secFetchMode = request.headers.get(\"sec-fetch-mode\");\n\tif (!secFetchMode && lowerUA && BOT_LIKE_REGEX.test(lowerUA)) {\n\t\tconst isTraditionalBot = TRADITIONAL_BOT_PATTERNS.some((pattern) => lowerUA.includes(pattern));\n\t\tif (!isTraditionalBot) {\n\t\t\treturn { detected: true, method: \"heuristic\" };\n\t\t}\n\t}\n\n\treturn { detected: false, method: null };\n}\n","import { isAIAgent } from \"./detection\";\nimport type { DetectionResult, MinimalRequest } from \"./types\";\n\nconst DEFAULT_MARKDOWN_TYPES = [\"text/markdown\", \"text/x-markdown\"];\n\nexport interface AcceptMarkdownOptions {\n\tmediaTypes?: string[];\n}\n\n/**\n * Check if the request prefers markdown via the Accept header.\n */\nexport function acceptsMarkdown(request: MinimalRequest, options?: AcceptMarkdownOptions): boolean {\n\tconst accept = request.headers.get(\"accept\");\n\tif (!accept) return false;\n\n\tconst types = options?.mediaTypes ?? DEFAULT_MARKDOWN_TYPES;\n\tconst lowerAccept = accept.toLowerCase();\n\treturn types.some((type) => lowerAccept.includes(type));\n}\n\nexport interface ShouldServeMarkdownResult {\n\tserve: boolean;\n\treason: \"agent\" | \"accept-header\" | null;\n\tdetection: DetectionResult;\n}\n\n/**\n * Combines agent detection and content negotiation into one call.\n * Returns whether to serve markdown and why.\n */\nexport function shouldServeMarkdown(\n\trequest: MinimalRequest,\n\toptions?: AcceptMarkdownOptions,\n): ShouldServeMarkdownResult {\n\tconst detection = isAIAgent(request);\n\tif (detection.detected) {\n\t\treturn { serve: true, reason: \"agent\", detection };\n\t}\n\n\tif (acceptsMarkdown(request, options)) {\n\t\treturn { serve: true, reason: \"accept-header\", detection };\n\t}\n\n\treturn { serve: false, reason: null, detection };\n}\n"],"mappings":";AACA,SAAS,0BAA0B;;;ACI5B,IAAM,uBAA0C;AAAA;AAAA,EAEtD;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACD;AAKO,IAAM,0BAA6C,CAAC,aAAa;AAMjE,IAAM,2BAA8C;AAAA,EAC1D;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACD;AAMO,IAAM,iBAAyB;;;ACrE/B,SAAS,UAAU,SAA0C;AACnE,QAAM,YAAY,QAAQ,QAAQ,IAAI,YAAY;AAClD,QAAM,UAAU,WAAW,YAAY,KAAK;AAG5C,MAAI,WAAW,qBAAqB,KAAK,CAAC,YAAY,QAAQ,SAAS,OAAO,CAAC,GAAG;AACjF,WAAO,EAAE,UAAU,MAAM,QAAQ,WAAW;AAAA,EAC7C;AAGA,QAAM,iBAAiB,QAAQ,QAAQ,IAAI,iBAAiB;AAC5D,MAAI,gBAAgB;AACnB,UAAM,WAAW,eAAe,YAAY;AAC5C,QAAI,wBAAwB,KAAK,CAAC,WAAW,SAAS,SAAS,MAAM,CAAC,GAAG;AACxE,aAAO,EAAE,UAAU,MAAM,QAAQ,kBAAkB;AAAA,IACpD;AAAA,EACD;AAKA,QAAM,eAAe,QAAQ,QAAQ,IAAI,gBAAgB;AACzD,MAAI,CAAC,gBAAgB,WAAW,eAAe,KAAK,OAAO,GAAG;AAC7D,UAAM,mBAAmB,yBAAyB,KAAK,CAAC,YAAY,QAAQ,SAAS,OAAO,CAAC;AAC7F,QAAI,CAAC,kBAAkB;AACtB,aAAO,EAAE,UAAU,MAAM,QAAQ,YAAY;AAAA,IAC9C;AAAA,EACD;AAEA,SAAO,EAAE,UAAU,OAAO,QAAQ,KAAK;AACxC;;;AC9CA,IAAM,yBAAyB,CAAC,iBAAiB,iBAAiB;AAS3D,SAAS,gBAAgB,SAAyB,SAA0C;AAClG,QAAM,SAAS,QAAQ,QAAQ,IAAI,QAAQ;AAC3C,MAAI,CAAC,OAAQ,QAAO;AAEpB,QAAM,QAAQ,SAAS,cAAc;AACrC,QAAM,cAAc,OAAO,YAAY;AACvC,SAAO,MAAM,KAAK,CAAC,SAAS,YAAY,SAAS,IAAI,CAAC;AACvD;AAYO,SAAS,oBACf,SACA,SAC4B;AAC5B,QAAM,YAAY,UAAU,OAAO;AACnC,MAAI,UAAU,UAAU;AACvB,WAAO,EAAE,OAAO,MAAM,QAAQ,SAAS,UAAU;AAAA,EAClD;AAEA,MAAI,gBAAgB,SAAS,OAAO,GAAG;AACtC,WAAO,EAAE,OAAO,MAAM,QAAQ,iBAAiB,UAAU;AAAA,EAC1D;AAEA,SAAO,EAAE,OAAO,OAAO,QAAQ,MAAM,UAAU;AAChD;;;AHhBO,SAAS,sBAAsB,SAAsC;AAC3E,SAAO,mBAAmB,OAAO,UAAmB;AAEnD,UAAM,WAAW,MAAM,KAAK,MAAM,GAAG,EAAE,CAAC;AACxC,UAAM,SAAS,QAAQ,cAAc;AAErC,QAAI,CAAC,SAAS,WAAW,MAAM,EAAG;AAElC,UAAM,EAAE,OAAO,UAAU,IAAI,oBAAoB,EAAE,SAAS,MAAM,QAAQ,CAAC;AAC3E,QAAI,CAAC,MAAO;AAGZ,QAAI,QAAQ,aAAa;AACxB,UAAI;AACH,cAAM,SAAS,UAAU,WAAW,UAAU,SAAS;AACvD,cAAM,IAAI,QAAQ,YAAY;AAAA,UAC7B,MAAM;AAAA,UACN;AAAA,UACA,WAAW,MAAM,QAAQ,IAAI,YAAY;AAAA,QAC1C,CAAC;AACD,YAAI,aAAa,QAAS,GAAE,MAAM,MAAM;AAAA,QAAC,CAAC;AAAA,MAC3C,QAAQ;AAAA,MAER;AAAA,IACD;AAEA,UAAM,UAAU,MAAM,QAAQ,YAAY,UAAU,KAAK;AACzD,QAAI,mBAAmB,SAAU,QAAO;AAExC,WAAO,IAAI,SAAS,SAAS;AAAA,MAC5B,SAAS,EAAE,gBAAgB,gCAAgC,MAAM,SAAS;AAAA,IAC3E,CAAC;AAAA,EACF,CAAC;AACF;","names":[]}
@@ -0,0 +1,178 @@
1
+ "use strict";
2
+ var __defProp = Object.defineProperty;
3
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
+ var __getOwnPropNames = Object.getOwnPropertyNames;
5
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
6
+ var __export = (target, all) => {
7
+ for (var name in all)
8
+ __defProp(target, name, { get: all[name], enumerable: true });
9
+ };
10
+ var __copyProps = (to, from, except, desc) => {
11
+ if (from && typeof from === "object" || typeof from === "function") {
12
+ for (let key of __getOwnPropNames(from))
13
+ if (!__hasOwnProp.call(to, key) && key !== except)
14
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
15
+ }
16
+ return to;
17
+ };
18
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
+
20
+ // src/sveltekit/index.ts
21
+ var sveltekit_exports = {};
22
+ __export(sveltekit_exports, {
23
+ handleAgentReadability: () => handleAgentReadability
24
+ });
25
+ module.exports = __toCommonJS(sveltekit_exports);
26
+
27
+ // src/patterns.ts
28
+ var AI_AGENT_UA_PATTERNS = [
29
+ // Anthropic — https://support.claude.com/en/articles/8896518
30
+ "claudebot",
31
+ "claude-searchbot",
32
+ "claude-user",
33
+ "anthropic-ai",
34
+ "claude-web",
35
+ // OpenAI — https://platform.openai.com/docs/bots
36
+ "chatgpt",
37
+ "gptbot",
38
+ "oai-searchbot",
39
+ "openai",
40
+ // Google AI
41
+ "gemini",
42
+ "bard",
43
+ "google-cloudvertexbot",
44
+ "google-extended",
45
+ // Meta
46
+ "meta-externalagent",
47
+ "meta-externalfetcher",
48
+ "meta-webindexer",
49
+ // Search/Research AI
50
+ "perplexity",
51
+ "youbot",
52
+ "you.com",
53
+ "deepseekbot",
54
+ // Coding assistants
55
+ "cursor",
56
+ "github-copilot",
57
+ "codeium",
58
+ "tabnine",
59
+ "sourcegraph",
60
+ // Other AI agents / data scrapers
61
+ "cohere-ai",
62
+ "bytespider",
63
+ "amazonbot",
64
+ "ai2bot",
65
+ "diffbot",
66
+ "omgili",
67
+ "omgilibot"
68
+ ];
69
+ var SIGNATURE_AGENT_DOMAINS = ["chatgpt.com"];
70
+ var TRADITIONAL_BOT_PATTERNS = [
71
+ "googlebot",
72
+ "bingbot",
73
+ "yandexbot",
74
+ "baiduspider",
75
+ "duckduckbot",
76
+ "slurp",
77
+ "msnbot",
78
+ "facebot",
79
+ "twitterbot",
80
+ "linkedinbot",
81
+ "whatsapp",
82
+ "telegrambot",
83
+ "pingdom",
84
+ "uptimerobot",
85
+ "newrelic",
86
+ "datadog",
87
+ "statuspage",
88
+ "site24x7",
89
+ "applebot"
90
+ ];
91
+ var BOT_LIKE_REGEX = /bot|agent|fetch|crawl|spider|search/i;
92
+
93
+ // src/detection.ts
94
+ function isAIAgent(request) {
95
+ const userAgent = request.headers.get("user-agent");
96
+ const lowerUA = userAgent?.toLowerCase() ?? "";
97
+ if (lowerUA && AI_AGENT_UA_PATTERNS.some((pattern) => lowerUA.includes(pattern))) {
98
+ return { detected: true, method: "ua-match" };
99
+ }
100
+ const signatureAgent = request.headers.get("signature-agent");
101
+ if (signatureAgent) {
102
+ const lowerSig = signatureAgent.toLowerCase();
103
+ if (SIGNATURE_AGENT_DOMAINS.some((domain) => lowerSig.includes(domain))) {
104
+ return { detected: true, method: "signature-agent" };
105
+ }
106
+ }
107
+ const secFetchMode = request.headers.get("sec-fetch-mode");
108
+ if (!secFetchMode && lowerUA && BOT_LIKE_REGEX.test(lowerUA)) {
109
+ const isTraditionalBot = TRADITIONAL_BOT_PATTERNS.some((pattern) => lowerUA.includes(pattern));
110
+ if (!isTraditionalBot) {
111
+ return { detected: true, method: "heuristic" };
112
+ }
113
+ }
114
+ return { detected: false, method: null };
115
+ }
116
+
117
+ // src/negotiation.ts
118
+ var DEFAULT_MARKDOWN_TYPES = ["text/markdown", "text/x-markdown"];
119
+ function acceptsMarkdown(request, options) {
120
+ const accept = request.headers.get("accept");
121
+ if (!accept) return false;
122
+ const types = options?.mediaTypes ?? DEFAULT_MARKDOWN_TYPES;
123
+ const lowerAccept = accept.toLowerCase();
124
+ return types.some((type) => lowerAccept.includes(type));
125
+ }
126
+ function shouldServeMarkdown(request, options) {
127
+ const detection = isAIAgent(request);
128
+ if (detection.detected) {
129
+ return { serve: true, reason: "agent", detection };
130
+ }
131
+ if (acceptsMarkdown(request, options)) {
132
+ return { serve: true, reason: "accept-header", detection };
133
+ }
134
+ return { serve: false, reason: null, detection };
135
+ }
136
+
137
+ // src/sveltekit/index.ts
138
+ function handleAgentReadability(options) {
139
+ return async ({ event, resolve }) => {
140
+ const { pathname } = event.url;
141
+ const prefix = options.docsPrefix ?? "/docs";
142
+ if (!pathname.startsWith(prefix) || event.isDataRequest || event.isSubRequest) {
143
+ return resolve(event);
144
+ }
145
+ const { serve, detection } = shouldServeMarkdown(event.request);
146
+ if (!serve) return resolve(event);
147
+ if (options.onDetection) {
148
+ try {
149
+ const method = detection.detected ? detection.method : "accept-header";
150
+ const p = options.onDetection({
151
+ path: pathname,
152
+ method,
153
+ userAgent: event.request.headers.get("user-agent")
154
+ });
155
+ if (p instanceof Promise) p.catch(() => {
156
+ });
157
+ } catch {
158
+ }
159
+ }
160
+ const response = await event.fetch(options.rewrite(pathname));
161
+ if (!response.ok) return resolve(event);
162
+ const headers = new Headers(response.headers);
163
+ const varyTokens = (headers.get("vary") ?? "").toLowerCase().split(/\s*,\s*/);
164
+ if (!varyTokens.includes("accept")) {
165
+ headers.append("vary", "Accept");
166
+ }
167
+ return new Response(response.body, {
168
+ status: response.status,
169
+ statusText: response.statusText,
170
+ headers
171
+ });
172
+ };
173
+ }
174
+ // Annotate the CommonJS export names for ESM import in node:
175
+ 0 && (module.exports = {
176
+ handleAgentReadability
177
+ });
178
+ //# sourceMappingURL=index.cjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../../src/sveltekit/index.ts","../../src/patterns.ts","../../src/detection.ts","../../src/negotiation.ts"],"sourcesContent":["import type { Handle } from \"@sveltejs/kit\";\nimport { shouldServeMarkdown } from \"../negotiation\";\nimport type { OnDetectionCallback } from \"../types\";\n\nexport interface SvelteKitAgentReadabilityOptions {\n\t/** URL prefix to intercept. Default: '/docs' */\n\tdocsPrefix?: string;\n\t/** Maps request path to internal +server.ts route returning markdown */\n\trewrite: (pathname: string) => string;\n\t/** Fire-and-forget callback for analytics/logging. */\n\tonDetection?: OnDetectionCallback;\n}\n\n/**\n * SvelteKit handle hook that detects AI agents and rewrites to markdown routes.\n *\n * ```ts\n * // hooks.server.ts\n * import { handleAgentReadability } from '@vercel/agent-readability/sveltekit'\n * import { sequence } from '@sveltejs/kit/hooks'\n *\n * export const handle = sequence(\n * handleAgentReadability({ rewrite: (p) => `/api/docs-md${p}` }),\n * // other handles...\n * )\n * ```\n */\nexport function handleAgentReadability(options: SvelteKitAgentReadabilityOptions): Handle {\n\treturn async ({ event, resolve }) => {\n\t\tconst { pathname } = event.url;\n\t\tconst prefix = options.docsPrefix ?? \"/docs\";\n\n\t\t// Skip: outside prefix, data requests (__data.json), sub-requests (prevent loop)\n\t\tif (!pathname.startsWith(prefix) || event.isDataRequest || event.isSubRequest) {\n\t\t\treturn resolve(event);\n\t\t}\n\n\t\tconst { serve, detection } = shouldServeMarkdown(event.request);\n\t\tif (!serve) return resolve(event);\n\n\t\t// Fire-and-forget — never block the response\n\t\tif (options.onDetection) {\n\t\t\ttry {\n\t\t\t\tconst method = detection.detected ? detection.method : \"accept-header\";\n\t\t\t\tconst p = options.onDetection({\n\t\t\t\t\tpath: pathname,\n\t\t\t\t\tmethod,\n\t\t\t\t\tuserAgent: event.request.headers.get(\"user-agent\"),\n\t\t\t\t});\n\t\t\t\tif (p instanceof Promise) p.catch(() => {});\n\t\t\t} catch {\n\t\t\t\t/* swallow sync errors */\n\t\t\t}\n\t\t}\n\n\t\t// Internal fetch — zero network hop via SvelteKit's event.fetch\n\t\tconst response = await event.fetch(options.rewrite(pathname));\n\t\tif (!response.ok) return resolve(event);\n\n\t\t// Ensure Vary: Accept for correct CDN caching (token match, not substring)\n\t\tconst headers = new Headers(response.headers);\n\t\tconst varyTokens = (headers.get(\"vary\") ?? \"\").toLowerCase().split(/\\s*,\\s*/);\n\t\tif (!varyTokens.includes(\"accept\")) {\n\t\t\theaders.append(\"vary\", \"Accept\");\n\t\t}\n\n\t\treturn new Response(response.body, {\n\t\t\tstatus: response.status,\n\t\t\tstatusText: response.statusText,\n\t\t\theaders,\n\t\t});\n\t};\n}\n","/**\n * Layer 1: Known AI agent UA substrings (lowercase).\n * Curated from https://bots.fyi/?tags=ai_assistant + official vendor docs.\n * Last reviewed: 2026-03-20\n */\nexport const AI_AGENT_UA_PATTERNS: readonly string[] = [\n\t// Anthropic — https://support.claude.com/en/articles/8896518\n\t\"claudebot\",\n\t\"claude-searchbot\",\n\t\"claude-user\",\n\t\"anthropic-ai\",\n\t\"claude-web\",\n\n\t// OpenAI — https://platform.openai.com/docs/bots\n\t\"chatgpt\",\n\t\"gptbot\",\n\t\"oai-searchbot\",\n\t\"openai\",\n\n\t// Google AI\n\t\"gemini\",\n\t\"bard\",\n\t\"google-cloudvertexbot\",\n\t\"google-extended\",\n\n\t// Meta\n\t\"meta-externalagent\",\n\t\"meta-externalfetcher\",\n\t\"meta-webindexer\",\n\n\t// Search/Research AI\n\t\"perplexity\",\n\t\"youbot\",\n\t\"you.com\",\n\t\"deepseekbot\",\n\n\t// Coding assistants\n\t\"cursor\",\n\t\"github-copilot\",\n\t\"codeium\",\n\t\"tabnine\",\n\t\"sourcegraph\",\n\n\t// Other AI agents / data scrapers\n\t\"cohere-ai\",\n\t\"bytespider\",\n\t\"amazonbot\",\n\t\"ai2bot\",\n\t\"diffbot\",\n\t\"omgili\",\n\t\"omgilibot\",\n];\n\n/**\n * Layer 2: Known AI service URLs in Signature-Agent header (RFC 9421).\n */\nexport const SIGNATURE_AGENT_DOMAINS: readonly string[] = [\"chatgpt.com\"];\n\n/**\n * Layer 3: Traditional bot exclusion list. Bots that should NOT trigger the\n * heuristic layer (search engine crawlers, social previews, monitoring tools).\n */\nexport const TRADITIONAL_BOT_PATTERNS: readonly string[] = [\n\t\"googlebot\",\n\t\"bingbot\",\n\t\"yandexbot\",\n\t\"baiduspider\",\n\t\"duckduckbot\",\n\t\"slurp\",\n\t\"msnbot\",\n\t\"facebot\",\n\t\"twitterbot\",\n\t\"linkedinbot\",\n\t\"whatsapp\",\n\t\"telegrambot\",\n\t\"pingdom\",\n\t\"uptimerobot\",\n\t\"newrelic\",\n\t\"datadog\",\n\t\"statuspage\",\n\t\"site24x7\",\n\t\"applebot\",\n];\n\n/**\n * Broad regex for bot-like UA strings (used only in Layer 3 heuristic).\n * No word boundaries — keywords commonly appear in compound names.\n */\nexport const BOT_LIKE_REGEX: RegExp = /bot|agent|fetch|crawl|spider|search/i;\n","import {\n\tAI_AGENT_UA_PATTERNS,\n\tBOT_LIKE_REGEX,\n\tSIGNATURE_AGENT_DOMAINS,\n\tTRADITIONAL_BOT_PATTERNS,\n} from \"./patterns\";\nimport type { DetectionResult, MinimalRequest } from \"./types\";\n\n/**\n * Detects AI agents from HTTP request headers.\n *\n * Three detection layers (checked in order):\n * 1. Known UA patterns (definitive)\n * 2. Signature-Agent header (definitive, RFC 9421)\n * 3. Missing sec-fetch-mode heuristic (catches unknown bots)\n *\n * Optimizes for recall over precision: serving markdown to a non-AI bot\n * is low-harm; missing an AI agent means a worse experience.\n */\nexport function isAIAgent(request: MinimalRequest): DetectionResult {\n\tconst userAgent = request.headers.get(\"user-agent\");\n\tconst lowerUA = userAgent?.toLowerCase() ?? \"\";\n\n\t// Layer 1: Known UA pattern match\n\tif (lowerUA && AI_AGENT_UA_PATTERNS.some((pattern) => lowerUA.includes(pattern))) {\n\t\treturn { detected: true, method: \"ua-match\" };\n\t}\n\n\t// Layer 2: Signature-Agent header (RFC 9421, used by ChatGPT agent)\n\tconst signatureAgent = request.headers.get(\"signature-agent\");\n\tif (signatureAgent) {\n\t\tconst lowerSig = signatureAgent.toLowerCase();\n\t\tif (SIGNATURE_AGENT_DOMAINS.some((domain) => lowerSig.includes(domain))) {\n\t\t\treturn { detected: true, method: \"signature-agent\" };\n\t\t}\n\t}\n\n\t// Layer 3: Missing browser fingerprint heuristic\n\t// Real browsers (Chrome 76+, Firefox 90+, Safari 16.4+) send sec-fetch-mode\n\t// on navigation requests. Its absence signals a programmatic client.\n\tconst secFetchMode = request.headers.get(\"sec-fetch-mode\");\n\tif (!secFetchMode && lowerUA && BOT_LIKE_REGEX.test(lowerUA)) {\n\t\tconst isTraditionalBot = TRADITIONAL_BOT_PATTERNS.some((pattern) => lowerUA.includes(pattern));\n\t\tif (!isTraditionalBot) {\n\t\t\treturn { detected: true, method: \"heuristic\" };\n\t\t}\n\t}\n\n\treturn { detected: false, method: null };\n}\n","import { isAIAgent } from \"./detection\";\nimport type { DetectionResult, MinimalRequest } from \"./types\";\n\nconst DEFAULT_MARKDOWN_TYPES = [\"text/markdown\", \"text/x-markdown\"];\n\nexport interface AcceptMarkdownOptions {\n\tmediaTypes?: string[];\n}\n\n/**\n * Check if the request prefers markdown via the Accept header.\n */\nexport function acceptsMarkdown(request: MinimalRequest, options?: AcceptMarkdownOptions): boolean {\n\tconst accept = request.headers.get(\"accept\");\n\tif (!accept) return false;\n\n\tconst types = options?.mediaTypes ?? DEFAULT_MARKDOWN_TYPES;\n\tconst lowerAccept = accept.toLowerCase();\n\treturn types.some((type) => lowerAccept.includes(type));\n}\n\nexport interface ShouldServeMarkdownResult {\n\tserve: boolean;\n\treason: \"agent\" | \"accept-header\" | null;\n\tdetection: DetectionResult;\n}\n\n/**\n * Combines agent detection and content negotiation into one call.\n * Returns whether to serve markdown and why.\n */\nexport function shouldServeMarkdown(\n\trequest: MinimalRequest,\n\toptions?: AcceptMarkdownOptions,\n): ShouldServeMarkdownResult {\n\tconst detection = isAIAgent(request);\n\tif (detection.detected) {\n\t\treturn { serve: true, reason: \"agent\", detection };\n\t}\n\n\tif (acceptsMarkdown(request, options)) {\n\t\treturn { serve: true, reason: \"accept-header\", detection };\n\t}\n\n\treturn { serve: false, reason: null, detection };\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACKO,IAAM,uBAA0C;AAAA;AAAA,EAEtD;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACD;AAKO,IAAM,0BAA6C,CAAC,aAAa;AAMjE,IAAM,2BAA8C;AAAA,EAC1D;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACD;AAMO,IAAM,iBAAyB;;;ACrE/B,SAAS,UAAU,SAA0C;AACnE,QAAM,YAAY,QAAQ,QAAQ,IAAI,YAAY;AAClD,QAAM,UAAU,WAAW,YAAY,KAAK;AAG5C,MAAI,WAAW,qBAAqB,KAAK,CAAC,YAAY,QAAQ,SAAS,OAAO,CAAC,GAAG;AACjF,WAAO,EAAE,UAAU,MAAM,QAAQ,WAAW;AAAA,EAC7C;AAGA,QAAM,iBAAiB,QAAQ,QAAQ,IAAI,iBAAiB;AAC5D,MAAI,gBAAgB;AACnB,UAAM,WAAW,eAAe,YAAY;AAC5C,QAAI,wBAAwB,KAAK,CAAC,WAAW,SAAS,SAAS,MAAM,CAAC,GAAG;AACxE,aAAO,EAAE,UAAU,MAAM,QAAQ,kBAAkB;AAAA,IACpD;AAAA,EACD;AAKA,QAAM,eAAe,QAAQ,QAAQ,IAAI,gBAAgB;AACzD,MAAI,CAAC,gBAAgB,WAAW,eAAe,KAAK,OAAO,GAAG;AAC7D,UAAM,mBAAmB,yBAAyB,KAAK,CAAC,YAAY,QAAQ,SAAS,OAAO,CAAC;AAC7F,QAAI,CAAC,kBAAkB;AACtB,aAAO,EAAE,UAAU,MAAM,QAAQ,YAAY;AAAA,IAC9C;AAAA,EACD;AAEA,SAAO,EAAE,UAAU,OAAO,QAAQ,KAAK;AACxC;;;AC9CA,IAAM,yBAAyB,CAAC,iBAAiB,iBAAiB;AAS3D,SAAS,gBAAgB,SAAyB,SAA0C;AAClG,QAAM,SAAS,QAAQ,QAAQ,IAAI,QAAQ;AAC3C,MAAI,CAAC,OAAQ,QAAO;AAEpB,QAAM,QAAQ,SAAS,cAAc;AACrC,QAAM,cAAc,OAAO,YAAY;AACvC,SAAO,MAAM,KAAK,CAAC,SAAS,YAAY,SAAS,IAAI,CAAC;AACvD;AAYO,SAAS,oBACf,SACA,SAC4B;AAC5B,QAAM,YAAY,UAAU,OAAO;AACnC,MAAI,UAAU,UAAU;AACvB,WAAO,EAAE,OAAO,MAAM,QAAQ,SAAS,UAAU;AAAA,EAClD;AAEA,MAAI,gBAAgB,SAAS,OAAO,GAAG;AACtC,WAAO,EAAE,OAAO,MAAM,QAAQ,iBAAiB,UAAU;AAAA,EAC1D;AAEA,SAAO,EAAE,OAAO,OAAO,QAAQ,MAAM,UAAU;AAChD;;;AHlBO,SAAS,uBAAuB,SAAmD;AACzF,SAAO,OAAO,EAAE,OAAO,QAAQ,MAAM;AACpC,UAAM,EAAE,SAAS,IAAI,MAAM;AAC3B,UAAM,SAAS,QAAQ,cAAc;AAGrC,QAAI,CAAC,SAAS,WAAW,MAAM,KAAK,MAAM,iBAAiB,MAAM,cAAc;AAC9E,aAAO,QAAQ,KAAK;AAAA,IACrB;AAEA,UAAM,EAAE,OAAO,UAAU,IAAI,oBAAoB,MAAM,OAAO;AAC9D,QAAI,CAAC,MAAO,QAAO,QAAQ,KAAK;AAGhC,QAAI,QAAQ,aAAa;AACxB,UAAI;AACH,cAAM,SAAS,UAAU,WAAW,UAAU,SAAS;AACvD,cAAM,IAAI,QAAQ,YAAY;AAAA,UAC7B,MAAM;AAAA,UACN;AAAA,UACA,WAAW,MAAM,QAAQ,QAAQ,IAAI,YAAY;AAAA,QAClD,CAAC;AACD,YAAI,aAAa,QAAS,GAAE,MAAM,MAAM;AAAA,QAAC,CAAC;AAAA,MAC3C,QAAQ;AAAA,MAER;AAAA,IACD;AAGA,UAAM,WAAW,MAAM,MAAM,MAAM,QAAQ,QAAQ,QAAQ,CAAC;AAC5D,QAAI,CAAC,SAAS,GAAI,QAAO,QAAQ,KAAK;AAGtC,UAAM,UAAU,IAAI,QAAQ,SAAS,OAAO;AAC5C,UAAM,cAAc,QAAQ,IAAI,MAAM,KAAK,IAAI,YAAY,EAAE,MAAM,SAAS;AAC5E,QAAI,CAAC,WAAW,SAAS,QAAQ,GAAG;AACnC,cAAQ,OAAO,QAAQ,QAAQ;AAAA,IAChC;AAEA,WAAO,IAAI,SAAS,SAAS,MAAM;AAAA,MAClC,QAAQ,SAAS;AAAA,MACjB,YAAY,SAAS;AAAA,MACrB;AAAA,IACD,CAAC;AAAA,EACF;AACD;","names":[]}