@ontosdk/next 1.4.4 → 1.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,6 +10,8 @@ interface AiBot {
10
10
  name: string;
11
11
  /** The company operating this bot */
12
12
  company: string;
13
+ /** ISO date when this bot was added or last verified */
14
+ addedAt?: string;
13
15
  }
14
16
  /**
15
17
  * Flat list of user-agent substrings for fast matching in the middleware.
@@ -17,7 +19,8 @@ interface AiBot {
17
19
  declare const AI_BOT_USER_AGENTS: string[];
18
20
  /**
19
21
  * Given a raw user-agent string, returns the matched AiBot entry or undefined.
20
- * Comparison is case-insensitive to handle inconsistent agent casing.
22
+ * Uses a "Longest Match" strategy to ensure maximum specificity.
23
+ * Comparison is case-insensitive.
21
24
  */
22
25
  declare function matchBot(userAgent: string | null): AiBot | undefined;
23
26
 
@@ -10,6 +10,8 @@ interface AiBot {
10
10
  name: string;
11
11
  /** The company operating this bot */
12
12
  company: string;
13
+ /** ISO date when this bot was added or last verified */
14
+ addedAt?: string;
13
15
  }
14
16
  /**
15
17
  * Flat list of user-agent substrings for fast matching in the middleware.
@@ -17,7 +19,8 @@ interface AiBot {
17
19
  declare const AI_BOT_USER_AGENTS: string[];
18
20
  /**
19
21
  * Given a raw user-agent string, returns the matched AiBot entry or undefined.
20
- * Comparison is case-insensitive to handle inconsistent agent casing.
22
+ * Uses a "Longest Match" strategy to ensure maximum specificity.
23
+ * Comparison is case-insensitive.
21
24
  */
22
25
  declare function matchBot(userAgent: string | null): AiBot | undefined;
23
26
 
@@ -1,8 +1,4 @@
1
- "use strict";var d=Object.defineProperty;var T=Object.getOwnPropertyDescriptor;var b=Object.getOwnPropertyNames;var k=Object.prototype.hasOwnProperty;var P=(t,e)=>{for(var o in e)d(t,o,{get:e[o],enumerable:!0})},R=(t,e,o,a)=>{if(e&&typeof e=="object"||typeof e=="function")for(let n of b(e))!k.call(t,n)&&n!==o&&d(t,n,{get:()=>e[n],enumerable:!(a=T(e,n))||a.enumerable});return t};var _=t=>R(d({},"__esModule",{value:!0}),t);var U={};P(U,{AI_BOT_USER_AGENTS:()=>f,matchBot:()=>m,ontoMiddleware:()=>I});module.exports=_(U);var p=require("next/server");var g=[{name:"GPTBot",company:"OpenAI"},{name:"ChatGPT-User",company:"OpenAI"},{name:"ChatGPT",company:"OpenAI"},{name:"OAI-SearchBot",company:"OpenAI"},{name:"OpenAI",company:"OpenAI"},{name:"Googlebot",company:"Google"},{name:"Google-CloudVertexBot",company:"Google"},{name:"Google-Extended",company:"Google"},{name:"GoogleOther",company:"Google"},{name:"ClaudeBot",company:"Anthropic"},{name:"Claude-User",company:"Anthropic"},{name:"Claude",company:"Anthropic"},{name:"anthropic-ai",company:"Anthropic"},{name:"Anthropic",company:"Anthropic"},{name:"PerplexityBot",company:"Perplexity"},{name:"Perplexity-User",company:"Perplexity"},{name:"Perplexity",company:"Perplexity"},{name:"Meta-ExternalAgent",company:"Meta"},{name:"Meta-ExternalFetcher",company:"Meta"},{name:"FacebookBot",company:"Meta"},{name:"CCBot",company:"Common Crawl"},{name:"Bytespider",company:"ByteDance"},{name:"Applebot-Extended",company:"Apple"},{name:"cohere-ai",company:"Cohere"},{name:"YouBot",company:"You.com"}],f=g.map(t=>t.name);function m(t){if(!t)return;let e=t.toLowerCase();return g.find(o=>e.includes(o.name.toLowerCase()))}function O(t){let e=[];if(e.push(`# ${t.name}`),e.push(""),e.push(`> ${t.summary}`),e.push(""),t.routes&&t.routes.length>0){e.push("## Key Routes"),e.push("");for(let o of t.routes){let a=`${t.baseUrl}${o.path}`;e.push(`- [${o.path}](${a}): ${o.description}`)}e.push("")}if(t.externalLinks&&t.externalLinks.length>0){e.push("## Resources"),e.push("");for(let o of t.externalLinks)o.description?e.push(`- [${o.title}](${o.url}): ${o.description}`):e.push(`- [${o.title}](${o.url})`);e.push("")}if(t.sections&&t.sections.length>0)for(let o of t.sections)e.push(`## ${o.heading}`),e.push(""),e.push(o.content),e.push("");return e.join(`
1
+ "use strict";var m=Object.defineProperty;var C=Object.getOwnPropertyDescriptor;var T=Object.getOwnPropertyNames;var _=Object.prototype.hasOwnProperty;var $=(t,e)=>{for(var n in e)m(t,n,{get:e[n],enumerable:!0})},b=(t,e,n,o)=>{if(e&&typeof e=="object"||typeof e=="function")for(let a of T(e))!_.call(t,a)&&a!==n&&m(t,a,{get:()=>e[a],enumerable:!(o=C(e,a))||o.enumerable});return t};var k=t=>b(m({},"__esModule",{value:!0}),t);var U={};$(U,{AI_BOT_USER_AGENTS:()=>y,matchBot:()=>i,ontoMiddleware:()=>R});module.exports=k(U);var d=require("next/server");var A=[{name:"GPTBot",company:"OpenAI",addedAt:"2025-01-01"},{name:"ChatGPT-User",company:"OpenAI",addedAt:"2025-01-01"},{name:"OAI-SearchBot",company:"OpenAI",addedAt:"2025-01-01"},{name:"Google-CloudVertexBot",company:"Google",addedAt:"2025-01-01"},{name:"Google-Extended",company:"Google",addedAt:"2025-01-01"},{name:"GoogleOther",company:"Google",addedAt:"2025-01-01"},{name:"ClaudeBot",company:"Anthropic",addedAt:"2025-01-01"},{name:"Claude-User",company:"Anthropic",addedAt:"2025-01-01"},{name:"anthropic-ai",company:"Anthropic",addedAt:"2025-01-01"},{name:"PerplexityBot",company:"Perplexity",addedAt:"2025-01-01"},{name:"Perplexity-User",company:"Perplexity",addedAt:"2025-01-01"},{name:"Meta-ExternalAgent",company:"Meta",addedAt:"2025-01-01"},{name:"Meta-ExternalFetcher",company:"Meta",addedAt:"2025-01-01"},{name:"facebookexternalhit",company:"Meta",addedAt:"2025-03-25"},{name:"FacebookBot",company:"Meta",addedAt:"2025-01-01"},{name:"MistralBot",company:"Mistral",addedAt:"2025-03-25"},{name:"Amazonbot",company:"Amazon",addedAt:"2025-03-25"},{name:"AI2Bot",company:"Allen Institute",addedAt:"2025-03-25"},{name:"DuckAssistBot",company:"DuckDuckGo",addedAt:"2025-03-25"},{name:"Diffbot",company:"Diffbot",addedAt:"2025-03-25"},{name:"CCBot",company:"Common Crawl",addedAt:"2025-01-01"},{name:"Bytespider",company:"ByteDance",addedAt:"2025-01-01"},{name:"Applebot-Extended",company:"Apple",addedAt:"2025-01-01"},{name:"cohere-ai",company:"Cohere",addedAt:"2025-01-01"},{name:"YouBot",company:"You.com",addedAt:"2025-01-01"}],y=P();function P(){return A.map(t=>t.name)}function i(t){if(!t)return;let e=t.toLowerCase(),n=A.filter(o=>e.includes(o.name.toLowerCase()));if(n.length!==0)return n.length===1?n[0]:n.reduce((o,a)=>a.name.length>o.name.length?a:o)}function g(t){let e=[];if(e.push(`# ${t.name}`),e.push(""),e.push(`> ${t.summary}`),e.push(""),t.routes&&t.routes.length>0){e.push("## Key Routes"),e.push("");for(let n of t.routes){let o=`${t.baseUrl}${n.path}`;e.push(`- [${n.path}](${o}): ${n.description}`)}e.push("")}if(t.externalLinks&&t.externalLinks.length>0){e.push("## Resources"),e.push("");for(let n of t.externalLinks)n.description?e.push(`- [${n.title}](${n.url}): ${n.description}`):e.push(`- [${n.title}](${n.url})`);e.push("")}if(t.sections&&t.sections.length>0)for(let n of t.sections)e.push(`## ${n.heading}`),e.push(""),e.push(n.content),e.push("");return e.join(`
2
2
  `).trim()+`
3
- `}async function I(t,e){let o=t.headers.get("user-agent"),a=t.nextUrl.clone(),n=m(o),A=t.headers.get("accept")||"",l=t.nextUrl.searchParams.has("onto"),C=!!n,$=A.includes("text/markdown")||l;if(C||$){if(a.pathname.startsWith("/_next"))return p.NextResponse.next();if(a.pathname==="/llms.txt")try{if(e){let r=O(e),i=new p.NextResponse(r,{headers:{"Content-Type":"text/plain; charset=utf-8","Cache-Control":"public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400"}});return n&&i.headers.set("X-Onto-Bot",`${n.name} (${n.company})`),i}else{a.pathname="/llms.txt";let r=p.NextResponse.rewrite(a);return r.headers.set("Content-Type","text/plain; charset=utf-8"),r.headers.set("Cache-Control","public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400"),n&&r.headers.set("X-Onto-Bot",`${n.name} (${n.company})`),r}}catch(r){console.error("[Onto] Failed to generate llms.txt:",r),a.pathname="/llms.txt";let i=p.NextResponse.rewrite(a);return i.headers.set("Content-Type","text/plain; charset=utf-8"),n&&i.headers.set("X-Onto-Bot",`${n.name} (${n.company})`),i}if(a.pathname.includes("."))return p.NextResponse.next();let s=a.pathname;(s==="/"||s==="")&&(s="/index"),s.endsWith("/")&&s!=="/"&&(s=s.slice(0,-1));let h={"Content-Type":"text/markdown; charset=utf-8","Cache-Control":"public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400"};n&&(h["X-Onto-Bot"]=`${n.name} (${n.company})`),l&&(h["X-Onto-Debug"]="true");let u=process.env.ONTO_API_KEY,y=process.env.ONTO_DASHBOARD_URL||"https://app.buildonto.dev";if(u){fetch(`${y}/api/track`,{method:"POST",headers:{"x-onto-key":u,"Content-Type":"application/json"},body:JSON.stringify({route:a.pathname,userAgent:o,bot:n?n.name:null,company:n?n.company:null})}).catch(()=>{});try{let r=await fetch(`${y}/api/sdk/inject?route=${a.pathname}`,{headers:{"x-onto-key":u},signal:AbortSignal.timeout(1500)});if(r.ok){let{injection:i}=await r.json();if(i){let B=`${a.origin}/.onto${s}.md`,x=await fetch(B);if(x.ok){let w=`${await x.text()}
4
-
5
- ---
6
-
7
- ${i}`;return new p.NextResponse(w,{headers:{...h,"X-Onto-Injected":"true"}})}}}}catch(r){console.error("[Onto] Injection failed",r)}}a.pathname=`/.onto${s}.md`;let c=p.NextResponse.rewrite(a);return c.headers.set("Content-Type","text/markdown; charset=utf-8"),c.headers.set("Cache-Control","public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400"),n&&c.headers.set("X-Onto-Bot",`${n.name} (${n.company})`),l&&c.headers.set("X-Onto-Debug","true"),c}return p.NextResponse.next()}0&&(module.exports={AI_BOT_USER_AGENTS,matchBot,ontoMiddleware});
3
+ `}async function R(t,e){let n=t.headers.get("user-agent"),o=t.nextUrl.clone(),a=i(n),x=t.headers.get("accept")||"",l=t.nextUrl.searchParams.has("onto"),f=!!a,O=x.includes("text/markdown")||l;if(f||O){if(o.pathname.startsWith("/_next"))return d.NextResponse.next();if(o.pathname==="/llms.txt")try{if(e){let p=g(e),c=new d.NextResponse(p,{headers:{"Content-Type":"text/plain; charset=utf-8","Cache-Control":"public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400"}});return a&&c.headers.set("X-Onto-Bot",`${a.name} (${a.company})`),c.headers.set("X-Onto-Trace",n||"no-ua"),c}}catch(p){console.error("[Onto] Failed to generate llms.txt:",p)}if(o.pathname.includes("."))return d.NextResponse.next();let r=o.pathname;(r==="/"||r==="")&&(r="/index"),r.endsWith("/")&&r!=="/"&&(r=r.slice(0,-1));let u=process.env.ONTO_API_KEY,B=process.env.ONTO_DASHBOARD_URL||"https://app.buildonto.dev";u&&fetch(`${B}/api/track`,{method:"POST",headers:{"x-onto-key":u,"Content-Type":"application/json"},body:JSON.stringify({route:o.pathname,userAgent:n,bot:a?.name,company:a?.company})}).catch(()=>{}),o.pathname=`/.onto${r}.md`;let s=d.NextResponse.rewrite(o);return s.headers.set("Content-Type","text/markdown; charset=utf-8"),s.headers.set("Cache-Control","public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400"),s.headers.set("X-Onto-Trace",n||"no-ua"),a&&s.headers.set("X-Onto-Bot",`${a.name} (${a.company})`),l&&s.headers.set("X-Onto-Debug","true"),s}let h=d.NextResponse.next();return h.headers.set("X-Onto-Trace",n||"no-ua"),h}0&&(module.exports={AI_BOT_USER_AGENTS,matchBot,ontoMiddleware});
8
4
  //# sourceMappingURL=middleware.js.map
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/middleware.ts","../src/bots.ts","../src/config.ts"],"sourcesContent":["import { NextRequest, NextResponse } from 'next/server';\r\nimport { matchBot } from './bots';\r\nimport { generateLlmsTxt, OntoConfig } from './config';\r\n\r\nexport async function ontoMiddleware(request: any, config?: OntoConfig) {\r\n const userAgent = request.headers.get('user-agent');\r\n const url = request.nextUrl.clone();\r\n const matched = matchBot(userAgent);\r\n\r\n const accept = request.headers.get('accept') || '';\r\n const hasDebugParam = request.nextUrl.searchParams.has('onto');\r\n\r\n const isAiBot = !!matched;\r\n const isMarkdownRequested = accept.includes('text/markdown') || hasDebugParam;\r\n\r\n // If traffic is identified as an AI Bot or markdown is requested\r\n if (isAiBot || isMarkdownRequested) {\r\n\r\n // Ignore internal next.js requests & static assets (but not llms.txt)\r\n if (url.pathname.startsWith('/_next')) {\r\n return NextResponse.next();\r\n }\r\n\r\n // --- llms.txt Auto-Discovery ---\r\n // Serve the llms.txt manifest to AI agents\r\n if (url.pathname === '/llms.txt') {\r\n try {\r\n if (config) {\r\n // Generate llms.txt dynamically from config\r\n const llmsTxtContent = generateLlmsTxt(config);\r\n const response = new NextResponse(llmsTxtContent, {\r\n headers: {\r\n 'Content-Type': 'text/plain; charset=utf-8',\r\n 'Cache-Control': 'public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400',\r\n }\r\n });\r\n\r\n if (matched) {\r\n response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);\r\n }\r\n\r\n return response;\r\n } else {\r\n // Fallback: try to serve static llms.txt from public folder\r\n url.pathname = '/llms.txt';\r\n const response = NextResponse.rewrite(url);\r\n response.headers.set('Content-Type', 'text/plain; charset=utf-8');\r\n response.headers.set('Cache-Control', 'public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400');\r\n if (matched) {\r\n response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);\r\n }\r\n return response;\r\n }\r\n } catch (error) {\r\n console.error('[Onto] Failed to generate llms.txt:', error);\r\n // Fallback to static file on error\r\n url.pathname = '/llms.txt';\r\n const response = NextResponse.rewrite(url);\r\n response.headers.set('Content-Type', 'text/plain; charset=utf-8');\r\n if (matched) {\r\n response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);\r\n }\r\n return response;\r\n }\r\n }\r\n\r\n // Skip other static assets\r\n if (url.pathname.includes('.')) {\r\n return NextResponse.next();\r\n }\r\n\r\n // Determine the corresponding payload path\r\n let payloadPath = url.pathname;\r\n if (payloadPath === '/' || payloadPath === '') {\r\n payloadPath = '/index';\r\n }\r\n\r\n // Strip trailing slash if present\r\n if (payloadPath.endsWith('/') && payloadPath !== '/') {\r\n payloadPath = payloadPath.slice(0, -1);\r\n }\r\n\r\n // Common response headers for all bot responses\r\n const botHeaders: Record<string, string> = {\r\n 'Content-Type': 'text/markdown; charset=utf-8',\r\n 'Cache-Control': 'public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400',\r\n };\r\n if (matched) {\r\n botHeaders['X-Onto-Bot'] = `${matched.name} (${matched.company})`;\r\n }\r\n if (hasDebugParam) {\r\n botHeaders['X-Onto-Debug'] = 'true';\r\n }\r\n\r\n // --- Onto Control Plane Integration (Premium) ---\r\n const ONTO_API_KEY = process.env.ONTO_API_KEY;\r\n const DASHBOARD_URL = process.env.ONTO_DASHBOARD_URL || 'https://app.buildonto.dev';\r\n\r\n if (ONTO_API_KEY) {\r\n // 1. Fire-and-forget tracking\r\n fetch(`${DASHBOARD_URL}/api/track`, {\r\n method: 'POST',\r\n headers: {\r\n 'x-onto-key': ONTO_API_KEY,\r\n 'Content-Type': 'application/json'\r\n },\r\n body: JSON.stringify({\r\n route: url.pathname,\r\n userAgent: userAgent,\r\n bot: matched ? matched.name : null,\r\n company: matched ? matched.company : null,\r\n })\r\n }).catch(() => {});\r\n\r\n // 2. Dynamic Context Injection\r\n try {\r\n const injectRes = await fetch(`${DASHBOARD_URL}/api/sdk/inject?route=${url.pathname}`, {\r\n headers: { 'x-onto-key': ONTO_API_KEY },\r\n signal: AbortSignal.timeout(1500)\r\n });\r\n\r\n if (injectRes.ok) {\r\n const { injection } = await injectRes.json();\r\n \r\n if (injection) {\r\n const localMdUrl = `${url.origin}/.onto${payloadPath}.md`;\r\n const mdRes = await fetch(localMdUrl);\r\n \r\n if (mdRes.ok) {\r\n const baseMarkdown = await mdRes.text();\r\n const finalMarkdown = `${baseMarkdown}\\n\\n---\\n\\n${injection}`;\r\n \r\n return new NextResponse(finalMarkdown, {\r\n headers: {\r\n ...botHeaders,\r\n 'X-Onto-Injected': 'true'\r\n }\r\n });\r\n }\r\n }\r\n }\r\n } catch (err) {\r\n console.error('[Onto] Injection failed', err);\r\n }\r\n }\r\n // ------------------------------------------------\r\n\r\n url.pathname = `/.onto${payloadPath}.md`;\r\n\r\n // Rewrite implicitly serves the target URL transparently to the client.\r\n const response = NextResponse.rewrite(url);\r\n \r\n // Explicitly set headers on the rewrite response\r\n response.headers.set('Content-Type', 'text/markdown; charset=utf-8');\r\n response.headers.set('Cache-Control', 'public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400');\r\n \r\n if (matched) {\r\n response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);\r\n }\r\n if (hasDebugParam) {\r\n response.headers.set('X-Onto-Debug', 'true');\r\n }\r\n\r\n return response;\r\n }\r\n\r\n return NextResponse.next();\r\n}\r\n\r\n// Re-export the bot registry for consumers who want to extend or inspect it\r\nexport { AI_BOT_USER_AGENTS, matchBot } from './bots';\r\nexport type { AiBot } from './bots';\r\n\r\n\r\n","/**\n * Comprehensive registry of AI bot user-agent strings.\n * The middleware uses this list to detect AI crawlers and serve optimized markdown.\n */\n\nexport interface AiBot {\n /** The user-agent substring to match against */\n name: string;\n /** The company operating this bot */\n company: string;\n}\n\n/**\n * Structured registry of all known AI bots, grouped by company.\n * Useful for analytics and the Control Plane dashboard.\n */\nexport const AI_BOTS: AiBot[] = [\n // OpenAI\n { name: 'GPTBot', company: 'OpenAI' },\n { name: 'ChatGPT-User', company: 'OpenAI' },\n { name: 'ChatGPT', company: 'OpenAI' },\n { name: 'OAI-SearchBot', company: 'OpenAI' },\n { name: 'OpenAI', company: 'OpenAI' },\n\n // Google\n { name: 'Googlebot', company: 'Google' },\n { name: 'Google-CloudVertexBot', company: 'Google' },\n { name: 'Google-Extended', company: 'Google' },\n { name: 'GoogleOther', company: 'Google' },\n\n // Anthropic\n { name: 'ClaudeBot', company: 'Anthropic' },\n { name: 'Claude-User', company: 'Anthropic' },\n { name: 'Claude', company: 'Anthropic' },\n { name: 'anthropic-ai', company: 'Anthropic' },\n { name: 'Anthropic', company: 'Anthropic' },\n\n // Perplexity\n { name: 'PerplexityBot', company: 'Perplexity' },\n { name: 'Perplexity-User', company: 'Perplexity' },\n { name: 'Perplexity', company: 'Perplexity' },\n\n // Meta\n { name: 'Meta-ExternalAgent', company: 'Meta' },\n { name: 'Meta-ExternalFetcher', company: 'Meta' },\n { name: 'FacebookBot', company: 'Meta' },\n\n // Common Crawl (used by most smaller AI companies)\n { name: 'CCBot', company: 'Common Crawl' },\n\n // Other notable AI crawlers\n { name: 'Bytespider', company: 'ByteDance' },\n { name: 'Applebot-Extended', company: 'Apple' },\n { name: 'cohere-ai', company: 'Cohere' },\n { name: 'YouBot', company: 'You.com' },\n];\n\n/**\n * Flat list of user-agent substrings for fast matching in the middleware.\n */\nexport const AI_BOT_USER_AGENTS: string[] = AI_BOTS.map(bot => bot.name);\n\n/**\n * Given a raw user-agent string, returns the matched AiBot entry or undefined.\n * Comparison is case-insensitive to handle inconsistent agent casing.\n */\nexport function matchBot(userAgent: string | null): AiBot | undefined {\n if (!userAgent) return undefined;\n const lowerUA = userAgent.toLowerCase();\n return AI_BOTS.find(bot => \n lowerUA.includes(bot.name.toLowerCase())\n );\n}\n","/**\r\n * Configuration schema for onto.config.ts\r\n * Used to dynamically generate llms.txt and other AI discovery files\r\n */\r\n\r\nexport type PageType = 'scoring' | 'about' | 'default';\r\n\r\nexport interface OntoRoute {\r\n /**\r\n * The URL path (e.g., '/docs', '/api/reference')\r\n */\r\n path: string;\r\n /**\r\n * Description of what this route contains\r\n */\r\n description: string;\r\n /**\r\n * Optional: Page type for automatic JSON-LD schema injection\r\n * - 'scoring': Injects Methodology schema with AIO scoring weights (40/35/25)\r\n * - 'about': Injects Organization/AboutPage schema\r\n * - 'default': No automatic schema injection\r\n */\r\n pageType?: PageType;\r\n}\r\n\r\nexport interface OntoConfig {\r\n /**\r\n * The name of your project or site (required)\r\n * Used as the H1 heading in llms.txt\r\n */\r\n name: string;\r\n\r\n /**\r\n * A short summary of your project (required)\r\n * Displayed as a blockquote in llms.txt\r\n * Should contain key information necessary for understanding the rest of the file\r\n */\r\n summary: string;\r\n\r\n /**\r\n * The base URL of your site (e.g., 'https://example.com')\r\n */\r\n baseUrl: string;\r\n\r\n /**\r\n * Optional: Additional sections to include in llms.txt\r\n * Each section can contain any markdown content\r\n */\r\n sections?: {\r\n heading: string;\r\n content: string;\r\n }[];\r\n\r\n /**\r\n * Key routes that AI agents should know about\r\n * These will be formatted as a markdown list in llms.txt\r\n */\r\n routes?: OntoRoute[];\r\n\r\n /**\r\n * Optional: Links to external resources (documentation, API references, etc.)\r\n */\r\n externalLinks?: {\r\n title: string;\r\n url: string;\r\n description?: string;\r\n }[];\r\n\r\n /**\r\n * Optional: Organization information for JSON-LD schemas\r\n */\r\n organization?: {\r\n name: string;\r\n description?: string;\r\n url?: string;\r\n logo?: string;\r\n foundingDate?: string;\r\n };\r\n}\r\n\r\n/**\r\n * Generate llms.txt content from OntoConfig\r\n * Follows the llms.txt specification:\r\n * - H1 with project name\r\n * - Blockquote with summary\r\n * - Additional markdown sections\r\n */\r\nexport function generateLlmsTxt(config: OntoConfig): string {\r\n const lines: string[] = [];\r\n\r\n // H1: Project name (required)\r\n lines.push(`# ${config.name}`);\r\n lines.push('');\r\n\r\n // Blockquote: Summary (required)\r\n lines.push(`> ${config.summary}`);\r\n lines.push('');\r\n\r\n // Key Routes section (if provided)\r\n if (config.routes && config.routes.length > 0) {\r\n lines.push('## Key Routes');\r\n lines.push('');\r\n for (const route of config.routes) {\r\n const fullUrl = `${config.baseUrl}${route.path}`;\r\n lines.push(`- [${route.path}](${fullUrl}): ${route.description}`);\r\n }\r\n lines.push('');\r\n }\r\n\r\n // External Links section (if provided)\r\n if (config.externalLinks && config.externalLinks.length > 0) {\r\n lines.push('## Resources');\r\n lines.push('');\r\n for (const link of config.externalLinks) {\r\n if (link.description) {\r\n lines.push(`- [${link.title}](${link.url}): ${link.description}`);\r\n } else {\r\n lines.push(`- [${link.title}](${link.url})`);\r\n }\r\n }\r\n lines.push('');\r\n }\r\n\r\n // Custom sections (if provided)\r\n if (config.sections && config.sections.length > 0) {\r\n for (const section of config.sections) {\r\n lines.push(`## ${section.heading}`);\r\n lines.push('');\r\n lines.push(section.content);\r\n lines.push('');\r\n }\r\n }\r\n\r\n return lines.join('\\n').trim() + '\\n';\r\n}\r\n"],"mappings":"yaAAA,IAAAA,EAAA,GAAAC,EAAAD,EAAA,wBAAAE,EAAA,aAAAC,EAAA,mBAAAC,IAAA,eAAAC,EAAAL,GAAA,IAAAM,EAA0C,uBCgBnC,IAAMC,EAAmB,CAE5B,CAAE,KAAM,SAAqB,QAAS,QAAS,EAC/C,CAAE,KAAM,eAAoB,QAAS,QAAS,EAC9C,CAAE,KAAM,UAAoB,QAAS,QAAS,EAC9C,CAAE,KAAM,gBAAoB,QAAS,QAAS,EAC9C,CAAE,KAAM,SAAoB,QAAS,QAAS,EAG9C,CAAE,KAAM,YAA0B,QAAS,QAAS,EACpD,CAAE,KAAM,wBAA2B,QAAS,QAAS,EACrD,CAAE,KAAM,kBAA2B,QAAS,QAAS,EACrD,CAAE,KAAM,cAA2B,QAAS,QAAS,EAGrD,CAAE,KAAM,YAAmB,QAAS,WAAY,EAChD,CAAE,KAAM,cAAkB,QAAS,WAAY,EAC/C,CAAE,KAAM,SAAkB,QAAS,WAAY,EAC/C,CAAE,KAAM,eAAkB,QAAS,WAAY,EAC/C,CAAE,KAAM,YAAkB,QAAS,WAAY,EAG/C,CAAE,KAAM,gBAAmB,QAAS,YAAa,EACjD,CAAE,KAAM,kBAAmB,QAAS,YAAa,EACjD,CAAE,KAAM,aAAmB,QAAS,YAAa,EAGjD,CAAE,KAAM,qBAAwB,QAAS,MAAO,EAChD,CAAE,KAAM,uBAAwB,QAAS,MAAO,EAChD,CAAE,KAAM,cAAuB,QAAS,MAAO,EAG/C,CAAE,KAAM,QAAS,QAAS,cAAe,EAGzC,CAAE,KAAM,aAAqB,QAAS,WAAY,EAClD,CAAE,KAAM,oBAAqB,QAAS,OAAQ,EAC9C,CAAE,KAAM,YAAoB,QAAS,QAAS,EAC9C,CAAE,KAAM,SAAoB,QAAS,SAAU,CACnD,EAKaC,EAA+BD,EAAQ,IAAIE,GAAOA,EAAI,IAAI,EAMhE,SAASC,EAASC,EAA6C,CAClE,GAAI,CAACA,EAAW,OAChB,IAAMC,EAAUD,EAAU,YAAY,EACtC,OAAOJ,EAAQ,KAAKE,GAChBG,EAAQ,SAASH,EAAI,KAAK,YAAY,CAAC,CAC3C,CACJ,CCeO,SAASI,EAAgBC,EAA4B,CAC1D,IAAMC,EAAkB,CAAC,EAWzB,GARAA,EAAM,KAAK,KAAKD,EAAO,IAAI,EAAE,EAC7BC,EAAM,KAAK,EAAE,EAGbA,EAAM,KAAK,KAAKD,EAAO,OAAO,EAAE,EAChCC,EAAM,KAAK,EAAE,EAGTD,EAAO,QAAUA,EAAO,OAAO,OAAS,EAAG,CAC7CC,EAAM,KAAK,eAAe,EAC1BA,EAAM,KAAK,EAAE,EACb,QAAWC,KAASF,EAAO,OAAQ,CACjC,IAAMG,EAAU,GAAGH,EAAO,OAAO,GAAGE,EAAM,IAAI,GAC9CD,EAAM,KAAK,MAAMC,EAAM,IAAI,KAAKC,CAAO,MAAMD,EAAM,WAAW,EAAE,CAClE,CACAD,EAAM,KAAK,EAAE,CACf,CAGA,GAAID,EAAO,eAAiBA,EAAO,cAAc,OAAS,EAAG,CAC3DC,EAAM,KAAK,cAAc,EACzBA,EAAM,KAAK,EAAE,EACb,QAAWG,KAAQJ,EAAO,cACpBI,EAAK,YACPH,EAAM,KAAK,MAAMG,EAAK,KAAK,KAAKA,EAAK,GAAG,MAAMA,EAAK,WAAW,EAAE,EAEhEH,EAAM,KAAK,MAAMG,EAAK,KAAK,KAAKA,EAAK,GAAG,GAAG,EAG/CH,EAAM,KAAK,EAAE,CACf,CAGA,GAAID,EAAO,UAAYA,EAAO,SAAS,OAAS,EAC9C,QAAWK,KAAWL,EAAO,SAC3BC,EAAM,KAAK,MAAMI,EAAQ,OAAO,EAAE,EAClCJ,EAAM,KAAK,EAAE,EACbA,EAAM,KAAKI,EAAQ,OAAO,EAC1BJ,EAAM,KAAK,EAAE,EAIjB,OAAOA,EAAM,KAAK;AAAA,CAAI,EAAE,KAAK,EAAI;AAAA,CACnC,CFlIA,eAAsBK,EAAeC,EAAcC,EAAqB,CACpE,IAAMC,EAAYF,EAAQ,QAAQ,IAAI,YAAY,EAC5CG,EAAMH,EAAQ,QAAQ,MAAM,EAC5BI,EAAUC,EAASH,CAAS,EAE5BI,EAASN,EAAQ,QAAQ,IAAI,QAAQ,GAAK,GAC1CO,EAAgBP,EAAQ,QAAQ,aAAa,IAAI,MAAM,EAEvDQ,EAAU,CAAC,CAACJ,EACZK,EAAsBH,EAAO,SAAS,eAAe,GAAKC,EAGhE,GAAIC,GAAWC,EAAqB,CAGhC,GAAIN,EAAI,SAAS,WAAW,QAAQ,EAChC,OAAO,eAAa,KAAK,EAK7B,GAAIA,EAAI,WAAa,YACjB,GAAI,CACA,GAAIF,EAAQ,CAER,IAAMS,EAAiBC,EAAgBV,CAAM,EACvCW,EAAW,IAAI,eAAaF,EAAgB,CAC9C,QAAS,CACL,eAAgB,4BAChB,gBAAiB,mEACrB,CACJ,CAAC,EAED,OAAIN,GACAQ,EAAS,QAAQ,IAAI,aAAc,GAAGR,EAAQ,IAAI,KAAKA,EAAQ,OAAO,GAAG,EAGtEQ,CACX,KAAO,CAEHT,EAAI,SAAW,YACf,IAAMS,EAAW,eAAa,QAAQT,CAAG,EACzC,OAAAS,EAAS,QAAQ,IAAI,eAAgB,2BAA2B,EAChEA,EAAS,QAAQ,IAAI,gBAAiB,mEAAmE,EACrGR,GACAQ,EAAS,QAAQ,IAAI,aAAc,GAAGR,EAAQ,IAAI,KAAKA,EAAQ,OAAO,GAAG,EAEtEQ,CACX,CACJ,OAASC,EAAO,CACZ,QAAQ,MAAM,sCAAuCA,CAAK,EAE1DV,EAAI,SAAW,YACf,IAAMS,EAAW,eAAa,QAAQT,CAAG,EACzC,OAAAS,EAAS,QAAQ,IAAI,eAAgB,2BAA2B,EAC5DR,GACAQ,EAAS,QAAQ,IAAI,aAAc,GAAGR,EAAQ,IAAI,KAAKA,EAAQ,OAAO,GAAG,EAEtEQ,CACX,CAIJ,GAAIT,EAAI,SAAS,SAAS,GAAG,EACzB,OAAO,eAAa,KAAK,EAI7B,IAAIW,EAAcX,EAAI,UAClBW,IAAgB,KAAOA,IAAgB,MACvCA,EAAc,UAIdA,EAAY,SAAS,GAAG,GAAKA,IAAgB,MAC7CA,EAAcA,EAAY,MAAM,EAAG,EAAE,GAIzC,IAAMC,EAAqC,CACvC,eAAgB,+BAChB,gBAAiB,mEACrB,EACIX,IACAW,EAAW,YAAY,EAAI,GAAGX,EAAQ,IAAI,KAAKA,EAAQ,OAAO,KAE9DG,IACAQ,EAAW,cAAc,EAAI,QAIjC,IAAMC,EAAe,QAAQ,IAAI,aAC3BC,EAAgB,QAAQ,IAAI,oBAAsB,4BAExD,GAAID,EAAc,CAEd,MAAM,GAAGC,CAAa,aAAc,CAChC,OAAQ,OACR,QAAS,CACL,aAAcD,EACd,eAAgB,kBACpB,EACA,KAAM,KAAK,UAAU,CACjB,MAAOb,EAAI,SACX,UAAWD,EACX,IAAKE,EAAUA,EAAQ,KAAO,KAC9B,QAASA,EAAUA,EAAQ,QAAU,IACzC,CAAC,CACL,CAAC,EAAE,MAAM,IAAM,CAAC,CAAC,EAGjB,GAAI,CACA,IAAMc,EAAY,MAAM,MAAM,GAAGD,CAAa,yBAAyBd,EAAI,QAAQ,GAAI,CACnF,QAAS,CAAE,aAAca,CAAa,EACtC,OAAQ,YAAY,QAAQ,IAAI,CACpC,CAAC,EAED,GAAIE,EAAU,GAAI,CACd,GAAM,CAAE,UAAAC,CAAU,EAAI,MAAMD,EAAU,KAAK,EAE3C,GAAIC,EAAW,CACX,IAAMC,EAAa,GAAGjB,EAAI,MAAM,SAASW,CAAW,MAC9CO,EAAQ,MAAM,MAAMD,CAAU,EAEpC,GAAIC,EAAM,GAAI,CAEV,IAAMC,EAAgB,GADD,MAAMD,EAAM,KAAK,CACD;AAAA;AAAA;AAAA;AAAA,EAAcF,CAAS,GAE5D,OAAO,IAAI,eAAaG,EAAe,CACnC,QAAS,CACL,GAAGP,EACH,kBAAmB,MACvB,CACJ,CAAC,CACL,CACJ,CACJ,CACJ,OAASQ,EAAK,CACV,QAAQ,MAAM,0BAA2BA,CAAG,CAChD,CACJ,CAGApB,EAAI,SAAW,SAASW,CAAW,MAGnC,IAAMF,EAAW,eAAa,QAAQT,CAAG,EAGzC,OAAAS,EAAS,QAAQ,IAAI,eAAgB,8BAA8B,EACnEA,EAAS,QAAQ,IAAI,gBAAiB,mEAAmE,EAErGR,GACAQ,EAAS,QAAQ,IAAI,aAAc,GAAGR,EAAQ,IAAI,KAAKA,EAAQ,OAAO,GAAG,EAEzEG,GACAK,EAAS,QAAQ,IAAI,eAAgB,MAAM,EAGxCA,CACX,CAEA,OAAO,eAAa,KAAK,CAC7B","names":["middleware_exports","__export","AI_BOT_USER_AGENTS","matchBot","ontoMiddleware","__toCommonJS","import_server","AI_BOTS","AI_BOT_USER_AGENTS","bot","matchBot","userAgent","lowerUA","generateLlmsTxt","config","lines","route","fullUrl","link","section","ontoMiddleware","request","config","userAgent","url","matched","matchBot","accept","hasDebugParam","isAiBot","isMarkdownRequested","llmsTxtContent","generateLlmsTxt","response","error","payloadPath","botHeaders","ONTO_API_KEY","DASHBOARD_URL","injectRes","injection","localMdUrl","mdRes","finalMarkdown","err"]}
1
+ {"version":3,"sources":["../src/middleware.ts","../src/bots.ts","../src/config.ts"],"sourcesContent":["import { NextRequest, NextResponse } from 'next/server';\r\nimport { matchBot } from './bots';\r\nimport { generateLlmsTxt, OntoConfig } from './config';\r\n\r\nexport async function ontoMiddleware(request: any, config?: OntoConfig) {\r\n const userAgent = request.headers.get('user-agent');\r\n const url = request.nextUrl.clone();\r\n const matched = matchBot(userAgent);\r\n\r\n const accept = request.headers.get('accept') || '';\r\n const hasDebugParam = request.nextUrl.searchParams.has('onto');\r\n\r\n const isAiBot = !!matched;\r\n const isMarkdownRequested = accept.includes('text/markdown') || hasDebugParam;\r\n\r\n // Common logic for bot/markdown negotiation\r\n if (isAiBot || isMarkdownRequested) {\r\n // Ignore internal next.js requests\r\n if (url.pathname.startsWith('/_next')) {\r\n return NextResponse.next();\r\n }\r\n\r\n // --- llms.txt Auto-Discovery ---\r\n if (url.pathname === '/llms.txt') {\r\n try {\r\n if (config) {\r\n const llmsTxtContent = generateLlmsTxt(config);\r\n const response = new NextResponse(llmsTxtContent, {\r\n headers: {\r\n 'Content-Type': 'text/plain; charset=utf-8',\r\n 'Cache-Control': 'public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400',\r\n }\r\n });\r\n if (matched) response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);\r\n response.headers.set('X-Onto-Trace', userAgent || 'no-ua');\r\n return response;\r\n }\r\n } catch (error) {\r\n console.error('[Onto] Failed to generate llms.txt:', error);\r\n }\r\n }\r\n\r\n // Skip other static assets with dots\r\n if (url.pathname.includes('.')) {\r\n return NextResponse.next();\r\n }\r\n\r\n // Determine the corresponding payload path\r\n let payloadPath = url.pathname;\r\n if (payloadPath === '/' || payloadPath === '') payloadPath = '/index';\r\n if (payloadPath.endsWith('/') && payloadPath !== '/') payloadPath = payloadPath.slice(0, -1);\r\n\r\n // --- Onto Control Plane Integration (Premium) ---\r\n const ONTO_API_KEY = process.env.ONTO_API_KEY;\r\n const DASHBOARD_URL = process.env.ONTO_DASHBOARD_URL || 'https://app.buildonto.dev';\r\n\r\n if (ONTO_API_KEY) {\r\n // Logic for tracking and injection ... (fire-and-forget tracking)\r\n fetch(`${DASHBOARD_URL}/api/track`, {\r\n method: 'POST',\r\n headers: { 'x-onto-key': ONTO_API_KEY, 'Content-Type': 'application/json' },\r\n body: JSON.stringify({ route: url.pathname, userAgent, bot: matched?.name, company: matched?.company })\r\n }).catch(() => {});\r\n }\r\n\r\n // Rewrite to semantic payload\r\n url.pathname = `/.onto${payloadPath}.md`;\r\n const response = NextResponse.rewrite(url);\r\n \r\n // Apply headers\r\n response.headers.set('Content-Type', 'text/markdown; charset=utf-8');\r\n response.headers.set('Cache-Control', 'public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400');\r\n response.headers.set('X-Onto-Trace', userAgent || 'no-ua');\r\n if (matched) response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);\r\n if (hasDebugParam) response.headers.set('X-Onto-Debug', 'true');\r\n\r\n return response;\r\n }\r\n\r\n // Default response for non-bots\r\n const response = NextResponse.next();\r\n response.headers.set('X-Onto-Trace', userAgent || 'no-ua');\r\n return response;\r\n}\r\n\r\n// Re-export the bot registry for consumers who want to extend or inspect it\r\nexport { AI_BOT_USER_AGENTS, matchBot } from './bots';\r\nexport type { AiBot } from './bots';\r\n\r\n\r\n","/**\n * Comprehensive registry of AI bot user-agent strings.\n * The middleware uses this list to detect AI crawlers and serve optimized markdown.\n */\n\nexport interface AiBot {\n /** The user-agent substring to match against */\n name: string;\n /** The company operating this bot */\n company: string;\n /** ISO date when this bot was added or last verified */\n addedAt?: string;\n}\n\n/**\n * Structured registry of all known AI bots, grouped by company.\n * Useful for analytics and the Control Plane dashboard.\n */\nexport const AI_BOTS: AiBot[] = [\n // OpenAI\n { name: 'GPTBot', company: 'OpenAI', addedAt: '2025-01-01' },\n { name: 'ChatGPT-User', company: 'OpenAI', addedAt: '2025-01-01' },\n { name: 'OAI-SearchBot', company: 'OpenAI', addedAt: '2025-01-01' },\n\n // Google (SEO Safety: Googlebot proper is EXCLUDED)\n { name: 'Google-CloudVertexBot', company: 'Google', addedAt: '2025-01-01' },\n { name: 'Google-Extended', company: 'Google', addedAt: '2025-01-01' },\n { name: 'GoogleOther', company: 'Google', addedAt: '2025-01-01' },\n\n // Anthropic\n { name: 'ClaudeBot', company: 'Anthropic', addedAt: '2025-01-01' },\n { name: 'Claude-User', company: 'Anthropic', addedAt: '2025-01-01' },\n { name: 'anthropic-ai', company: 'Anthropic', addedAt: '2025-01-01' },\n\n // Perplexity\n { name: 'PerplexityBot', company: 'Perplexity', addedAt: '2025-01-01' },\n { name: 'Perplexity-User', company: 'Perplexity', addedAt: '2025-01-01' },\n\n // Meta\n { name: 'Meta-ExternalAgent', company: 'Meta', addedAt: '2025-01-01' },\n { name: 'Meta-ExternalFetcher', company: 'Meta', addedAt: '2025-01-01' },\n { name: 'facebookexternalhit', company: 'Meta', addedAt: '2025-03-25' },\n { name: 'FacebookBot', company: 'Meta', addedAt: '2025-01-01' },\n\n // Mistral\n { name: 'MistralBot', company: 'Mistral', addedAt: '2025-03-25' },\n\n // Amazon\n { name: 'Amazonbot', company: 'Amazon', addedAt: '2025-03-25' },\n\n // Others\n { name: 'AI2Bot', company: 'Allen Institute', addedAt: '2025-03-25' },\n { name: 'DuckAssistBot', company: 'DuckDuckGo', addedAt: '2025-03-25' },\n { name: 'Diffbot', company: 'Diffbot', addedAt: '2025-03-25' },\n { name: 'CCBot', company: 'Common Crawl', addedAt: '2025-01-01' },\n { name: 'Bytespider', company: 'ByteDance', addedAt: '2025-01-01' },\n { name: 'Applebot-Extended', company: 'Apple', addedAt: '2025-01-01' },\n { name: 'cohere-ai', company: 'Cohere', addedAt: '2025-01-01' },\n { name: 'YouBot', company: 'You.com', addedAt: '2025-01-01' },\n];\n\n/**\n * Flat list of user-agent substrings for fast matching in the middleware.\n */\nexport const AI_BOT_USER_AGENTS: string[] = AI_BOT_USER_AGENTS_CACHE();\n\nfunction AI_BOT_USER_AGENTS_CACHE() {\n return AI_BOTS.map(bot => bot.name);\n}\n\n/**\n * Given a raw user-agent string, returns the matched AiBot entry or undefined.\n * Uses a \"Longest Match\" strategy to ensure maximum specificity.\n * Comparison is case-insensitive.\n */\nexport function matchBot(userAgent: string | null): AiBot | undefined {\n if (!userAgent) return undefined;\n const lowerUA = userAgent.toLowerCase();\n \n // Find all matches\n const matches = AI_BOTS.filter(bot => \n lowerUA.includes(bot.name.toLowerCase())\n );\n\n if (matches.length === 0) return undefined;\n if (matches.length === 1) return matches[0];\n\n // Pick the longest match for maximum specificity (e.g. 'ChatGPT-User' vs 'GPT')\n return matches.reduce((longest, current) => \n current.name.length > longest.name.length ? current : longest\n );\n}\n","/**\r\n * Configuration schema for onto.config.ts\r\n * Used to dynamically generate llms.txt and other AI discovery files\r\n */\r\n\r\nexport type PageType = 'scoring' | 'about' | 'default';\r\n\r\nexport interface OntoRoute {\r\n /**\r\n * The URL path (e.g., '/docs', '/api/reference')\r\n */\r\n path: string;\r\n /**\r\n * Description of what this route contains\r\n */\r\n description: string;\r\n /**\r\n * Optional: Page type for automatic JSON-LD schema injection\r\n * - 'scoring': Injects Methodology schema with AIO scoring weights (40/35/25)\r\n * - 'about': Injects Organization/AboutPage schema\r\n * - 'default': No automatic schema injection\r\n */\r\n pageType?: PageType;\r\n}\r\n\r\nexport interface OntoConfig {\r\n /**\r\n * The name of your project or site (required)\r\n * Used as the H1 heading in llms.txt\r\n */\r\n name: string;\r\n\r\n /**\r\n * A short summary of your project (required)\r\n * Displayed as a blockquote in llms.txt\r\n * Should contain key information necessary for understanding the rest of the file\r\n */\r\n summary: string;\r\n\r\n /**\r\n * The base URL of your site (e.g., 'https://example.com')\r\n */\r\n baseUrl: string;\r\n\r\n /**\r\n * Optional: Additional sections to include in llms.txt\r\n * Each section can contain any markdown content\r\n */\r\n sections?: {\r\n heading: string;\r\n content: string;\r\n }[];\r\n\r\n /**\r\n * Key routes that AI agents should know about\r\n * These will be formatted as a markdown list in llms.txt\r\n */\r\n routes?: OntoRoute[];\r\n\r\n /**\r\n * Optional: Links to external resources (documentation, API references, etc.)\r\n */\r\n externalLinks?: {\r\n title: string;\r\n url: string;\r\n description?: string;\r\n }[];\r\n\r\n /**\r\n * Optional: Organization information for JSON-LD schemas\r\n */\r\n organization?: {\r\n name: string;\r\n description?: string;\r\n url?: string;\r\n logo?: string;\r\n foundingDate?: string;\r\n };\r\n}\r\n\r\n/**\r\n * Generate llms.txt content from OntoConfig\r\n * Follows the llms.txt specification:\r\n * - H1 with project name\r\n * - Blockquote with summary\r\n * - Additional markdown sections\r\n */\r\nexport function generateLlmsTxt(config: OntoConfig): string {\r\n const lines: string[] = [];\r\n\r\n // H1: Project name (required)\r\n lines.push(`# ${config.name}`);\r\n lines.push('');\r\n\r\n // Blockquote: Summary (required)\r\n lines.push(`> ${config.summary}`);\r\n lines.push('');\r\n\r\n // Key Routes section (if provided)\r\n if (config.routes && config.routes.length > 0) {\r\n lines.push('## Key Routes');\r\n lines.push('');\r\n for (const route of config.routes) {\r\n const fullUrl = `${config.baseUrl}${route.path}`;\r\n lines.push(`- [${route.path}](${fullUrl}): ${route.description}`);\r\n }\r\n lines.push('');\r\n }\r\n\r\n // External Links section (if provided)\r\n if (config.externalLinks && config.externalLinks.length > 0) {\r\n lines.push('## Resources');\r\n lines.push('');\r\n for (const link of config.externalLinks) {\r\n if (link.description) {\r\n lines.push(`- [${link.title}](${link.url}): ${link.description}`);\r\n } else {\r\n lines.push(`- [${link.title}](${link.url})`);\r\n }\r\n }\r\n lines.push('');\r\n }\r\n\r\n // Custom sections (if provided)\r\n if (config.sections && config.sections.length > 0) {\r\n for (const section of config.sections) {\r\n lines.push(`## ${section.heading}`);\r\n lines.push('');\r\n lines.push(section.content);\r\n lines.push('');\r\n }\r\n }\r\n\r\n return lines.join('\\n').trim() + '\\n';\r\n}\r\n"],"mappings":"yaAAA,IAAAA,EAAA,GAAAC,EAAAD,EAAA,wBAAAE,EAAA,aAAAC,EAAA,mBAAAC,IAAA,eAAAC,EAAAL,GAAA,IAAAM,EAA0C,uBCkBnC,IAAMC,EAAmB,CAE5B,CAAE,KAAM,SAAqB,QAAS,SAAY,QAAS,YAAa,EACxE,CAAE,KAAM,eAAoB,QAAS,SAAY,QAAS,YAAa,EACvE,CAAE,KAAM,gBAAoB,QAAS,SAAY,QAAS,YAAa,EAGvE,CAAE,KAAM,wBAA2B,QAAS,SAAU,QAAS,YAAa,EAC5E,CAAE,KAAM,kBAA2B,QAAS,SAAU,QAAS,YAAa,EAC5E,CAAE,KAAM,cAA2B,QAAS,SAAU,QAAS,YAAa,EAG5E,CAAE,KAAM,YAAmB,QAAS,YAAa,QAAS,YAAa,EACvE,CAAE,KAAM,cAAkB,QAAS,YAAa,QAAS,YAAa,EACtE,CAAE,KAAM,eAAkB,QAAS,YAAa,QAAS,YAAa,EAGtE,CAAE,KAAM,gBAAmB,QAAS,aAAc,QAAS,YAAa,EACxE,CAAE,KAAM,kBAAmB,QAAS,aAAc,QAAS,YAAa,EAGxE,CAAE,KAAM,qBAAwB,QAAS,OAAQ,QAAS,YAAa,EACvE,CAAE,KAAM,uBAAwB,QAAS,OAAQ,QAAS,YAAa,EACvE,CAAE,KAAM,sBAAwB,QAAS,OAAQ,QAAS,YAAa,EACvE,CAAE,KAAM,cAAwB,QAAS,OAAQ,QAAS,YAAa,EAGvE,CAAE,KAAM,aAAc,QAAS,UAAW,QAAS,YAAa,EAGhE,CAAE,KAAM,YAAa,QAAS,SAAU,QAAS,YAAa,EAG9D,CAAE,KAAM,SAAiB,QAAS,kBAAmB,QAAS,YAAa,EAC3E,CAAE,KAAM,gBAAiB,QAAS,aAAc,QAAS,YAAa,EACtE,CAAE,KAAM,UAAiB,QAAS,UAAe,QAAS,YAAa,EACvE,CAAE,KAAM,QAAiB,QAAS,eAAgB,QAAS,YAAa,EACxE,CAAE,KAAM,aAAiB,QAAS,YAAgB,QAAS,YAAa,EACxE,CAAE,KAAM,oBAAqB,QAAS,QAAY,QAAS,YAAa,EACxE,CAAE,KAAM,YAAiB,QAAS,SAAgB,QAAS,YAAa,EACxE,CAAE,KAAM,SAAiB,QAAS,UAAgB,QAAS,YAAa,CAC5E,EAKaC,EAA+BC,EAAyB,EAErE,SAASA,GAA2B,CAChC,OAAOF,EAAQ,IAAIG,GAAOA,EAAI,IAAI,CACtC,CAOO,SAASC,EAASC,EAA6C,CAClE,GAAI,CAACA,EAAW,OAChB,IAAMC,EAAUD,EAAU,YAAY,EAGhCE,EAAUP,EAAQ,OAAOG,GAC3BG,EAAQ,SAASH,EAAI,KAAK,YAAY,CAAC,CAC3C,EAEA,GAAII,EAAQ,SAAW,EACvB,OAAIA,EAAQ,SAAW,EAAUA,EAAQ,CAAC,EAGnCA,EAAQ,OAAO,CAACC,EAASC,IAC5BA,EAAQ,KAAK,OAASD,EAAQ,KAAK,OAASC,EAAUD,CAC1D,CACJ,CCJO,SAASE,EAAgBC,EAA4B,CAC1D,IAAMC,EAAkB,CAAC,EAWzB,GARAA,EAAM,KAAK,KAAKD,EAAO,IAAI,EAAE,EAC7BC,EAAM,KAAK,EAAE,EAGbA,EAAM,KAAK,KAAKD,EAAO,OAAO,EAAE,EAChCC,EAAM,KAAK,EAAE,EAGTD,EAAO,QAAUA,EAAO,OAAO,OAAS,EAAG,CAC7CC,EAAM,KAAK,eAAe,EAC1BA,EAAM,KAAK,EAAE,EACb,QAAWC,KAASF,EAAO,OAAQ,CACjC,IAAMG,EAAU,GAAGH,EAAO,OAAO,GAAGE,EAAM,IAAI,GAC9CD,EAAM,KAAK,MAAMC,EAAM,IAAI,KAAKC,CAAO,MAAMD,EAAM,WAAW,EAAE,CAClE,CACAD,EAAM,KAAK,EAAE,CACf,CAGA,GAAID,EAAO,eAAiBA,EAAO,cAAc,OAAS,EAAG,CAC3DC,EAAM,KAAK,cAAc,EACzBA,EAAM,KAAK,EAAE,EACb,QAAWG,KAAQJ,EAAO,cACpBI,EAAK,YACPH,EAAM,KAAK,MAAMG,EAAK,KAAK,KAAKA,EAAK,GAAG,MAAMA,EAAK,WAAW,EAAE,EAEhEH,EAAM,KAAK,MAAMG,EAAK,KAAK,KAAKA,EAAK,GAAG,GAAG,EAG/CH,EAAM,KAAK,EAAE,CACf,CAGA,GAAID,EAAO,UAAYA,EAAO,SAAS,OAAS,EAC9C,QAAWK,KAAWL,EAAO,SAC3BC,EAAM,KAAK,MAAMI,EAAQ,OAAO,EAAE,EAClCJ,EAAM,KAAK,EAAE,EACbA,EAAM,KAAKI,EAAQ,OAAO,EAC1BJ,EAAM,KAAK,EAAE,EAIjB,OAAOA,EAAM,KAAK;AAAA,CAAI,EAAE,KAAK,EAAI;AAAA,CACnC,CFlIA,eAAsBK,EAAeC,EAAcC,EAAqB,CACpE,IAAMC,EAAYF,EAAQ,QAAQ,IAAI,YAAY,EAC5CG,EAAMH,EAAQ,QAAQ,MAAM,EAC5BI,EAAUC,EAASH,CAAS,EAE5BI,EAASN,EAAQ,QAAQ,IAAI,QAAQ,GAAK,GAC1CO,EAAgBP,EAAQ,QAAQ,aAAa,IAAI,MAAM,EAEvDQ,EAAU,CAAC,CAACJ,EACZK,EAAsBH,EAAO,SAAS,eAAe,GAAKC,EAGhE,GAAIC,GAAWC,EAAqB,CAEhC,GAAIN,EAAI,SAAS,WAAW,QAAQ,EAChC,OAAO,eAAa,KAAK,EAI7B,GAAIA,EAAI,WAAa,YACjB,GAAI,CACA,GAAIF,EAAQ,CACR,IAAMS,EAAiBC,EAAgBV,CAAM,EACvCW,EAAW,IAAI,eAAaF,EAAgB,CAC9C,QAAS,CACL,eAAgB,4BAChB,gBAAiB,mEACrB,CACJ,CAAC,EACD,OAAIN,GAASQ,EAAS,QAAQ,IAAI,aAAc,GAAGR,EAAQ,IAAI,KAAKA,EAAQ,OAAO,GAAG,EACtFQ,EAAS,QAAQ,IAAI,eAAgBV,GAAa,OAAO,EAClDU,CACX,CACJ,OAASC,EAAO,CACZ,QAAQ,MAAM,sCAAuCA,CAAK,CAC9D,CAIJ,GAAIV,EAAI,SAAS,SAAS,GAAG,EACzB,OAAO,eAAa,KAAK,EAI7B,IAAIW,EAAcX,EAAI,UAClBW,IAAgB,KAAOA,IAAgB,MAAIA,EAAc,UACzDA,EAAY,SAAS,GAAG,GAAKA,IAAgB,MAAKA,EAAcA,EAAY,MAAM,EAAG,EAAE,GAG3F,IAAMC,EAAe,QAAQ,IAAI,aAC3BC,EAAgB,QAAQ,IAAI,oBAAsB,4BAEpDD,GAEA,MAAM,GAAGC,CAAa,aAAc,CAChC,OAAQ,OACR,QAAS,CAAE,aAAcD,EAAc,eAAgB,kBAAmB,EAC1E,KAAM,KAAK,UAAU,CAAE,MAAOZ,EAAI,SAAU,UAAAD,EAAW,IAAKE,GAAS,KAAM,QAASA,GAAS,OAAQ,CAAC,CAC1G,CAAC,EAAE,MAAM,IAAM,CAAC,CAAC,EAIrBD,EAAI,SAAW,SAASW,CAAW,MACnC,IAAMF,EAAW,eAAa,QAAQT,CAAG,EAGzC,OAAAS,EAAS,QAAQ,IAAI,eAAgB,8BAA8B,EACnEA,EAAS,QAAQ,IAAI,gBAAiB,mEAAmE,EACzGA,EAAS,QAAQ,IAAI,eAAgBV,GAAa,OAAO,EACrDE,GAASQ,EAAS,QAAQ,IAAI,aAAc,GAAGR,EAAQ,IAAI,KAAKA,EAAQ,OAAO,GAAG,EAClFG,GAAeK,EAAS,QAAQ,IAAI,eAAgB,MAAM,EAEvDA,CACX,CAGA,IAAMA,EAAW,eAAa,KAAK,EACnC,OAAAA,EAAS,QAAQ,IAAI,eAAgBV,GAAa,OAAO,EAClDU,CACX","names":["middleware_exports","__export","AI_BOT_USER_AGENTS","matchBot","ontoMiddleware","__toCommonJS","import_server","AI_BOTS","AI_BOT_USER_AGENTS","AI_BOT_USER_AGENTS_CACHE","bot","matchBot","userAgent","lowerUA","matches","longest","current","generateLlmsTxt","config","lines","route","fullUrl","link","section","ontoMiddleware","request","config","userAgent","url","matched","matchBot","accept","hasDebugParam","isAiBot","isMarkdownRequested","llmsTxtContent","generateLlmsTxt","response","error","payloadPath","ONTO_API_KEY","DASHBOARD_URL"]}
@@ -1,8 +1,4 @@
1
- import{NextResponse as p}from"next/server";var x=[{name:"GPTBot",company:"OpenAI"},{name:"ChatGPT-User",company:"OpenAI"},{name:"ChatGPT",company:"OpenAI"},{name:"OAI-SearchBot",company:"OpenAI"},{name:"OpenAI",company:"OpenAI"},{name:"Googlebot",company:"Google"},{name:"Google-CloudVertexBot",company:"Google"},{name:"Google-Extended",company:"Google"},{name:"GoogleOther",company:"Google"},{name:"ClaudeBot",company:"Anthropic"},{name:"Claude-User",company:"Anthropic"},{name:"Claude",company:"Anthropic"},{name:"anthropic-ai",company:"Anthropic"},{name:"Anthropic",company:"Anthropic"},{name:"PerplexityBot",company:"Perplexity"},{name:"Perplexity-User",company:"Perplexity"},{name:"Perplexity",company:"Perplexity"},{name:"Meta-ExternalAgent",company:"Meta"},{name:"Meta-ExternalFetcher",company:"Meta"},{name:"FacebookBot",company:"Meta"},{name:"CCBot",company:"Common Crawl"},{name:"Bytespider",company:"ByteDance"},{name:"Applebot-Extended",company:"Apple"},{name:"cohere-ai",company:"Cohere"},{name:"YouBot",company:"You.com"}],B=x.map(t=>t.name);function u(t){if(!t)return;let e=t.toLowerCase();return x.find(o=>e.includes(o.name.toLowerCase()))}function g(t){let e=[];if(e.push(`# ${t.name}`),e.push(""),e.push(`> ${t.summary}`),e.push(""),t.routes&&t.routes.length>0){e.push("## Key Routes"),e.push("");for(let o of t.routes){let a=`${t.baseUrl}${o.path}`;e.push(`- [${o.path}](${a}): ${o.description}`)}e.push("")}if(t.externalLinks&&t.externalLinks.length>0){e.push("## Resources"),e.push("");for(let o of t.externalLinks)o.description?e.push(`- [${o.title}](${o.url}): ${o.description}`):e.push(`- [${o.title}](${o.url})`);e.push("")}if(t.sections&&t.sections.length>0)for(let o of t.sections)e.push(`## ${o.heading}`),e.push(""),e.push(o.content),e.push("");return e.join(`
1
+ import{NextResponse as d}from"next/server";var u=[{name:"GPTBot",company:"OpenAI",addedAt:"2025-01-01"},{name:"ChatGPT-User",company:"OpenAI",addedAt:"2025-01-01"},{name:"OAI-SearchBot",company:"OpenAI",addedAt:"2025-01-01"},{name:"Google-CloudVertexBot",company:"Google",addedAt:"2025-01-01"},{name:"Google-Extended",company:"Google",addedAt:"2025-01-01"},{name:"GoogleOther",company:"Google",addedAt:"2025-01-01"},{name:"ClaudeBot",company:"Anthropic",addedAt:"2025-01-01"},{name:"Claude-User",company:"Anthropic",addedAt:"2025-01-01"},{name:"anthropic-ai",company:"Anthropic",addedAt:"2025-01-01"},{name:"PerplexityBot",company:"Perplexity",addedAt:"2025-01-01"},{name:"Perplexity-User",company:"Perplexity",addedAt:"2025-01-01"},{name:"Meta-ExternalAgent",company:"Meta",addedAt:"2025-01-01"},{name:"Meta-ExternalFetcher",company:"Meta",addedAt:"2025-01-01"},{name:"facebookexternalhit",company:"Meta",addedAt:"2025-03-25"},{name:"FacebookBot",company:"Meta",addedAt:"2025-01-01"},{name:"MistralBot",company:"Mistral",addedAt:"2025-03-25"},{name:"Amazonbot",company:"Amazon",addedAt:"2025-03-25"},{name:"AI2Bot",company:"Allen Institute",addedAt:"2025-03-25"},{name:"DuckAssistBot",company:"DuckDuckGo",addedAt:"2025-03-25"},{name:"Diffbot",company:"Diffbot",addedAt:"2025-03-25"},{name:"CCBot",company:"Common Crawl",addedAt:"2025-01-01"},{name:"Bytespider",company:"ByteDance",addedAt:"2025-01-01"},{name:"Applebot-Extended",company:"Apple",addedAt:"2025-01-01"},{name:"cohere-ai",company:"Cohere",addedAt:"2025-01-01"},{name:"YouBot",company:"You.com",addedAt:"2025-01-01"}],O=B();function B(){return u.map(n=>n.name)}function c(n){if(!n)return;let t=n.toLowerCase(),e=u.filter(a=>t.includes(a.name.toLowerCase()));if(e.length!==0)return e.length===1?e[0]:e.reduce((a,o)=>o.name.length>a.name.length?o:a)}function A(n){let t=[];if(t.push(`# ${n.name}`),t.push(""),t.push(`> ${n.summary}`),t.push(""),n.routes&&n.routes.length>0){t.push("## Key Routes"),t.push("");for(let e of n.routes){let a=`${n.baseUrl}${e.path}`;t.push(`- [${e.path}](${a}): ${e.description}`)}t.push("")}if(n.externalLinks&&n.externalLinks.length>0){t.push("## Resources"),t.push("");for(let e of n.externalLinks)e.description?t.push(`- [${e.title}](${e.url}): ${e.description}`):t.push(`- [${e.title}](${e.url})`);t.push("")}if(n.sections&&n.sections.length>0)for(let e of n.sections)t.push(`## ${e.heading}`),t.push(""),t.push(e.content),t.push("");return t.join(`
2
2
  `).trim()+`
3
- `}async function U(t,e){let o=t.headers.get("user-agent"),a=t.nextUrl.clone(),n=u(o),f=t.headers.get("accept")||"",m=t.nextUrl.searchParams.has("onto"),O=!!n,A=f.includes("text/markdown")||m;if(O||A){if(a.pathname.startsWith("/_next"))return p.next();if(a.pathname==="/llms.txt")try{if(e){let r=g(e),i=new p(r,{headers:{"Content-Type":"text/plain; charset=utf-8","Cache-Control":"public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400"}});return n&&i.headers.set("X-Onto-Bot",`${n.name} (${n.company})`),i}else{a.pathname="/llms.txt";let r=p.rewrite(a);return r.headers.set("Content-Type","text/plain; charset=utf-8"),r.headers.set("Cache-Control","public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400"),n&&r.headers.set("X-Onto-Bot",`${n.name} (${n.company})`),r}}catch(r){console.error("[Onto] Failed to generate llms.txt:",r),a.pathname="/llms.txt";let i=p.rewrite(a);return i.headers.set("Content-Type","text/plain; charset=utf-8"),n&&i.headers.set("X-Onto-Bot",`${n.name} (${n.company})`),i}if(a.pathname.includes("."))return p.next();let s=a.pathname;(s==="/"||s==="")&&(s="/index"),s.endsWith("/")&&s!=="/"&&(s=s.slice(0,-1));let l={"Content-Type":"text/markdown; charset=utf-8","Cache-Control":"public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400"};n&&(l["X-Onto-Bot"]=`${n.name} (${n.company})`),m&&(l["X-Onto-Debug"]="true");let h=process.env.ONTO_API_KEY,d=process.env.ONTO_DASHBOARD_URL||"https://app.buildonto.dev";if(h){fetch(`${d}/api/track`,{method:"POST",headers:{"x-onto-key":h,"Content-Type":"application/json"},body:JSON.stringify({route:a.pathname,userAgent:o,bot:n?n.name:null,company:n?n.company:null})}).catch(()=>{});try{let r=await fetch(`${d}/api/sdk/inject?route=${a.pathname}`,{headers:{"x-onto-key":h},signal:AbortSignal.timeout(1500)});if(r.ok){let{injection:i}=await r.json();if(i){let C=`${a.origin}/.onto${s}.md`,y=await fetch(C);if(y.ok){let $=`${await y.text()}
4
-
5
- ---
6
-
7
- ${i}`;return new p($,{headers:{...l,"X-Onto-Injected":"true"}})}}}}catch(r){console.error("[Onto] Injection failed",r)}}a.pathname=`/.onto${s}.md`;let c=p.rewrite(a);return c.headers.set("Content-Type","text/markdown; charset=utf-8"),c.headers.set("Cache-Control","public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400"),n&&c.headers.set("X-Onto-Bot",`${n.name} (${n.company})`),m&&c.headers.set("X-Onto-Debug","true"),c}return p.next()}export{B as AI_BOT_USER_AGENTS,u as matchBot,U as ontoMiddleware};
3
+ `}async function R(n,t){let e=n.headers.get("user-agent"),a=n.nextUrl.clone(),o=c(e),y=n.headers.get("accept")||"",m=n.nextUrl.searchParams.has("onto"),g=!!o,x=y.includes("text/markdown")||m;if(g||x){if(a.pathname.startsWith("/_next"))return d.next();if(a.pathname==="/llms.txt")try{if(t){let i=A(t),p=new d(i,{headers:{"Content-Type":"text/plain; charset=utf-8","Cache-Control":"public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400"}});return o&&p.headers.set("X-Onto-Bot",`${o.name} (${o.company})`),p.headers.set("X-Onto-Trace",e||"no-ua"),p}}catch(i){console.error("[Onto] Failed to generate llms.txt:",i)}if(a.pathname.includes("."))return d.next();let r=a.pathname;(r==="/"||r==="")&&(r="/index"),r.endsWith("/")&&r!=="/"&&(r=r.slice(0,-1));let h=process.env.ONTO_API_KEY,f=process.env.ONTO_DASHBOARD_URL||"https://app.buildonto.dev";h&&fetch(`${f}/api/track`,{method:"POST",headers:{"x-onto-key":h,"Content-Type":"application/json"},body:JSON.stringify({route:a.pathname,userAgent:e,bot:o?.name,company:o?.company})}).catch(()=>{}),a.pathname=`/.onto${r}.md`;let s=d.rewrite(a);return s.headers.set("Content-Type","text/markdown; charset=utf-8"),s.headers.set("Cache-Control","public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400"),s.headers.set("X-Onto-Trace",e||"no-ua"),o&&s.headers.set("X-Onto-Bot",`${o.name} (${o.company})`),m&&s.headers.set("X-Onto-Debug","true"),s}let l=d.next();return l.headers.set("X-Onto-Trace",e||"no-ua"),l}export{O as AI_BOT_USER_AGENTS,c as matchBot,R as ontoMiddleware};
8
4
  //# sourceMappingURL=middleware.mjs.map
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/middleware.ts","../src/bots.ts","../src/config.ts"],"sourcesContent":["import { NextRequest, NextResponse } from 'next/server';\r\nimport { matchBot } from './bots';\r\nimport { generateLlmsTxt, OntoConfig } from './config';\r\n\r\nexport async function ontoMiddleware(request: any, config?: OntoConfig) {\r\n const userAgent = request.headers.get('user-agent');\r\n const url = request.nextUrl.clone();\r\n const matched = matchBot(userAgent);\r\n\r\n const accept = request.headers.get('accept') || '';\r\n const hasDebugParam = request.nextUrl.searchParams.has('onto');\r\n\r\n const isAiBot = !!matched;\r\n const isMarkdownRequested = accept.includes('text/markdown') || hasDebugParam;\r\n\r\n // If traffic is identified as an AI Bot or markdown is requested\r\n if (isAiBot || isMarkdownRequested) {\r\n\r\n // Ignore internal next.js requests & static assets (but not llms.txt)\r\n if (url.pathname.startsWith('/_next')) {\r\n return NextResponse.next();\r\n }\r\n\r\n // --- llms.txt Auto-Discovery ---\r\n // Serve the llms.txt manifest to AI agents\r\n if (url.pathname === '/llms.txt') {\r\n try {\r\n if (config) {\r\n // Generate llms.txt dynamically from config\r\n const llmsTxtContent = generateLlmsTxt(config);\r\n const response = new NextResponse(llmsTxtContent, {\r\n headers: {\r\n 'Content-Type': 'text/plain; charset=utf-8',\r\n 'Cache-Control': 'public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400',\r\n }\r\n });\r\n\r\n if (matched) {\r\n response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);\r\n }\r\n\r\n return response;\r\n } else {\r\n // Fallback: try to serve static llms.txt from public folder\r\n url.pathname = '/llms.txt';\r\n const response = NextResponse.rewrite(url);\r\n response.headers.set('Content-Type', 'text/plain; charset=utf-8');\r\n response.headers.set('Cache-Control', 'public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400');\r\n if (matched) {\r\n response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);\r\n }\r\n return response;\r\n }\r\n } catch (error) {\r\n console.error('[Onto] Failed to generate llms.txt:', error);\r\n // Fallback to static file on error\r\n url.pathname = '/llms.txt';\r\n const response = NextResponse.rewrite(url);\r\n response.headers.set('Content-Type', 'text/plain; charset=utf-8');\r\n if (matched) {\r\n response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);\r\n }\r\n return response;\r\n }\r\n }\r\n\r\n // Skip other static assets\r\n if (url.pathname.includes('.')) {\r\n return NextResponse.next();\r\n }\r\n\r\n // Determine the corresponding payload path\r\n let payloadPath = url.pathname;\r\n if (payloadPath === '/' || payloadPath === '') {\r\n payloadPath = '/index';\r\n }\r\n\r\n // Strip trailing slash if present\r\n if (payloadPath.endsWith('/') && payloadPath !== '/') {\r\n payloadPath = payloadPath.slice(0, -1);\r\n }\r\n\r\n // Common response headers for all bot responses\r\n const botHeaders: Record<string, string> = {\r\n 'Content-Type': 'text/markdown; charset=utf-8',\r\n 'Cache-Control': 'public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400',\r\n };\r\n if (matched) {\r\n botHeaders['X-Onto-Bot'] = `${matched.name} (${matched.company})`;\r\n }\r\n if (hasDebugParam) {\r\n botHeaders['X-Onto-Debug'] = 'true';\r\n }\r\n\r\n // --- Onto Control Plane Integration (Premium) ---\r\n const ONTO_API_KEY = process.env.ONTO_API_KEY;\r\n const DASHBOARD_URL = process.env.ONTO_DASHBOARD_URL || 'https://app.buildonto.dev';\r\n\r\n if (ONTO_API_KEY) {\r\n // 1. Fire-and-forget tracking\r\n fetch(`${DASHBOARD_URL}/api/track`, {\r\n method: 'POST',\r\n headers: {\r\n 'x-onto-key': ONTO_API_KEY,\r\n 'Content-Type': 'application/json'\r\n },\r\n body: JSON.stringify({\r\n route: url.pathname,\r\n userAgent: userAgent,\r\n bot: matched ? matched.name : null,\r\n company: matched ? matched.company : null,\r\n })\r\n }).catch(() => {});\r\n\r\n // 2. Dynamic Context Injection\r\n try {\r\n const injectRes = await fetch(`${DASHBOARD_URL}/api/sdk/inject?route=${url.pathname}`, {\r\n headers: { 'x-onto-key': ONTO_API_KEY },\r\n signal: AbortSignal.timeout(1500)\r\n });\r\n\r\n if (injectRes.ok) {\r\n const { injection } = await injectRes.json();\r\n \r\n if (injection) {\r\n const localMdUrl = `${url.origin}/.onto${payloadPath}.md`;\r\n const mdRes = await fetch(localMdUrl);\r\n \r\n if (mdRes.ok) {\r\n const baseMarkdown = await mdRes.text();\r\n const finalMarkdown = `${baseMarkdown}\\n\\n---\\n\\n${injection}`;\r\n \r\n return new NextResponse(finalMarkdown, {\r\n headers: {\r\n ...botHeaders,\r\n 'X-Onto-Injected': 'true'\r\n }\r\n });\r\n }\r\n }\r\n }\r\n } catch (err) {\r\n console.error('[Onto] Injection failed', err);\r\n }\r\n }\r\n // ------------------------------------------------\r\n\r\n url.pathname = `/.onto${payloadPath}.md`;\r\n\r\n // Rewrite implicitly serves the target URL transparently to the client.\r\n const response = NextResponse.rewrite(url);\r\n \r\n // Explicitly set headers on the rewrite response\r\n response.headers.set('Content-Type', 'text/markdown; charset=utf-8');\r\n response.headers.set('Cache-Control', 'public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400');\r\n \r\n if (matched) {\r\n response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);\r\n }\r\n if (hasDebugParam) {\r\n response.headers.set('X-Onto-Debug', 'true');\r\n }\r\n\r\n return response;\r\n }\r\n\r\n return NextResponse.next();\r\n}\r\n\r\n// Re-export the bot registry for consumers who want to extend or inspect it\r\nexport { AI_BOT_USER_AGENTS, matchBot } from './bots';\r\nexport type { AiBot } from './bots';\r\n\r\n\r\n","/**\n * Comprehensive registry of AI bot user-agent strings.\n * The middleware uses this list to detect AI crawlers and serve optimized markdown.\n */\n\nexport interface AiBot {\n /** The user-agent substring to match against */\n name: string;\n /** The company operating this bot */\n company: string;\n}\n\n/**\n * Structured registry of all known AI bots, grouped by company.\n * Useful for analytics and the Control Plane dashboard.\n */\nexport const AI_BOTS: AiBot[] = [\n // OpenAI\n { name: 'GPTBot', company: 'OpenAI' },\n { name: 'ChatGPT-User', company: 'OpenAI' },\n { name: 'ChatGPT', company: 'OpenAI' },\n { name: 'OAI-SearchBot', company: 'OpenAI' },\n { name: 'OpenAI', company: 'OpenAI' },\n\n // Google\n { name: 'Googlebot', company: 'Google' },\n { name: 'Google-CloudVertexBot', company: 'Google' },\n { name: 'Google-Extended', company: 'Google' },\n { name: 'GoogleOther', company: 'Google' },\n\n // Anthropic\n { name: 'ClaudeBot', company: 'Anthropic' },\n { name: 'Claude-User', company: 'Anthropic' },\n { name: 'Claude', company: 'Anthropic' },\n { name: 'anthropic-ai', company: 'Anthropic' },\n { name: 'Anthropic', company: 'Anthropic' },\n\n // Perplexity\n { name: 'PerplexityBot', company: 'Perplexity' },\n { name: 'Perplexity-User', company: 'Perplexity' },\n { name: 'Perplexity', company: 'Perplexity' },\n\n // Meta\n { name: 'Meta-ExternalAgent', company: 'Meta' },\n { name: 'Meta-ExternalFetcher', company: 'Meta' },\n { name: 'FacebookBot', company: 'Meta' },\n\n // Common Crawl (used by most smaller AI companies)\n { name: 'CCBot', company: 'Common Crawl' },\n\n // Other notable AI crawlers\n { name: 'Bytespider', company: 'ByteDance' },\n { name: 'Applebot-Extended', company: 'Apple' },\n { name: 'cohere-ai', company: 'Cohere' },\n { name: 'YouBot', company: 'You.com' },\n];\n\n/**\n * Flat list of user-agent substrings for fast matching in the middleware.\n */\nexport const AI_BOT_USER_AGENTS: string[] = AI_BOTS.map(bot => bot.name);\n\n/**\n * Given a raw user-agent string, returns the matched AiBot entry or undefined.\n * Comparison is case-insensitive to handle inconsistent agent casing.\n */\nexport function matchBot(userAgent: string | null): AiBot | undefined {\n if (!userAgent) return undefined;\n const lowerUA = userAgent.toLowerCase();\n return AI_BOTS.find(bot => \n lowerUA.includes(bot.name.toLowerCase())\n );\n}\n","/**\r\n * Configuration schema for onto.config.ts\r\n * Used to dynamically generate llms.txt and other AI discovery files\r\n */\r\n\r\nexport type PageType = 'scoring' | 'about' | 'default';\r\n\r\nexport interface OntoRoute {\r\n /**\r\n * The URL path (e.g., '/docs', '/api/reference')\r\n */\r\n path: string;\r\n /**\r\n * Description of what this route contains\r\n */\r\n description: string;\r\n /**\r\n * Optional: Page type for automatic JSON-LD schema injection\r\n * - 'scoring': Injects Methodology schema with AIO scoring weights (40/35/25)\r\n * - 'about': Injects Organization/AboutPage schema\r\n * - 'default': No automatic schema injection\r\n */\r\n pageType?: PageType;\r\n}\r\n\r\nexport interface OntoConfig {\r\n /**\r\n * The name of your project or site (required)\r\n * Used as the H1 heading in llms.txt\r\n */\r\n name: string;\r\n\r\n /**\r\n * A short summary of your project (required)\r\n * Displayed as a blockquote in llms.txt\r\n * Should contain key information necessary for understanding the rest of the file\r\n */\r\n summary: string;\r\n\r\n /**\r\n * The base URL of your site (e.g., 'https://example.com')\r\n */\r\n baseUrl: string;\r\n\r\n /**\r\n * Optional: Additional sections to include in llms.txt\r\n * Each section can contain any markdown content\r\n */\r\n sections?: {\r\n heading: string;\r\n content: string;\r\n }[];\r\n\r\n /**\r\n * Key routes that AI agents should know about\r\n * These will be formatted as a markdown list in llms.txt\r\n */\r\n routes?: OntoRoute[];\r\n\r\n /**\r\n * Optional: Links to external resources (documentation, API references, etc.)\r\n */\r\n externalLinks?: {\r\n title: string;\r\n url: string;\r\n description?: string;\r\n }[];\r\n\r\n /**\r\n * Optional: Organization information for JSON-LD schemas\r\n */\r\n organization?: {\r\n name: string;\r\n description?: string;\r\n url?: string;\r\n logo?: string;\r\n foundingDate?: string;\r\n };\r\n}\r\n\r\n/**\r\n * Generate llms.txt content from OntoConfig\r\n * Follows the llms.txt specification:\r\n * - H1 with project name\r\n * - Blockquote with summary\r\n * - Additional markdown sections\r\n */\r\nexport function generateLlmsTxt(config: OntoConfig): string {\r\n const lines: string[] = [];\r\n\r\n // H1: Project name (required)\r\n lines.push(`# ${config.name}`);\r\n lines.push('');\r\n\r\n // Blockquote: Summary (required)\r\n lines.push(`> ${config.summary}`);\r\n lines.push('');\r\n\r\n // Key Routes section (if provided)\r\n if (config.routes && config.routes.length > 0) {\r\n lines.push('## Key Routes');\r\n lines.push('');\r\n for (const route of config.routes) {\r\n const fullUrl = `${config.baseUrl}${route.path}`;\r\n lines.push(`- [${route.path}](${fullUrl}): ${route.description}`);\r\n }\r\n lines.push('');\r\n }\r\n\r\n // External Links section (if provided)\r\n if (config.externalLinks && config.externalLinks.length > 0) {\r\n lines.push('## Resources');\r\n lines.push('');\r\n for (const link of config.externalLinks) {\r\n if (link.description) {\r\n lines.push(`- [${link.title}](${link.url}): ${link.description}`);\r\n } else {\r\n lines.push(`- [${link.title}](${link.url})`);\r\n }\r\n }\r\n lines.push('');\r\n }\r\n\r\n // Custom sections (if provided)\r\n if (config.sections && config.sections.length > 0) {\r\n for (const section of config.sections) {\r\n lines.push(`## ${section.heading}`);\r\n lines.push('');\r\n lines.push(section.content);\r\n lines.push('');\r\n }\r\n }\r\n\r\n return lines.join('\\n').trim() + '\\n';\r\n}\r\n"],"mappings":"AAAA,OAAsB,gBAAAA,MAAoB,cCgBnC,IAAMC,EAAmB,CAE5B,CAAE,KAAM,SAAqB,QAAS,QAAS,EAC/C,CAAE,KAAM,eAAoB,QAAS,QAAS,EAC9C,CAAE,KAAM,UAAoB,QAAS,QAAS,EAC9C,CAAE,KAAM,gBAAoB,QAAS,QAAS,EAC9C,CAAE,KAAM,SAAoB,QAAS,QAAS,EAG9C,CAAE,KAAM,YAA0B,QAAS,QAAS,EACpD,CAAE,KAAM,wBAA2B,QAAS,QAAS,EACrD,CAAE,KAAM,kBAA2B,QAAS,QAAS,EACrD,CAAE,KAAM,cAA2B,QAAS,QAAS,EAGrD,CAAE,KAAM,YAAmB,QAAS,WAAY,EAChD,CAAE,KAAM,cAAkB,QAAS,WAAY,EAC/C,CAAE,KAAM,SAAkB,QAAS,WAAY,EAC/C,CAAE,KAAM,eAAkB,QAAS,WAAY,EAC/C,CAAE,KAAM,YAAkB,QAAS,WAAY,EAG/C,CAAE,KAAM,gBAAmB,QAAS,YAAa,EACjD,CAAE,KAAM,kBAAmB,QAAS,YAAa,EACjD,CAAE,KAAM,aAAmB,QAAS,YAAa,EAGjD,CAAE,KAAM,qBAAwB,QAAS,MAAO,EAChD,CAAE,KAAM,uBAAwB,QAAS,MAAO,EAChD,CAAE,KAAM,cAAuB,QAAS,MAAO,EAG/C,CAAE,KAAM,QAAS,QAAS,cAAe,EAGzC,CAAE,KAAM,aAAqB,QAAS,WAAY,EAClD,CAAE,KAAM,oBAAqB,QAAS,OAAQ,EAC9C,CAAE,KAAM,YAAoB,QAAS,QAAS,EAC9C,CAAE,KAAM,SAAoB,QAAS,SAAU,CACnD,EAKaC,EAA+BD,EAAQ,IAAIE,GAAOA,EAAI,IAAI,EAMhE,SAASC,EAASC,EAA6C,CAClE,GAAI,CAACA,EAAW,OAChB,IAAMC,EAAUD,EAAU,YAAY,EACtC,OAAOJ,EAAQ,KAAKE,GAChBG,EAAQ,SAASH,EAAI,KAAK,YAAY,CAAC,CAC3C,CACJ,CCeO,SAASI,EAAgBC,EAA4B,CAC1D,IAAMC,EAAkB,CAAC,EAWzB,GARAA,EAAM,KAAK,KAAKD,EAAO,IAAI,EAAE,EAC7BC,EAAM,KAAK,EAAE,EAGbA,EAAM,KAAK,KAAKD,EAAO,OAAO,EAAE,EAChCC,EAAM,KAAK,EAAE,EAGTD,EAAO,QAAUA,EAAO,OAAO,OAAS,EAAG,CAC7CC,EAAM,KAAK,eAAe,EAC1BA,EAAM,KAAK,EAAE,EACb,QAAWC,KAASF,EAAO,OAAQ,CACjC,IAAMG,EAAU,GAAGH,EAAO,OAAO,GAAGE,EAAM,IAAI,GAC9CD,EAAM,KAAK,MAAMC,EAAM,IAAI,KAAKC,CAAO,MAAMD,EAAM,WAAW,EAAE,CAClE,CACAD,EAAM,KAAK,EAAE,CACf,CAGA,GAAID,EAAO,eAAiBA,EAAO,cAAc,OAAS,EAAG,CAC3DC,EAAM,KAAK,cAAc,EACzBA,EAAM,KAAK,EAAE,EACb,QAAWG,KAAQJ,EAAO,cACpBI,EAAK,YACPH,EAAM,KAAK,MAAMG,EAAK,KAAK,KAAKA,EAAK,GAAG,MAAMA,EAAK,WAAW,EAAE,EAEhEH,EAAM,KAAK,MAAMG,EAAK,KAAK,KAAKA,EAAK,GAAG,GAAG,EAG/CH,EAAM,KAAK,EAAE,CACf,CAGA,GAAID,EAAO,UAAYA,EAAO,SAAS,OAAS,EAC9C,QAAWK,KAAWL,EAAO,SAC3BC,EAAM,KAAK,MAAMI,EAAQ,OAAO,EAAE,EAClCJ,EAAM,KAAK,EAAE,EACbA,EAAM,KAAKI,EAAQ,OAAO,EAC1BJ,EAAM,KAAK,EAAE,EAIjB,OAAOA,EAAM,KAAK;AAAA,CAAI,EAAE,KAAK,EAAI;AAAA,CACnC,CFlIA,eAAsBK,EAAeC,EAAcC,EAAqB,CACpE,IAAMC,EAAYF,EAAQ,QAAQ,IAAI,YAAY,EAC5CG,EAAMH,EAAQ,QAAQ,MAAM,EAC5BI,EAAUC,EAASH,CAAS,EAE5BI,EAASN,EAAQ,QAAQ,IAAI,QAAQ,GAAK,GAC1CO,EAAgBP,EAAQ,QAAQ,aAAa,IAAI,MAAM,EAEvDQ,EAAU,CAAC,CAACJ,EACZK,EAAsBH,EAAO,SAAS,eAAe,GAAKC,EAGhE,GAAIC,GAAWC,EAAqB,CAGhC,GAAIN,EAAI,SAAS,WAAW,QAAQ,EAChC,OAAOO,EAAa,KAAK,EAK7B,GAAIP,EAAI,WAAa,YACjB,GAAI,CACA,GAAIF,EAAQ,CAER,IAAMU,EAAiBC,EAAgBX,CAAM,EACvCY,EAAW,IAAIH,EAAaC,EAAgB,CAC9C,QAAS,CACL,eAAgB,4BAChB,gBAAiB,mEACrB,CACJ,CAAC,EAED,OAAIP,GACAS,EAAS,QAAQ,IAAI,aAAc,GAAGT,EAAQ,IAAI,KAAKA,EAAQ,OAAO,GAAG,EAGtES,CACX,KAAO,CAEHV,EAAI,SAAW,YACf,IAAMU,EAAWH,EAAa,QAAQP,CAAG,EACzC,OAAAU,EAAS,QAAQ,IAAI,eAAgB,2BAA2B,EAChEA,EAAS,QAAQ,IAAI,gBAAiB,mEAAmE,EACrGT,GACAS,EAAS,QAAQ,IAAI,aAAc,GAAGT,EAAQ,IAAI,KAAKA,EAAQ,OAAO,GAAG,EAEtES,CACX,CACJ,OAASC,EAAO,CACZ,QAAQ,MAAM,sCAAuCA,CAAK,EAE1DX,EAAI,SAAW,YACf,IAAMU,EAAWH,EAAa,QAAQP,CAAG,EACzC,OAAAU,EAAS,QAAQ,IAAI,eAAgB,2BAA2B,EAC5DT,GACAS,EAAS,QAAQ,IAAI,aAAc,GAAGT,EAAQ,IAAI,KAAKA,EAAQ,OAAO,GAAG,EAEtES,CACX,CAIJ,GAAIV,EAAI,SAAS,SAAS,GAAG,EACzB,OAAOO,EAAa,KAAK,EAI7B,IAAIK,EAAcZ,EAAI,UAClBY,IAAgB,KAAOA,IAAgB,MACvCA,EAAc,UAIdA,EAAY,SAAS,GAAG,GAAKA,IAAgB,MAC7CA,EAAcA,EAAY,MAAM,EAAG,EAAE,GAIzC,IAAMC,EAAqC,CACvC,eAAgB,+BAChB,gBAAiB,mEACrB,EACIZ,IACAY,EAAW,YAAY,EAAI,GAAGZ,EAAQ,IAAI,KAAKA,EAAQ,OAAO,KAE9DG,IACAS,EAAW,cAAc,EAAI,QAIjC,IAAMC,EAAe,QAAQ,IAAI,aAC3BC,EAAgB,QAAQ,IAAI,oBAAsB,4BAExD,GAAID,EAAc,CAEd,MAAM,GAAGC,CAAa,aAAc,CAChC,OAAQ,OACR,QAAS,CACL,aAAcD,EACd,eAAgB,kBACpB,EACA,KAAM,KAAK,UAAU,CACjB,MAAOd,EAAI,SACX,UAAWD,EACX,IAAKE,EAAUA,EAAQ,KAAO,KAC9B,QAASA,EAAUA,EAAQ,QAAU,IACzC,CAAC,CACL,CAAC,EAAE,MAAM,IAAM,CAAC,CAAC,EAGjB,GAAI,CACA,IAAMe,EAAY,MAAM,MAAM,GAAGD,CAAa,yBAAyBf,EAAI,QAAQ,GAAI,CACnF,QAAS,CAAE,aAAcc,CAAa,EACtC,OAAQ,YAAY,QAAQ,IAAI,CACpC,CAAC,EAED,GAAIE,EAAU,GAAI,CACd,GAAM,CAAE,UAAAC,CAAU,EAAI,MAAMD,EAAU,KAAK,EAE3C,GAAIC,EAAW,CACX,IAAMC,EAAa,GAAGlB,EAAI,MAAM,SAASY,CAAW,MAC9CO,EAAQ,MAAM,MAAMD,CAAU,EAEpC,GAAIC,EAAM,GAAI,CAEV,IAAMC,EAAgB,GADD,MAAMD,EAAM,KAAK,CACD;AAAA;AAAA;AAAA;AAAA,EAAcF,CAAS,GAE5D,OAAO,IAAIV,EAAaa,EAAe,CACnC,QAAS,CACL,GAAGP,EACH,kBAAmB,MACvB,CACJ,CAAC,CACL,CACJ,CACJ,CACJ,OAASQ,EAAK,CACV,QAAQ,MAAM,0BAA2BA,CAAG,CAChD,CACJ,CAGArB,EAAI,SAAW,SAASY,CAAW,MAGnC,IAAMF,EAAWH,EAAa,QAAQP,CAAG,EAGzC,OAAAU,EAAS,QAAQ,IAAI,eAAgB,8BAA8B,EACnEA,EAAS,QAAQ,IAAI,gBAAiB,mEAAmE,EAErGT,GACAS,EAAS,QAAQ,IAAI,aAAc,GAAGT,EAAQ,IAAI,KAAKA,EAAQ,OAAO,GAAG,EAEzEG,GACAM,EAAS,QAAQ,IAAI,eAAgB,MAAM,EAGxCA,CACX,CAEA,OAAOH,EAAa,KAAK,CAC7B","names":["NextResponse","AI_BOTS","AI_BOT_USER_AGENTS","bot","matchBot","userAgent","lowerUA","generateLlmsTxt","config","lines","route","fullUrl","link","section","ontoMiddleware","request","config","userAgent","url","matched","matchBot","accept","hasDebugParam","isAiBot","isMarkdownRequested","NextResponse","llmsTxtContent","generateLlmsTxt","response","error","payloadPath","botHeaders","ONTO_API_KEY","DASHBOARD_URL","injectRes","injection","localMdUrl","mdRes","finalMarkdown","err"]}
1
+ {"version":3,"sources":["../src/middleware.ts","../src/bots.ts","../src/config.ts"],"sourcesContent":["import { NextRequest, NextResponse } from 'next/server';\r\nimport { matchBot } from './bots';\r\nimport { generateLlmsTxt, OntoConfig } from './config';\r\n\r\nexport async function ontoMiddleware(request: any, config?: OntoConfig) {\r\n const userAgent = request.headers.get('user-agent');\r\n const url = request.nextUrl.clone();\r\n const matched = matchBot(userAgent);\r\n\r\n const accept = request.headers.get('accept') || '';\r\n const hasDebugParam = request.nextUrl.searchParams.has('onto');\r\n\r\n const isAiBot = !!matched;\r\n const isMarkdownRequested = accept.includes('text/markdown') || hasDebugParam;\r\n\r\n // Common logic for bot/markdown negotiation\r\n if (isAiBot || isMarkdownRequested) {\r\n // Ignore internal next.js requests\r\n if (url.pathname.startsWith('/_next')) {\r\n return NextResponse.next();\r\n }\r\n\r\n // --- llms.txt Auto-Discovery ---\r\n if (url.pathname === '/llms.txt') {\r\n try {\r\n if (config) {\r\n const llmsTxtContent = generateLlmsTxt(config);\r\n const response = new NextResponse(llmsTxtContent, {\r\n headers: {\r\n 'Content-Type': 'text/plain; charset=utf-8',\r\n 'Cache-Control': 'public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400',\r\n }\r\n });\r\n if (matched) response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);\r\n response.headers.set('X-Onto-Trace', userAgent || 'no-ua');\r\n return response;\r\n }\r\n } catch (error) {\r\n console.error('[Onto] Failed to generate llms.txt:', error);\r\n }\r\n }\r\n\r\n // Skip other static assets with dots\r\n if (url.pathname.includes('.')) {\r\n return NextResponse.next();\r\n }\r\n\r\n // Determine the corresponding payload path\r\n let payloadPath = url.pathname;\r\n if (payloadPath === '/' || payloadPath === '') payloadPath = '/index';\r\n if (payloadPath.endsWith('/') && payloadPath !== '/') payloadPath = payloadPath.slice(0, -1);\r\n\r\n // --- Onto Control Plane Integration (Premium) ---\r\n const ONTO_API_KEY = process.env.ONTO_API_KEY;\r\n const DASHBOARD_URL = process.env.ONTO_DASHBOARD_URL || 'https://app.buildonto.dev';\r\n\r\n if (ONTO_API_KEY) {\r\n // Logic for tracking and injection ... (fire-and-forget tracking)\r\n fetch(`${DASHBOARD_URL}/api/track`, {\r\n method: 'POST',\r\n headers: { 'x-onto-key': ONTO_API_KEY, 'Content-Type': 'application/json' },\r\n body: JSON.stringify({ route: url.pathname, userAgent, bot: matched?.name, company: matched?.company })\r\n }).catch(() => {});\r\n }\r\n\r\n // Rewrite to semantic payload\r\n url.pathname = `/.onto${payloadPath}.md`;\r\n const response = NextResponse.rewrite(url);\r\n \r\n // Apply headers\r\n response.headers.set('Content-Type', 'text/markdown; charset=utf-8');\r\n response.headers.set('Cache-Control', 'public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400');\r\n response.headers.set('X-Onto-Trace', userAgent || 'no-ua');\r\n if (matched) response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);\r\n if (hasDebugParam) response.headers.set('X-Onto-Debug', 'true');\r\n\r\n return response;\r\n }\r\n\r\n // Default response for non-bots\r\n const response = NextResponse.next();\r\n response.headers.set('X-Onto-Trace', userAgent || 'no-ua');\r\n return response;\r\n}\r\n\r\n// Re-export the bot registry for consumers who want to extend or inspect it\r\nexport { AI_BOT_USER_AGENTS, matchBot } from './bots';\r\nexport type { AiBot } from './bots';\r\n\r\n\r\n","/**\n * Comprehensive registry of AI bot user-agent strings.\n * The middleware uses this list to detect AI crawlers and serve optimized markdown.\n */\n\nexport interface AiBot {\n /** The user-agent substring to match against */\n name: string;\n /** The company operating this bot */\n company: string;\n /** ISO date when this bot was added or last verified */\n addedAt?: string;\n}\n\n/**\n * Structured registry of all known AI bots, grouped by company.\n * Useful for analytics and the Control Plane dashboard.\n */\nexport const AI_BOTS: AiBot[] = [\n // OpenAI\n { name: 'GPTBot', company: 'OpenAI', addedAt: '2025-01-01' },\n { name: 'ChatGPT-User', company: 'OpenAI', addedAt: '2025-01-01' },\n { name: 'OAI-SearchBot', company: 'OpenAI', addedAt: '2025-01-01' },\n\n // Google (SEO Safety: Googlebot proper is EXCLUDED)\n { name: 'Google-CloudVertexBot', company: 'Google', addedAt: '2025-01-01' },\n { name: 'Google-Extended', company: 'Google', addedAt: '2025-01-01' },\n { name: 'GoogleOther', company: 'Google', addedAt: '2025-01-01' },\n\n // Anthropic\n { name: 'ClaudeBot', company: 'Anthropic', addedAt: '2025-01-01' },\n { name: 'Claude-User', company: 'Anthropic', addedAt: '2025-01-01' },\n { name: 'anthropic-ai', company: 'Anthropic', addedAt: '2025-01-01' },\n\n // Perplexity\n { name: 'PerplexityBot', company: 'Perplexity', addedAt: '2025-01-01' },\n { name: 'Perplexity-User', company: 'Perplexity', addedAt: '2025-01-01' },\n\n // Meta\n { name: 'Meta-ExternalAgent', company: 'Meta', addedAt: '2025-01-01' },\n { name: 'Meta-ExternalFetcher', company: 'Meta', addedAt: '2025-01-01' },\n { name: 'facebookexternalhit', company: 'Meta', addedAt: '2025-03-25' },\n { name: 'FacebookBot', company: 'Meta', addedAt: '2025-01-01' },\n\n // Mistral\n { name: 'MistralBot', company: 'Mistral', addedAt: '2025-03-25' },\n\n // Amazon\n { name: 'Amazonbot', company: 'Amazon', addedAt: '2025-03-25' },\n\n // Others\n { name: 'AI2Bot', company: 'Allen Institute', addedAt: '2025-03-25' },\n { name: 'DuckAssistBot', company: 'DuckDuckGo', addedAt: '2025-03-25' },\n { name: 'Diffbot', company: 'Diffbot', addedAt: '2025-03-25' },\n { name: 'CCBot', company: 'Common Crawl', addedAt: '2025-01-01' },\n { name: 'Bytespider', company: 'ByteDance', addedAt: '2025-01-01' },\n { name: 'Applebot-Extended', company: 'Apple', addedAt: '2025-01-01' },\n { name: 'cohere-ai', company: 'Cohere', addedAt: '2025-01-01' },\n { name: 'YouBot', company: 'You.com', addedAt: '2025-01-01' },\n];\n\n/**\n * Flat list of user-agent substrings for fast matching in the middleware.\n */\nexport const AI_BOT_USER_AGENTS: string[] = AI_BOT_USER_AGENTS_CACHE();\n\nfunction AI_BOT_USER_AGENTS_CACHE() {\n return AI_BOTS.map(bot => bot.name);\n}\n\n/**\n * Given a raw user-agent string, returns the matched AiBot entry or undefined.\n * Uses a \"Longest Match\" strategy to ensure maximum specificity.\n * Comparison is case-insensitive.\n */\nexport function matchBot(userAgent: string | null): AiBot | undefined {\n if (!userAgent) return undefined;\n const lowerUA = userAgent.toLowerCase();\n \n // Find all matches\n const matches = AI_BOTS.filter(bot => \n lowerUA.includes(bot.name.toLowerCase())\n );\n\n if (matches.length === 0) return undefined;\n if (matches.length === 1) return matches[0];\n\n // Pick the longest match for maximum specificity (e.g. 'ChatGPT-User' vs 'GPT')\n return matches.reduce((longest, current) => \n current.name.length > longest.name.length ? current : longest\n );\n}\n","/**\r\n * Configuration schema for onto.config.ts\r\n * Used to dynamically generate llms.txt and other AI discovery files\r\n */\r\n\r\nexport type PageType = 'scoring' | 'about' | 'default';\r\n\r\nexport interface OntoRoute {\r\n /**\r\n * The URL path (e.g., '/docs', '/api/reference')\r\n */\r\n path: string;\r\n /**\r\n * Description of what this route contains\r\n */\r\n description: string;\r\n /**\r\n * Optional: Page type for automatic JSON-LD schema injection\r\n * - 'scoring': Injects Methodology schema with AIO scoring weights (40/35/25)\r\n * - 'about': Injects Organization/AboutPage schema\r\n * - 'default': No automatic schema injection\r\n */\r\n pageType?: PageType;\r\n}\r\n\r\nexport interface OntoConfig {\r\n /**\r\n * The name of your project or site (required)\r\n * Used as the H1 heading in llms.txt\r\n */\r\n name: string;\r\n\r\n /**\r\n * A short summary of your project (required)\r\n * Displayed as a blockquote in llms.txt\r\n * Should contain key information necessary for understanding the rest of the file\r\n */\r\n summary: string;\r\n\r\n /**\r\n * The base URL of your site (e.g., 'https://example.com')\r\n */\r\n baseUrl: string;\r\n\r\n /**\r\n * Optional: Additional sections to include in llms.txt\r\n * Each section can contain any markdown content\r\n */\r\n sections?: {\r\n heading: string;\r\n content: string;\r\n }[];\r\n\r\n /**\r\n * Key routes that AI agents should know about\r\n * These will be formatted as a markdown list in llms.txt\r\n */\r\n routes?: OntoRoute[];\r\n\r\n /**\r\n * Optional: Links to external resources (documentation, API references, etc.)\r\n */\r\n externalLinks?: {\r\n title: string;\r\n url: string;\r\n description?: string;\r\n }[];\r\n\r\n /**\r\n * Optional: Organization information for JSON-LD schemas\r\n */\r\n organization?: {\r\n name: string;\r\n description?: string;\r\n url?: string;\r\n logo?: string;\r\n foundingDate?: string;\r\n };\r\n}\r\n\r\n/**\r\n * Generate llms.txt content from OntoConfig\r\n * Follows the llms.txt specification:\r\n * - H1 with project name\r\n * - Blockquote with summary\r\n * - Additional markdown sections\r\n */\r\nexport function generateLlmsTxt(config: OntoConfig): string {\r\n const lines: string[] = [];\r\n\r\n // H1: Project name (required)\r\n lines.push(`# ${config.name}`);\r\n lines.push('');\r\n\r\n // Blockquote: Summary (required)\r\n lines.push(`> ${config.summary}`);\r\n lines.push('');\r\n\r\n // Key Routes section (if provided)\r\n if (config.routes && config.routes.length > 0) {\r\n lines.push('## Key Routes');\r\n lines.push('');\r\n for (const route of config.routes) {\r\n const fullUrl = `${config.baseUrl}${route.path}`;\r\n lines.push(`- [${route.path}](${fullUrl}): ${route.description}`);\r\n }\r\n lines.push('');\r\n }\r\n\r\n // External Links section (if provided)\r\n if (config.externalLinks && config.externalLinks.length > 0) {\r\n lines.push('## Resources');\r\n lines.push('');\r\n for (const link of config.externalLinks) {\r\n if (link.description) {\r\n lines.push(`- [${link.title}](${link.url}): ${link.description}`);\r\n } else {\r\n lines.push(`- [${link.title}](${link.url})`);\r\n }\r\n }\r\n lines.push('');\r\n }\r\n\r\n // Custom sections (if provided)\r\n if (config.sections && config.sections.length > 0) {\r\n for (const section of config.sections) {\r\n lines.push(`## ${section.heading}`);\r\n lines.push('');\r\n lines.push(section.content);\r\n lines.push('');\r\n }\r\n }\r\n\r\n return lines.join('\\n').trim() + '\\n';\r\n}\r\n"],"mappings":"AAAA,OAAsB,gBAAAA,MAAoB,cCkBnC,IAAMC,EAAmB,CAE5B,CAAE,KAAM,SAAqB,QAAS,SAAY,QAAS,YAAa,EACxE,CAAE,KAAM,eAAoB,QAAS,SAAY,QAAS,YAAa,EACvE,CAAE,KAAM,gBAAoB,QAAS,SAAY,QAAS,YAAa,EAGvE,CAAE,KAAM,wBAA2B,QAAS,SAAU,QAAS,YAAa,EAC5E,CAAE,KAAM,kBAA2B,QAAS,SAAU,QAAS,YAAa,EAC5E,CAAE,KAAM,cAA2B,QAAS,SAAU,QAAS,YAAa,EAG5E,CAAE,KAAM,YAAmB,QAAS,YAAa,QAAS,YAAa,EACvE,CAAE,KAAM,cAAkB,QAAS,YAAa,QAAS,YAAa,EACtE,CAAE,KAAM,eAAkB,QAAS,YAAa,QAAS,YAAa,EAGtE,CAAE,KAAM,gBAAmB,QAAS,aAAc,QAAS,YAAa,EACxE,CAAE,KAAM,kBAAmB,QAAS,aAAc,QAAS,YAAa,EAGxE,CAAE,KAAM,qBAAwB,QAAS,OAAQ,QAAS,YAAa,EACvE,CAAE,KAAM,uBAAwB,QAAS,OAAQ,QAAS,YAAa,EACvE,CAAE,KAAM,sBAAwB,QAAS,OAAQ,QAAS,YAAa,EACvE,CAAE,KAAM,cAAwB,QAAS,OAAQ,QAAS,YAAa,EAGvE,CAAE,KAAM,aAAc,QAAS,UAAW,QAAS,YAAa,EAGhE,CAAE,KAAM,YAAa,QAAS,SAAU,QAAS,YAAa,EAG9D,CAAE,KAAM,SAAiB,QAAS,kBAAmB,QAAS,YAAa,EAC3E,CAAE,KAAM,gBAAiB,QAAS,aAAc,QAAS,YAAa,EACtE,CAAE,KAAM,UAAiB,QAAS,UAAe,QAAS,YAAa,EACvE,CAAE,KAAM,QAAiB,QAAS,eAAgB,QAAS,YAAa,EACxE,CAAE,KAAM,aAAiB,QAAS,YAAgB,QAAS,YAAa,EACxE,CAAE,KAAM,oBAAqB,QAAS,QAAY,QAAS,YAAa,EACxE,CAAE,KAAM,YAAiB,QAAS,SAAgB,QAAS,YAAa,EACxE,CAAE,KAAM,SAAiB,QAAS,UAAgB,QAAS,YAAa,CAC5E,EAKaC,EAA+BC,EAAyB,EAErE,SAASA,GAA2B,CAChC,OAAOF,EAAQ,IAAIG,GAAOA,EAAI,IAAI,CACtC,CAOO,SAASC,EAASC,EAA6C,CAClE,GAAI,CAACA,EAAW,OAChB,IAAMC,EAAUD,EAAU,YAAY,EAGhCE,EAAUP,EAAQ,OAAOG,GAC3BG,EAAQ,SAASH,EAAI,KAAK,YAAY,CAAC,CAC3C,EAEA,GAAII,EAAQ,SAAW,EACvB,OAAIA,EAAQ,SAAW,EAAUA,EAAQ,CAAC,EAGnCA,EAAQ,OAAO,CAACC,EAASC,IAC5BA,EAAQ,KAAK,OAASD,EAAQ,KAAK,OAASC,EAAUD,CAC1D,CACJ,CCJO,SAASE,EAAgBC,EAA4B,CAC1D,IAAMC,EAAkB,CAAC,EAWzB,GARAA,EAAM,KAAK,KAAKD,EAAO,IAAI,EAAE,EAC7BC,EAAM,KAAK,EAAE,EAGbA,EAAM,KAAK,KAAKD,EAAO,OAAO,EAAE,EAChCC,EAAM,KAAK,EAAE,EAGTD,EAAO,QAAUA,EAAO,OAAO,OAAS,EAAG,CAC7CC,EAAM,KAAK,eAAe,EAC1BA,EAAM,KAAK,EAAE,EACb,QAAWC,KAASF,EAAO,OAAQ,CACjC,IAAMG,EAAU,GAAGH,EAAO,OAAO,GAAGE,EAAM,IAAI,GAC9CD,EAAM,KAAK,MAAMC,EAAM,IAAI,KAAKC,CAAO,MAAMD,EAAM,WAAW,EAAE,CAClE,CACAD,EAAM,KAAK,EAAE,CACf,CAGA,GAAID,EAAO,eAAiBA,EAAO,cAAc,OAAS,EAAG,CAC3DC,EAAM,KAAK,cAAc,EACzBA,EAAM,KAAK,EAAE,EACb,QAAWG,KAAQJ,EAAO,cACpBI,EAAK,YACPH,EAAM,KAAK,MAAMG,EAAK,KAAK,KAAKA,EAAK,GAAG,MAAMA,EAAK,WAAW,EAAE,EAEhEH,EAAM,KAAK,MAAMG,EAAK,KAAK,KAAKA,EAAK,GAAG,GAAG,EAG/CH,EAAM,KAAK,EAAE,CACf,CAGA,GAAID,EAAO,UAAYA,EAAO,SAAS,OAAS,EAC9C,QAAWK,KAAWL,EAAO,SAC3BC,EAAM,KAAK,MAAMI,EAAQ,OAAO,EAAE,EAClCJ,EAAM,KAAK,EAAE,EACbA,EAAM,KAAKI,EAAQ,OAAO,EAC1BJ,EAAM,KAAK,EAAE,EAIjB,OAAOA,EAAM,KAAK;AAAA,CAAI,EAAE,KAAK,EAAI;AAAA,CACnC,CFlIA,eAAsBK,EAAeC,EAAcC,EAAqB,CACpE,IAAMC,EAAYF,EAAQ,QAAQ,IAAI,YAAY,EAC5CG,EAAMH,EAAQ,QAAQ,MAAM,EAC5BI,EAAUC,EAASH,CAAS,EAE5BI,EAASN,EAAQ,QAAQ,IAAI,QAAQ,GAAK,GAC1CO,EAAgBP,EAAQ,QAAQ,aAAa,IAAI,MAAM,EAEvDQ,EAAU,CAAC,CAACJ,EACZK,EAAsBH,EAAO,SAAS,eAAe,GAAKC,EAGhE,GAAIC,GAAWC,EAAqB,CAEhC,GAAIN,EAAI,SAAS,WAAW,QAAQ,EAChC,OAAOO,EAAa,KAAK,EAI7B,GAAIP,EAAI,WAAa,YACjB,GAAI,CACA,GAAIF,EAAQ,CACR,IAAMU,EAAiBC,EAAgBX,CAAM,EACvCY,EAAW,IAAIH,EAAaC,EAAgB,CAC9C,QAAS,CACL,eAAgB,4BAChB,gBAAiB,mEACrB,CACJ,CAAC,EACD,OAAIP,GAASS,EAAS,QAAQ,IAAI,aAAc,GAAGT,EAAQ,IAAI,KAAKA,EAAQ,OAAO,GAAG,EACtFS,EAAS,QAAQ,IAAI,eAAgBX,GAAa,OAAO,EAClDW,CACX,CACJ,OAASC,EAAO,CACZ,QAAQ,MAAM,sCAAuCA,CAAK,CAC9D,CAIJ,GAAIX,EAAI,SAAS,SAAS,GAAG,EACzB,OAAOO,EAAa,KAAK,EAI7B,IAAIK,EAAcZ,EAAI,UAClBY,IAAgB,KAAOA,IAAgB,MAAIA,EAAc,UACzDA,EAAY,SAAS,GAAG,GAAKA,IAAgB,MAAKA,EAAcA,EAAY,MAAM,EAAG,EAAE,GAG3F,IAAMC,EAAe,QAAQ,IAAI,aAC3BC,EAAgB,QAAQ,IAAI,oBAAsB,4BAEpDD,GAEA,MAAM,GAAGC,CAAa,aAAc,CAChC,OAAQ,OACR,QAAS,CAAE,aAAcD,EAAc,eAAgB,kBAAmB,EAC1E,KAAM,KAAK,UAAU,CAAE,MAAOb,EAAI,SAAU,UAAAD,EAAW,IAAKE,GAAS,KAAM,QAASA,GAAS,OAAQ,CAAC,CAC1G,CAAC,EAAE,MAAM,IAAM,CAAC,CAAC,EAIrBD,EAAI,SAAW,SAASY,CAAW,MACnC,IAAMF,EAAWH,EAAa,QAAQP,CAAG,EAGzC,OAAAU,EAAS,QAAQ,IAAI,eAAgB,8BAA8B,EACnEA,EAAS,QAAQ,IAAI,gBAAiB,mEAAmE,EACzGA,EAAS,QAAQ,IAAI,eAAgBX,GAAa,OAAO,EACrDE,GAASS,EAAS,QAAQ,IAAI,aAAc,GAAGT,EAAQ,IAAI,KAAKA,EAAQ,OAAO,GAAG,EAClFG,GAAeM,EAAS,QAAQ,IAAI,eAAgB,MAAM,EAEvDA,CACX,CAGA,IAAMA,EAAWH,EAAa,KAAK,EACnC,OAAAG,EAAS,QAAQ,IAAI,eAAgBX,GAAa,OAAO,EAClDW,CACX","names":["NextResponse","AI_BOTS","AI_BOT_USER_AGENTS","AI_BOT_USER_AGENTS_CACHE","bot","matchBot","userAgent","lowerUA","matches","longest","current","generateLlmsTxt","config","lines","route","fullUrl","link","section","ontoMiddleware","request","config","userAgent","url","matched","matchBot","accept","hasDebugParam","isAiBot","isMarkdownRequested","NextResponse","llmsTxtContent","generateLlmsTxt","response","error","payloadPath","ONTO_API_KEY","DASHBOARD_URL"]}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ontosdk/next",
3
- "version": "1.4.4",
3
+ "version": "1.5.1",
4
4
  "description": "Extracts semantic Markdown from React/Next.js pages for AI Agents",
5
5
  "main": "dist/index.js",
6
6
  "module": "dist/index.mjs",
package/src/bots.ts CHANGED
@@ -8,6 +8,8 @@ export interface AiBot {
8
8
  name: string;
9
9
  /** The company operating this bot */
10
10
  company: string;
11
+ /** ISO date when this bot was added or last verified */
12
+ addedAt?: string;
11
13
  }
12
14
 
13
15
  /**
@@ -16,58 +18,75 @@ export interface AiBot {
16
18
  */
17
19
  export const AI_BOTS: AiBot[] = [
18
20
  // OpenAI
19
- { name: 'GPTBot', company: 'OpenAI' },
20
- { name: 'ChatGPT-User', company: 'OpenAI' },
21
- { name: 'ChatGPT', company: 'OpenAI' },
22
- { name: 'OAI-SearchBot', company: 'OpenAI' },
23
- { name: 'OpenAI', company: 'OpenAI' },
21
+ { name: 'GPTBot', company: 'OpenAI', addedAt: '2025-01-01' },
22
+ { name: 'ChatGPT-User', company: 'OpenAI', addedAt: '2025-01-01' },
23
+ { name: 'OAI-SearchBot', company: 'OpenAI', addedAt: '2025-01-01' },
24
24
 
25
- // Google
26
- { name: 'Googlebot', company: 'Google' },
27
- { name: 'Google-CloudVertexBot', company: 'Google' },
28
- { name: 'Google-Extended', company: 'Google' },
29
- { name: 'GoogleOther', company: 'Google' },
25
+ // Google (SEO Safety: Googlebot proper is EXCLUDED)
26
+ { name: 'Google-CloudVertexBot', company: 'Google', addedAt: '2025-01-01' },
27
+ { name: 'Google-Extended', company: 'Google', addedAt: '2025-01-01' },
28
+ { name: 'GoogleOther', company: 'Google', addedAt: '2025-01-01' },
30
29
 
31
30
  // Anthropic
32
- { name: 'ClaudeBot', company: 'Anthropic' },
33
- { name: 'Claude-User', company: 'Anthropic' },
34
- { name: 'Claude', company: 'Anthropic' },
35
- { name: 'anthropic-ai', company: 'Anthropic' },
36
- { name: 'Anthropic', company: 'Anthropic' },
31
+ { name: 'ClaudeBot', company: 'Anthropic', addedAt: '2025-01-01' },
32
+ { name: 'Claude-User', company: 'Anthropic', addedAt: '2025-01-01' },
33
+ { name: 'anthropic-ai', company: 'Anthropic', addedAt: '2025-01-01' },
37
34
 
38
35
  // Perplexity
39
- { name: 'PerplexityBot', company: 'Perplexity' },
40
- { name: 'Perplexity-User', company: 'Perplexity' },
41
- { name: 'Perplexity', company: 'Perplexity' },
36
+ { name: 'PerplexityBot', company: 'Perplexity', addedAt: '2025-01-01' },
37
+ { name: 'Perplexity-User', company: 'Perplexity', addedAt: '2025-01-01' },
42
38
 
43
39
  // Meta
44
- { name: 'Meta-ExternalAgent', company: 'Meta' },
45
- { name: 'Meta-ExternalFetcher', company: 'Meta' },
46
- { name: 'FacebookBot', company: 'Meta' },
40
+ { name: 'Meta-ExternalAgent', company: 'Meta', addedAt: '2025-01-01' },
41
+ { name: 'Meta-ExternalFetcher', company: 'Meta', addedAt: '2025-01-01' },
42
+ { name: 'facebookexternalhit', company: 'Meta', addedAt: '2025-03-25' },
43
+ { name: 'FacebookBot', company: 'Meta', addedAt: '2025-01-01' },
47
44
 
48
- // Common Crawl (used by most smaller AI companies)
49
- { name: 'CCBot', company: 'Common Crawl' },
45
+ // Mistral
46
+ { name: 'MistralBot', company: 'Mistral', addedAt: '2025-03-25' },
50
47
 
51
- // Other notable AI crawlers
52
- { name: 'Bytespider', company: 'ByteDance' },
53
- { name: 'Applebot-Extended', company: 'Apple' },
54
- { name: 'cohere-ai', company: 'Cohere' },
55
- { name: 'YouBot', company: 'You.com' },
48
+ // Amazon
49
+ { name: 'Amazonbot', company: 'Amazon', addedAt: '2025-03-25' },
50
+
51
+ // Others
52
+ { name: 'AI2Bot', company: 'Allen Institute', addedAt: '2025-03-25' },
53
+ { name: 'DuckAssistBot', company: 'DuckDuckGo', addedAt: '2025-03-25' },
54
+ { name: 'Diffbot', company: 'Diffbot', addedAt: '2025-03-25' },
55
+ { name: 'CCBot', company: 'Common Crawl', addedAt: '2025-01-01' },
56
+ { name: 'Bytespider', company: 'ByteDance', addedAt: '2025-01-01' },
57
+ { name: 'Applebot-Extended', company: 'Apple', addedAt: '2025-01-01' },
58
+ { name: 'cohere-ai', company: 'Cohere', addedAt: '2025-01-01' },
59
+ { name: 'YouBot', company: 'You.com', addedAt: '2025-01-01' },
56
60
  ];
57
61
 
58
62
  /**
59
63
  * Flat list of user-agent substrings for fast matching in the middleware.
60
64
  */
61
- export const AI_BOT_USER_AGENTS: string[] = AI_BOTS.map(bot => bot.name);
65
+ export const AI_BOT_USER_AGENTS: string[] = AI_BOT_USER_AGENTS_CACHE();
66
+
67
+ function AI_BOT_USER_AGENTS_CACHE() {
68
+ return AI_BOTS.map(bot => bot.name);
69
+ }
62
70
 
63
71
  /**
64
72
  * Given a raw user-agent string, returns the matched AiBot entry or undefined.
65
- * Comparison is case-insensitive to handle inconsistent agent casing.
73
+ * Uses a "Longest Match" strategy to ensure maximum specificity.
74
+ * Comparison is case-insensitive.
66
75
  */
67
76
  export function matchBot(userAgent: string | null): AiBot | undefined {
68
77
  if (!userAgent) return undefined;
69
78
  const lowerUA = userAgent.toLowerCase();
70
- return AI_BOTS.find(bot =>
79
+
80
+ // Find all matches
81
+ const matches = AI_BOTS.filter(bot =>
71
82
  lowerUA.includes(bot.name.toLowerCase())
72
83
  );
84
+
85
+ if (matches.length === 0) return undefined;
86
+ if (matches.length === 1) return matches[0];
87
+
88
+ // Pick the longest match for maximum specificity (e.g. 'ChatGPT-User' vs 'GPT')
89
+ return matches.reduce((longest, current) =>
90
+ current.name.length > longest.name.length ? current : longest
91
+ );
73
92
  }
package/src/middleware.ts CHANGED
@@ -13,20 +13,17 @@ export async function ontoMiddleware(request: any, config?: OntoConfig) {
13
13
  const isAiBot = !!matched;
14
14
  const isMarkdownRequested = accept.includes('text/markdown') || hasDebugParam;
15
15
 
16
- // If traffic is identified as an AI Bot or markdown is requested
16
+ // Common logic for bot/markdown negotiation
17
17
  if (isAiBot || isMarkdownRequested) {
18
-
19
- // Ignore internal next.js requests & static assets (but not llms.txt)
18
+ // Ignore internal next.js requests
20
19
  if (url.pathname.startsWith('/_next')) {
21
20
  return NextResponse.next();
22
21
  }
23
22
 
24
23
  // --- llms.txt Auto-Discovery ---
25
- // Serve the llms.txt manifest to AI agents
26
24
  if (url.pathname === '/llms.txt') {
27
25
  try {
28
26
  if (config) {
29
- // Generate llms.txt dynamically from config
30
27
  const llmsTxtContent = generateLlmsTxt(config);
31
28
  const response = new NextResponse(llmsTxtContent, {
32
29
  headers: {
@@ -34,137 +31,56 @@ export async function ontoMiddleware(request: any, config?: OntoConfig) {
34
31
  'Cache-Control': 'public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400',
35
32
  }
36
33
  });
37
-
38
- if (matched) {
39
- response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);
40
- }
41
-
42
- return response;
43
- } else {
44
- // Fallback: try to serve static llms.txt from public folder
45
- url.pathname = '/llms.txt';
46
- const response = NextResponse.rewrite(url);
47
- response.headers.set('Content-Type', 'text/plain; charset=utf-8');
48
- response.headers.set('Cache-Control', 'public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400');
49
- if (matched) {
50
- response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);
51
- }
34
+ if (matched) response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);
35
+ response.headers.set('X-Onto-Trace', userAgent || 'no-ua');
52
36
  return response;
53
37
  }
54
38
  } catch (error) {
55
39
  console.error('[Onto] Failed to generate llms.txt:', error);
56
- // Fallback to static file on error
57
- url.pathname = '/llms.txt';
58
- const response = NextResponse.rewrite(url);
59
- response.headers.set('Content-Type', 'text/plain; charset=utf-8');
60
- if (matched) {
61
- response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);
62
- }
63
- return response;
64
40
  }
65
41
  }
66
42
 
67
- // Skip other static assets
43
+ // Skip other static assets with dots
68
44
  if (url.pathname.includes('.')) {
69
45
  return NextResponse.next();
70
46
  }
71
47
 
72
48
  // Determine the corresponding payload path
73
49
  let payloadPath = url.pathname;
74
- if (payloadPath === '/' || payloadPath === '') {
75
- payloadPath = '/index';
76
- }
77
-
78
- // Strip trailing slash if present
79
- if (payloadPath.endsWith('/') && payloadPath !== '/') {
80
- payloadPath = payloadPath.slice(0, -1);
81
- }
82
-
83
- // Common response headers for all bot responses
84
- const botHeaders: Record<string, string> = {
85
- 'Content-Type': 'text/markdown; charset=utf-8',
86
- 'Cache-Control': 'public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400',
87
- };
88
- if (matched) {
89
- botHeaders['X-Onto-Bot'] = `${matched.name} (${matched.company})`;
90
- }
91
- if (hasDebugParam) {
92
- botHeaders['X-Onto-Debug'] = 'true';
93
- }
50
+ if (payloadPath === '/' || payloadPath === '') payloadPath = '/index';
51
+ if (payloadPath.endsWith('/') && payloadPath !== '/') payloadPath = payloadPath.slice(0, -1);
94
52
 
95
53
  // --- Onto Control Plane Integration (Premium) ---
96
54
  const ONTO_API_KEY = process.env.ONTO_API_KEY;
97
55
  const DASHBOARD_URL = process.env.ONTO_DASHBOARD_URL || 'https://app.buildonto.dev';
98
56
 
99
57
  if (ONTO_API_KEY) {
100
- // 1. Fire-and-forget tracking
58
+ // Logic for tracking and injection ... (fire-and-forget tracking)
101
59
  fetch(`${DASHBOARD_URL}/api/track`, {
102
60
  method: 'POST',
103
- headers: {
104
- 'x-onto-key': ONTO_API_KEY,
105
- 'Content-Type': 'application/json'
106
- },
107
- body: JSON.stringify({
108
- route: url.pathname,
109
- userAgent: userAgent,
110
- bot: matched ? matched.name : null,
111
- company: matched ? matched.company : null,
112
- })
61
+ headers: { 'x-onto-key': ONTO_API_KEY, 'Content-Type': 'application/json' },
62
+ body: JSON.stringify({ route: url.pathname, userAgent, bot: matched?.name, company: matched?.company })
113
63
  }).catch(() => {});
114
-
115
- // 2. Dynamic Context Injection
116
- try {
117
- const injectRes = await fetch(`${DASHBOARD_URL}/api/sdk/inject?route=${url.pathname}`, {
118
- headers: { 'x-onto-key': ONTO_API_KEY },
119
- signal: AbortSignal.timeout(1500)
120
- });
121
-
122
- if (injectRes.ok) {
123
- const { injection } = await injectRes.json();
124
-
125
- if (injection) {
126
- const localMdUrl = `${url.origin}/.onto${payloadPath}.md`;
127
- const mdRes = await fetch(localMdUrl);
128
-
129
- if (mdRes.ok) {
130
- const baseMarkdown = await mdRes.text();
131
- const finalMarkdown = `${baseMarkdown}\n\n---\n\n${injection}`;
132
-
133
- return new NextResponse(finalMarkdown, {
134
- headers: {
135
- ...botHeaders,
136
- 'X-Onto-Injected': 'true'
137
- }
138
- });
139
- }
140
- }
141
- }
142
- } catch (err) {
143
- console.error('[Onto] Injection failed', err);
144
- }
145
64
  }
146
- // ------------------------------------------------
147
65
 
66
+ // Rewrite to semantic payload
148
67
  url.pathname = `/.onto${payloadPath}.md`;
149
-
150
- // Rewrite implicitly serves the target URL transparently to the client.
151
68
  const response = NextResponse.rewrite(url);
152
69
 
153
- // Explicitly set headers on the rewrite response
70
+ // Apply headers
154
71
  response.headers.set('Content-Type', 'text/markdown; charset=utf-8');
155
72
  response.headers.set('Cache-Control', 'public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400');
156
-
157
- if (matched) {
158
- response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);
159
- }
160
- if (hasDebugParam) {
161
- response.headers.set('X-Onto-Debug', 'true');
162
- }
73
+ response.headers.set('X-Onto-Trace', userAgent || 'no-ua');
74
+ if (matched) response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);
75
+ if (hasDebugParam) response.headers.set('X-Onto-Debug', 'true');
163
76
 
164
77
  return response;
165
78
  }
166
79
 
167
- return NextResponse.next();
80
+ // Default response for non-bots
81
+ const response = NextResponse.next();
82
+ response.headers.set('X-Onto-Trace', userAgent || 'no-ua');
83
+ return response;
168
84
  }
169
85
 
170
86
  // Re-export the bot registry for consumers who want to extend or inspect it