@ontosdk/next 1.5.6 → 1.5.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- "use strict";var h=Object.defineProperty;var C=Object.getOwnPropertyDescriptor;var T=Object.getOwnPropertyNames;var G=Object.prototype.hasOwnProperty;var _=(t,e)=>{for(var n in e)h(t,n,{get:e[n],enumerable:!0})},$=(t,e,n,o)=>{if(e&&typeof e=="object"||typeof e=="function")for(let a of T(e))!G.call(t,a)&&a!==n&&h(t,a,{get:()=>e[a],enumerable:!(o=C(e,a))||o.enumerable});return t};var P=t=>$(h({},"__esModule",{value:!0}),t);var U={};_(U,{AI_BOT_USER_AGENTS:()=>y,matchBot:()=>p,ontoMiddleware:()=>I});module.exports=P(U);var s=require("next/server");var u=[{name:"GPTBot",company:"OpenAI",addedAt:"2025-01-01"},{name:"ChatGPT-User",company:"OpenAI",addedAt:"2025-01-01"},{name:"OAI-SearchBot",company:"OpenAI",addedAt:"2025-01-01"},{name:"ChatGPT",company:"OpenAI",addedAt:"2025-03-25"},{name:"OpenAI",company:"OpenAI",addedAt:"2025-03-25"},{name:"GPT",company:"OpenAI",addedAt:"2025-03-25"},{name:"Google-CloudVertexBot",company:"Google",addedAt:"2025-01-01"},{name:"Google-Extended",company:"Google",addedAt:"2025-01-01"},{name:"GoogleOther",company:"Google",addedAt:"2025-01-01"},{name:"ClaudeBot",company:"Anthropic",addedAt:"2025-01-01"},{name:"Claude-User",company:"Anthropic",addedAt:"2025-01-01"},{name:"claude-fetch",company:"Anthropic",addedAt:"2025-03-25"},{name:"AnthropicFetch",company:"Anthropic",addedAt:"2025-03-25"},{name:"Claude-Fetch",company:"Anthropic",addedAt:"2025-03-25"},{name:"anthropic-ai",company:"Anthropic",addedAt:"2025-01-01"},{name:"Claude",company:"Anthropic",addedAt:"2025-03-25"},{name:"Anthropic",company:"Anthropic",addedAt:"2025-03-25"},{name:"PerplexityBot",company:"Perplexity",addedAt:"2025-01-01"},{name:"Perplexity-User",company:"Perplexity",addedAt:"2025-01-01"},{name:"Perplexity",company:"Perplexity",addedAt:"2025-03-25"},{name:"Meta-ExternalAgent",company:"Meta",addedAt:"2025-01-01"},{name:"Meta-ExternalFetcher",company:"Meta",addedAt:"2025-01-01"},{name:"facebookexternalhit",company:"Meta",addedAt:"2025-03-25"},{name:"FacebookBot",company:"Meta",addedAt:"2025-01-01"},{name:"MistralBot",company:"Mistral",addedAt:"2025-03-25"},{name:"Mistral",company:"Mistral",addedAt:"2025-03-25"},{name:"Amazonbot",company:"Amazon",addedAt:"2025-03-25"},{name:"AI2Bot",company:"Allen Institute",addedAt:"2025-03-25"},{name:"DuckAssistBot",company:"DuckDuckGo",addedAt:"2025-03-25"},{name:"Diffbot",company:"Diffbot",addedAt:"2025-03-25"},{name:"CCBot",company:"Common Crawl",addedAt:"2025-01-01"},{name:"Bytespider",company:"ByteDance",addedAt:"2025-01-01"},{name:"Applebot-Extended",company:"Apple",addedAt:"2025-01-01"},{name:"YouBot",company:"You.com",addedAt:"2025-01-01"},{name:"python-requests",company:"Generic Bot",addedAt:"2025-03-25"},{name:"python-httpx",company:"Generic Bot",addedAt:"2025-03-25"},{name:"httpx",company:"Generic Bot",addedAt:"2025-03-25"},{name:"axios",company:"Generic Bot",addedAt:"2025-03-25"},{name:"node-fetch",company:"Generic Bot",addedAt:"2025-03-25"},{name:"Go-http-client",company:"Generic Bot",addedAt:"2025-03-25"},{name:"Wget",company:"Generic Bot",addedAt:"2025-03-25"},{name:"Curl",company:"Generic Bot",addedAt:"2025-03-25"}],y=b();function b(){return u.map(t=>t.name)}function p(t){if(!t)return;let e=t.toLowerCase(),n=u.filter(o=>e.includes(o.name.toLowerCase()));if(n.length!==0)return n.length===1?n[0]:n.reduce((o,a)=>a.name.length>o.name.length?a:o)}function g(t){let e=[];if(e.push(`# ${t.name}`),e.push(""),e.push(`> ${t.summary}`),e.push(""),t.routes&&t.routes.length>0){e.push("## Key Routes"),e.push("");for(let n of t.routes){let o=`${t.baseUrl}${n.path}`;e.push(`- [${n.path}](${o}): ${n.description}`)}e.push("")}if(t.externalLinks&&t.externalLinks.length>0){e.push("## Resources"),e.push("");for(let n of t.externalLinks)n.description?e.push(`- [${n.title}](${n.url}): ${n.description}`):e.push(`- [${n.title}](${n.url})`);e.push("")}if(t.sections&&t.sections.length>0)for(let n of t.sections)e.push(`## ${n.heading}`),e.push(""),e.push(n.content),e.push("");return e.join(`
1
+ "use strict";var h=Object.defineProperty;var C=Object.getOwnPropertyDescriptor;var T=Object.getOwnPropertyNames;var G=Object.prototype.hasOwnProperty;var _=(t,e)=>{for(var n in e)h(t,n,{get:e[n],enumerable:!0})},$=(t,e,n,o)=>{if(e&&typeof e=="object"||typeof e=="function")for(let a of T(e))!G.call(t,a)&&a!==n&&h(t,a,{get:()=>e[a],enumerable:!(o=C(e,a))||o.enumerable});return t};var I=t=>$(h({},"__esModule",{value:!0}),t);var U={};_(U,{AI_BOT_USER_AGENTS:()=>y,matchBot:()=>c,ontoMiddleware:()=>b});module.exports=I(U);var p=require("next/server");var u=[{name:"GPTBot",company:"OpenAI",addedAt:"2025-01-01"},{name:"ChatGPT-User",company:"OpenAI",addedAt:"2025-01-01"},{name:"OAI-SearchBot",company:"OpenAI",addedAt:"2025-01-01"},{name:"ChatGPT",company:"OpenAI",addedAt:"2025-03-25"},{name:"OpenAI",company:"OpenAI",addedAt:"2025-03-25"},{name:"GPT",company:"OpenAI",addedAt:"2025-03-25"},{name:"Google-CloudVertexBot",company:"Google",addedAt:"2025-01-01"},{name:"Google-Extended",company:"Google",addedAt:"2025-01-01"},{name:"GoogleOther",company:"Google",addedAt:"2025-01-01"},{name:"ClaudeBot",company:"Anthropic",addedAt:"2025-01-01"},{name:"Claude-User",company:"Anthropic",addedAt:"2025-01-01"},{name:"claude-fetch",company:"Anthropic",addedAt:"2025-03-25"},{name:"AnthropicFetch",company:"Anthropic",addedAt:"2025-03-25"},{name:"Claude-Fetch",company:"Anthropic",addedAt:"2025-03-25"},{name:"anthropic-ai",company:"Anthropic",addedAt:"2025-01-01"},{name:"Claude",company:"Anthropic",addedAt:"2025-03-25"},{name:"Anthropic",company:"Anthropic",addedAt:"2025-03-25"},{name:"PerplexityBot",company:"Perplexity",addedAt:"2025-01-01"},{name:"Perplexity-User",company:"Perplexity",addedAt:"2025-01-01"},{name:"Perplexity",company:"Perplexity",addedAt:"2025-03-25"},{name:"Meta-ExternalAgent",company:"Meta",addedAt:"2025-01-01"},{name:"Meta-ExternalFetcher",company:"Meta",addedAt:"2025-01-01"},{name:"facebookexternalhit",company:"Meta",addedAt:"2025-03-25"},{name:"FacebookBot",company:"Meta",addedAt:"2025-01-01"},{name:"MistralBot",company:"Mistral",addedAt:"2025-03-25"},{name:"Mistral",company:"Mistral",addedAt:"2025-03-25"},{name:"Amazonbot",company:"Amazon",addedAt:"2025-03-25"},{name:"AI2Bot",company:"Allen Institute",addedAt:"2025-03-25"},{name:"DuckAssistBot",company:"DuckDuckGo",addedAt:"2025-03-25"},{name:"Diffbot",company:"Diffbot",addedAt:"2025-03-25"},{name:"CCBot",company:"Common Crawl",addedAt:"2025-01-01"},{name:"Bytespider",company:"ByteDance",addedAt:"2025-01-01"},{name:"Applebot-Extended",company:"Apple",addedAt:"2025-01-01"},{name:"YouBot",company:"You.com",addedAt:"2025-01-01"},{name:"python-requests",company:"Generic Bot",addedAt:"2025-03-25"},{name:"python-httpx",company:"Generic Bot",addedAt:"2025-03-25"},{name:"httpx",company:"Generic Bot",addedAt:"2025-03-25"},{name:"axios",company:"Generic Bot",addedAt:"2025-03-25"},{name:"node-fetch",company:"Generic Bot",addedAt:"2025-03-25"},{name:"Go-http-client",company:"Generic Bot",addedAt:"2025-03-25"},{name:"Wget",company:"Generic Bot",addedAt:"2025-03-25"},{name:"Curl",company:"Generic Bot",addedAt:"2025-03-25"}],y=P();function P(){return u.map(t=>t.name)}function c(t){if(!t)return;let e=t.toLowerCase(),n=u.filter(o=>e.includes(o.name.toLowerCase()));if(n.length!==0)return n.length===1?n[0]:n.reduce((o,a)=>a.name.length>o.name.length?a:o)}function g(t){let e=[];if(e.push(`# ${t.name}`),e.push(""),e.push(`> ${t.summary}`),e.push(""),t.routes&&t.routes.length>0){e.push("## Key Routes"),e.push("");for(let n of t.routes){let o=`${t.baseUrl}${n.path}`;e.push(`- [${n.path}](${o}): ${n.description}`)}e.push("")}if(t.externalLinks&&t.externalLinks.length>0){e.push("## Resources"),e.push("");for(let n of t.externalLinks)n.description?e.push(`- [${n.title}](${n.url}): ${n.description}`):e.push(`- [${n.title}](${n.url})`);e.push("")}if(t.sections&&t.sections.length>0)for(let n of t.sections)e.push(`## ${n.heading}`),e.push(""),e.push(n.content),e.push("");return e.join(`
2
2
  `).trim()+`
3
- `}async function I(t,e){let n=t.headers.get("user-agent"),o=t.nextUrl.clone(),a=p(n),x=t.headers.get("accept")||"",l=t.nextUrl.searchParams.has("onto"),f=!!a,B=x.includes("text/markdown")||l;if(f||B){if(o.pathname.startsWith("/_next"))return s.NextResponse.next();if(o.pathname==="/llms.txt")try{if(e){let i=g(e),m=new s.NextResponse(i,{headers:{"Content-Type":"text/plain; charset=utf-8","Cache-Control":"public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400"}});return a&&m.headers.set("X-Onto-Bot",`${a.name} (${a.company})`),m.headers.set("X-Onto-Trace",n||"no-ua"),m}}catch(i){console.error("[Onto] Failed to generate llms.txt:",i)}if(o.pathname.includes("."))return s.NextResponse.next();let r=o.pathname;(r==="/"||r==="")&&(r="/index"),r.endsWith("/")&&r!=="/"&&(r=r.slice(0,-1));let A=process.env.ONTO_API_KEY,O=process.env.ONTO_DASHBOARD_URL||"https://app.buildonto.dev";A&&fetch(`${O}/api/track`,{method:"POST",headers:{"x-onto-key":A,"Content-Type":"application/json"},body:JSON.stringify({route:o.pathname,userAgent:n,bot:a?.name,company:a?.company})}).catch(()=>{}),o.pathname=`/.onto${r}.md`;let d=s.NextResponse.rewrite(o);return d.headers.set("Content-Type","text/markdown; charset=utf-8"),d.headers.set("Cache-Control","public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400"),d.headers.set("Vary","User-Agent, Accept"),d.headers.set("X-Onto-Trace",n||"no-ua"),a&&(d.headers.set("X-Onto-Bot",`${a.name} (${a.company})`),d.headers.set("X-Onto-Matched","true")),l&&d.headers.set("X-Onto-Debug","true"),d}let c=s.NextResponse.next();return c.headers.set("Vary","User-Agent, Accept"),c.headers.set("X-Onto-Trace",n||"no-ua"),c}0&&(module.exports={AI_BOT_USER_AGENTS,matchBot,ontoMiddleware});
3
+ `}async function b(t,e){let n=t.headers.get("user-agent"),o=t.nextUrl.clone(),a=c(n),x=t.headers.get("accept")||"",l=t.nextUrl.searchParams.has("onto"),f=!!a,O=x.includes("text/markdown")||l;if(f||O){if(o.pathname.startsWith("/_next"))return p.NextResponse.next();if(o.pathname==="/llms.txt")try{if(e){let i=g(e),m=new p.NextResponse(i,{headers:{"Content-Type":"text/plain; charset=utf-8","Cache-Control":"public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400"}});return a&&m.headers.set("X-Onto-Bot",`${a.name} (${a.company})`),m.headers.set("X-Onto-Trace",n||"no-ua"),m}}catch(i){console.error("[Onto] Failed to generate llms.txt:",i)}if(o.pathname.includes("."))return p.NextResponse.next();let r=o.pathname;(r==="/"||r==="")&&(r="/index"),r.endsWith("/")&&r!=="/"&&(r=r.slice(0,-1));let A=process.env.ONTO_API_KEY,B=process.env.ONTO_DASHBOARD_URL||"https://app.buildonto.dev";A&&fetch(`${B}/api/track`,{method:"POST",headers:{"x-onto-key":A,"Content-Type":"application/json"},body:JSON.stringify({route:o.pathname,userAgent:n,bot:a?.name,company:a?.company})}).catch(()=>{}),o.pathname=`/.onto${r}.md`;let d=p.NextResponse.rewrite(o);return d.headers.set("Content-Type","text/markdown; charset=utf-8"),d.headers.set("Cache-Control","public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400"),d.headers.set("Vary","User-Agent, Accept"),d.headers.set("X-Onto-Trace",n||"no-ua"),a&&(d.headers.set("X-Onto-Bot",`${a.name} (${a.company})`),d.headers.set("X-Onto-Matched","true")),l&&d.headers.set("X-Onto-Debug","true"),d}let s=p.NextResponse.next();return s.headers.set("Vary","User-Agent, Accept"),s.headers.set("X-Onto-Trace",n||"no-ua"),a?(s.headers.set("X-Onto-Matched-Bot",a.name),s.headers.set("X-Onto-Identified","true")):s.headers.set("X-Onto-Identified","false"),s}0&&(module.exports={AI_BOT_USER_AGENTS,matchBot,ontoMiddleware});
4
4
  //# sourceMappingURL=middleware.js.map
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/middleware.ts","../src/bots.ts","../src/config.ts"],"sourcesContent":["import { NextRequest, NextResponse } from 'next/server';\r\nimport { matchBot } from './bots';\r\nimport { generateLlmsTxt, OntoConfig } from './config';\r\n\r\nexport async function ontoMiddleware(request: any, config?: OntoConfig) {\r\n const userAgent = request.headers.get('user-agent');\r\n const url = request.nextUrl.clone();\r\n const matched = matchBot(userAgent);\r\n\r\n const accept = request.headers.get('accept') || '';\r\n const hasDebugParam = request.nextUrl.searchParams.has('onto');\r\n\r\n const isAiBot = !!matched;\r\n const isMarkdownRequested = accept.includes('text/markdown') || hasDebugParam;\r\n\r\n // Common logic for bot/markdown negotiation\r\n if (isAiBot || isMarkdownRequested) {\r\n // Ignore internal next.js requests\r\n if (url.pathname.startsWith('/_next')) {\r\n return NextResponse.next();\r\n }\r\n\r\n // --- llms.txt Auto-Discovery ---\r\n if (url.pathname === '/llms.txt') {\r\n try {\r\n if (config) {\r\n const llmsTxtContent = generateLlmsTxt(config);\r\n const response = new NextResponse(llmsTxtContent, {\r\n headers: {\r\n 'Content-Type': 'text/plain; charset=utf-8',\r\n 'Cache-Control': 'public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400',\r\n }\r\n });\r\n if (matched) response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);\r\n response.headers.set('X-Onto-Trace', userAgent || 'no-ua');\r\n return response;\r\n }\r\n } catch (error) {\r\n console.error('[Onto] Failed to generate llms.txt:', error);\r\n }\r\n }\r\n\r\n // Skip other static assets with dots\r\n if (url.pathname.includes('.')) {\r\n return NextResponse.next();\r\n }\r\n\r\n // Determine the corresponding payload path\r\n let payloadPath = url.pathname;\r\n if (payloadPath === '/' || payloadPath === '') payloadPath = '/index';\r\n if (payloadPath.endsWith('/') && payloadPath !== '/') payloadPath = payloadPath.slice(0, -1);\r\n\r\n // --- Onto Control Plane Integration (Premium) ---\r\n const ONTO_API_KEY = process.env.ONTO_API_KEY;\r\n const DASHBOARD_URL = process.env.ONTO_DASHBOARD_URL || 'https://app.buildonto.dev';\r\n\r\n if (ONTO_API_KEY) {\r\n // Logic for tracking and injection ... (fire-and-forget tracking)\r\n fetch(`${DASHBOARD_URL}/api/track`, {\r\n method: 'POST',\r\n headers: { 'x-onto-key': ONTO_API_KEY, 'Content-Type': 'application/json' },\r\n body: JSON.stringify({ route: url.pathname, userAgent, bot: matched?.name, company: matched?.company })\r\n }).catch(() => {});\r\n }\r\n\r\n // Rewrite to semantic payload\r\n url.pathname = `/.onto${payloadPath}.md`;\r\n const response = NextResponse.rewrite(url);\r\n \r\n // Apply headers\r\n response.headers.set('Content-Type', 'text/markdown; charset=utf-8');\r\n response.headers.set('Cache-Control', 'public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400');\r\n response.headers.set('Vary', 'User-Agent, Accept');\r\n response.headers.set('X-Onto-Trace', userAgent || 'no-ua');\r\n \r\n if (matched) {\r\n response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);\r\n response.headers.set('X-Onto-Matched', 'true');\r\n }\r\n \r\n if (hasDebugParam) response.headers.set('X-Onto-Debug', 'true');\r\n\r\n return response;\r\n }\r\n\r\n // Default response for non-bots\r\n const response = NextResponse.next();\r\n \r\n // Crucial: Tell Vercel/Edge to vary the cache by User-Agent \r\n // This ensures bots get the rewrite and humans get the HTML\r\n response.headers.set('Vary', 'User-Agent, Accept');\r\n response.headers.set('X-Onto-Trace', userAgent || 'no-ua');\r\n \r\n return response;\r\n}\r\n\r\n// Re-export the bot registry for consumers who want to extend or inspect it\r\nexport { AI_BOT_USER_AGENTS, matchBot } from './bots';\r\nexport type { AiBot } from './bots';\r\n\r\n\r\n","/**\n * Comprehensive registry of AI bot user-agent strings.\n * The middleware uses this list to detect AI crawlers and serve optimized markdown.\n */\n\nexport interface AiBot {\n /** The user-agent substring to match against */\n name: string;\n /** The company operating this bot */\n company: string;\n /** ISO date when this bot was added or last verified */\n addedAt?: string;\n}\n\n/**\n * Structured registry of all known AI bots, grouped by company.\n * Useful for analytics and the Control Plane dashboard.\n */\nexport const AI_BOTS: AiBot[] = [\n // OpenAI\n { name: 'GPTBot', company: 'OpenAI', addedAt: '2025-01-01' },\n { name: 'ChatGPT-User', company: 'OpenAI', addedAt: '2025-01-01' },\n { name: 'OAI-SearchBot', company: 'OpenAI', addedAt: '2025-01-01' },\n { name: 'ChatGPT', company: 'OpenAI', addedAt: '2025-03-25' },\n { name: 'OpenAI', company: 'OpenAI', addedAt: '2025-03-25' },\n { name: 'GPT', company: 'OpenAI', addedAt: '2025-03-25' },\n\n // Google (SEO Safety: Googlebot proper is EXCLUDED)\n { name: 'Google-CloudVertexBot', company: 'Google', addedAt: '2025-01-01' },\n { name: 'Google-Extended', company: 'Google', addedAt: '2025-01-01' },\n { name: 'GoogleOther', company: 'Google', addedAt: '2025-01-01' },\n\n // Anthropic\n { name: 'ClaudeBot', company: 'Anthropic', addedAt: '2025-01-01' },\n { name: 'Claude-User', company: 'Anthropic', addedAt: '2025-01-01' },\n { name: 'claude-fetch', company: 'Anthropic', addedAt: '2025-03-25' },\n { name: 'AnthropicFetch', company: 'Anthropic', addedAt: '2025-03-25' },\n { name: 'Claude-Fetch', company: 'Anthropic', addedAt: '2025-03-25' },\n { name: 'anthropic-ai', company: 'Anthropic', addedAt: '2025-01-01' },\n { name: 'Claude', company: 'Anthropic', addedAt: '2025-03-25' },\n { name: 'Anthropic', company: 'Anthropic', addedAt: '2025-03-25' },\n\n // Perplexity\n { name: 'PerplexityBot', company: 'Perplexity', addedAt: '2025-01-01' },\n { name: 'Perplexity-User', company: 'Perplexity', addedAt: '2025-01-01' },\n { name: 'Perplexity', company: 'Perplexity', addedAt: '2025-03-25' },\n\n // Meta\n { name: 'Meta-ExternalAgent', company: 'Meta', addedAt: '2025-01-01' },\n { name: 'Meta-ExternalFetcher', company: 'Meta', addedAt: '2025-01-01' },\n { name: 'facebookexternalhit', company: 'Meta', addedAt: '2025-03-25' },\n { name: 'FacebookBot', company: 'Meta', addedAt: '2025-01-01' },\n\n // Mistral\n { name: 'MistralBot', company: 'Mistral', addedAt: '2025-03-25' },\n { name: 'Mistral', company: 'Mistral', addedAt: '2025-03-25' },\n\n // Amazon\n { name: 'Amazonbot', company: 'Amazon', addedAt: '2025-03-25' },\n\n // Others\n { name: 'AI2Bot', company: 'Allen Institute', addedAt: '2025-03-25' },\n { name: 'DuckAssistBot', company: 'DuckDuckGo', addedAt: '2025-03-25' },\n { name: 'Diffbot', company: 'Diffbot', addedAt: '2025-03-25' },\n { name: 'CCBot', company: 'Common Crawl', addedAt: '2025-01-01' },\n { name: 'Bytespider', company: 'ByteDance', addedAt: '2025-01-01' },\n { name: 'Applebot-Extended', company: 'Apple', addedAt: '2025-01-01' },\n { name: 'YouBot', company: 'You.com', addedAt: '2025-01-01' },\n\n // Generic AI / Scraping Libraries (Last Resort)\n { name: 'python-requests', company: 'Generic Bot', addedAt: '2025-03-25' },\n { name: 'python-httpx', company: 'Generic Bot', addedAt: '2025-03-25' },\n { name: 'httpx', company: 'Generic Bot', addedAt: '2025-03-25' },\n { name: 'axios', company: 'Generic Bot', addedAt: '2025-03-25' },\n { name: 'node-fetch', company: 'Generic Bot', addedAt: '2025-03-25' },\n { name: 'Go-http-client', company: 'Generic Bot', addedAt: '2025-03-25' },\n { name: 'Wget', company: 'Generic Bot', addedAt: '2025-03-25' },\n { name: 'Curl', company: 'Generic Bot', addedAt: '2025-03-25' },\n];\n\n/**\n * Flat list of user-agent substrings for fast matching in the middleware.\n */\nexport const AI_BOT_USER_AGENTS: string[] = AI_BOT_USER_AGENTS_CACHE();\n\nfunction AI_BOT_USER_AGENTS_CACHE() {\n return AI_BOTS.map(bot => bot.name);\n}\n\n/**\n * Given a raw user-agent string, returns the matched AiBot entry or undefined.\n * Uses a \"Longest Match\" strategy to ensure maximum specificity.\n * Comparison is case-insensitive.\n */\nexport function matchBot(userAgent: string | null): AiBot | undefined {\n if (!userAgent) return undefined;\n const lowerUA = userAgent.toLowerCase();\n \n // Find all matches\n const matches = AI_BOTS.filter(bot => \n lowerUA.includes(bot.name.toLowerCase())\n );\n\n if (matches.length === 0) return undefined;\n if (matches.length === 1) return matches[0];\n\n // Pick the longest match for maximum specificity (e.g. 'ChatGPT-User' vs 'GPT')\n return matches.reduce((longest, current) => \n current.name.length > longest.name.length ? current : longest\n );\n}\n","/**\r\n * Configuration schema for onto.config.ts\r\n * Used to dynamically generate llms.txt and other AI discovery files\r\n */\r\n\r\nexport type PageType = 'scoring' | 'about' | 'default';\r\n\r\nexport interface OntoRoute {\r\n /**\r\n * The URL path (e.g., '/docs', '/api/reference')\r\n */\r\n path: string;\r\n /**\r\n * Description of what this route contains\r\n */\r\n description: string;\r\n /**\r\n * Optional: Page type for automatic JSON-LD schema injection\r\n * - 'scoring': Injects Methodology schema with AIO scoring weights (40/35/25)\r\n * - 'about': Injects Organization/AboutPage schema\r\n * - 'default': No automatic schema injection\r\n */\r\n pageType?: PageType;\r\n}\r\n\r\nexport interface OntoConfig {\r\n /**\r\n * The name of your project or site (required)\r\n * Used as the H1 heading in llms.txt\r\n */\r\n name: string;\r\n\r\n /**\r\n * A short summary of your project (required)\r\n * Displayed as a blockquote in llms.txt\r\n * Should contain key information necessary for understanding the rest of the file\r\n */\r\n summary: string;\r\n\r\n /**\r\n * The base URL of your site (e.g., 'https://example.com')\r\n */\r\n baseUrl: string;\r\n\r\n /**\r\n * Optional: Additional sections to include in llms.txt\r\n * Each section can contain any markdown content\r\n */\r\n sections?: {\r\n heading: string;\r\n content: string;\r\n }[];\r\n\r\n /**\r\n * Key routes that AI agents should know about\r\n * These will be formatted as a markdown list in llms.txt\r\n */\r\n routes?: OntoRoute[];\r\n\r\n /**\r\n * Optional: Links to external resources (documentation, API references, etc.)\r\n */\r\n externalLinks?: {\r\n title: string;\r\n url: string;\r\n description?: string;\r\n }[];\r\n\r\n /**\r\n * Optional: Organization information for JSON-LD schemas\r\n */\r\n organization?: {\r\n name: string;\r\n description?: string;\r\n url?: string;\r\n logo?: string;\r\n foundingDate?: string;\r\n };\r\n}\r\n\r\n/**\r\n * Generate llms.txt content from OntoConfig\r\n * Follows the llms.txt specification:\r\n * - H1 with project name\r\n * - Blockquote with summary\r\n * - Additional markdown sections\r\n */\r\nexport function generateLlmsTxt(config: OntoConfig): string {\r\n const lines: string[] = [];\r\n\r\n // H1: Project name (required)\r\n lines.push(`# ${config.name}`);\r\n lines.push('');\r\n\r\n // Blockquote: Summary (required)\r\n lines.push(`> ${config.summary}`);\r\n lines.push('');\r\n\r\n // Key Routes section (if provided)\r\n if (config.routes && config.routes.length > 0) {\r\n lines.push('## Key Routes');\r\n lines.push('');\r\n for (const route of config.routes) {\r\n const fullUrl = `${config.baseUrl}${route.path}`;\r\n lines.push(`- [${route.path}](${fullUrl}): ${route.description}`);\r\n }\r\n lines.push('');\r\n }\r\n\r\n // External Links section (if provided)\r\n if (config.externalLinks && config.externalLinks.length > 0) {\r\n lines.push('## Resources');\r\n lines.push('');\r\n for (const link of config.externalLinks) {\r\n if (link.description) {\r\n lines.push(`- [${link.title}](${link.url}): ${link.description}`);\r\n } else {\r\n lines.push(`- [${link.title}](${link.url})`);\r\n }\r\n }\r\n lines.push('');\r\n }\r\n\r\n // Custom sections (if provided)\r\n if (config.sections && config.sections.length > 0) {\r\n for (const section of config.sections) {\r\n lines.push(`## ${section.heading}`);\r\n lines.push('');\r\n lines.push(section.content);\r\n lines.push('');\r\n }\r\n }\r\n\r\n return lines.join('\\n').trim() + '\\n';\r\n}\r\n"],"mappings":"yaAAA,IAAAA,EAAA,GAAAC,EAAAD,EAAA,wBAAAE,EAAA,aAAAC,EAAA,mBAAAC,IAAA,eAAAC,EAAAL,GAAA,IAAAM,EAA0C,uBCkBnC,IAAMC,EAAmB,CAE5B,CAAE,KAAM,SAAqB,QAAS,SAAY,QAAS,YAAa,EACxE,CAAE,KAAM,eAAoB,QAAS,SAAY,QAAS,YAAa,EACvE,CAAE,KAAM,gBAAoB,QAAS,SAAY,QAAS,YAAa,EACvE,CAAE,KAAM,UAAoB,QAAS,SAAY,QAAS,YAAa,EACvE,CAAE,KAAM,SAAoB,QAAS,SAAY,QAAS,YAAa,EACvE,CAAE,KAAM,MAAoB,QAAS,SAAY,QAAS,YAAa,EAGvE,CAAE,KAAM,wBAA2B,QAAS,SAAU,QAAS,YAAa,EAC5E,CAAE,KAAM,kBAA2B,QAAS,SAAU,QAAS,YAAa,EAC5E,CAAE,KAAM,cAA2B,QAAS,SAAU,QAAS,YAAa,EAG5E,CAAE,KAAM,YAAmB,QAAS,YAAa,QAAS,YAAa,EACvE,CAAE,KAAM,cAAkB,QAAS,YAAa,QAAS,YAAa,EACtE,CAAE,KAAM,eAAkB,QAAS,YAAa,QAAS,YAAa,EACtE,CAAE,KAAM,iBAAkB,QAAS,YAAa,QAAS,YAAa,EACtE,CAAE,KAAM,eAAkB,QAAS,YAAa,QAAS,YAAa,EACtE,CAAE,KAAM,eAAkB,QAAS,YAAa,QAAS,YAAa,EACtE,CAAE,KAAM,SAAkB,QAAS,YAAa,QAAS,YAAa,EACtE,CAAE,KAAM,YAAkB,QAAS,YAAa,QAAS,YAAa,EAGtE,CAAE,KAAM,gBAAmB,QAAS,aAAc,QAAS,YAAa,EACxE,CAAE,KAAM,kBAAmB,QAAS,aAAc,QAAS,YAAa,EACxE,CAAE,KAAM,aAAmB,QAAS,aAAc,QAAS,YAAa,EAGxE,CAAE,KAAM,qBAAwB,QAAS,OAAQ,QAAS,YAAa,EACvE,CAAE,KAAM,uBAAwB,QAAS,OAAQ,QAAS,YAAa,EACvE,CAAE,KAAM,sBAAwB,QAAS,OAAQ,QAAS,YAAa,EACvE,CAAE,KAAM,cAAwB,QAAS,OAAQ,QAAS,YAAa,EAGvE,CAAE,KAAM,aAAc,QAAS,UAAW,QAAS,YAAa,EAChE,CAAE,KAAM,UAAc,QAAS,UAAW,QAAS,YAAa,EAGhE,CAAE,KAAM,YAAa,QAAS,SAAU,QAAS,YAAa,EAG9D,CAAE,KAAM,SAAiB,QAAS,kBAAmB,QAAS,YAAa,EAC3E,CAAE,KAAM,gBAAiB,QAAS,aAAc,QAAS,YAAa,EACtE,CAAE,KAAM,UAAiB,QAAS,UAAe,QAAS,YAAa,EACvE,CAAE,KAAM,QAAiB,QAAS,eAAgB,QAAS,YAAa,EACxE,CAAE,KAAM,aAAiB,QAAS,YAAgB,QAAS,YAAa,EACxE,CAAE,KAAM,oBAAqB,QAAS,QAAY,QAAS,YAAa,EACxE,CAAE,KAAM,SAAiB,QAAS,UAAgB,QAAS,YAAa,EAGxE,CAAE,KAAM,kBAAoB,QAAS,cAAe,QAAS,YAAa,EAC1E,CAAE,KAAM,eAAoB,QAAS,cAAe,QAAS,YAAa,EAC1E,CAAE,KAAM,QAAoB,QAAS,cAAe,QAAS,YAAa,EAC1E,CAAE,KAAM,QAAoB,QAAS,cAAe,QAAS,YAAa,EAC1E,CAAE,KAAM,aAAoB,QAAS,cAAe,QAAS,YAAa,EAC1E,CAAE,KAAM,iBAAoB,QAAS,cAAe,QAAS,YAAa,EAC1E,CAAE,KAAM,OAAoB,QAAS,cAAe,QAAS,YAAa,EAC1E,CAAE,KAAM,OAAoB,QAAS,cAAe,QAAS,YAAa,CAC9E,EAKaC,EAA+BC,EAAyB,EAErE,SAASA,GAA2B,CAChC,OAAOF,EAAQ,IAAIG,GAAOA,EAAI,IAAI,CACtC,CAOO,SAASC,EAASC,EAA6C,CAClE,GAAI,CAACA,EAAW,OAChB,IAAMC,EAAUD,EAAU,YAAY,EAGhCE,EAAUP,EAAQ,OAAOG,GAC3BG,EAAQ,SAASH,EAAI,KAAK,YAAY,CAAC,CAC3C,EAEA,GAAII,EAAQ,SAAW,EACvB,OAAIA,EAAQ,SAAW,EAAUA,EAAQ,CAAC,EAGnCA,EAAQ,OAAO,CAACC,EAASC,IAC5BA,EAAQ,KAAK,OAASD,EAAQ,KAAK,OAASC,EAAUD,CAC1D,CACJ,CCvBO,SAASE,EAAgBC,EAA4B,CAC1D,IAAMC,EAAkB,CAAC,EAWzB,GARAA,EAAM,KAAK,KAAKD,EAAO,IAAI,EAAE,EAC7BC,EAAM,KAAK,EAAE,EAGbA,EAAM,KAAK,KAAKD,EAAO,OAAO,EAAE,EAChCC,EAAM,KAAK,EAAE,EAGTD,EAAO,QAAUA,EAAO,OAAO,OAAS,EAAG,CAC7CC,EAAM,KAAK,eAAe,EAC1BA,EAAM,KAAK,EAAE,EACb,QAAWC,KAASF,EAAO,OAAQ,CACjC,IAAMG,EAAU,GAAGH,EAAO,OAAO,GAAGE,EAAM,IAAI,GAC9CD,EAAM,KAAK,MAAMC,EAAM,IAAI,KAAKC,CAAO,MAAMD,EAAM,WAAW,EAAE,CAClE,CACAD,EAAM,KAAK,EAAE,CACf,CAGA,GAAID,EAAO,eAAiBA,EAAO,cAAc,OAAS,EAAG,CAC3DC,EAAM,KAAK,cAAc,EACzBA,EAAM,KAAK,EAAE,EACb,QAAWG,KAAQJ,EAAO,cACpBI,EAAK,YACPH,EAAM,KAAK,MAAMG,EAAK,KAAK,KAAKA,EAAK,GAAG,MAAMA,EAAK,WAAW,EAAE,EAEhEH,EAAM,KAAK,MAAMG,EAAK,KAAK,KAAKA,EAAK,GAAG,GAAG,EAG/CH,EAAM,KAAK,EAAE,CACf,CAGA,GAAID,EAAO,UAAYA,EAAO,SAAS,OAAS,EAC9C,QAAWK,KAAWL,EAAO,SAC3BC,EAAM,KAAK,MAAMI,EAAQ,OAAO,EAAE,EAClCJ,EAAM,KAAK,EAAE,EACbA,EAAM,KAAKI,EAAQ,OAAO,EAC1BJ,EAAM,KAAK,EAAE,EAIjB,OAAOA,EAAM,KAAK;AAAA,CAAI,EAAE,KAAK,EAAI;AAAA,CACnC,CFlIA,eAAsBK,EAAeC,EAAcC,EAAqB,CACpE,IAAMC,EAAYF,EAAQ,QAAQ,IAAI,YAAY,EAC5CG,EAAMH,EAAQ,QAAQ,MAAM,EAC5BI,EAAUC,EAASH,CAAS,EAE5BI,EAASN,EAAQ,QAAQ,IAAI,QAAQ,GAAK,GAC1CO,EAAgBP,EAAQ,QAAQ,aAAa,IAAI,MAAM,EAEvDQ,EAAU,CAAC,CAACJ,EACZK,EAAsBH,EAAO,SAAS,eAAe,GAAKC,EAGhE,GAAIC,GAAWC,EAAqB,CAEhC,GAAIN,EAAI,SAAS,WAAW,QAAQ,EAChC,OAAO,eAAa,KAAK,EAI7B,GAAIA,EAAI,WAAa,YACjB,GAAI,CACA,GAAIF,EAAQ,CACR,IAAMS,EAAiBC,EAAgBV,CAAM,EACvCW,EAAW,IAAI,eAAaF,EAAgB,CAC9C,QAAS,CACL,eAAgB,4BAChB,gBAAiB,mEACrB,CACJ,CAAC,EACD,OAAIN,GAASQ,EAAS,QAAQ,IAAI,aAAc,GAAGR,EAAQ,IAAI,KAAKA,EAAQ,OAAO,GAAG,EACtFQ,EAAS,QAAQ,IAAI,eAAgBV,GAAa,OAAO,EAClDU,CACX,CACJ,OAASC,EAAO,CACZ,QAAQ,MAAM,sCAAuCA,CAAK,CAC9D,CAIJ,GAAIV,EAAI,SAAS,SAAS,GAAG,EACzB,OAAO,eAAa,KAAK,EAI7B,IAAIW,EAAcX,EAAI,UAClBW,IAAgB,KAAOA,IAAgB,MAAIA,EAAc,UACzDA,EAAY,SAAS,GAAG,GAAKA,IAAgB,MAAKA,EAAcA,EAAY,MAAM,EAAG,EAAE,GAG3F,IAAMC,EAAe,QAAQ,IAAI,aAC3BC,EAAgB,QAAQ,IAAI,oBAAsB,4BAEpDD,GAEA,MAAM,GAAGC,CAAa,aAAc,CAChC,OAAQ,OACR,QAAS,CAAE,aAAcD,EAAc,eAAgB,kBAAmB,EAC1E,KAAM,KAAK,UAAU,CAAE,MAAOZ,EAAI,SAAU,UAAAD,EAAW,IAAKE,GAAS,KAAM,QAASA,GAAS,OAAQ,CAAC,CAC1G,CAAC,EAAE,MAAM,IAAM,CAAC,CAAC,EAIrBD,EAAI,SAAW,SAASW,CAAW,MACnC,IAAMF,EAAW,eAAa,QAAQT,CAAG,EAGzC,OAAAS,EAAS,QAAQ,IAAI,eAAgB,8BAA8B,EACnEA,EAAS,QAAQ,IAAI,gBAAiB,mEAAmE,EACzGA,EAAS,QAAQ,IAAI,OAAQ,oBAAoB,EACjDA,EAAS,QAAQ,IAAI,eAAgBV,GAAa,OAAO,EAErDE,IACAQ,EAAS,QAAQ,IAAI,aAAc,GAAGR,EAAQ,IAAI,KAAKA,EAAQ,OAAO,GAAG,EACzEQ,EAAS,QAAQ,IAAI,iBAAkB,MAAM,GAG7CL,GAAeK,EAAS,QAAQ,IAAI,eAAgB,MAAM,EAEvDA,CACX,CAGA,IAAMA,EAAW,eAAa,KAAK,EAInC,OAAAA,EAAS,QAAQ,IAAI,OAAQ,oBAAoB,EACjDA,EAAS,QAAQ,IAAI,eAAgBV,GAAa,OAAO,EAElDU,CACX","names":["middleware_exports","__export","AI_BOT_USER_AGENTS","matchBot","ontoMiddleware","__toCommonJS","import_server","AI_BOTS","AI_BOT_USER_AGENTS","AI_BOT_USER_AGENTS_CACHE","bot","matchBot","userAgent","lowerUA","matches","longest","current","generateLlmsTxt","config","lines","route","fullUrl","link","section","ontoMiddleware","request","config","userAgent","url","matched","matchBot","accept","hasDebugParam","isAiBot","isMarkdownRequested","llmsTxtContent","generateLlmsTxt","response","error","payloadPath","ONTO_API_KEY","DASHBOARD_URL"]}
1
+ {"version":3,"sources":["../src/middleware.ts","../src/bots.ts","../src/config.ts"],"sourcesContent":["import { NextRequest, NextResponse } from 'next/server';\r\nimport { matchBot } from './bots';\r\nimport { generateLlmsTxt, OntoConfig } from './config';\r\n\r\nexport async function ontoMiddleware(request: any, config?: OntoConfig) {\r\n const userAgent = request.headers.get('user-agent');\r\n const url = request.nextUrl.clone();\r\n const matched = matchBot(userAgent);\r\n\r\n const accept = request.headers.get('accept') || '';\r\n const hasDebugParam = request.nextUrl.searchParams.has('onto');\r\n\r\n const isAiBot = !!matched;\r\n const isMarkdownRequested = accept.includes('text/markdown') || hasDebugParam;\r\n\r\n // Common logic for bot/markdown negotiation\r\n if (isAiBot || isMarkdownRequested) {\r\n // Ignore internal next.js requests\r\n if (url.pathname.startsWith('/_next')) {\r\n return NextResponse.next();\r\n }\r\n\r\n // --- llms.txt Auto-Discovery ---\r\n if (url.pathname === '/llms.txt') {\r\n try {\r\n if (config) {\r\n const llmsTxtContent = generateLlmsTxt(config);\r\n const response = new NextResponse(llmsTxtContent, {\r\n headers: {\r\n 'Content-Type': 'text/plain; charset=utf-8',\r\n 'Cache-Control': 'public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400',\r\n }\r\n });\r\n if (matched) response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);\r\n response.headers.set('X-Onto-Trace', userAgent || 'no-ua');\r\n return response;\r\n }\r\n } catch (error) {\r\n console.error('[Onto] Failed to generate llms.txt:', error);\r\n }\r\n }\r\n\r\n // Skip other static assets with dots\r\n if (url.pathname.includes('.')) {\r\n return NextResponse.next();\r\n }\r\n\r\n // Determine the corresponding payload path\r\n let payloadPath = url.pathname;\r\n if (payloadPath === '/' || payloadPath === '') payloadPath = '/index';\r\n if (payloadPath.endsWith('/') && payloadPath !== '/') payloadPath = payloadPath.slice(0, -1);\r\n\r\n // --- Onto Control Plane Integration (Premium) ---\r\n const ONTO_API_KEY = process.env.ONTO_API_KEY;\r\n const DASHBOARD_URL = process.env.ONTO_DASHBOARD_URL || 'https://app.buildonto.dev';\r\n\r\n if (ONTO_API_KEY) {\r\n // Logic for tracking and injection ... (fire-and-forget tracking)\r\n fetch(`${DASHBOARD_URL}/api/track`, {\r\n method: 'POST',\r\n headers: { 'x-onto-key': ONTO_API_KEY, 'Content-Type': 'application/json' },\r\n body: JSON.stringify({ route: url.pathname, userAgent, bot: matched?.name, company: matched?.company })\r\n }).catch(() => {});\r\n }\r\n\r\n // Rewrite to semantic payload\r\n url.pathname = `/.onto${payloadPath}.md`;\r\n const response = NextResponse.rewrite(url);\r\n \r\n // Apply headers\r\n response.headers.set('Content-Type', 'text/markdown; charset=utf-8');\r\n response.headers.set('Cache-Control', 'public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400');\r\n response.headers.set('Vary', 'User-Agent, Accept');\r\n response.headers.set('X-Onto-Trace', userAgent || 'no-ua');\r\n \r\n if (matched) {\r\n response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);\r\n response.headers.set('X-Onto-Matched', 'true');\r\n }\r\n \r\n if (hasDebugParam) response.headers.set('X-Onto-Debug', 'true');\r\n\r\n return response;\r\n }\r\n\r\n // Default response for non-bots\r\n const response = NextResponse.next();\r\n \r\n // Add identify headers to EVERY response for clinical debugging\r\n response.headers.set('Vary', 'User-Agent, Accept');\r\n response.headers.set('X-Onto-Trace', userAgent || 'no-ua');\r\n \r\n if (matched) {\r\n response.headers.set('X-Onto-Matched-Bot', matched.name);\r\n response.headers.set('X-Onto-Identified', 'true');\r\n } else {\r\n response.headers.set('X-Onto-Identified', 'false');\r\n }\r\n \r\n return response;\r\n}\r\n\r\n// Re-export the bot registry for consumers who want to extend or inspect it\r\nexport { AI_BOT_USER_AGENTS, matchBot } from './bots';\r\nexport type { AiBot } from './bots';\r\n\r\n\r\n","/**\n * Comprehensive registry of AI bot user-agent strings.\n * The middleware uses this list to detect AI crawlers and serve optimized markdown.\n */\n\nexport interface AiBot {\n /** The user-agent substring to match against */\n name: string;\n /** The company operating this bot */\n company: string;\n /** ISO date when this bot was added or last verified */\n addedAt?: string;\n}\n\n/**\n * Structured registry of all known AI bots, grouped by company.\n * Useful for analytics and the Control Plane dashboard.\n */\nexport const AI_BOTS: AiBot[] = [\n // OpenAI\n { name: 'GPTBot', company: 'OpenAI', addedAt: '2025-01-01' },\n { name: 'ChatGPT-User', company: 'OpenAI', addedAt: '2025-01-01' },\n { name: 'OAI-SearchBot', company: 'OpenAI', addedAt: '2025-01-01' },\n { name: 'ChatGPT', company: 'OpenAI', addedAt: '2025-03-25' },\n { name: 'OpenAI', company: 'OpenAI', addedAt: '2025-03-25' },\n { name: 'GPT', company: 'OpenAI', addedAt: '2025-03-25' },\n\n // Google (SEO Safety: Googlebot proper is EXCLUDED)\n { name: 'Google-CloudVertexBot', company: 'Google', addedAt: '2025-01-01' },\n { name: 'Google-Extended', company: 'Google', addedAt: '2025-01-01' },\n { name: 'GoogleOther', company: 'Google', addedAt: '2025-01-01' },\n\n // Anthropic\n { name: 'ClaudeBot', company: 'Anthropic', addedAt: '2025-01-01' },\n { name: 'Claude-User', company: 'Anthropic', addedAt: '2025-01-01' },\n { name: 'claude-fetch', company: 'Anthropic', addedAt: '2025-03-25' },\n { name: 'AnthropicFetch', company: 'Anthropic', addedAt: '2025-03-25' },\n { name: 'Claude-Fetch', company: 'Anthropic', addedAt: '2025-03-25' },\n { name: 'anthropic-ai', company: 'Anthropic', addedAt: '2025-01-01' },\n { name: 'Claude', company: 'Anthropic', addedAt: '2025-03-25' },\n { name: 'Anthropic', company: 'Anthropic', addedAt: '2025-03-25' },\n\n // Perplexity\n { name: 'PerplexityBot', company: 'Perplexity', addedAt: '2025-01-01' },\n { name: 'Perplexity-User', company: 'Perplexity', addedAt: '2025-01-01' },\n { name: 'Perplexity', company: 'Perplexity', addedAt: '2025-03-25' },\n\n // Meta\n { name: 'Meta-ExternalAgent', company: 'Meta', addedAt: '2025-01-01' },\n { name: 'Meta-ExternalFetcher', company: 'Meta', addedAt: '2025-01-01' },\n { name: 'facebookexternalhit', company: 'Meta', addedAt: '2025-03-25' },\n { name: 'FacebookBot', company: 'Meta', addedAt: '2025-01-01' },\n\n // Mistral\n { name: 'MistralBot', company: 'Mistral', addedAt: '2025-03-25' },\n { name: 'Mistral', company: 'Mistral', addedAt: '2025-03-25' },\n\n // Amazon\n { name: 'Amazonbot', company: 'Amazon', addedAt: '2025-03-25' },\n\n // Others\n { name: 'AI2Bot', company: 'Allen Institute', addedAt: '2025-03-25' },\n { name: 'DuckAssistBot', company: 'DuckDuckGo', addedAt: '2025-03-25' },\n { name: 'Diffbot', company: 'Diffbot', addedAt: '2025-03-25' },\n { name: 'CCBot', company: 'Common Crawl', addedAt: '2025-01-01' },\n { name: 'Bytespider', company: 'ByteDance', addedAt: '2025-01-01' },\n { name: 'Applebot-Extended', company: 'Apple', addedAt: '2025-01-01' },\n { name: 'YouBot', company: 'You.com', addedAt: '2025-01-01' },\n\n // Generic AI / Scraping Libraries (Last Resort)\n { name: 'python-requests', company: 'Generic Bot', addedAt: '2025-03-25' },\n { name: 'python-httpx', company: 'Generic Bot', addedAt: '2025-03-25' },\n { name: 'httpx', company: 'Generic Bot', addedAt: '2025-03-25' },\n { name: 'axios', company: 'Generic Bot', addedAt: '2025-03-25' },\n { name: 'node-fetch', company: 'Generic Bot', addedAt: '2025-03-25' },\n { name: 'Go-http-client', company: 'Generic Bot', addedAt: '2025-03-25' },\n { name: 'Wget', company: 'Generic Bot', addedAt: '2025-03-25' },\n { name: 'Curl', company: 'Generic Bot', addedAt: '2025-03-25' },\n];\n\n/**\n * Flat list of user-agent substrings for fast matching in the middleware.\n */\nexport const AI_BOT_USER_AGENTS: string[] = AI_BOT_USER_AGENTS_CACHE();\n\nfunction AI_BOT_USER_AGENTS_CACHE() {\n return AI_BOTS.map(bot => bot.name);\n}\n\n/**\n * Given a raw user-agent string, returns the matched AiBot entry or undefined.\n * Uses a \"Longest Match\" strategy to ensure maximum specificity.\n * Comparison is case-insensitive.\n */\nexport function matchBot(userAgent: string | null): AiBot | undefined {\n if (!userAgent) return undefined;\n const lowerUA = userAgent.toLowerCase();\n \n // Find all matches\n const matches = AI_BOTS.filter(bot => \n lowerUA.includes(bot.name.toLowerCase())\n );\n\n if (matches.length === 0) return undefined;\n if (matches.length === 1) return matches[0];\n\n // Pick the longest match for maximum specificity (e.g. 'ChatGPT-User' vs 'GPT')\n return matches.reduce((longest, current) => \n current.name.length > longest.name.length ? current : longest\n );\n}\n","/**\r\n * Configuration schema for onto.config.ts\r\n * Used to dynamically generate llms.txt and other AI discovery files\r\n */\r\n\r\nexport type PageType = 'scoring' | 'about' | 'default';\r\n\r\nexport interface OntoRoute {\r\n /**\r\n * The URL path (e.g., '/docs', '/api/reference')\r\n */\r\n path: string;\r\n /**\r\n * Description of what this route contains\r\n */\r\n description: string;\r\n /**\r\n * Optional: Page type for automatic JSON-LD schema injection\r\n * - 'scoring': Injects Methodology schema with AIO scoring weights (40/35/25)\r\n * - 'about': Injects Organization/AboutPage schema\r\n * - 'default': No automatic schema injection\r\n */\r\n pageType?: PageType;\r\n}\r\n\r\nexport interface OntoConfig {\r\n /**\r\n * The name of your project or site (required)\r\n * Used as the H1 heading in llms.txt\r\n */\r\n name: string;\r\n\r\n /**\r\n * A short summary of your project (required)\r\n * Displayed as a blockquote in llms.txt\r\n * Should contain key information necessary for understanding the rest of the file\r\n */\r\n summary: string;\r\n\r\n /**\r\n * The base URL of your site (e.g., 'https://example.com')\r\n */\r\n baseUrl: string;\r\n\r\n /**\r\n * Optional: Additional sections to include in llms.txt\r\n * Each section can contain any markdown content\r\n */\r\n sections?: {\r\n heading: string;\r\n content: string;\r\n }[];\r\n\r\n /**\r\n * Key routes that AI agents should know about\r\n * These will be formatted as a markdown list in llms.txt\r\n */\r\n routes?: OntoRoute[];\r\n\r\n /**\r\n * Optional: Links to external resources (documentation, API references, etc.)\r\n */\r\n externalLinks?: {\r\n title: string;\r\n url: string;\r\n description?: string;\r\n }[];\r\n\r\n /**\r\n * Optional: Organization information for JSON-LD schemas\r\n */\r\n organization?: {\r\n name: string;\r\n description?: string;\r\n url?: string;\r\n logo?: string;\r\n foundingDate?: string;\r\n };\r\n}\r\n\r\n/**\r\n * Generate llms.txt content from OntoConfig\r\n * Follows the llms.txt specification:\r\n * - H1 with project name\r\n * - Blockquote with summary\r\n * - Additional markdown sections\r\n */\r\nexport function generateLlmsTxt(config: OntoConfig): string {\r\n const lines: string[] = [];\r\n\r\n // H1: Project name (required)\r\n lines.push(`# ${config.name}`);\r\n lines.push('');\r\n\r\n // Blockquote: Summary (required)\r\n lines.push(`> ${config.summary}`);\r\n lines.push('');\r\n\r\n // Key Routes section (if provided)\r\n if (config.routes && config.routes.length > 0) {\r\n lines.push('## Key Routes');\r\n lines.push('');\r\n for (const route of config.routes) {\r\n const fullUrl = `${config.baseUrl}${route.path}`;\r\n lines.push(`- [${route.path}](${fullUrl}): ${route.description}`);\r\n }\r\n lines.push('');\r\n }\r\n\r\n // External Links section (if provided)\r\n if (config.externalLinks && config.externalLinks.length > 0) {\r\n lines.push('## Resources');\r\n lines.push('');\r\n for (const link of config.externalLinks) {\r\n if (link.description) {\r\n lines.push(`- [${link.title}](${link.url}): ${link.description}`);\r\n } else {\r\n lines.push(`- [${link.title}](${link.url})`);\r\n }\r\n }\r\n lines.push('');\r\n }\r\n\r\n // Custom sections (if provided)\r\n if (config.sections && config.sections.length > 0) {\r\n for (const section of config.sections) {\r\n lines.push(`## ${section.heading}`);\r\n lines.push('');\r\n lines.push(section.content);\r\n lines.push('');\r\n }\r\n }\r\n\r\n return lines.join('\\n').trim() + '\\n';\r\n}\r\n"],"mappings":"yaAAA,IAAAA,EAAA,GAAAC,EAAAD,EAAA,wBAAAE,EAAA,aAAAC,EAAA,mBAAAC,IAAA,eAAAC,EAAAL,GAAA,IAAAM,EAA0C,uBCkBnC,IAAMC,EAAmB,CAE5B,CAAE,KAAM,SAAqB,QAAS,SAAY,QAAS,YAAa,EACxE,CAAE,KAAM,eAAoB,QAAS,SAAY,QAAS,YAAa,EACvE,CAAE,KAAM,gBAAoB,QAAS,SAAY,QAAS,YAAa,EACvE,CAAE,KAAM,UAAoB,QAAS,SAAY,QAAS,YAAa,EACvE,CAAE,KAAM,SAAoB,QAAS,SAAY,QAAS,YAAa,EACvE,CAAE,KAAM,MAAoB,QAAS,SAAY,QAAS,YAAa,EAGvE,CAAE,KAAM,wBAA2B,QAAS,SAAU,QAAS,YAAa,EAC5E,CAAE,KAAM,kBAA2B,QAAS,SAAU,QAAS,YAAa,EAC5E,CAAE,KAAM,cAA2B,QAAS,SAAU,QAAS,YAAa,EAG5E,CAAE,KAAM,YAAmB,QAAS,YAAa,QAAS,YAAa,EACvE,CAAE,KAAM,cAAkB,QAAS,YAAa,QAAS,YAAa,EACtE,CAAE,KAAM,eAAkB,QAAS,YAAa,QAAS,YAAa,EACtE,CAAE,KAAM,iBAAkB,QAAS,YAAa,QAAS,YAAa,EACtE,CAAE,KAAM,eAAkB,QAAS,YAAa,QAAS,YAAa,EACtE,CAAE,KAAM,eAAkB,QAAS,YAAa,QAAS,YAAa,EACtE,CAAE,KAAM,SAAkB,QAAS,YAAa,QAAS,YAAa,EACtE,CAAE,KAAM,YAAkB,QAAS,YAAa,QAAS,YAAa,EAGtE,CAAE,KAAM,gBAAmB,QAAS,aAAc,QAAS,YAAa,EACxE,CAAE,KAAM,kBAAmB,QAAS,aAAc,QAAS,YAAa,EACxE,CAAE,KAAM,aAAmB,QAAS,aAAc,QAAS,YAAa,EAGxE,CAAE,KAAM,qBAAwB,QAAS,OAAQ,QAAS,YAAa,EACvE,CAAE,KAAM,uBAAwB,QAAS,OAAQ,QAAS,YAAa,EACvE,CAAE,KAAM,sBAAwB,QAAS,OAAQ,QAAS,YAAa,EACvE,CAAE,KAAM,cAAwB,QAAS,OAAQ,QAAS,YAAa,EAGvE,CAAE,KAAM,aAAc,QAAS,UAAW,QAAS,YAAa,EAChE,CAAE,KAAM,UAAc,QAAS,UAAW,QAAS,YAAa,EAGhE,CAAE,KAAM,YAAa,QAAS,SAAU,QAAS,YAAa,EAG9D,CAAE,KAAM,SAAiB,QAAS,kBAAmB,QAAS,YAAa,EAC3E,CAAE,KAAM,gBAAiB,QAAS,aAAc,QAAS,YAAa,EACtE,CAAE,KAAM,UAAiB,QAAS,UAAe,QAAS,YAAa,EACvE,CAAE,KAAM,QAAiB,QAAS,eAAgB,QAAS,YAAa,EACxE,CAAE,KAAM,aAAiB,QAAS,YAAgB,QAAS,YAAa,EACxE,CAAE,KAAM,oBAAqB,QAAS,QAAY,QAAS,YAAa,EACxE,CAAE,KAAM,SAAiB,QAAS,UAAgB,QAAS,YAAa,EAGxE,CAAE,KAAM,kBAAoB,QAAS,cAAe,QAAS,YAAa,EAC1E,CAAE,KAAM,eAAoB,QAAS,cAAe,QAAS,YAAa,EAC1E,CAAE,KAAM,QAAoB,QAAS,cAAe,QAAS,YAAa,EAC1E,CAAE,KAAM,QAAoB,QAAS,cAAe,QAAS,YAAa,EAC1E,CAAE,KAAM,aAAoB,QAAS,cAAe,QAAS,YAAa,EAC1E,CAAE,KAAM,iBAAoB,QAAS,cAAe,QAAS,YAAa,EAC1E,CAAE,KAAM,OAAoB,QAAS,cAAe,QAAS,YAAa,EAC1E,CAAE,KAAM,OAAoB,QAAS,cAAe,QAAS,YAAa,CAC9E,EAKaC,EAA+BC,EAAyB,EAErE,SAASA,GAA2B,CAChC,OAAOF,EAAQ,IAAIG,GAAOA,EAAI,IAAI,CACtC,CAOO,SAASC,EAASC,EAA6C,CAClE,GAAI,CAACA,EAAW,OAChB,IAAMC,EAAUD,EAAU,YAAY,EAGhCE,EAAUP,EAAQ,OAAOG,GAC3BG,EAAQ,SAASH,EAAI,KAAK,YAAY,CAAC,CAC3C,EAEA,GAAII,EAAQ,SAAW,EACvB,OAAIA,EAAQ,SAAW,EAAUA,EAAQ,CAAC,EAGnCA,EAAQ,OAAO,CAACC,EAASC,IAC5BA,EAAQ,KAAK,OAASD,EAAQ,KAAK,OAASC,EAAUD,CAC1D,CACJ,CCvBO,SAASE,EAAgBC,EAA4B,CAC1D,IAAMC,EAAkB,CAAC,EAWzB,GARAA,EAAM,KAAK,KAAKD,EAAO,IAAI,EAAE,EAC7BC,EAAM,KAAK,EAAE,EAGbA,EAAM,KAAK,KAAKD,EAAO,OAAO,EAAE,EAChCC,EAAM,KAAK,EAAE,EAGTD,EAAO,QAAUA,EAAO,OAAO,OAAS,EAAG,CAC7CC,EAAM,KAAK,eAAe,EAC1BA,EAAM,KAAK,EAAE,EACb,QAAWC,KAASF,EAAO,OAAQ,CACjC,IAAMG,EAAU,GAAGH,EAAO,OAAO,GAAGE,EAAM,IAAI,GAC9CD,EAAM,KAAK,MAAMC,EAAM,IAAI,KAAKC,CAAO,MAAMD,EAAM,WAAW,EAAE,CAClE,CACAD,EAAM,KAAK,EAAE,CACf,CAGA,GAAID,EAAO,eAAiBA,EAAO,cAAc,OAAS,EAAG,CAC3DC,EAAM,KAAK,cAAc,EACzBA,EAAM,KAAK,EAAE,EACb,QAAWG,KAAQJ,EAAO,cACpBI,EAAK,YACPH,EAAM,KAAK,MAAMG,EAAK,KAAK,KAAKA,EAAK,GAAG,MAAMA,EAAK,WAAW,EAAE,EAEhEH,EAAM,KAAK,MAAMG,EAAK,KAAK,KAAKA,EAAK,GAAG,GAAG,EAG/CH,EAAM,KAAK,EAAE,CACf,CAGA,GAAID,EAAO,UAAYA,EAAO,SAAS,OAAS,EAC9C,QAAWK,KAAWL,EAAO,SAC3BC,EAAM,KAAK,MAAMI,EAAQ,OAAO,EAAE,EAClCJ,EAAM,KAAK,EAAE,EACbA,EAAM,KAAKI,EAAQ,OAAO,EAC1BJ,EAAM,KAAK,EAAE,EAIjB,OAAOA,EAAM,KAAK;AAAA,CAAI,EAAE,KAAK,EAAI;AAAA,CACnC,CFlIA,eAAsBK,EAAeC,EAAcC,EAAqB,CACpE,IAAMC,EAAYF,EAAQ,QAAQ,IAAI,YAAY,EAC5CG,EAAMH,EAAQ,QAAQ,MAAM,EAC5BI,EAAUC,EAASH,CAAS,EAE5BI,EAASN,EAAQ,QAAQ,IAAI,QAAQ,GAAK,GAC1CO,EAAgBP,EAAQ,QAAQ,aAAa,IAAI,MAAM,EAEvDQ,EAAU,CAAC,CAACJ,EACZK,EAAsBH,EAAO,SAAS,eAAe,GAAKC,EAGhE,GAAIC,GAAWC,EAAqB,CAEhC,GAAIN,EAAI,SAAS,WAAW,QAAQ,EAChC,OAAO,eAAa,KAAK,EAI7B,GAAIA,EAAI,WAAa,YACjB,GAAI,CACA,GAAIF,EAAQ,CACR,IAAMS,EAAiBC,EAAgBV,CAAM,EACvCW,EAAW,IAAI,eAAaF,EAAgB,CAC9C,QAAS,CACL,eAAgB,4BAChB,gBAAiB,mEACrB,CACJ,CAAC,EACD,OAAIN,GAASQ,EAAS,QAAQ,IAAI,aAAc,GAAGR,EAAQ,IAAI,KAAKA,EAAQ,OAAO,GAAG,EACtFQ,EAAS,QAAQ,IAAI,eAAgBV,GAAa,OAAO,EAClDU,CACX,CACJ,OAASC,EAAO,CACZ,QAAQ,MAAM,sCAAuCA,CAAK,CAC9D,CAIJ,GAAIV,EAAI,SAAS,SAAS,GAAG,EACzB,OAAO,eAAa,KAAK,EAI7B,IAAIW,EAAcX,EAAI,UAClBW,IAAgB,KAAOA,IAAgB,MAAIA,EAAc,UACzDA,EAAY,SAAS,GAAG,GAAKA,IAAgB,MAAKA,EAAcA,EAAY,MAAM,EAAG,EAAE,GAG3F,IAAMC,EAAe,QAAQ,IAAI,aAC3BC,EAAgB,QAAQ,IAAI,oBAAsB,4BAEpDD,GAEA,MAAM,GAAGC,CAAa,aAAc,CAChC,OAAQ,OACR,QAAS,CAAE,aAAcD,EAAc,eAAgB,kBAAmB,EAC1E,KAAM,KAAK,UAAU,CAAE,MAAOZ,EAAI,SAAU,UAAAD,EAAW,IAAKE,GAAS,KAAM,QAASA,GAAS,OAAQ,CAAC,CAC1G,CAAC,EAAE,MAAM,IAAM,CAAC,CAAC,EAIrBD,EAAI,SAAW,SAASW,CAAW,MACnC,IAAMF,EAAW,eAAa,QAAQT,CAAG,EAGzC,OAAAS,EAAS,QAAQ,IAAI,eAAgB,8BAA8B,EACnEA,EAAS,QAAQ,IAAI,gBAAiB,mEAAmE,EACzGA,EAAS,QAAQ,IAAI,OAAQ,oBAAoB,EACjDA,EAAS,QAAQ,IAAI,eAAgBV,GAAa,OAAO,EAErDE,IACAQ,EAAS,QAAQ,IAAI,aAAc,GAAGR,EAAQ,IAAI,KAAKA,EAAQ,OAAO,GAAG,EACzEQ,EAAS,QAAQ,IAAI,iBAAkB,MAAM,GAG7CL,GAAeK,EAAS,QAAQ,IAAI,eAAgB,MAAM,EAEvDA,CACX,CAGA,IAAMA,EAAW,eAAa,KAAK,EAGnC,OAAAA,EAAS,QAAQ,IAAI,OAAQ,oBAAoB,EACjDA,EAAS,QAAQ,IAAI,eAAgBV,GAAa,OAAO,EAErDE,GACAQ,EAAS,QAAQ,IAAI,qBAAsBR,EAAQ,IAAI,EACvDQ,EAAS,QAAQ,IAAI,oBAAqB,MAAM,GAEhDA,EAAS,QAAQ,IAAI,oBAAqB,OAAO,EAG9CA,CACX","names":["middleware_exports","__export","AI_BOT_USER_AGENTS","matchBot","ontoMiddleware","__toCommonJS","import_server","AI_BOTS","AI_BOT_USER_AGENTS","AI_BOT_USER_AGENTS_CACHE","bot","matchBot","userAgent","lowerUA","matches","longest","current","generateLlmsTxt","config","lines","route","fullUrl","link","section","ontoMiddleware","request","config","userAgent","url","matched","matchBot","accept","hasDebugParam","isAiBot","isMarkdownRequested","llmsTxtContent","generateLlmsTxt","response","error","payloadPath","ONTO_API_KEY","DASHBOARD_URL"]}
@@ -1,4 +1,4 @@
1
- import{NextResponse as s}from"next/server";var A=[{name:"GPTBot",company:"OpenAI",addedAt:"2025-01-01"},{name:"ChatGPT-User",company:"OpenAI",addedAt:"2025-01-01"},{name:"OAI-SearchBot",company:"OpenAI",addedAt:"2025-01-01"},{name:"ChatGPT",company:"OpenAI",addedAt:"2025-03-25"},{name:"OpenAI",company:"OpenAI",addedAt:"2025-03-25"},{name:"GPT",company:"OpenAI",addedAt:"2025-03-25"},{name:"Google-CloudVertexBot",company:"Google",addedAt:"2025-01-01"},{name:"Google-Extended",company:"Google",addedAt:"2025-01-01"},{name:"GoogleOther",company:"Google",addedAt:"2025-01-01"},{name:"ClaudeBot",company:"Anthropic",addedAt:"2025-01-01"},{name:"Claude-User",company:"Anthropic",addedAt:"2025-01-01"},{name:"claude-fetch",company:"Anthropic",addedAt:"2025-03-25"},{name:"AnthropicFetch",company:"Anthropic",addedAt:"2025-03-25"},{name:"Claude-Fetch",company:"Anthropic",addedAt:"2025-03-25"},{name:"anthropic-ai",company:"Anthropic",addedAt:"2025-01-01"},{name:"Claude",company:"Anthropic",addedAt:"2025-03-25"},{name:"Anthropic",company:"Anthropic",addedAt:"2025-03-25"},{name:"PerplexityBot",company:"Perplexity",addedAt:"2025-01-01"},{name:"Perplexity-User",company:"Perplexity",addedAt:"2025-01-01"},{name:"Perplexity",company:"Perplexity",addedAt:"2025-03-25"},{name:"Meta-ExternalAgent",company:"Meta",addedAt:"2025-01-01"},{name:"Meta-ExternalFetcher",company:"Meta",addedAt:"2025-01-01"},{name:"facebookexternalhit",company:"Meta",addedAt:"2025-03-25"},{name:"FacebookBot",company:"Meta",addedAt:"2025-01-01"},{name:"MistralBot",company:"Mistral",addedAt:"2025-03-25"},{name:"Mistral",company:"Mistral",addedAt:"2025-03-25"},{name:"Amazonbot",company:"Amazon",addedAt:"2025-03-25"},{name:"AI2Bot",company:"Allen Institute",addedAt:"2025-03-25"},{name:"DuckAssistBot",company:"DuckDuckGo",addedAt:"2025-03-25"},{name:"Diffbot",company:"Diffbot",addedAt:"2025-03-25"},{name:"CCBot",company:"Common Crawl",addedAt:"2025-01-01"},{name:"Bytespider",company:"ByteDance",addedAt:"2025-01-01"},{name:"Applebot-Extended",company:"Apple",addedAt:"2025-01-01"},{name:"YouBot",company:"You.com",addedAt:"2025-01-01"},{name:"python-requests",company:"Generic Bot",addedAt:"2025-03-25"},{name:"python-httpx",company:"Generic Bot",addedAt:"2025-03-25"},{name:"httpx",company:"Generic Bot",addedAt:"2025-03-25"},{name:"axios",company:"Generic Bot",addedAt:"2025-03-25"},{name:"node-fetch",company:"Generic Bot",addedAt:"2025-03-25"},{name:"Go-http-client",company:"Generic Bot",addedAt:"2025-03-25"},{name:"Wget",company:"Generic Bot",addedAt:"2025-03-25"},{name:"Curl",company:"Generic Bot",addedAt:"2025-03-25"}],B=O();function O(){return A.map(n=>n.name)}function m(n){if(!n)return;let t=n.toLowerCase(),e=A.filter(a=>t.includes(a.name.toLowerCase()));if(e.length!==0)return e.length===1?e[0]:e.reduce((a,o)=>o.name.length>a.name.length?o:a)}function u(n){let t=[];if(t.push(`# ${n.name}`),t.push(""),t.push(`> ${n.summary}`),t.push(""),n.routes&&n.routes.length>0){t.push("## Key Routes"),t.push("");for(let e of n.routes){let a=`${n.baseUrl}${e.path}`;t.push(`- [${e.path}](${a}): ${e.description}`)}t.push("")}if(n.externalLinks&&n.externalLinks.length>0){t.push("## Resources"),t.push("");for(let e of n.externalLinks)e.description?t.push(`- [${e.title}](${e.url}): ${e.description}`):t.push(`- [${e.title}](${e.url})`);t.push("")}if(n.sections&&n.sections.length>0)for(let e of n.sections)t.push(`## ${e.heading}`),t.push(""),t.push(e.content),t.push("");return t.join(`
1
+ import{NextResponse as p}from"next/server";var A=[{name:"GPTBot",company:"OpenAI",addedAt:"2025-01-01"},{name:"ChatGPT-User",company:"OpenAI",addedAt:"2025-01-01"},{name:"OAI-SearchBot",company:"OpenAI",addedAt:"2025-01-01"},{name:"ChatGPT",company:"OpenAI",addedAt:"2025-03-25"},{name:"OpenAI",company:"OpenAI",addedAt:"2025-03-25"},{name:"GPT",company:"OpenAI",addedAt:"2025-03-25"},{name:"Google-CloudVertexBot",company:"Google",addedAt:"2025-01-01"},{name:"Google-Extended",company:"Google",addedAt:"2025-01-01"},{name:"GoogleOther",company:"Google",addedAt:"2025-01-01"},{name:"ClaudeBot",company:"Anthropic",addedAt:"2025-01-01"},{name:"Claude-User",company:"Anthropic",addedAt:"2025-01-01"},{name:"claude-fetch",company:"Anthropic",addedAt:"2025-03-25"},{name:"AnthropicFetch",company:"Anthropic",addedAt:"2025-03-25"},{name:"Claude-Fetch",company:"Anthropic",addedAt:"2025-03-25"},{name:"anthropic-ai",company:"Anthropic",addedAt:"2025-01-01"},{name:"Claude",company:"Anthropic",addedAt:"2025-03-25"},{name:"Anthropic",company:"Anthropic",addedAt:"2025-03-25"},{name:"PerplexityBot",company:"Perplexity",addedAt:"2025-01-01"},{name:"Perplexity-User",company:"Perplexity",addedAt:"2025-01-01"},{name:"Perplexity",company:"Perplexity",addedAt:"2025-03-25"},{name:"Meta-ExternalAgent",company:"Meta",addedAt:"2025-01-01"},{name:"Meta-ExternalFetcher",company:"Meta",addedAt:"2025-01-01"},{name:"facebookexternalhit",company:"Meta",addedAt:"2025-03-25"},{name:"FacebookBot",company:"Meta",addedAt:"2025-01-01"},{name:"MistralBot",company:"Mistral",addedAt:"2025-03-25"},{name:"Mistral",company:"Mistral",addedAt:"2025-03-25"},{name:"Amazonbot",company:"Amazon",addedAt:"2025-03-25"},{name:"AI2Bot",company:"Allen Institute",addedAt:"2025-03-25"},{name:"DuckAssistBot",company:"DuckDuckGo",addedAt:"2025-03-25"},{name:"Diffbot",company:"Diffbot",addedAt:"2025-03-25"},{name:"CCBot",company:"Common Crawl",addedAt:"2025-01-01"},{name:"Bytespider",company:"ByteDance",addedAt:"2025-01-01"},{name:"Applebot-Extended",company:"Apple",addedAt:"2025-01-01"},{name:"YouBot",company:"You.com",addedAt:"2025-01-01"},{name:"python-requests",company:"Generic Bot",addedAt:"2025-03-25"},{name:"python-httpx",company:"Generic Bot",addedAt:"2025-03-25"},{name:"httpx",company:"Generic Bot",addedAt:"2025-03-25"},{name:"axios",company:"Generic Bot",addedAt:"2025-03-25"},{name:"node-fetch",company:"Generic Bot",addedAt:"2025-03-25"},{name:"Go-http-client",company:"Generic Bot",addedAt:"2025-03-25"},{name:"Wget",company:"Generic Bot",addedAt:"2025-03-25"},{name:"Curl",company:"Generic Bot",addedAt:"2025-03-25"}],O=B();function B(){return A.map(n=>n.name)}function m(n){if(!n)return;let t=n.toLowerCase(),e=A.filter(a=>t.includes(a.name.toLowerCase()));if(e.length!==0)return e.length===1?e[0]:e.reduce((a,o)=>o.name.length>a.name.length?o:a)}function u(n){let t=[];if(t.push(`# ${n.name}`),t.push(""),t.push(`> ${n.summary}`),t.push(""),n.routes&&n.routes.length>0){t.push("## Key Routes"),t.push("");for(let e of n.routes){let a=`${n.baseUrl}${e.path}`;t.push(`- [${e.path}](${a}): ${e.description}`)}t.push("")}if(n.externalLinks&&n.externalLinks.length>0){t.push("## Resources"),t.push("");for(let e of n.externalLinks)e.description?t.push(`- [${e.title}](${e.url}): ${e.description}`):t.push(`- [${e.title}](${e.url})`);t.push("")}if(n.sections&&n.sections.length>0)for(let e of n.sections)t.push(`## ${e.heading}`),t.push(""),t.push(e.content),t.push("");return t.join(`
2
2
  `).trim()+`
3
- `}async function I(n,t){let e=n.headers.get("user-agent"),a=n.nextUrl.clone(),o=m(e),y=n.headers.get("accept")||"",h=n.nextUrl.searchParams.has("onto"),g=!!o,x=y.includes("text/markdown")||h;if(g||x){if(a.pathname.startsWith("/_next"))return s.next();if(a.pathname==="/llms.txt")try{if(t){let c=u(t),i=new s(c,{headers:{"Content-Type":"text/plain; charset=utf-8","Cache-Control":"public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400"}});return o&&i.headers.set("X-Onto-Bot",`${o.name} (${o.company})`),i.headers.set("X-Onto-Trace",e||"no-ua"),i}}catch(c){console.error("[Onto] Failed to generate llms.txt:",c)}if(a.pathname.includes("."))return s.next();let r=a.pathname;(r==="/"||r==="")&&(r="/index"),r.endsWith("/")&&r!=="/"&&(r=r.slice(0,-1));let l=process.env.ONTO_API_KEY,f=process.env.ONTO_DASHBOARD_URL||"https://app.buildonto.dev";l&&fetch(`${f}/api/track`,{method:"POST",headers:{"x-onto-key":l,"Content-Type":"application/json"},body:JSON.stringify({route:a.pathname,userAgent:e,bot:o?.name,company:o?.company})}).catch(()=>{}),a.pathname=`/.onto${r}.md`;let d=s.rewrite(a);return d.headers.set("Content-Type","text/markdown; charset=utf-8"),d.headers.set("Cache-Control","public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400"),d.headers.set("Vary","User-Agent, Accept"),d.headers.set("X-Onto-Trace",e||"no-ua"),o&&(d.headers.set("X-Onto-Bot",`${o.name} (${o.company})`),d.headers.set("X-Onto-Matched","true")),h&&d.headers.set("X-Onto-Debug","true"),d}let p=s.next();return p.headers.set("Vary","User-Agent, Accept"),p.headers.set("X-Onto-Trace",e||"no-ua"),p}export{B as AI_BOT_USER_AGENTS,m as matchBot,I as ontoMiddleware};
3
+ `}async function b(n,t){let e=n.headers.get("user-agent"),a=n.nextUrl.clone(),o=m(e),y=n.headers.get("accept")||"",h=n.nextUrl.searchParams.has("onto"),g=!!o,x=y.includes("text/markdown")||h;if(g||x){if(a.pathname.startsWith("/_next"))return p.next();if(a.pathname==="/llms.txt")try{if(t){let c=u(t),i=new p(c,{headers:{"Content-Type":"text/plain; charset=utf-8","Cache-Control":"public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400"}});return o&&i.headers.set("X-Onto-Bot",`${o.name} (${o.company})`),i.headers.set("X-Onto-Trace",e||"no-ua"),i}}catch(c){console.error("[Onto] Failed to generate llms.txt:",c)}if(a.pathname.includes("."))return p.next();let r=a.pathname;(r==="/"||r==="")&&(r="/index"),r.endsWith("/")&&r!=="/"&&(r=r.slice(0,-1));let l=process.env.ONTO_API_KEY,f=process.env.ONTO_DASHBOARD_URL||"https://app.buildonto.dev";l&&fetch(`${f}/api/track`,{method:"POST",headers:{"x-onto-key":l,"Content-Type":"application/json"},body:JSON.stringify({route:a.pathname,userAgent:e,bot:o?.name,company:o?.company})}).catch(()=>{}),a.pathname=`/.onto${r}.md`;let d=p.rewrite(a);return d.headers.set("Content-Type","text/markdown; charset=utf-8"),d.headers.set("Cache-Control","public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400"),d.headers.set("Vary","User-Agent, Accept"),d.headers.set("X-Onto-Trace",e||"no-ua"),o&&(d.headers.set("X-Onto-Bot",`${o.name} (${o.company})`),d.headers.set("X-Onto-Matched","true")),h&&d.headers.set("X-Onto-Debug","true"),d}let s=p.next();return s.headers.set("Vary","User-Agent, Accept"),s.headers.set("X-Onto-Trace",e||"no-ua"),o?(s.headers.set("X-Onto-Matched-Bot",o.name),s.headers.set("X-Onto-Identified","true")):s.headers.set("X-Onto-Identified","false"),s}export{O as AI_BOT_USER_AGENTS,m as matchBot,b as ontoMiddleware};
4
4
  //# sourceMappingURL=middleware.mjs.map
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/middleware.ts","../src/bots.ts","../src/config.ts"],"sourcesContent":["import { NextRequest, NextResponse } from 'next/server';\r\nimport { matchBot } from './bots';\r\nimport { generateLlmsTxt, OntoConfig } from './config';\r\n\r\nexport async function ontoMiddleware(request: any, config?: OntoConfig) {\r\n const userAgent = request.headers.get('user-agent');\r\n const url = request.nextUrl.clone();\r\n const matched = matchBot(userAgent);\r\n\r\n const accept = request.headers.get('accept') || '';\r\n const hasDebugParam = request.nextUrl.searchParams.has('onto');\r\n\r\n const isAiBot = !!matched;\r\n const isMarkdownRequested = accept.includes('text/markdown') || hasDebugParam;\r\n\r\n // Common logic for bot/markdown negotiation\r\n if (isAiBot || isMarkdownRequested) {\r\n // Ignore internal next.js requests\r\n if (url.pathname.startsWith('/_next')) {\r\n return NextResponse.next();\r\n }\r\n\r\n // --- llms.txt Auto-Discovery ---\r\n if (url.pathname === '/llms.txt') {\r\n try {\r\n if (config) {\r\n const llmsTxtContent = generateLlmsTxt(config);\r\n const response = new NextResponse(llmsTxtContent, {\r\n headers: {\r\n 'Content-Type': 'text/plain; charset=utf-8',\r\n 'Cache-Control': 'public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400',\r\n }\r\n });\r\n if (matched) response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);\r\n response.headers.set('X-Onto-Trace', userAgent || 'no-ua');\r\n return response;\r\n }\r\n } catch (error) {\r\n console.error('[Onto] Failed to generate llms.txt:', error);\r\n }\r\n }\r\n\r\n // Skip other static assets with dots\r\n if (url.pathname.includes('.')) {\r\n return NextResponse.next();\r\n }\r\n\r\n // Determine the corresponding payload path\r\n let payloadPath = url.pathname;\r\n if (payloadPath === '/' || payloadPath === '') payloadPath = '/index';\r\n if (payloadPath.endsWith('/') && payloadPath !== '/') payloadPath = payloadPath.slice(0, -1);\r\n\r\n // --- Onto Control Plane Integration (Premium) ---\r\n const ONTO_API_KEY = process.env.ONTO_API_KEY;\r\n const DASHBOARD_URL = process.env.ONTO_DASHBOARD_URL || 'https://app.buildonto.dev';\r\n\r\n if (ONTO_API_KEY) {\r\n // Logic for tracking and injection ... (fire-and-forget tracking)\r\n fetch(`${DASHBOARD_URL}/api/track`, {\r\n method: 'POST',\r\n headers: { 'x-onto-key': ONTO_API_KEY, 'Content-Type': 'application/json' },\r\n body: JSON.stringify({ route: url.pathname, userAgent, bot: matched?.name, company: matched?.company })\r\n }).catch(() => {});\r\n }\r\n\r\n // Rewrite to semantic payload\r\n url.pathname = `/.onto${payloadPath}.md`;\r\n const response = NextResponse.rewrite(url);\r\n \r\n // Apply headers\r\n response.headers.set('Content-Type', 'text/markdown; charset=utf-8');\r\n response.headers.set('Cache-Control', 'public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400');\r\n response.headers.set('Vary', 'User-Agent, Accept');\r\n response.headers.set('X-Onto-Trace', userAgent || 'no-ua');\r\n \r\n if (matched) {\r\n response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);\r\n response.headers.set('X-Onto-Matched', 'true');\r\n }\r\n \r\n if (hasDebugParam) response.headers.set('X-Onto-Debug', 'true');\r\n\r\n return response;\r\n }\r\n\r\n // Default response for non-bots\r\n const response = NextResponse.next();\r\n \r\n // Crucial: Tell Vercel/Edge to vary the cache by User-Agent \r\n // This ensures bots get the rewrite and humans get the HTML\r\n response.headers.set('Vary', 'User-Agent, Accept');\r\n response.headers.set('X-Onto-Trace', userAgent || 'no-ua');\r\n \r\n return response;\r\n}\r\n\r\n// Re-export the bot registry for consumers who want to extend or inspect it\r\nexport { AI_BOT_USER_AGENTS, matchBot } from './bots';\r\nexport type { AiBot } from './bots';\r\n\r\n\r\n","/**\n * Comprehensive registry of AI bot user-agent strings.\n * The middleware uses this list to detect AI crawlers and serve optimized markdown.\n */\n\nexport interface AiBot {\n /** The user-agent substring to match against */\n name: string;\n /** The company operating this bot */\n company: string;\n /** ISO date when this bot was added or last verified */\n addedAt?: string;\n}\n\n/**\n * Structured registry of all known AI bots, grouped by company.\n * Useful for analytics and the Control Plane dashboard.\n */\nexport const AI_BOTS: AiBot[] = [\n // OpenAI\n { name: 'GPTBot', company: 'OpenAI', addedAt: '2025-01-01' },\n { name: 'ChatGPT-User', company: 'OpenAI', addedAt: '2025-01-01' },\n { name: 'OAI-SearchBot', company: 'OpenAI', addedAt: '2025-01-01' },\n { name: 'ChatGPT', company: 'OpenAI', addedAt: '2025-03-25' },\n { name: 'OpenAI', company: 'OpenAI', addedAt: '2025-03-25' },\n { name: 'GPT', company: 'OpenAI', addedAt: '2025-03-25' },\n\n // Google (SEO Safety: Googlebot proper is EXCLUDED)\n { name: 'Google-CloudVertexBot', company: 'Google', addedAt: '2025-01-01' },\n { name: 'Google-Extended', company: 'Google', addedAt: '2025-01-01' },\n { name: 'GoogleOther', company: 'Google', addedAt: '2025-01-01' },\n\n // Anthropic\n { name: 'ClaudeBot', company: 'Anthropic', addedAt: '2025-01-01' },\n { name: 'Claude-User', company: 'Anthropic', addedAt: '2025-01-01' },\n { name: 'claude-fetch', company: 'Anthropic', addedAt: '2025-03-25' },\n { name: 'AnthropicFetch', company: 'Anthropic', addedAt: '2025-03-25' },\n { name: 'Claude-Fetch', company: 'Anthropic', addedAt: '2025-03-25' },\n { name: 'anthropic-ai', company: 'Anthropic', addedAt: '2025-01-01' },\n { name: 'Claude', company: 'Anthropic', addedAt: '2025-03-25' },\n { name: 'Anthropic', company: 'Anthropic', addedAt: '2025-03-25' },\n\n // Perplexity\n { name: 'PerplexityBot', company: 'Perplexity', addedAt: '2025-01-01' },\n { name: 'Perplexity-User', company: 'Perplexity', addedAt: '2025-01-01' },\n { name: 'Perplexity', company: 'Perplexity', addedAt: '2025-03-25' },\n\n // Meta\n { name: 'Meta-ExternalAgent', company: 'Meta', addedAt: '2025-01-01' },\n { name: 'Meta-ExternalFetcher', company: 'Meta', addedAt: '2025-01-01' },\n { name: 'facebookexternalhit', company: 'Meta', addedAt: '2025-03-25' },\n { name: 'FacebookBot', company: 'Meta', addedAt: '2025-01-01' },\n\n // Mistral\n { name: 'MistralBot', company: 'Mistral', addedAt: '2025-03-25' },\n { name: 'Mistral', company: 'Mistral', addedAt: '2025-03-25' },\n\n // Amazon\n { name: 'Amazonbot', company: 'Amazon', addedAt: '2025-03-25' },\n\n // Others\n { name: 'AI2Bot', company: 'Allen Institute', addedAt: '2025-03-25' },\n { name: 'DuckAssistBot', company: 'DuckDuckGo', addedAt: '2025-03-25' },\n { name: 'Diffbot', company: 'Diffbot', addedAt: '2025-03-25' },\n { name: 'CCBot', company: 'Common Crawl', addedAt: '2025-01-01' },\n { name: 'Bytespider', company: 'ByteDance', addedAt: '2025-01-01' },\n { name: 'Applebot-Extended', company: 'Apple', addedAt: '2025-01-01' },\n { name: 'YouBot', company: 'You.com', addedAt: '2025-01-01' },\n\n // Generic AI / Scraping Libraries (Last Resort)\n { name: 'python-requests', company: 'Generic Bot', addedAt: '2025-03-25' },\n { name: 'python-httpx', company: 'Generic Bot', addedAt: '2025-03-25' },\n { name: 'httpx', company: 'Generic Bot', addedAt: '2025-03-25' },\n { name: 'axios', company: 'Generic Bot', addedAt: '2025-03-25' },\n { name: 'node-fetch', company: 'Generic Bot', addedAt: '2025-03-25' },\n { name: 'Go-http-client', company: 'Generic Bot', addedAt: '2025-03-25' },\n { name: 'Wget', company: 'Generic Bot', addedAt: '2025-03-25' },\n { name: 'Curl', company: 'Generic Bot', addedAt: '2025-03-25' },\n];\n\n/**\n * Flat list of user-agent substrings for fast matching in the middleware.\n */\nexport const AI_BOT_USER_AGENTS: string[] = AI_BOT_USER_AGENTS_CACHE();\n\nfunction AI_BOT_USER_AGENTS_CACHE() {\n return AI_BOTS.map(bot => bot.name);\n}\n\n/**\n * Given a raw user-agent string, returns the matched AiBot entry or undefined.\n * Uses a \"Longest Match\" strategy to ensure maximum specificity.\n * Comparison is case-insensitive.\n */\nexport function matchBot(userAgent: string | null): AiBot | undefined {\n if (!userAgent) return undefined;\n const lowerUA = userAgent.toLowerCase();\n \n // Find all matches\n const matches = AI_BOTS.filter(bot => \n lowerUA.includes(bot.name.toLowerCase())\n );\n\n if (matches.length === 0) return undefined;\n if (matches.length === 1) return matches[0];\n\n // Pick the longest match for maximum specificity (e.g. 'ChatGPT-User' vs 'GPT')\n return matches.reduce((longest, current) => \n current.name.length > longest.name.length ? current : longest\n );\n}\n","/**\r\n * Configuration schema for onto.config.ts\r\n * Used to dynamically generate llms.txt and other AI discovery files\r\n */\r\n\r\nexport type PageType = 'scoring' | 'about' | 'default';\r\n\r\nexport interface OntoRoute {\r\n /**\r\n * The URL path (e.g., '/docs', '/api/reference')\r\n */\r\n path: string;\r\n /**\r\n * Description of what this route contains\r\n */\r\n description: string;\r\n /**\r\n * Optional: Page type for automatic JSON-LD schema injection\r\n * - 'scoring': Injects Methodology schema with AIO scoring weights (40/35/25)\r\n * - 'about': Injects Organization/AboutPage schema\r\n * - 'default': No automatic schema injection\r\n */\r\n pageType?: PageType;\r\n}\r\n\r\nexport interface OntoConfig {\r\n /**\r\n * The name of your project or site (required)\r\n * Used as the H1 heading in llms.txt\r\n */\r\n name: string;\r\n\r\n /**\r\n * A short summary of your project (required)\r\n * Displayed as a blockquote in llms.txt\r\n * Should contain key information necessary for understanding the rest of the file\r\n */\r\n summary: string;\r\n\r\n /**\r\n * The base URL of your site (e.g., 'https://example.com')\r\n */\r\n baseUrl: string;\r\n\r\n /**\r\n * Optional: Additional sections to include in llms.txt\r\n * Each section can contain any markdown content\r\n */\r\n sections?: {\r\n heading: string;\r\n content: string;\r\n }[];\r\n\r\n /**\r\n * Key routes that AI agents should know about\r\n * These will be formatted as a markdown list in llms.txt\r\n */\r\n routes?: OntoRoute[];\r\n\r\n /**\r\n * Optional: Links to external resources (documentation, API references, etc.)\r\n */\r\n externalLinks?: {\r\n title: string;\r\n url: string;\r\n description?: string;\r\n }[];\r\n\r\n /**\r\n * Optional: Organization information for JSON-LD schemas\r\n */\r\n organization?: {\r\n name: string;\r\n description?: string;\r\n url?: string;\r\n logo?: string;\r\n foundingDate?: string;\r\n };\r\n}\r\n\r\n/**\r\n * Generate llms.txt content from OntoConfig\r\n * Follows the llms.txt specification:\r\n * - H1 with project name\r\n * - Blockquote with summary\r\n * - Additional markdown sections\r\n */\r\nexport function generateLlmsTxt(config: OntoConfig): string {\r\n const lines: string[] = [];\r\n\r\n // H1: Project name (required)\r\n lines.push(`# ${config.name}`);\r\n lines.push('');\r\n\r\n // Blockquote: Summary (required)\r\n lines.push(`> ${config.summary}`);\r\n lines.push('');\r\n\r\n // Key Routes section (if provided)\r\n if (config.routes && config.routes.length > 0) {\r\n lines.push('## Key Routes');\r\n lines.push('');\r\n for (const route of config.routes) {\r\n const fullUrl = `${config.baseUrl}${route.path}`;\r\n lines.push(`- [${route.path}](${fullUrl}): ${route.description}`);\r\n }\r\n lines.push('');\r\n }\r\n\r\n // External Links section (if provided)\r\n if (config.externalLinks && config.externalLinks.length > 0) {\r\n lines.push('## Resources');\r\n lines.push('');\r\n for (const link of config.externalLinks) {\r\n if (link.description) {\r\n lines.push(`- [${link.title}](${link.url}): ${link.description}`);\r\n } else {\r\n lines.push(`- [${link.title}](${link.url})`);\r\n }\r\n }\r\n lines.push('');\r\n }\r\n\r\n // Custom sections (if provided)\r\n if (config.sections && config.sections.length > 0) {\r\n for (const section of config.sections) {\r\n lines.push(`## ${section.heading}`);\r\n lines.push('');\r\n lines.push(section.content);\r\n lines.push('');\r\n }\r\n }\r\n\r\n return lines.join('\\n').trim() + '\\n';\r\n}\r\n"],"mappings":"AAAA,OAAsB,gBAAAA,MAAoB,cCkBnC,IAAMC,EAAmB,CAE5B,CAAE,KAAM,SAAqB,QAAS,SAAY,QAAS,YAAa,EACxE,CAAE,KAAM,eAAoB,QAAS,SAAY,QAAS,YAAa,EACvE,CAAE,KAAM,gBAAoB,QAAS,SAAY,QAAS,YAAa,EACvE,CAAE,KAAM,UAAoB,QAAS,SAAY,QAAS,YAAa,EACvE,CAAE,KAAM,SAAoB,QAAS,SAAY,QAAS,YAAa,EACvE,CAAE,KAAM,MAAoB,QAAS,SAAY,QAAS,YAAa,EAGvE,CAAE,KAAM,wBAA2B,QAAS,SAAU,QAAS,YAAa,EAC5E,CAAE,KAAM,kBAA2B,QAAS,SAAU,QAAS,YAAa,EAC5E,CAAE,KAAM,cAA2B,QAAS,SAAU,QAAS,YAAa,EAG5E,CAAE,KAAM,YAAmB,QAAS,YAAa,QAAS,YAAa,EACvE,CAAE,KAAM,cAAkB,QAAS,YAAa,QAAS,YAAa,EACtE,CAAE,KAAM,eAAkB,QAAS,YAAa,QAAS,YAAa,EACtE,CAAE,KAAM,iBAAkB,QAAS,YAAa,QAAS,YAAa,EACtE,CAAE,KAAM,eAAkB,QAAS,YAAa,QAAS,YAAa,EACtE,CAAE,KAAM,eAAkB,QAAS,YAAa,QAAS,YAAa,EACtE,CAAE,KAAM,SAAkB,QAAS,YAAa,QAAS,YAAa,EACtE,CAAE,KAAM,YAAkB,QAAS,YAAa,QAAS,YAAa,EAGtE,CAAE,KAAM,gBAAmB,QAAS,aAAc,QAAS,YAAa,EACxE,CAAE,KAAM,kBAAmB,QAAS,aAAc,QAAS,YAAa,EACxE,CAAE,KAAM,aAAmB,QAAS,aAAc,QAAS,YAAa,EAGxE,CAAE,KAAM,qBAAwB,QAAS,OAAQ,QAAS,YAAa,EACvE,CAAE,KAAM,uBAAwB,QAAS,OAAQ,QAAS,YAAa,EACvE,CAAE,KAAM,sBAAwB,QAAS,OAAQ,QAAS,YAAa,EACvE,CAAE,KAAM,cAAwB,QAAS,OAAQ,QAAS,YAAa,EAGvE,CAAE,KAAM,aAAc,QAAS,UAAW,QAAS,YAAa,EAChE,CAAE,KAAM,UAAc,QAAS,UAAW,QAAS,YAAa,EAGhE,CAAE,KAAM,YAAa,QAAS,SAAU,QAAS,YAAa,EAG9D,CAAE,KAAM,SAAiB,QAAS,kBAAmB,QAAS,YAAa,EAC3E,CAAE,KAAM,gBAAiB,QAAS,aAAc,QAAS,YAAa,EACtE,CAAE,KAAM,UAAiB,QAAS,UAAe,QAAS,YAAa,EACvE,CAAE,KAAM,QAAiB,QAAS,eAAgB,QAAS,YAAa,EACxE,CAAE,KAAM,aAAiB,QAAS,YAAgB,QAAS,YAAa,EACxE,CAAE,KAAM,oBAAqB,QAAS,QAAY,QAAS,YAAa,EACxE,CAAE,KAAM,SAAiB,QAAS,UAAgB,QAAS,YAAa,EAGxE,CAAE,KAAM,kBAAoB,QAAS,cAAe,QAAS,YAAa,EAC1E,CAAE,KAAM,eAAoB,QAAS,cAAe,QAAS,YAAa,EAC1E,CAAE,KAAM,QAAoB,QAAS,cAAe,QAAS,YAAa,EAC1E,CAAE,KAAM,QAAoB,QAAS,cAAe,QAAS,YAAa,EAC1E,CAAE,KAAM,aAAoB,QAAS,cAAe,QAAS,YAAa,EAC1E,CAAE,KAAM,iBAAoB,QAAS,cAAe,QAAS,YAAa,EAC1E,CAAE,KAAM,OAAoB,QAAS,cAAe,QAAS,YAAa,EAC1E,CAAE,KAAM,OAAoB,QAAS,cAAe,QAAS,YAAa,CAC9E,EAKaC,EAA+BC,EAAyB,EAErE,SAASA,GAA2B,CAChC,OAAOF,EAAQ,IAAIG,GAAOA,EAAI,IAAI,CACtC,CAOO,SAASC,EAASC,EAA6C,CAClE,GAAI,CAACA,EAAW,OAChB,IAAMC,EAAUD,EAAU,YAAY,EAGhCE,EAAUP,EAAQ,OAAOG,GAC3BG,EAAQ,SAASH,EAAI,KAAK,YAAY,CAAC,CAC3C,EAEA,GAAII,EAAQ,SAAW,EACvB,OAAIA,EAAQ,SAAW,EAAUA,EAAQ,CAAC,EAGnCA,EAAQ,OAAO,CAACC,EAASC,IAC5BA,EAAQ,KAAK,OAASD,EAAQ,KAAK,OAASC,EAAUD,CAC1D,CACJ,CCvBO,SAASE,EAAgBC,EAA4B,CAC1D,IAAMC,EAAkB,CAAC,EAWzB,GARAA,EAAM,KAAK,KAAKD,EAAO,IAAI,EAAE,EAC7BC,EAAM,KAAK,EAAE,EAGbA,EAAM,KAAK,KAAKD,EAAO,OAAO,EAAE,EAChCC,EAAM,KAAK,EAAE,EAGTD,EAAO,QAAUA,EAAO,OAAO,OAAS,EAAG,CAC7CC,EAAM,KAAK,eAAe,EAC1BA,EAAM,KAAK,EAAE,EACb,QAAWC,KAASF,EAAO,OAAQ,CACjC,IAAMG,EAAU,GAAGH,EAAO,OAAO,GAAGE,EAAM,IAAI,GAC9CD,EAAM,KAAK,MAAMC,EAAM,IAAI,KAAKC,CAAO,MAAMD,EAAM,WAAW,EAAE,CAClE,CACAD,EAAM,KAAK,EAAE,CACf,CAGA,GAAID,EAAO,eAAiBA,EAAO,cAAc,OAAS,EAAG,CAC3DC,EAAM,KAAK,cAAc,EACzBA,EAAM,KAAK,EAAE,EACb,QAAWG,KAAQJ,EAAO,cACpBI,EAAK,YACPH,EAAM,KAAK,MAAMG,EAAK,KAAK,KAAKA,EAAK,GAAG,MAAMA,EAAK,WAAW,EAAE,EAEhEH,EAAM,KAAK,MAAMG,EAAK,KAAK,KAAKA,EAAK,GAAG,GAAG,EAG/CH,EAAM,KAAK,EAAE,CACf,CAGA,GAAID,EAAO,UAAYA,EAAO,SAAS,OAAS,EAC9C,QAAWK,KAAWL,EAAO,SAC3BC,EAAM,KAAK,MAAMI,EAAQ,OAAO,EAAE,EAClCJ,EAAM,KAAK,EAAE,EACbA,EAAM,KAAKI,EAAQ,OAAO,EAC1BJ,EAAM,KAAK,EAAE,EAIjB,OAAOA,EAAM,KAAK;AAAA,CAAI,EAAE,KAAK,EAAI;AAAA,CACnC,CFlIA,eAAsBK,EAAeC,EAAcC,EAAqB,CACpE,IAAMC,EAAYF,EAAQ,QAAQ,IAAI,YAAY,EAC5CG,EAAMH,EAAQ,QAAQ,MAAM,EAC5BI,EAAUC,EAASH,CAAS,EAE5BI,EAASN,EAAQ,QAAQ,IAAI,QAAQ,GAAK,GAC1CO,EAAgBP,EAAQ,QAAQ,aAAa,IAAI,MAAM,EAEvDQ,EAAU,CAAC,CAACJ,EACZK,EAAsBH,EAAO,SAAS,eAAe,GAAKC,EAGhE,GAAIC,GAAWC,EAAqB,CAEhC,GAAIN,EAAI,SAAS,WAAW,QAAQ,EAChC,OAAOO,EAAa,KAAK,EAI7B,GAAIP,EAAI,WAAa,YACjB,GAAI,CACA,GAAIF,EAAQ,CACR,IAAMU,EAAiBC,EAAgBX,CAAM,EACvCY,EAAW,IAAIH,EAAaC,EAAgB,CAC9C,QAAS,CACL,eAAgB,4BAChB,gBAAiB,mEACrB,CACJ,CAAC,EACD,OAAIP,GAASS,EAAS,QAAQ,IAAI,aAAc,GAAGT,EAAQ,IAAI,KAAKA,EAAQ,OAAO,GAAG,EACtFS,EAAS,QAAQ,IAAI,eAAgBX,GAAa,OAAO,EAClDW,CACX,CACJ,OAASC,EAAO,CACZ,QAAQ,MAAM,sCAAuCA,CAAK,CAC9D,CAIJ,GAAIX,EAAI,SAAS,SAAS,GAAG,EACzB,OAAOO,EAAa,KAAK,EAI7B,IAAIK,EAAcZ,EAAI,UAClBY,IAAgB,KAAOA,IAAgB,MAAIA,EAAc,UACzDA,EAAY,SAAS,GAAG,GAAKA,IAAgB,MAAKA,EAAcA,EAAY,MAAM,EAAG,EAAE,GAG3F,IAAMC,EAAe,QAAQ,IAAI,aAC3BC,EAAgB,QAAQ,IAAI,oBAAsB,4BAEpDD,GAEA,MAAM,GAAGC,CAAa,aAAc,CAChC,OAAQ,OACR,QAAS,CAAE,aAAcD,EAAc,eAAgB,kBAAmB,EAC1E,KAAM,KAAK,UAAU,CAAE,MAAOb,EAAI,SAAU,UAAAD,EAAW,IAAKE,GAAS,KAAM,QAASA,GAAS,OAAQ,CAAC,CAC1G,CAAC,EAAE,MAAM,IAAM,CAAC,CAAC,EAIrBD,EAAI,SAAW,SAASY,CAAW,MACnC,IAAMF,EAAWH,EAAa,QAAQP,CAAG,EAGzC,OAAAU,EAAS,QAAQ,IAAI,eAAgB,8BAA8B,EACnEA,EAAS,QAAQ,IAAI,gBAAiB,mEAAmE,EACzGA,EAAS,QAAQ,IAAI,OAAQ,oBAAoB,EACjDA,EAAS,QAAQ,IAAI,eAAgBX,GAAa,OAAO,EAErDE,IACAS,EAAS,QAAQ,IAAI,aAAc,GAAGT,EAAQ,IAAI,KAAKA,EAAQ,OAAO,GAAG,EACzES,EAAS,QAAQ,IAAI,iBAAkB,MAAM,GAG7CN,GAAeM,EAAS,QAAQ,IAAI,eAAgB,MAAM,EAEvDA,CACX,CAGA,IAAMA,EAAWH,EAAa,KAAK,EAInC,OAAAG,EAAS,QAAQ,IAAI,OAAQ,oBAAoB,EACjDA,EAAS,QAAQ,IAAI,eAAgBX,GAAa,OAAO,EAElDW,CACX","names":["NextResponse","AI_BOTS","AI_BOT_USER_AGENTS","AI_BOT_USER_AGENTS_CACHE","bot","matchBot","userAgent","lowerUA","matches","longest","current","generateLlmsTxt","config","lines","route","fullUrl","link","section","ontoMiddleware","request","config","userAgent","url","matched","matchBot","accept","hasDebugParam","isAiBot","isMarkdownRequested","NextResponse","llmsTxtContent","generateLlmsTxt","response","error","payloadPath","ONTO_API_KEY","DASHBOARD_URL"]}
1
+ {"version":3,"sources":["../src/middleware.ts","../src/bots.ts","../src/config.ts"],"sourcesContent":["import { NextRequest, NextResponse } from 'next/server';\r\nimport { matchBot } from './bots';\r\nimport { generateLlmsTxt, OntoConfig } from './config';\r\n\r\nexport async function ontoMiddleware(request: any, config?: OntoConfig) {\r\n const userAgent = request.headers.get('user-agent');\r\n const url = request.nextUrl.clone();\r\n const matched = matchBot(userAgent);\r\n\r\n const accept = request.headers.get('accept') || '';\r\n const hasDebugParam = request.nextUrl.searchParams.has('onto');\r\n\r\n const isAiBot = !!matched;\r\n const isMarkdownRequested = accept.includes('text/markdown') || hasDebugParam;\r\n\r\n // Common logic for bot/markdown negotiation\r\n if (isAiBot || isMarkdownRequested) {\r\n // Ignore internal next.js requests\r\n if (url.pathname.startsWith('/_next')) {\r\n return NextResponse.next();\r\n }\r\n\r\n // --- llms.txt Auto-Discovery ---\r\n if (url.pathname === '/llms.txt') {\r\n try {\r\n if (config) {\r\n const llmsTxtContent = generateLlmsTxt(config);\r\n const response = new NextResponse(llmsTxtContent, {\r\n headers: {\r\n 'Content-Type': 'text/plain; charset=utf-8',\r\n 'Cache-Control': 'public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400',\r\n }\r\n });\r\n if (matched) response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);\r\n response.headers.set('X-Onto-Trace', userAgent || 'no-ua');\r\n return response;\r\n }\r\n } catch (error) {\r\n console.error('[Onto] Failed to generate llms.txt:', error);\r\n }\r\n }\r\n\r\n // Skip other static assets with dots\r\n if (url.pathname.includes('.')) {\r\n return NextResponse.next();\r\n }\r\n\r\n // Determine the corresponding payload path\r\n let payloadPath = url.pathname;\r\n if (payloadPath === '/' || payloadPath === '') payloadPath = '/index';\r\n if (payloadPath.endsWith('/') && payloadPath !== '/') payloadPath = payloadPath.slice(0, -1);\r\n\r\n // --- Onto Control Plane Integration (Premium) ---\r\n const ONTO_API_KEY = process.env.ONTO_API_KEY;\r\n const DASHBOARD_URL = process.env.ONTO_DASHBOARD_URL || 'https://app.buildonto.dev';\r\n\r\n if (ONTO_API_KEY) {\r\n // Logic for tracking and injection ... (fire-and-forget tracking)\r\n fetch(`${DASHBOARD_URL}/api/track`, {\r\n method: 'POST',\r\n headers: { 'x-onto-key': ONTO_API_KEY, 'Content-Type': 'application/json' },\r\n body: JSON.stringify({ route: url.pathname, userAgent, bot: matched?.name, company: matched?.company })\r\n }).catch(() => {});\r\n }\r\n\r\n // Rewrite to semantic payload\r\n url.pathname = `/.onto${payloadPath}.md`;\r\n const response = NextResponse.rewrite(url);\r\n \r\n // Apply headers\r\n response.headers.set('Content-Type', 'text/markdown; charset=utf-8');\r\n response.headers.set('Cache-Control', 'public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400');\r\n response.headers.set('Vary', 'User-Agent, Accept');\r\n response.headers.set('X-Onto-Trace', userAgent || 'no-ua');\r\n \r\n if (matched) {\r\n response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);\r\n response.headers.set('X-Onto-Matched', 'true');\r\n }\r\n \r\n if (hasDebugParam) response.headers.set('X-Onto-Debug', 'true');\r\n\r\n return response;\r\n }\r\n\r\n // Default response for non-bots\r\n const response = NextResponse.next();\r\n \r\n // Add identify headers to EVERY response for clinical debugging\r\n response.headers.set('Vary', 'User-Agent, Accept');\r\n response.headers.set('X-Onto-Trace', userAgent || 'no-ua');\r\n \r\n if (matched) {\r\n response.headers.set('X-Onto-Matched-Bot', matched.name);\r\n response.headers.set('X-Onto-Identified', 'true');\r\n } else {\r\n response.headers.set('X-Onto-Identified', 'false');\r\n }\r\n \r\n return response;\r\n}\r\n\r\n// Re-export the bot registry for consumers who want to extend or inspect it\r\nexport { AI_BOT_USER_AGENTS, matchBot } from './bots';\r\nexport type { AiBot } from './bots';\r\n\r\n\r\n","/**\n * Comprehensive registry of AI bot user-agent strings.\n * The middleware uses this list to detect AI crawlers and serve optimized markdown.\n */\n\nexport interface AiBot {\n /** The user-agent substring to match against */\n name: string;\n /** The company operating this bot */\n company: string;\n /** ISO date when this bot was added or last verified */\n addedAt?: string;\n}\n\n/**\n * Structured registry of all known AI bots, grouped by company.\n * Useful for analytics and the Control Plane dashboard.\n */\nexport const AI_BOTS: AiBot[] = [\n // OpenAI\n { name: 'GPTBot', company: 'OpenAI', addedAt: '2025-01-01' },\n { name: 'ChatGPT-User', company: 'OpenAI', addedAt: '2025-01-01' },\n { name: 'OAI-SearchBot', company: 'OpenAI', addedAt: '2025-01-01' },\n { name: 'ChatGPT', company: 'OpenAI', addedAt: '2025-03-25' },\n { name: 'OpenAI', company: 'OpenAI', addedAt: '2025-03-25' },\n { name: 'GPT', company: 'OpenAI', addedAt: '2025-03-25' },\n\n // Google (SEO Safety: Googlebot proper is EXCLUDED)\n { name: 'Google-CloudVertexBot', company: 'Google', addedAt: '2025-01-01' },\n { name: 'Google-Extended', company: 'Google', addedAt: '2025-01-01' },\n { name: 'GoogleOther', company: 'Google', addedAt: '2025-01-01' },\n\n // Anthropic\n { name: 'ClaudeBot', company: 'Anthropic', addedAt: '2025-01-01' },\n { name: 'Claude-User', company: 'Anthropic', addedAt: '2025-01-01' },\n { name: 'claude-fetch', company: 'Anthropic', addedAt: '2025-03-25' },\n { name: 'AnthropicFetch', company: 'Anthropic', addedAt: '2025-03-25' },\n { name: 'Claude-Fetch', company: 'Anthropic', addedAt: '2025-03-25' },\n { name: 'anthropic-ai', company: 'Anthropic', addedAt: '2025-01-01' },\n { name: 'Claude', company: 'Anthropic', addedAt: '2025-03-25' },\n { name: 'Anthropic', company: 'Anthropic', addedAt: '2025-03-25' },\n\n // Perplexity\n { name: 'PerplexityBot', company: 'Perplexity', addedAt: '2025-01-01' },\n { name: 'Perplexity-User', company: 'Perplexity', addedAt: '2025-01-01' },\n { name: 'Perplexity', company: 'Perplexity', addedAt: '2025-03-25' },\n\n // Meta\n { name: 'Meta-ExternalAgent', company: 'Meta', addedAt: '2025-01-01' },\n { name: 'Meta-ExternalFetcher', company: 'Meta', addedAt: '2025-01-01' },\n { name: 'facebookexternalhit', company: 'Meta', addedAt: '2025-03-25' },\n { name: 'FacebookBot', company: 'Meta', addedAt: '2025-01-01' },\n\n // Mistral\n { name: 'MistralBot', company: 'Mistral', addedAt: '2025-03-25' },\n { name: 'Mistral', company: 'Mistral', addedAt: '2025-03-25' },\n\n // Amazon\n { name: 'Amazonbot', company: 'Amazon', addedAt: '2025-03-25' },\n\n // Others\n { name: 'AI2Bot', company: 'Allen Institute', addedAt: '2025-03-25' },\n { name: 'DuckAssistBot', company: 'DuckDuckGo', addedAt: '2025-03-25' },\n { name: 'Diffbot', company: 'Diffbot', addedAt: '2025-03-25' },\n { name: 'CCBot', company: 'Common Crawl', addedAt: '2025-01-01' },\n { name: 'Bytespider', company: 'ByteDance', addedAt: '2025-01-01' },\n { name: 'Applebot-Extended', company: 'Apple', addedAt: '2025-01-01' },\n { name: 'YouBot', company: 'You.com', addedAt: '2025-01-01' },\n\n // Generic AI / Scraping Libraries (Last Resort)\n { name: 'python-requests', company: 'Generic Bot', addedAt: '2025-03-25' },\n { name: 'python-httpx', company: 'Generic Bot', addedAt: '2025-03-25' },\n { name: 'httpx', company: 'Generic Bot', addedAt: '2025-03-25' },\n { name: 'axios', company: 'Generic Bot', addedAt: '2025-03-25' },\n { name: 'node-fetch', company: 'Generic Bot', addedAt: '2025-03-25' },\n { name: 'Go-http-client', company: 'Generic Bot', addedAt: '2025-03-25' },\n { name: 'Wget', company: 'Generic Bot', addedAt: '2025-03-25' },\n { name: 'Curl', company: 'Generic Bot', addedAt: '2025-03-25' },\n];\n\n/**\n * Flat list of user-agent substrings for fast matching in the middleware.\n */\nexport const AI_BOT_USER_AGENTS: string[] = AI_BOT_USER_AGENTS_CACHE();\n\nfunction AI_BOT_USER_AGENTS_CACHE() {\n return AI_BOTS.map(bot => bot.name);\n}\n\n/**\n * Given a raw user-agent string, returns the matched AiBot entry or undefined.\n * Uses a \"Longest Match\" strategy to ensure maximum specificity.\n * Comparison is case-insensitive.\n */\nexport function matchBot(userAgent: string | null): AiBot | undefined {\n if (!userAgent) return undefined;\n const lowerUA = userAgent.toLowerCase();\n \n // Find all matches\n const matches = AI_BOTS.filter(bot => \n lowerUA.includes(bot.name.toLowerCase())\n );\n\n if (matches.length === 0) return undefined;\n if (matches.length === 1) return matches[0];\n\n // Pick the longest match for maximum specificity (e.g. 'ChatGPT-User' vs 'GPT')\n return matches.reduce((longest, current) => \n current.name.length > longest.name.length ? current : longest\n );\n}\n","/**\r\n * Configuration schema for onto.config.ts\r\n * Used to dynamically generate llms.txt and other AI discovery files\r\n */\r\n\r\nexport type PageType = 'scoring' | 'about' | 'default';\r\n\r\nexport interface OntoRoute {\r\n /**\r\n * The URL path (e.g., '/docs', '/api/reference')\r\n */\r\n path: string;\r\n /**\r\n * Description of what this route contains\r\n */\r\n description: string;\r\n /**\r\n * Optional: Page type for automatic JSON-LD schema injection\r\n * - 'scoring': Injects Methodology schema with AIO scoring weights (40/35/25)\r\n * - 'about': Injects Organization/AboutPage schema\r\n * - 'default': No automatic schema injection\r\n */\r\n pageType?: PageType;\r\n}\r\n\r\nexport interface OntoConfig {\r\n /**\r\n * The name of your project or site (required)\r\n * Used as the H1 heading in llms.txt\r\n */\r\n name: string;\r\n\r\n /**\r\n * A short summary of your project (required)\r\n * Displayed as a blockquote in llms.txt\r\n * Should contain key information necessary for understanding the rest of the file\r\n */\r\n summary: string;\r\n\r\n /**\r\n * The base URL of your site (e.g., 'https://example.com')\r\n */\r\n baseUrl: string;\r\n\r\n /**\r\n * Optional: Additional sections to include in llms.txt\r\n * Each section can contain any markdown content\r\n */\r\n sections?: {\r\n heading: string;\r\n content: string;\r\n }[];\r\n\r\n /**\r\n * Key routes that AI agents should know about\r\n * These will be formatted as a markdown list in llms.txt\r\n */\r\n routes?: OntoRoute[];\r\n\r\n /**\r\n * Optional: Links to external resources (documentation, API references, etc.)\r\n */\r\n externalLinks?: {\r\n title: string;\r\n url: string;\r\n description?: string;\r\n }[];\r\n\r\n /**\r\n * Optional: Organization information for JSON-LD schemas\r\n */\r\n organization?: {\r\n name: string;\r\n description?: string;\r\n url?: string;\r\n logo?: string;\r\n foundingDate?: string;\r\n };\r\n}\r\n\r\n/**\r\n * Generate llms.txt content from OntoConfig\r\n * Follows the llms.txt specification:\r\n * - H1 with project name\r\n * - Blockquote with summary\r\n * - Additional markdown sections\r\n */\r\nexport function generateLlmsTxt(config: OntoConfig): string {\r\n const lines: string[] = [];\r\n\r\n // H1: Project name (required)\r\n lines.push(`# ${config.name}`);\r\n lines.push('');\r\n\r\n // Blockquote: Summary (required)\r\n lines.push(`> ${config.summary}`);\r\n lines.push('');\r\n\r\n // Key Routes section (if provided)\r\n if (config.routes && config.routes.length > 0) {\r\n lines.push('## Key Routes');\r\n lines.push('');\r\n for (const route of config.routes) {\r\n const fullUrl = `${config.baseUrl}${route.path}`;\r\n lines.push(`- [${route.path}](${fullUrl}): ${route.description}`);\r\n }\r\n lines.push('');\r\n }\r\n\r\n // External Links section (if provided)\r\n if (config.externalLinks && config.externalLinks.length > 0) {\r\n lines.push('## Resources');\r\n lines.push('');\r\n for (const link of config.externalLinks) {\r\n if (link.description) {\r\n lines.push(`- [${link.title}](${link.url}): ${link.description}`);\r\n } else {\r\n lines.push(`- [${link.title}](${link.url})`);\r\n }\r\n }\r\n lines.push('');\r\n }\r\n\r\n // Custom sections (if provided)\r\n if (config.sections && config.sections.length > 0) {\r\n for (const section of config.sections) {\r\n lines.push(`## ${section.heading}`);\r\n lines.push('');\r\n lines.push(section.content);\r\n lines.push('');\r\n }\r\n }\r\n\r\n return lines.join('\\n').trim() + '\\n';\r\n}\r\n"],"mappings":"AAAA,OAAsB,gBAAAA,MAAoB,cCkBnC,IAAMC,EAAmB,CAE5B,CAAE,KAAM,SAAqB,QAAS,SAAY,QAAS,YAAa,EACxE,CAAE,KAAM,eAAoB,QAAS,SAAY,QAAS,YAAa,EACvE,CAAE,KAAM,gBAAoB,QAAS,SAAY,QAAS,YAAa,EACvE,CAAE,KAAM,UAAoB,QAAS,SAAY,QAAS,YAAa,EACvE,CAAE,KAAM,SAAoB,QAAS,SAAY,QAAS,YAAa,EACvE,CAAE,KAAM,MAAoB,QAAS,SAAY,QAAS,YAAa,EAGvE,CAAE,KAAM,wBAA2B,QAAS,SAAU,QAAS,YAAa,EAC5E,CAAE,KAAM,kBAA2B,QAAS,SAAU,QAAS,YAAa,EAC5E,CAAE,KAAM,cAA2B,QAAS,SAAU,QAAS,YAAa,EAG5E,CAAE,KAAM,YAAmB,QAAS,YAAa,QAAS,YAAa,EACvE,CAAE,KAAM,cAAkB,QAAS,YAAa,QAAS,YAAa,EACtE,CAAE,KAAM,eAAkB,QAAS,YAAa,QAAS,YAAa,EACtE,CAAE,KAAM,iBAAkB,QAAS,YAAa,QAAS,YAAa,EACtE,CAAE,KAAM,eAAkB,QAAS,YAAa,QAAS,YAAa,EACtE,CAAE,KAAM,eAAkB,QAAS,YAAa,QAAS,YAAa,EACtE,CAAE,KAAM,SAAkB,QAAS,YAAa,QAAS,YAAa,EACtE,CAAE,KAAM,YAAkB,QAAS,YAAa,QAAS,YAAa,EAGtE,CAAE,KAAM,gBAAmB,QAAS,aAAc,QAAS,YAAa,EACxE,CAAE,KAAM,kBAAmB,QAAS,aAAc,QAAS,YAAa,EACxE,CAAE,KAAM,aAAmB,QAAS,aAAc,QAAS,YAAa,EAGxE,CAAE,KAAM,qBAAwB,QAAS,OAAQ,QAAS,YAAa,EACvE,CAAE,KAAM,uBAAwB,QAAS,OAAQ,QAAS,YAAa,EACvE,CAAE,KAAM,sBAAwB,QAAS,OAAQ,QAAS,YAAa,EACvE,CAAE,KAAM,cAAwB,QAAS,OAAQ,QAAS,YAAa,EAGvE,CAAE,KAAM,aAAc,QAAS,UAAW,QAAS,YAAa,EAChE,CAAE,KAAM,UAAc,QAAS,UAAW,QAAS,YAAa,EAGhE,CAAE,KAAM,YAAa,QAAS,SAAU,QAAS,YAAa,EAG9D,CAAE,KAAM,SAAiB,QAAS,kBAAmB,QAAS,YAAa,EAC3E,CAAE,KAAM,gBAAiB,QAAS,aAAc,QAAS,YAAa,EACtE,CAAE,KAAM,UAAiB,QAAS,UAAe,QAAS,YAAa,EACvE,CAAE,KAAM,QAAiB,QAAS,eAAgB,QAAS,YAAa,EACxE,CAAE,KAAM,aAAiB,QAAS,YAAgB,QAAS,YAAa,EACxE,CAAE,KAAM,oBAAqB,QAAS,QAAY,QAAS,YAAa,EACxE,CAAE,KAAM,SAAiB,QAAS,UAAgB,QAAS,YAAa,EAGxE,CAAE,KAAM,kBAAoB,QAAS,cAAe,QAAS,YAAa,EAC1E,CAAE,KAAM,eAAoB,QAAS,cAAe,QAAS,YAAa,EAC1E,CAAE,KAAM,QAAoB,QAAS,cAAe,QAAS,YAAa,EAC1E,CAAE,KAAM,QAAoB,QAAS,cAAe,QAAS,YAAa,EAC1E,CAAE,KAAM,aAAoB,QAAS,cAAe,QAAS,YAAa,EAC1E,CAAE,KAAM,iBAAoB,QAAS,cAAe,QAAS,YAAa,EAC1E,CAAE,KAAM,OAAoB,QAAS,cAAe,QAAS,YAAa,EAC1E,CAAE,KAAM,OAAoB,QAAS,cAAe,QAAS,YAAa,CAC9E,EAKaC,EAA+BC,EAAyB,EAErE,SAASA,GAA2B,CAChC,OAAOF,EAAQ,IAAIG,GAAOA,EAAI,IAAI,CACtC,CAOO,SAASC,EAASC,EAA6C,CAClE,GAAI,CAACA,EAAW,OAChB,IAAMC,EAAUD,EAAU,YAAY,EAGhCE,EAAUP,EAAQ,OAAOG,GAC3BG,EAAQ,SAASH,EAAI,KAAK,YAAY,CAAC,CAC3C,EAEA,GAAII,EAAQ,SAAW,EACvB,OAAIA,EAAQ,SAAW,EAAUA,EAAQ,CAAC,EAGnCA,EAAQ,OAAO,CAACC,EAASC,IAC5BA,EAAQ,KAAK,OAASD,EAAQ,KAAK,OAASC,EAAUD,CAC1D,CACJ,CCvBO,SAASE,EAAgBC,EAA4B,CAC1D,IAAMC,EAAkB,CAAC,EAWzB,GARAA,EAAM,KAAK,KAAKD,EAAO,IAAI,EAAE,EAC7BC,EAAM,KAAK,EAAE,EAGbA,EAAM,KAAK,KAAKD,EAAO,OAAO,EAAE,EAChCC,EAAM,KAAK,EAAE,EAGTD,EAAO,QAAUA,EAAO,OAAO,OAAS,EAAG,CAC7CC,EAAM,KAAK,eAAe,EAC1BA,EAAM,KAAK,EAAE,EACb,QAAWC,KAASF,EAAO,OAAQ,CACjC,IAAMG,EAAU,GAAGH,EAAO,OAAO,GAAGE,EAAM,IAAI,GAC9CD,EAAM,KAAK,MAAMC,EAAM,IAAI,KAAKC,CAAO,MAAMD,EAAM,WAAW,EAAE,CAClE,CACAD,EAAM,KAAK,EAAE,CACf,CAGA,GAAID,EAAO,eAAiBA,EAAO,cAAc,OAAS,EAAG,CAC3DC,EAAM,KAAK,cAAc,EACzBA,EAAM,KAAK,EAAE,EACb,QAAWG,KAAQJ,EAAO,cACpBI,EAAK,YACPH,EAAM,KAAK,MAAMG,EAAK,KAAK,KAAKA,EAAK,GAAG,MAAMA,EAAK,WAAW,EAAE,EAEhEH,EAAM,KAAK,MAAMG,EAAK,KAAK,KAAKA,EAAK,GAAG,GAAG,EAG/CH,EAAM,KAAK,EAAE,CACf,CAGA,GAAID,EAAO,UAAYA,EAAO,SAAS,OAAS,EAC9C,QAAWK,KAAWL,EAAO,SAC3BC,EAAM,KAAK,MAAMI,EAAQ,OAAO,EAAE,EAClCJ,EAAM,KAAK,EAAE,EACbA,EAAM,KAAKI,EAAQ,OAAO,EAC1BJ,EAAM,KAAK,EAAE,EAIjB,OAAOA,EAAM,KAAK;AAAA,CAAI,EAAE,KAAK,EAAI;AAAA,CACnC,CFlIA,eAAsBK,EAAeC,EAAcC,EAAqB,CACpE,IAAMC,EAAYF,EAAQ,QAAQ,IAAI,YAAY,EAC5CG,EAAMH,EAAQ,QAAQ,MAAM,EAC5BI,EAAUC,EAASH,CAAS,EAE5BI,EAASN,EAAQ,QAAQ,IAAI,QAAQ,GAAK,GAC1CO,EAAgBP,EAAQ,QAAQ,aAAa,IAAI,MAAM,EAEvDQ,EAAU,CAAC,CAACJ,EACZK,EAAsBH,EAAO,SAAS,eAAe,GAAKC,EAGhE,GAAIC,GAAWC,EAAqB,CAEhC,GAAIN,EAAI,SAAS,WAAW,QAAQ,EAChC,OAAOO,EAAa,KAAK,EAI7B,GAAIP,EAAI,WAAa,YACjB,GAAI,CACA,GAAIF,EAAQ,CACR,IAAMU,EAAiBC,EAAgBX,CAAM,EACvCY,EAAW,IAAIH,EAAaC,EAAgB,CAC9C,QAAS,CACL,eAAgB,4BAChB,gBAAiB,mEACrB,CACJ,CAAC,EACD,OAAIP,GAASS,EAAS,QAAQ,IAAI,aAAc,GAAGT,EAAQ,IAAI,KAAKA,EAAQ,OAAO,GAAG,EACtFS,EAAS,QAAQ,IAAI,eAAgBX,GAAa,OAAO,EAClDW,CACX,CACJ,OAASC,EAAO,CACZ,QAAQ,MAAM,sCAAuCA,CAAK,CAC9D,CAIJ,GAAIX,EAAI,SAAS,SAAS,GAAG,EACzB,OAAOO,EAAa,KAAK,EAI7B,IAAIK,EAAcZ,EAAI,UAClBY,IAAgB,KAAOA,IAAgB,MAAIA,EAAc,UACzDA,EAAY,SAAS,GAAG,GAAKA,IAAgB,MAAKA,EAAcA,EAAY,MAAM,EAAG,EAAE,GAG3F,IAAMC,EAAe,QAAQ,IAAI,aAC3BC,EAAgB,QAAQ,IAAI,oBAAsB,4BAEpDD,GAEA,MAAM,GAAGC,CAAa,aAAc,CAChC,OAAQ,OACR,QAAS,CAAE,aAAcD,EAAc,eAAgB,kBAAmB,EAC1E,KAAM,KAAK,UAAU,CAAE,MAAOb,EAAI,SAAU,UAAAD,EAAW,IAAKE,GAAS,KAAM,QAASA,GAAS,OAAQ,CAAC,CAC1G,CAAC,EAAE,MAAM,IAAM,CAAC,CAAC,EAIrBD,EAAI,SAAW,SAASY,CAAW,MACnC,IAAMF,EAAWH,EAAa,QAAQP,CAAG,EAGzC,OAAAU,EAAS,QAAQ,IAAI,eAAgB,8BAA8B,EACnEA,EAAS,QAAQ,IAAI,gBAAiB,mEAAmE,EACzGA,EAAS,QAAQ,IAAI,OAAQ,oBAAoB,EACjDA,EAAS,QAAQ,IAAI,eAAgBX,GAAa,OAAO,EAErDE,IACAS,EAAS,QAAQ,IAAI,aAAc,GAAGT,EAAQ,IAAI,KAAKA,EAAQ,OAAO,GAAG,EACzES,EAAS,QAAQ,IAAI,iBAAkB,MAAM,GAG7CN,GAAeM,EAAS,QAAQ,IAAI,eAAgB,MAAM,EAEvDA,CACX,CAGA,IAAMA,EAAWH,EAAa,KAAK,EAGnC,OAAAG,EAAS,QAAQ,IAAI,OAAQ,oBAAoB,EACjDA,EAAS,QAAQ,IAAI,eAAgBX,GAAa,OAAO,EAErDE,GACAS,EAAS,QAAQ,IAAI,qBAAsBT,EAAQ,IAAI,EACvDS,EAAS,QAAQ,IAAI,oBAAqB,MAAM,GAEhDA,EAAS,QAAQ,IAAI,oBAAqB,OAAO,EAG9CA,CACX","names":["NextResponse","AI_BOTS","AI_BOT_USER_AGENTS","AI_BOT_USER_AGENTS_CACHE","bot","matchBot","userAgent","lowerUA","matches","longest","current","generateLlmsTxt","config","lines","route","fullUrl","link","section","ontoMiddleware","request","config","userAgent","url","matched","matchBot","accept","hasDebugParam","isAiBot","isMarkdownRequested","NextResponse","llmsTxtContent","generateLlmsTxt","response","error","payloadPath","ONTO_API_KEY","DASHBOARD_URL"]}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ontosdk/next",
3
- "version": "1.5.6",
3
+ "version": "1.5.7",
4
4
  "description": "Extracts semantic Markdown from React/Next.js pages for AI Agents",
5
5
  "main": "dist/index.js",
6
6
  "module": "dist/index.mjs",
package/src/middleware.ts CHANGED
@@ -86,11 +86,17 @@ export async function ontoMiddleware(request: any, config?: OntoConfig) {
86
86
  // Default response for non-bots
87
87
  const response = NextResponse.next();
88
88
 
89
- // Crucial: Tell Vercel/Edge to vary the cache by User-Agent
90
- // This ensures bots get the rewrite and humans get the HTML
89
+ // Add identify headers to EVERY response for clinical debugging
91
90
  response.headers.set('Vary', 'User-Agent, Accept');
92
91
  response.headers.set('X-Onto-Trace', userAgent || 'no-ua');
93
92
 
93
+ if (matched) {
94
+ response.headers.set('X-Onto-Matched-Bot', matched.name);
95
+ response.headers.set('X-Onto-Identified', 'true');
96
+ } else {
97
+ response.headers.set('X-Onto-Identified', 'false');
98
+ }
99
+
94
100
  return response;
95
101
  }
96
102
 
@@ -1,27 +0,0 @@
1
- import * as react_jsx_runtime from 'react/jsx-runtime';
2
-
3
- /**
4
- * OntoHead — Auto-Discovery component for AI agents.
5
- *
6
- * Injects `<link rel="alternate">` tags into the page `<head>` so AI crawlers
7
- * can discover the optimized markdown endpoint for the current route.
8
- *
9
- * Usage in a Next.js App Router layout:
10
- * ```tsx
11
- * import { OntoHead } from '@ontosdk/next/components';
12
- *
13
- * export default function RootLayout({ children }) {
14
- * return (
15
- * <html>
16
- * <head>
17
- * <OntoHead />
18
- * </head>
19
- * <body>{children}</body>
20
- * </html>
21
- * );
22
- * }
23
- * ```
24
- */
25
- declare function OntoHead(): react_jsx_runtime.JSX.Element;
26
-
27
- export { OntoHead };
@@ -1,27 +0,0 @@
1
- import * as react_jsx_runtime from 'react/jsx-runtime';
2
-
3
- /**
4
- * OntoHead — Auto-Discovery component for AI agents.
5
- *
6
- * Injects `<link rel="alternate">` tags into the page `<head>` so AI crawlers
7
- * can discover the optimized markdown endpoint for the current route.
8
- *
9
- * Usage in a Next.js App Router layout:
10
- * ```tsx
11
- * import { OntoHead } from '@ontosdk/next/components';
12
- *
13
- * export default function RootLayout({ children }) {
14
- * return (
15
- * <html>
16
- * <head>
17
- * <OntoHead />
18
- * </head>
19
- * <body>{children}</body>
20
- * </html>
21
- * );
22
- * }
23
- * ```
24
- */
25
- declare function OntoHead(): react_jsx_runtime.JSX.Element;
26
-
27
- export { OntoHead };
@@ -1,52 +0,0 @@
1
- import * as react_jsx_runtime from 'react/jsx-runtime';
2
- import { ReactNode } from 'react';
3
- import { OntoConfig } from './config.mjs';
4
-
5
- interface OntoProviderProps {
6
- /**
7
- * The base URL of your site (e.g., 'https://example.com')
8
- * Used to construct the full href for the AI discovery link tag.
9
- */
10
- baseUrl: string;
11
- /**
12
- * Child components to render
13
- */
14
- children: ReactNode;
15
- /**
16
- * Optional: Onto configuration for automatic JSON-LD schema injection
17
- * If provided, the provider will automatically inject JSON-LD schemas
18
- * based on the page type configuration
19
- */
20
- config?: OntoConfig;
21
- }
22
- /**
23
- * OntoProvider — Automatic AI Discovery Provider
24
- *
25
- * Wraps your application and automatically injects:
26
- * 1. `<link rel="alternate">` tags for AI discovery
27
- * 2. JSON-LD structured data schemas based on page type
28
- *
29
- * With config, automatically generates JSON-LD schemas:
30
- * - 'scoring' pages get Methodology schema with AIO weights (40/35/25)
31
- * - 'about' pages get Organization/AboutPage schema
32
- *
33
- * Usage in a Next.js App Router layout:
34
- * ```tsx
35
- * import { OntoProvider } from '@ontosdk/next/provider';
36
- * import config from '../onto.config';
37
- *
38
- * export default function RootLayout({ children }) {
39
- * return (
40
- * <OntoProvider baseUrl="https://example.com" config={config}>
41
- * <html>
42
- * <head />
43
- * <body>{children}</body>
44
- * </html>
45
- * </OntoProvider>
46
- * );
47
- * }
48
- * ```
49
- */
50
- declare function OntoProvider({ baseUrl, children, config }: OntoProviderProps): react_jsx_runtime.JSX.Element;
51
-
52
- export { OntoProvider, type OntoProviderProps };
@@ -1,52 +0,0 @@
1
- import * as react_jsx_runtime from 'react/jsx-runtime';
2
- import { ReactNode } from 'react';
3
- import { OntoConfig } from './config.js';
4
-
5
- interface OntoProviderProps {
6
- /**
7
- * The base URL of your site (e.g., 'https://example.com')
8
- * Used to construct the full href for the AI discovery link tag.
9
- */
10
- baseUrl: string;
11
- /**
12
- * Child components to render
13
- */
14
- children: ReactNode;
15
- /**
16
- * Optional: Onto configuration for automatic JSON-LD schema injection
17
- * If provided, the provider will automatically inject JSON-LD schemas
18
- * based on the page type configuration
19
- */
20
- config?: OntoConfig;
21
- }
22
- /**
23
- * OntoProvider — Automatic AI Discovery Provider
24
- *
25
- * Wraps your application and automatically injects:
26
- * 1. `<link rel="alternate">` tags for AI discovery
27
- * 2. JSON-LD structured data schemas based on page type
28
- *
29
- * With config, automatically generates JSON-LD schemas:
30
- * - 'scoring' pages get Methodology schema with AIO weights (40/35/25)
31
- * - 'about' pages get Organization/AboutPage schema
32
- *
33
- * Usage in a Next.js App Router layout:
34
- * ```tsx
35
- * import { OntoProvider } from '@ontosdk/next/provider';
36
- * import config from '../onto.config';
37
- *
38
- * export default function RootLayout({ children }) {
39
- * return (
40
- * <OntoProvider baseUrl="https://example.com" config={config}>
41
- * <html>
42
- * <head />
43
- * <body>{children}</body>
44
- * </html>
45
- * </OntoProvider>
46
- * );
47
- * }
48
- * ```
49
- */
50
- declare function OntoProvider({ baseUrl, children, config }: OntoProviderProps): react_jsx_runtime.JSX.Element;
51
-
52
- export { OntoProvider, type OntoProviderProps };
package/dist/cli.d.mts DELETED
@@ -1 +0,0 @@
1
- #!/usr/bin/env node
package/dist/cli.d.ts DELETED
@@ -1 +0,0 @@
1
- #!/usr/bin/env node
package/dist/config.d.mts DELETED
@@ -1,80 +0,0 @@
1
- /**
2
- * Configuration schema for onto.config.ts
3
- * Used to dynamically generate llms.txt and other AI discovery files
4
- */
5
- type PageType = 'scoring' | 'about' | 'default';
6
- interface OntoRoute {
7
- /**
8
- * The URL path (e.g., '/docs', '/api/reference')
9
- */
10
- path: string;
11
- /**
12
- * Description of what this route contains
13
- */
14
- description: string;
15
- /**
16
- * Optional: Page type for automatic JSON-LD schema injection
17
- * - 'scoring': Injects Methodology schema with AIO scoring weights (40/35/25)
18
- * - 'about': Injects Organization/AboutPage schema
19
- * - 'default': No automatic schema injection
20
- */
21
- pageType?: PageType;
22
- }
23
- interface OntoConfig {
24
- /**
25
- * The name of your project or site (required)
26
- * Used as the H1 heading in llms.txt
27
- */
28
- name: string;
29
- /**
30
- * A short summary of your project (required)
31
- * Displayed as a blockquote in llms.txt
32
- * Should contain key information necessary for understanding the rest of the file
33
- */
34
- summary: string;
35
- /**
36
- * The base URL of your site (e.g., 'https://example.com')
37
- */
38
- baseUrl: string;
39
- /**
40
- * Optional: Additional sections to include in llms.txt
41
- * Each section can contain any markdown content
42
- */
43
- sections?: {
44
- heading: string;
45
- content: string;
46
- }[];
47
- /**
48
- * Key routes that AI agents should know about
49
- * These will be formatted as a markdown list in llms.txt
50
- */
51
- routes?: OntoRoute[];
52
- /**
53
- * Optional: Links to external resources (documentation, API references, etc.)
54
- */
55
- externalLinks?: {
56
- title: string;
57
- url: string;
58
- description?: string;
59
- }[];
60
- /**
61
- * Optional: Organization information for JSON-LD schemas
62
- */
63
- organization?: {
64
- name: string;
65
- description?: string;
66
- url?: string;
67
- logo?: string;
68
- foundingDate?: string;
69
- };
70
- }
71
- /**
72
- * Generate llms.txt content from OntoConfig
73
- * Follows the llms.txt specification:
74
- * - H1 with project name
75
- * - Blockquote with summary
76
- * - Additional markdown sections
77
- */
78
- declare function generateLlmsTxt(config: OntoConfig): string;
79
-
80
- export { type OntoConfig, type OntoRoute, type PageType, generateLlmsTxt };
package/dist/config.d.ts DELETED
@@ -1,80 +0,0 @@
1
- /**
2
- * Configuration schema for onto.config.ts
3
- * Used to dynamically generate llms.txt and other AI discovery files
4
- */
5
- type PageType = 'scoring' | 'about' | 'default';
6
- interface OntoRoute {
7
- /**
8
- * The URL path (e.g., '/docs', '/api/reference')
9
- */
10
- path: string;
11
- /**
12
- * Description of what this route contains
13
- */
14
- description: string;
15
- /**
16
- * Optional: Page type for automatic JSON-LD schema injection
17
- * - 'scoring': Injects Methodology schema with AIO scoring weights (40/35/25)
18
- * - 'about': Injects Organization/AboutPage schema
19
- * - 'default': No automatic schema injection
20
- */
21
- pageType?: PageType;
22
- }
23
- interface OntoConfig {
24
- /**
25
- * The name of your project or site (required)
26
- * Used as the H1 heading in llms.txt
27
- */
28
- name: string;
29
- /**
30
- * A short summary of your project (required)
31
- * Displayed as a blockquote in llms.txt
32
- * Should contain key information necessary for understanding the rest of the file
33
- */
34
- summary: string;
35
- /**
36
- * The base URL of your site (e.g., 'https://example.com')
37
- */
38
- baseUrl: string;
39
- /**
40
- * Optional: Additional sections to include in llms.txt
41
- * Each section can contain any markdown content
42
- */
43
- sections?: {
44
- heading: string;
45
- content: string;
46
- }[];
47
- /**
48
- * Key routes that AI agents should know about
49
- * These will be formatted as a markdown list in llms.txt
50
- */
51
- routes?: OntoRoute[];
52
- /**
53
- * Optional: Links to external resources (documentation, API references, etc.)
54
- */
55
- externalLinks?: {
56
- title: string;
57
- url: string;
58
- description?: string;
59
- }[];
60
- /**
61
- * Optional: Organization information for JSON-LD schemas
62
- */
63
- organization?: {
64
- name: string;
65
- description?: string;
66
- url?: string;
67
- logo?: string;
68
- foundingDate?: string;
69
- };
70
- }
71
- /**
72
- * Generate llms.txt content from OntoConfig
73
- * Follows the llms.txt specification:
74
- * - H1 with project name
75
- * - Blockquote with summary
76
- * - Additional markdown sections
77
- */
78
- declare function generateLlmsTxt(config: OntoConfig): string;
79
-
80
- export { type OntoConfig, type OntoRoute, type PageType, generateLlmsTxt };
package/dist/index.d.mts DELETED
@@ -1,25 +0,0 @@
1
- export { OntoConfig, OntoConfig as OntoConfigType, OntoRoute, OntoRoute as OntoRouteType, PageType, generateLlmsTxt } from './config.mjs';
2
- export { AIOMethodologySchema, AboutPageSchema, OrganizationSchema, generateAIOMethodologySchema, generateAboutPageSchema, generateOrganizationSchema, generateSchemaForPageType, serializeSchema } from './schemas.mjs';
3
-
4
- interface ExtractionResult {
5
- markdown: string;
6
- metadata: {
7
- title: string;
8
- description: string;
9
- jsonLd: any[];
10
- };
11
- stats: {
12
- originalHtmlSize: number;
13
- markdownSize: number;
14
- tokenReductionRatio: number;
15
- };
16
- }
17
- /**
18
- * Extracts pure semantic markdown and metadata from rendered Next.js HTML strings.
19
- * @param html The raw HTML string.
20
- * @param sourceUrl (Optional) the URL this was generated from, to attach as metadata.
21
- * @returns {ExtractionResult} The extracted payload.
22
- */
23
- declare function extractContent(html: string, sourceUrl?: string): ExtractionResult;
24
-
25
- export { extractContent };
package/dist/index.d.ts DELETED
@@ -1,25 +0,0 @@
1
- export { OntoConfig, OntoConfig as OntoConfigType, OntoRoute, OntoRoute as OntoRouteType, PageType, generateLlmsTxt } from './config.js';
2
- export { AIOMethodologySchema, AboutPageSchema, OrganizationSchema, generateAIOMethodologySchema, generateAboutPageSchema, generateOrganizationSchema, generateSchemaForPageType, serializeSchema } from './schemas.js';
3
-
4
- interface ExtractionResult {
5
- markdown: string;
6
- metadata: {
7
- title: string;
8
- description: string;
9
- jsonLd: any[];
10
- };
11
- stats: {
12
- originalHtmlSize: number;
13
- markdownSize: number;
14
- tokenReductionRatio: number;
15
- };
16
- }
17
- /**
18
- * Extracts pure semantic markdown and metadata from rendered Next.js HTML strings.
19
- * @param html The raw HTML string.
20
- * @param sourceUrl (Optional) the URL this was generated from, to attach as metadata.
21
- * @returns {ExtractionResult} The extracted payload.
22
- */
23
- declare function extractContent(html: string, sourceUrl?: string): ExtractionResult;
24
-
25
- export { extractContent };
@@ -1,29 +0,0 @@
1
- import { NextResponse } from 'next/server';
2
- import { OntoConfig } from './config.mjs';
3
-
4
- /**
5
- * Comprehensive registry of AI bot user-agent strings.
6
- * The middleware uses this list to detect AI crawlers and serve optimized markdown.
7
- */
8
- interface AiBot {
9
- /** The user-agent substring to match against */
10
- name: string;
11
- /** The company operating this bot */
12
- company: string;
13
- /** ISO date when this bot was added or last verified */
14
- addedAt?: string;
15
- }
16
- /**
17
- * Flat list of user-agent substrings for fast matching in the middleware.
18
- */
19
- declare const AI_BOT_USER_AGENTS: string[];
20
- /**
21
- * Given a raw user-agent string, returns the matched AiBot entry or undefined.
22
- * Uses a "Longest Match" strategy to ensure maximum specificity.
23
- * Comparison is case-insensitive.
24
- */
25
- declare function matchBot(userAgent: string | null): AiBot | undefined;
26
-
27
- declare function ontoMiddleware(request: any, config?: OntoConfig): Promise<NextResponse<unknown>>;
28
-
29
- export { AI_BOT_USER_AGENTS, type AiBot, matchBot, ontoMiddleware };
@@ -1,29 +0,0 @@
1
- import { NextResponse } from 'next/server';
2
- import { OntoConfig } from './config.js';
3
-
4
- /**
5
- * Comprehensive registry of AI bot user-agent strings.
6
- * The middleware uses this list to detect AI crawlers and serve optimized markdown.
7
- */
8
- interface AiBot {
9
- /** The user-agent substring to match against */
10
- name: string;
11
- /** The company operating this bot */
12
- company: string;
13
- /** ISO date when this bot was added or last verified */
14
- addedAt?: string;
15
- }
16
- /**
17
- * Flat list of user-agent substrings for fast matching in the middleware.
18
- */
19
- declare const AI_BOT_USER_AGENTS: string[];
20
- /**
21
- * Given a raw user-agent string, returns the matched AiBot entry or undefined.
22
- * Uses a "Longest Match" strategy to ensure maximum specificity.
23
- * Comparison is case-insensitive.
24
- */
25
- declare function matchBot(userAgent: string | null): AiBot | undefined;
26
-
27
- declare function ontoMiddleware(request: any, config?: OntoConfig): Promise<NextResponse<unknown>>;
28
-
29
- export { AI_BOT_USER_AGENTS, type AiBot, matchBot, ontoMiddleware };
@@ -1,72 +0,0 @@
1
- import { OntoConfig } from './config.mjs';
2
-
3
- /**
4
- * JSON-LD Schema generators for automatic structured data injection
5
- * Follows Schema.org standards for AI-friendly metadata
6
- */
7
-
8
- /**
9
- * Standard AIO (AI Optimization) scoring methodology
10
- * Based on the Onto scoring algorithm:
11
- * - React Tax (Efficiency): 40% (Step 1)
12
- * - Semantic Richness: 35% (Step 2)
13
- * - Content Negotiation: 25% (Step 3)
14
- */
15
- interface AIOMethodologySchema {
16
- '@context': 'https://schema.org';
17
- '@type': 'HowTo';
18
- name: string;
19
- description: string;
20
- step: Array<{
21
- '@type': 'HowToStep';
22
- name: string;
23
- text: string;
24
- position: number;
25
- }>;
26
- }
27
- /**
28
- * Generate AIO Scoring Methodology JSON-LD schema
29
- * This explains to AI agents how the scoring system works
30
- */
31
- declare function generateAIOMethodologySchema(config: OntoConfig, pageUrl: string): AIOMethodologySchema;
32
- /**
33
- * Organization schema for About pages
34
- */
35
- interface OrganizationSchema {
36
- '@context': 'https://schema.org';
37
- '@type': 'Organization';
38
- name: string;
39
- url?: string;
40
- description?: string;
41
- logo?: string;
42
- foundingDate?: string;
43
- }
44
- /**
45
- * Generate Organization JSON-LD schema for About pages
46
- */
47
- declare function generateOrganizationSchema(config: OntoConfig, pageUrl: string): OrganizationSchema | null;
48
- /**
49
- * AboutPage schema combining Organization and WebPage
50
- */
51
- interface AboutPageSchema {
52
- '@context': 'https://schema.org';
53
- '@type': 'AboutPage';
54
- name: string;
55
- url: string;
56
- description?: string;
57
- mainEntity?: OrganizationSchema;
58
- }
59
- /**
60
- * Generate AboutPage JSON-LD schema
61
- */
62
- declare function generateAboutPageSchema(config: OntoConfig, pageUrl: string): AboutPageSchema;
63
- /**
64
- * Determine which schema to generate based on page type
65
- */
66
- declare function generateSchemaForPageType(pageType: 'scoring' | 'about' | 'default', config: OntoConfig, pageUrl: string): any | null;
67
- /**
68
- * Serialize schema to JSON-LD script tag content
69
- */
70
- declare function serializeSchema(schema: any | null): string | null;
71
-
72
- export { type AIOMethodologySchema, type AboutPageSchema, type OrganizationSchema, generateAIOMethodologySchema, generateAboutPageSchema, generateOrganizationSchema, generateSchemaForPageType, serializeSchema };
package/dist/schemas.d.ts DELETED
@@ -1,72 +0,0 @@
1
- import { OntoConfig } from './config.js';
2
-
3
- /**
4
- * JSON-LD Schema generators for automatic structured data injection
5
- * Follows Schema.org standards for AI-friendly metadata
6
- */
7
-
8
- /**
9
- * Standard AIO (AI Optimization) scoring methodology
10
- * Based on the Onto scoring algorithm:
11
- * - React Tax (Efficiency): 40% (Step 1)
12
- * - Semantic Richness: 35% (Step 2)
13
- * - Content Negotiation: 25% (Step 3)
14
- */
15
- interface AIOMethodologySchema {
16
- '@context': 'https://schema.org';
17
- '@type': 'HowTo';
18
- name: string;
19
- description: string;
20
- step: Array<{
21
- '@type': 'HowToStep';
22
- name: string;
23
- text: string;
24
- position: number;
25
- }>;
26
- }
27
- /**
28
- * Generate AIO Scoring Methodology JSON-LD schema
29
- * This explains to AI agents how the scoring system works
30
- */
31
- declare function generateAIOMethodologySchema(config: OntoConfig, pageUrl: string): AIOMethodologySchema;
32
- /**
33
- * Organization schema for About pages
34
- */
35
- interface OrganizationSchema {
36
- '@context': 'https://schema.org';
37
- '@type': 'Organization';
38
- name: string;
39
- url?: string;
40
- description?: string;
41
- logo?: string;
42
- foundingDate?: string;
43
- }
44
- /**
45
- * Generate Organization JSON-LD schema for About pages
46
- */
47
- declare function generateOrganizationSchema(config: OntoConfig, pageUrl: string): OrganizationSchema | null;
48
- /**
49
- * AboutPage schema combining Organization and WebPage
50
- */
51
- interface AboutPageSchema {
52
- '@context': 'https://schema.org';
53
- '@type': 'AboutPage';
54
- name: string;
55
- url: string;
56
- description?: string;
57
- mainEntity?: OrganizationSchema;
58
- }
59
- /**
60
- * Generate AboutPage JSON-LD schema
61
- */
62
- declare function generateAboutPageSchema(config: OntoConfig, pageUrl: string): AboutPageSchema;
63
- /**
64
- * Determine which schema to generate based on page type
65
- */
66
- declare function generateSchemaForPageType(pageType: 'scoring' | 'about' | 'default', config: OntoConfig, pageUrl: string): any | null;
67
- /**
68
- * Serialize schema to JSON-LD script tag content
69
- */
70
- declare function serializeSchema(schema: any | null): string | null;
71
-
72
- export { type AIOMethodologySchema, type AboutPageSchema, type OrganizationSchema, generateAIOMethodologySchema, generateAboutPageSchema, generateOrganizationSchema, generateSchemaForPageType, serializeSchema };