@ontosdk/next 1.4.1 → 1.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -1,14 +1,14 @@
1
1
  #!/usr/bin/env node
2
- "use strict";var L=Object.create;var P=Object.defineProperty;var N=Object.getOwnPropertyDescriptor;var U=Object.getOwnPropertyNames;var _=Object.getPrototypeOf,E=Object.prototype.hasOwnProperty;var M=(t,e,n,o)=>{if(e&&typeof e=="object"||typeof e=="function")for(let r of U(e))!E.call(t,r)&&r!==n&&P(t,r,{get:()=>e[r],enumerable:!(o=N(e,r))||o.enumerable});return t};var O=(t,e,n)=>(n=t!=null?L(_(t)):{},M(e||!t||!t.__esModule?P(n,"default",{value:t,enumerable:!0}):n,t));var T=require("glob"),i=O(require("fs")),l=O(require("path")),s=O(require("picocolors"));var v=O(require("cheerio")),b=O(require("turndown")),A=new b.default({headingStyle:"atx",codeBlockStyle:"fenced"});function j(t,e="Generated Output"){let n=t.length,o=v.load(t),r=o("title").text()||o("h1").first().text()||"Untitled Page",a=o('meta[name="description"]').attr("content")||"No description found.",c=[];o('script[type="application/ld+json"]').each((d,p)=>{try{let h=o(p).html()||"",y=JSON.parse(h);c.push(y)}catch{}}),o("script, style, noscript, iframe, svg, nav, footer, meta, link, header").remove();let u="";o("main").length>0?u=o("main").html()||"":o("article").length>0?u=o("article").html()||"":u=o("body").html()||"";let f=A.turndown(u),g=[`# ${r}`,`> ${a}`,"",`**Source:** ${e}`,`**Extracted:** ${new Date().toISOString()}`,"","---",""].join(`
3
- `)+f;c.length>0&&(g+=`
2
+ "use strict";var L=Object.create;var P=Object.defineProperty;var N=Object.getOwnPropertyDescriptor;var U=Object.getOwnPropertyNames;var _=Object.getPrototypeOf,E=Object.prototype.hasOwnProperty;var M=(t,e,n,o)=>{if(e&&typeof e=="object"||typeof e=="function")for(let r of U(e))!E.call(t,r)&&r!==n&&P(t,r,{get:()=>e[r],enumerable:!(o=N(e,r))||o.enumerable});return t};var O=(t,e,n)=>(n=t!=null?L(_(t)):{},M(e||!t||!t.__esModule?P(n,"default",{value:t,enumerable:!0}):n,t));var T=require("glob"),i=O(require("fs")),m=O(require("path")),s=O(require("picocolors"));var v=O(require("cheerio")),b=O(require("turndown")),A=new b.default({headingStyle:"atx",codeBlockStyle:"fenced"});function j(t,e="Generated Output"){let n=t.length,o=v.load(t),r=o("title").text()||o("h1").first().text()||"Untitled Page",l=o('meta[name="description"]').attr("content")||"No description found.",a=[];o('script[type="application/ld+json"]').each((g,p)=>{try{let h=o(p).html()||"",x=JSON.parse(h);a.push(x)}catch{}}),o("script, style, noscript, iframe, svg, nav, footer, meta, link, header").remove();let d="";o("main").length>0?d=o("main").html()||"":o("article").length>0?d=o("article").html()||"":d=o("body").html()||"";let f=A.turndown(d),u=[`# ${r}`,`> ${l}`,"",`**Source:** ${e}`,`**Extracted:** ${new Date().toISOString()}`,"","---",""].join(`
3
+ `)+f;a.length>0&&(u+=`
4
4
 
5
5
  ---
6
6
  ## Structured Data (JSON-LD)
7
7
  \`\`\`json
8
- `,c.forEach(d=>{g+=JSON.stringify(d,null,2)+`
9
- `}),g+="```\n");let S=g.length,m=n>0?(n-S)/n*100:0;return{markdown:g,metadata:{title:r,description:a,jsonLd:c},stats:{originalHtmlSize:n,markdownSize:S,tokenReductionRatio:m}}}function C(t){let e=[];if(e.push(`# ${t.name}`),e.push(""),e.push(`> ${t.summary}`),e.push(""),t.routes&&t.routes.length>0){e.push("## Key Routes"),e.push("");for(let n of t.routes){let o=`${t.baseUrl}${n.path}`;e.push(`- [${n.path}](${o}): ${n.description}`)}e.push("")}if(t.externalLinks&&t.externalLinks.length>0){e.push("## Resources"),e.push("");for(let n of t.externalLinks)n.description?e.push(`- [${n.title}](${n.url}): ${n.description}`):e.push(`- [${n.title}](${n.url})`);e.push("")}if(t.sections&&t.sections.length>0)for(let n of t.sections)e.push(`## ${n.heading}`),e.push(""),e.push(n.content),e.push("");return e.join(`
8
+ `,a.forEach(g=>{u+=JSON.stringify(g,null,2)+`
9
+ `}),u+="```\n");let y=u.length,c=n>0?(n-y)/n*100:0;return{markdown:u,metadata:{title:r,description:l,jsonLd:a},stats:{originalHtmlSize:n,markdownSize:y,tokenReductionRatio:c}}}function C(t){let e=[];if(e.push(`# ${t.name}`),e.push(""),e.push(`> ${t.summary}`),e.push(""),t.routes&&t.routes.length>0){e.push("## Key Routes"),e.push("");for(let n of t.routes){let o=`${t.baseUrl}${n.path}`;e.push(`- [${n.path}](${o}): ${n.description}`)}e.push("")}if(t.externalLinks&&t.externalLinks.length>0){e.push("## Resources"),e.push("");for(let n of t.externalLinks)n.description?e.push(`- [${n.title}](${n.url}): ${n.description}`):e.push(`- [${n.title}](${n.url})`);e.push("")}if(t.sections&&t.sections.length>0)for(let n of t.sections)e.push(`## ${n.heading}`),e.push(""),e.push(n.content),e.push("");return e.join(`
10
10
  `).trim()+`
11
- `}var R=require("url");async function D(){let t=process.cwd(),e=l.default.resolve(t,"onto.config.ts"),n=l.default.resolve(t,"onto.config.js"),o=async a=>{try{let c=await import((0,R.pathToFileURL)(a).href);return c.default||c}catch{return null}},r=await o(e)||await o(n);if(r)return r;try{let a=i.default.existsSync(e)?e:i.default.existsSync(n)?n:null;if(!a)return null;let c=i.default.readFileSync(a,"utf8"),u=c.match(/name\s*:\s*['"`](.*)['"`]/),f=c.match(/summary\s*:\s*['"`](.*)['"`]/),w=c.match(/baseUrl\s*:\s*['"`](.*)['"`]/);if(u)return{name:u[1],summary:f?f[1]:"",baseUrl:w?w[1]:"",routes:[]}}catch{}return null}function K(){let t=l.default.join(process.cwd(),".env.local");i.default.existsSync(t)&&i.default.readFileSync(t,"utf8").split(/\r?\n/).forEach(n=>{let o=n.trim();if(!o||o.startsWith("#"))return;let[r,...a]=o.split("=");r&&a.length>0&&(process.env[r.trim()]=a.join("=").trim().replace(/^["']|["']$/g,""))})}async function H(){let t=process.cwd(),e=l.default.join(t,"onto.config.ts"),n=l.default.join(t,"middleware.ts");console.log(s.default.cyan(`
11
+ `}var R=require("url");async function D(){let t=process.cwd(),e=m.default.resolve(t,"onto.config.ts"),n=m.default.resolve(t,"onto.config.js"),o=async l=>{try{let a=await import((0,R.pathToFileURL)(l).href);return a.default||a}catch{return null}},r=await o(e)||await o(n);if(r)return r;try{let l=i.default.existsSync(e)?e:i.default.existsSync(n)?n:null;if(!l)return null;let a=i.default.readFileSync(l,"utf8"),d=a.match(/name\s*:\s*['"`](.*)['"`]/),f=a.match(/summary\s*:\s*['"`](.*)['"`]/),S=a.match(/baseUrl\s*:\s*['"`](.*)['"`]/),u=[],y=/path\s*:\s*['"`](.*?)['"`]\s*,\s*description\s*:\s*['"`](.*?)['"`]/g,c;for(;(c=y.exec(a))!==null;)u.push({path:c[1],description:c[2]});if(d)return{name:d[1],summary:f?f[1]:"",baseUrl:S?S[1]:"",routes:u}}catch{}return null}function K(){let t=m.default.join(process.cwd(),".env.local");i.default.existsSync(t)&&i.default.readFileSync(t,"utf8").split(/\r?\n/).forEach(n=>{let o=n.trim();if(!o||o.startsWith("#"))return;let[r,...l]=o.split("=");r&&l.length>0&&(process.env[r.trim()]=l.join("=").trim().replace(/^["']|["']$/g,""))})}async function H(){let t=process.cwd(),e=m.default.join(t,"onto.config.ts"),n=m.default.join(t,"middleware.ts");console.log(s.default.cyan(`
12
12
  [Onto] Initializing project...`)),i.default.existsSync(e)?console.log(s.default.yellow("\u2139 onto.config.ts already exists, skipping.")):(i.default.writeFileSync(e,`import { OntoConfig } from '@ontosdk/next';
13
13
 
14
14
  const config: OntoConfig = {
@@ -46,6 +46,6 @@ export const config = {
46
46
  `,"utf8"),console.log(s.default.green("\u2713 Created")+" middleware.ts")),console.log(s.default.magenta(`
47
47
  Initialization complete! \u{1F680}`)),console.log(s.default.dim("Next steps:")),console.log(s.default.dim("1. Update your routes in onto.config.ts")),console.log(s.default.dim(`2. Run "npm run build" to generate manifests
48
48
  `))}async function I(){if(process.argv.slice(2)[0]==="init"){await H();return}K(),console.log(s.default.cyan(`
49
- [Onto] Starting Semantic Output Generation...`));let n=process.cwd(),o=l.default.join(n,".next/server/app"),r=l.default.join(n,"public/.onto");if(!i.default.existsSync(o)){console.log(s.default.yellow(`[Onto] Could not find Next.js app output at ${o}`)),console.log(s.default.yellow('[Onto] Ensure this is run after "next build" and you are using the App Router.'));return}let a=await(0,T.glob)("**/*.html",{cwd:o});if(a.length===0){console.log(s.default.yellow("[Onto] No static HTML files found to process."));return}i.default.existsSync(r)||i.default.mkdirSync(r,{recursive:!0});let c=0,u=0,f=0;for(let m of a){let d=l.default.join(o,m),p=m.replace(/\.html$/,".md"),h=l.default.join(r,p);try{let y=i.default.readFileSync(d,"utf8"),x=j(y,`/${p.replace(/\.md$/,"")}`),k=l.default.dirname(h);i.default.existsSync(k)||i.default.mkdirSync(k,{recursive:!0}),i.default.writeFileSync(h,x.markdown,"utf8"),c+=x.stats.originalHtmlSize,u+=x.stats.markdownSize,f++;let F=(x.stats.originalHtmlSize/1024).toFixed(1),z=(x.stats.markdownSize/1024).toFixed(1),$=m.replace(/\.html$/,"");$==="index"?$="/":$=`/${$}`,console.log(s.default.green("\u2713 Optimized")+s.default.dim(` ${$} `)+s.default.blue(`[${F}KB -> ${z}KB]`))}catch(y){console.error(s.default.red(`\u2717 Failed to process ${m}: ${y.message}`))}}console.log(s.default.bold(s.default.magenta(`Processed ${f} pages. Total Size: ${(c/1024).toFixed(1)}KB -> ${(u/1024).toFixed(1)}KB`)));let w=process.env.ONTO_API_KEY,g=process.env.ONTO_DASHBOARD_URL||"https://app.buildonto.dev";if(w&&f>0){console.log(s.default.cyan(`[Onto] Syncing manifest with Control Plane [${g}]...`));try{let m=a.map(p=>{let h=p.replace(/\.html$/,""),y=h==="index"?"/":`/${h}`,x=l.default.join(r,p.replace(/\.html$/,".md"));return{route:y,filename:`${h}.md`,content:i.default.readFileSync(x,"utf8")}}),d=await fetch(`${g}/api/files`,{method:"POST",headers:{"x-onto-key":w,"Content-Type":"application/json"},body:JSON.stringify({files:m})});if(d.ok)console.log(s.default.green("\u2713 Control Plane sync successful"));else{let p=await d.json().catch(()=>({}));console.log(s.default.yellow(`\u26A0 Control Plane sync skipped: ${p.error||d.statusText}`))}}catch(m){console.log(s.default.yellow(`\u26A0 Control Plane sync failed: ${m.message}`))}}let S=await D();if(S){let m=C(S),d=l.default.join(n,"public/llms.txt"),p=l.default.join(n,"public");i.default.existsSync(p)||i.default.mkdirSync(p,{recursive:!0}),i.default.writeFileSync(d,m,"utf8"),console.log(s.default.green("\u2713 Generated")+s.default.dim(" /llms.txt"))}console.log(s.default.dim(`Edge payloads are ready at /public/.onto/*
49
+ [Onto] Starting Semantic Output Generation...`));let n=process.cwd(),o=m.default.join(n,".next/server/app"),r=m.default.join(n,"public/.onto");if(!i.default.existsSync(o)){console.log(s.default.yellow(`[Onto] Could not find Next.js app output at ${o}`)),console.log(s.default.yellow('[Onto] Ensure this is run after "next build" and you are using the App Router.'));return}let l=await(0,T.glob)("**/*.html",{cwd:o});if(l.length===0){console.log(s.default.yellow("[Onto] No static HTML files found to process."));return}i.default.existsSync(r)||i.default.mkdirSync(r,{recursive:!0});let a=0,d=0,f=0;for(let c of l){let g=m.default.join(o,c),p=c.replace(/\.html$/,".md"),h=m.default.join(r,p);try{let x=i.default.readFileSync(g,"utf8"),w=j(x,`/${p.replace(/\.md$/,"")}`),k=m.default.dirname(h);i.default.existsSync(k)||i.default.mkdirSync(k,{recursive:!0}),i.default.writeFileSync(h,w.markdown,"utf8"),a+=w.stats.originalHtmlSize,d+=w.stats.markdownSize,f++;let F=(w.stats.originalHtmlSize/1024).toFixed(1),z=(w.stats.markdownSize/1024).toFixed(1),$=c.replace(/\.html$/,"");$==="index"?$="/":$=`/${$}`,console.log(s.default.green("\u2713 Optimized")+s.default.dim(` ${$} `)+s.default.blue(`[${F}KB -> ${z}KB]`))}catch(x){console.error(s.default.red(`\u2717 Failed to process ${c}: ${x.message}`))}}console.log(s.default.bold(s.default.magenta(`Processed ${f} pages. Total Size: ${(a/1024).toFixed(1)}KB -> ${(d/1024).toFixed(1)}KB`)));let S=process.env.ONTO_API_KEY,u=process.env.ONTO_DASHBOARD_URL||"https://app.buildonto.dev";if(S&&f>0){console.log(s.default.cyan(`[Onto] Syncing manifest with Control Plane [${u}]...`));try{let c=l.map(p=>{let h=p.replace(/\.html$/,""),x=h==="index"?"/":`/${h}`,w=m.default.join(r,p.replace(/\.html$/,".md"));return{route:x,filename:`${h}.md`,content:i.default.readFileSync(w,"utf8")}}),g=await fetch(`${u}/api/files`,{method:"POST",headers:{"x-onto-key":S,"Content-Type":"application/json"},body:JSON.stringify({files:c})});if(g.ok)console.log(s.default.green("\u2713 Control Plane sync successful"));else{let p=await g.json().catch(()=>({}));console.log(s.default.yellow(`\u26A0 Control Plane sync skipped: ${p.error||g.statusText}`))}}catch(c){console.log(s.default.yellow(`\u26A0 Control Plane sync failed: ${c.message}`))}}let y=await D();if(y){let c=C(y),g=m.default.join(n,"public/llms.txt"),p=m.default.join(n,"public");i.default.existsSync(p)||i.default.mkdirSync(p,{recursive:!0}),i.default.writeFileSync(g,c,"utf8"),console.log(s.default.green("\u2713 Generated")+s.default.dim(" /llms.txt"))}console.log(s.default.dim(`Edge payloads are ready at /public/.onto/*
50
50
  `))}I().catch(t=>{console.error(s.default.red(`[Onto] Fatal Error: ${t.message}`)),process.exit(1)});
51
51
  //# sourceMappingURL=cli.js.map
package/dist/cli.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/cli.ts","../src/extractor.ts","../src/config.ts"],"sourcesContent":["#!/usr/bin/env node\r\nimport { glob } from 'glob';\r\nimport fs from 'fs';\r\nimport path from 'path';\r\nimport pc from 'picocolors';\r\nimport { extractContent } from './extractor';\r\nimport { generateLlmsTxt, OntoConfig } from './config';\r\n\r\nimport { pathToFileURL } from 'url';\r\n\r\nasync function loadOntoConfig(): Promise<OntoConfig | null> {\r\n const cwd = process.cwd();\r\n const configPathTs = path.resolve(cwd, 'onto.config.ts');\r\n const configPathJs = path.resolve(cwd, 'onto.config.js');\r\n\r\n const tryImport = async (p: string) => {\r\n try {\r\n const config = await import(pathToFileURL(p).href);\r\n return config.default || config;\r\n } catch (e) {\r\n return null;\r\n }\r\n };\r\n\r\n // 1. Try ESM Import\r\n let config = await tryImport(configPathTs) || await tryImport(configPathJs);\r\n if (config) return config;\r\n\r\n // 2. Fallback: Manual Parsing (Robust for environments without TS loader)\r\n try {\r\n const filePath = fs.existsSync(configPathTs) ? configPathTs : (fs.existsSync(configPathJs) ? configPathJs : null);\r\n if (!filePath) return null;\r\n\r\n const content = fs.readFileSync(filePath, 'utf8');\r\n \r\n // Simple regex extraction for name and summary\r\n const nameMatch = content.match(/name\\s*:\\s*['\"`](.*)['\"`]/);\r\n const summaryMatch = content.match(/summary\\s*:\\s*['\"`](.*)['\"`]/);\r\n const baseUrlMatch = content.match(/baseUrl\\s*:\\s*['\"`](.*)['\"`]/);\r\n\r\n if (nameMatch) {\r\n return {\r\n name: nameMatch[1],\r\n summary: summaryMatch ? summaryMatch[1] : '',\r\n baseUrl: baseUrlMatch ? baseUrlMatch[1] : '',\r\n routes: [] // We skip routes in manual fallback for simplicity\r\n } as OntoConfig;\r\n }\r\n } catch (e) {\r\n // Fallback failed\r\n }\r\n\r\n return null;\r\n}\r\n\r\n// Simple helper to load .env.local from the current working directory\r\nfunction loadEnv() {\r\n const envPath = path.join(process.cwd(), '.env.local');\r\n if (fs.existsSync(envPath)) {\r\n const envContent = fs.readFileSync(envPath, 'utf8');\r\n envContent.split(/\\r?\\n/).forEach(line => {\r\n const trimmedLine = line.trim();\r\n if (!trimmedLine || trimmedLine.startsWith('#')) return;\r\n const [key, ...valueParts] = trimmedLine.split('=');\r\n if (key && valueParts.length > 0) {\r\n process.env[key.trim()] = valueParts.join('=').trim().replace(/^[\"']|[\"']$/g, '');\r\n }\r\n });\r\n }\r\n}\r\n\r\nasync function init() {\r\n const cwd = process.cwd();\r\n const configPath = path.join(cwd, 'onto.config.ts');\r\n const middlewarePath = path.join(cwd, 'middleware.ts');\r\n\r\n console.log(pc.cyan('\\n[Onto] Initializing project...'));\r\n\r\n // 1. Create onto.config.ts\r\n if (!fs.existsSync(configPath)) {\r\n const configTemplate = `import { OntoConfig } from '@ontosdk/next';\r\n\r\nconst config: OntoConfig = {\r\n name: 'My Project',\r\n summary: 'A short description of my project for AI agents.',\r\n baseUrl: 'https://example.com',\r\n routes: [\r\n { \r\n path: '/', \r\n description: 'The homepage of my application.',\r\n pageType: 'about'\r\n }\r\n ]\r\n};\r\n\r\nexport default config;\r\n`;\r\n fs.writeFileSync(configPath, configTemplate, 'utf8');\r\n console.log(pc.green('✓ Created') + ' onto.config.ts');\r\n } else {\r\n console.log(pc.yellow('ℹ onto.config.ts already exists, skipping.'));\r\n }\r\n\r\n // 2. Create middleware.ts\r\n if (!fs.existsSync(middlewarePath)) {\r\n const middlewareTemplate = `import { NextRequest } from 'next/server';\r\nimport { ontoMiddleware } from '@ontosdk/next/middleware';\r\nimport ontoConfig from './onto.config';\r\n\r\nexport const middleware = (req: NextRequest) => ontoMiddleware(req, ontoConfig);\r\n\r\nexport const config = {\r\n matcher: [\r\n /*\r\n * Match all request paths except for the ones starting with:\r\n * - api (API routes)\r\n * - _next/static (static files)\r\n * - _next/image (image optimization files)\r\n * - favicon.ico, sitemap.xml, robots.txt (metadata files)\r\n */\r\n '/((?!api|_next/static|_next/image|favicon.ico|sitemap.xml|robots.txt).*)',\r\n ],\r\n};\r\n`;\r\n fs.writeFileSync(middlewarePath, middlewareTemplate, 'utf8');\r\n console.log(pc.green('✓ Created') + ' middleware.ts');\r\n } else {\r\n console.log(pc.yellow('ℹ middleware.ts already exists, skipping.'));\r\n }\r\n\r\n console.log(pc.magenta('\\nInitialization complete! 🚀'));\r\n console.log(pc.dim('Next steps:'));\r\n console.log(pc.dim('1. Update your routes in onto.config.ts'));\r\n console.log(pc.dim('2. Run \"npm run build\" to generate manifests\\n'));\r\n}\r\n\r\nasync function main() {\r\n const args = process.argv.slice(2);\r\n const command = args[0];\r\n\r\n if (command === 'init') {\r\n await init();\r\n return;\r\n }\r\n\r\n loadEnv();\r\n console.log(pc.cyan('\\n[Onto] Starting Semantic Output Generation...'));\r\n // ... rest of the existing main function logic ...\r\n\r\n const cwd = process.cwd();\r\n const nextAppDirDir = path.join(cwd, '.next/server/app');\r\n const ontoPublicDir = path.join(cwd, 'public/.onto');\r\n\r\n if (!fs.existsSync(nextAppDirDir)) {\r\n console.log(pc.yellow(`[Onto] Could not find Next.js app output at ${nextAppDirDir}`));\r\n console.log(pc.yellow(`[Onto] Ensure this is run after \"next build\" and you are using the App Router.`));\r\n return;\r\n }\r\n\r\n // Find all HTML files rendered by Next.js in the app directory\r\n const files = await glob('**/*.html', { cwd: nextAppDirDir });\r\n\r\n if (files.length === 0) {\r\n console.log(pc.yellow(`[Onto] No static HTML files found to process.`));\r\n return;\r\n }\r\n\r\n // Ensure output directory exists\r\n if (!fs.existsSync(ontoPublicDir)) {\r\n fs.mkdirSync(ontoPublicDir, { recursive: true });\r\n }\r\n\r\n let totalOriginalSize = 0;\r\n let totalMarkdownSize = 0;\r\n let totalFilesProcessed = 0;\r\n\r\n for (const file of files) {\r\n const inputPath = path.join(nextAppDirDir, file);\r\n\r\n // We map file path e.g. \"pricing.html\" to \"pricing.md\", or \"blog/post.html\" to \"blog/post.md\"\r\n let outputPathRelative = file.replace(/\\.html$/, '.md');\r\n // If it's a dynamic route page, or purely root index.html\r\n const outputPath = path.join(ontoPublicDir, outputPathRelative);\r\n\r\n try {\r\n const htmlContent = fs.readFileSync(inputPath, 'utf8');\r\n\r\n const result = extractContent(htmlContent, `/${outputPathRelative.replace(/\\.md$/, '')}`);\r\n\r\n // Ensure specific sub-directory exists (e.g., for blog/post.md)\r\n const outputDir = path.dirname(outputPath);\r\n if (!fs.existsSync(outputDir)) {\r\n fs.mkdirSync(outputDir, { recursive: true });\r\n }\r\n\r\n fs.writeFileSync(outputPath, result.markdown, 'utf8');\r\n\r\n totalOriginalSize += result.stats.originalHtmlSize;\r\n totalMarkdownSize += result.stats.markdownSize;\r\n totalFilesProcessed++;\r\n\r\n const origKb = (result.stats.originalHtmlSize / 1024).toFixed(1);\r\n const mdKb = (result.stats.markdownSize / 1024).toFixed(1);\r\n\r\n // /index.html -> /\r\n let routeName = file.replace(/\\.html$/, '');\r\n if (routeName === 'index') routeName = '/';\r\n else routeName = `/${routeName}`;\r\n\r\n console.log(\r\n pc.green(`✓ Optimized`) +\r\n pc.dim(` ${routeName} `) +\r\n pc.blue(`[${origKb}KB -> ${mdKb}KB]`)\r\n );\r\n } catch (e: any) {\r\n console.error(pc.red(`✗ Failed to process ${file}: ${e.message}`));\r\n }\r\n }\r\n\r\n console.log(\r\n pc.bold(\r\n pc.magenta(`Processed ${totalFilesProcessed} pages. Total Size: ${(totalOriginalSize / 1024).toFixed(1)}KB -> ${(totalMarkdownSize / 1024).toFixed(1)}KB`)\r\n )\r\n );\r\n\r\n // Sync with Onto Control Plane (Premium)\r\n const ONTO_API_KEY = process.env.ONTO_API_KEY;\r\n const DASHBOARD_URL = process.env.ONTO_DASHBOARD_URL || 'https://app.buildonto.dev';\r\n\r\n if (ONTO_API_KEY && totalFilesProcessed > 0) {\r\n console.log(pc.cyan(`[Onto] Syncing manifest with Control Plane [${DASHBOARD_URL}]...`));\r\n try {\r\n const manifest = files.map(file => {\r\n const routeName = file.replace(/\\.html$/, '');\r\n const route = routeName === 'index' ? '/' : `/${routeName}`;\r\n const mdPath = path.join(ontoPublicDir, file.replace(/\\.html$/, '.md'));\r\n return {\r\n route,\r\n filename: `${routeName}.md`,\r\n content: fs.readFileSync(mdPath, 'utf8')\r\n };\r\n });\r\n\r\n const res = await fetch(`${DASHBOARD_URL}/api/files`, {\r\n method: 'POST',\r\n headers: {\r\n 'x-onto-key': ONTO_API_KEY,\r\n 'Content-Type': 'application/json'\r\n },\r\n body: JSON.stringify({ files: manifest })\r\n });\r\n\r\n if (res.ok) {\r\n console.log(pc.green('✓ Control Plane sync successful'));\r\n } else {\r\n const errData = await res.json().catch(() => ({}));\r\n console.log(pc.yellow(`⚠ Control Plane sync skipped: ${errData.error || res.statusText}`));\r\n }\r\n } catch (e: any) {\r\n console.log(pc.yellow(`⚠ Control Plane sync failed: ${e.message}`));\r\n }\r\n }\r\n\r\n // --- Generate llms.txt manifest ---\r\n const config = await loadOntoConfig();\r\n if (config) {\r\n const llmsTxtContent = generateLlmsTxt(config);\r\n const llmsTxtPath = path.join(cwd, 'public/llms.txt');\r\n \r\n // Ensure public dir exists\r\n const publicDir = path.join(cwd, 'public');\r\n if (!fs.existsSync(publicDir)) {\r\n fs.mkdirSync(publicDir, { recursive: true });\r\n }\r\n\r\n fs.writeFileSync(llmsTxtPath, llmsTxtContent, 'utf8');\r\n console.log(pc.green('✓ Generated') + pc.dim(' /llms.txt'));\r\n }\r\n\r\n console.log(pc.dim(`Edge payloads are ready at /public/.onto/*\\n`));\r\n}\r\n\r\nmain().catch(e => {\r\n console.error(pc.red(`[Onto] Fatal Error: ${e.message}`));\r\n process.exit(1);\r\n});\r\n","import * as cheerio from 'cheerio';\r\nimport TurndownService from 'turndown';\r\n\r\nconst turndownService = new TurndownService({\r\n headingStyle: 'atx',\r\n codeBlockStyle: 'fenced',\r\n});\r\n\r\n// Configure turndown to keep some layout or handle semantic tags differently if needed\r\n\r\nexport interface ExtractionResult {\r\n markdown: string;\r\n metadata: {\r\n title: string;\r\n description: string;\r\n jsonLd: any[];\r\n };\r\n stats: {\r\n originalHtmlSize: number;\r\n markdownSize: number;\r\n tokenReductionRatio: number;\r\n };\r\n}\r\n\r\n/**\r\n * Extracts pure semantic markdown and metadata from rendered Next.js HTML strings.\r\n * @param html The raw HTML string.\r\n * @param sourceUrl (Optional) the URL this was generated from, to attach as metadata.\r\n * @returns {ExtractionResult} The extracted payload.\r\n */\r\nexport function extractContent(html: string, sourceUrl: string = 'Generated Output'): ExtractionResult {\r\n const originalSize = html.length;\r\n\r\n const $ = cheerio.load(html);\r\n\r\n // 1. Extract Metadata BEFORE removing structure\r\n const title = $('title').text() || $('h1').first().text() || 'Untitled Page';\r\n const description = $('meta[name=\"description\"]').attr('content') || 'No description found.';\r\n\r\n const jsonLdScripts: any[] = [];\r\n $('script[type=\"application/ld+json\"]').each((_, el) => {\r\n try {\r\n const raw = $(el).html() || '';\r\n const parsed = JSON.parse(raw);\r\n jsonLdScripts.push(parsed);\r\n } catch {\r\n // ignore bad json\r\n }\r\n });\r\n\r\n // 2. Strip noise (React boilerplate, styles, unnecessary tags)\r\n $('script, style, noscript, iframe, svg, nav, footer, meta, link, header').remove();\r\n\r\n // Optionally remove typical Next.js hidden wrappers if they don't contain real content.\r\n // Next.js uses <div id=\"__next\"> but we mostly just want semantic content.\r\n\r\n // 3. Find the entry point for content\r\n // Prefer <main> or <article> over <body>\r\n let contentHtml = '';\r\n if ($('main').length > 0) {\r\n contentHtml = $('main').html() || '';\r\n } else if ($('article').length > 0) {\r\n contentHtml = $('article').html() || '';\r\n } else {\r\n contentHtml = $('body').html() || '';\r\n }\r\n\r\n // 4. Convert to Markdown\r\n let markdown = turndownService.turndown(contentHtml);\r\n\r\n // 5. Optionally inject Metadata header\r\n const headerLines = [\r\n `# ${title}`,\r\n `> ${description}`,\r\n ``,\r\n `**Source:** ${sourceUrl}`,\r\n `**Extracted:** ${new Date().toISOString()}`,\r\n ``,\r\n `---`,\r\n ``\r\n ];\r\n\r\n let finalMarkdown = headerLines.join('\\n') + markdown;\r\n\r\n // Add JSON-LD section if exists\r\n if (jsonLdScripts.length > 0) {\r\n finalMarkdown += '\\n\\n---\\n## Structured Data (JSON-LD)\\n```json\\n';\r\n jsonLdScripts.forEach(j => {\r\n finalMarkdown += JSON.stringify(j, null, 2) + '\\n';\r\n });\r\n finalMarkdown += '```\\n';\r\n }\r\n\r\n const markdownSize = finalMarkdown.length;\r\n const tokenReductionRatio = originalSize > 0 ? ((originalSize - markdownSize) / originalSize) * 100 : 0;\r\n\r\n return {\r\n markdown: finalMarkdown,\r\n metadata: {\r\n title,\r\n description,\r\n jsonLd: jsonLdScripts\r\n },\r\n stats: {\r\n originalHtmlSize: originalSize,\r\n markdownSize,\r\n tokenReductionRatio\r\n }\r\n };\r\n}\r\n\r\nexport async function generateStaticPayloads(nextAppDirDir: string, ontoPublicDir: string) {\r\n const fs = await import('fs');\r\n const path = await import('path');\r\n const { glob } = await import('glob');\r\n\r\n if (!fs.existsSync(nextAppDirDir)) {\r\n return;\r\n }\r\n\r\n const files = await glob('**/*.html', { cwd: nextAppDirDir });\r\n if (files.length === 0) return;\r\n\r\n if (!fs.existsSync(ontoPublicDir)) {\r\n fs.mkdirSync(ontoPublicDir, { recursive: true });\r\n }\r\n\r\n let totalFilesProcessed = 0;\r\n\r\n for (const file of files) {\r\n const inputPath = path.join(nextAppDirDir, file);\r\n const outputPathRelative = file.replace(/\\.html$/, '.md');\r\n const outputPath = path.join(ontoPublicDir, outputPathRelative);\r\n\r\n try {\r\n const htmlContent = fs.readFileSync(inputPath, 'utf8');\r\n\r\n let routeName = file.replace(/\\.html$/, '');\r\n if (routeName === 'index') routeName = '/';\r\n else routeName = `/${routeName}`;\r\n\r\n const result = extractContent(htmlContent, routeName);\r\n\r\n const outputDir = path.dirname(outputPath);\r\n if (!fs.existsSync(outputDir)) {\r\n fs.mkdirSync(outputDir, { recursive: true });\r\n }\r\n\r\n fs.writeFileSync(outputPath, result.markdown, 'utf8');\r\n totalFilesProcessed++;\r\n } catch (e: any) {\r\n console.error(`[Onto] Failed to process ${file}: ${e.message}`);\r\n }\r\n }\r\n console.log(`[Onto] Successfully generated ${totalFilesProcessed} semantic markdown endpoints.`);\r\n}\r\n","/**\r\n * Configuration schema for onto.config.ts\r\n * Used to dynamically generate llms.txt and other AI discovery files\r\n */\r\n\r\nexport type PageType = 'scoring' | 'about' | 'default';\r\n\r\nexport interface OntoRoute {\r\n /**\r\n * The URL path (e.g., '/docs', '/api/reference')\r\n */\r\n path: string;\r\n /**\r\n * Description of what this route contains\r\n */\r\n description: string;\r\n /**\r\n * Optional: Page type for automatic JSON-LD schema injection\r\n * - 'scoring': Injects Methodology schema with AIO scoring weights (40/35/25)\r\n * - 'about': Injects Organization/AboutPage schema\r\n * - 'default': No automatic schema injection\r\n */\r\n pageType?: PageType;\r\n}\r\n\r\nexport interface OntoConfig {\r\n /**\r\n * The name of your project or site (required)\r\n * Used as the H1 heading in llms.txt\r\n */\r\n name: string;\r\n\r\n /**\r\n * A short summary of your project (required)\r\n * Displayed as a blockquote in llms.txt\r\n * Should contain key information necessary for understanding the rest of the file\r\n */\r\n summary: string;\r\n\r\n /**\r\n * The base URL of your site (e.g., 'https://example.com')\r\n */\r\n baseUrl: string;\r\n\r\n /**\r\n * Optional: Additional sections to include in llms.txt\r\n * Each section can contain any markdown content\r\n */\r\n sections?: {\r\n heading: string;\r\n content: string;\r\n }[];\r\n\r\n /**\r\n * Key routes that AI agents should know about\r\n * These will be formatted as a markdown list in llms.txt\r\n */\r\n routes?: OntoRoute[];\r\n\r\n /**\r\n * Optional: Links to external resources (documentation, API references, etc.)\r\n */\r\n externalLinks?: {\r\n title: string;\r\n url: string;\r\n description?: string;\r\n }[];\r\n\r\n /**\r\n * Optional: Organization information for JSON-LD schemas\r\n */\r\n organization?: {\r\n name: string;\r\n description?: string;\r\n url?: string;\r\n logo?: string;\r\n foundingDate?: string;\r\n };\r\n}\r\n\r\n/**\r\n * Generate llms.txt content from OntoConfig\r\n * Follows the llms.txt specification:\r\n * - H1 with project name\r\n * - Blockquote with summary\r\n * - Additional markdown sections\r\n */\r\nexport function generateLlmsTxt(config: OntoConfig): string {\r\n const lines: string[] = [];\r\n\r\n // H1: Project name (required)\r\n lines.push(`# ${config.name}`);\r\n lines.push('');\r\n\r\n // Blockquote: Summary (required)\r\n lines.push(`> ${config.summary}`);\r\n lines.push('');\r\n\r\n // Key Routes section (if provided)\r\n if (config.routes && config.routes.length > 0) {\r\n lines.push('## Key Routes');\r\n lines.push('');\r\n for (const route of config.routes) {\r\n const fullUrl = `${config.baseUrl}${route.path}`;\r\n lines.push(`- [${route.path}](${fullUrl}): ${route.description}`);\r\n }\r\n lines.push('');\r\n }\r\n\r\n // External Links section (if provided)\r\n if (config.externalLinks && config.externalLinks.length > 0) {\r\n lines.push('## Resources');\r\n lines.push('');\r\n for (const link of config.externalLinks) {\r\n if (link.description) {\r\n lines.push(`- [${link.title}](${link.url}): ${link.description}`);\r\n } else {\r\n lines.push(`- [${link.title}](${link.url})`);\r\n }\r\n }\r\n lines.push('');\r\n }\r\n\r\n // Custom sections (if provided)\r\n if (config.sections && config.sections.length > 0) {\r\n for (const section of config.sections) {\r\n lines.push(`## ${section.heading}`);\r\n lines.push('');\r\n lines.push(section.content);\r\n lines.push('');\r\n }\r\n }\r\n\r\n return lines.join('\\n').trim() + '\\n';\r\n}\r\n"],"mappings":";wdACA,IAAAA,EAAqB,gBACrBC,EAAe,iBACfC,EAAiB,mBACjBC,EAAe,yBCJf,IAAAC,EAAyB,sBACzBC,EAA4B,uBAEtBC,EAAkB,IAAI,EAAAC,QAAgB,CACxC,aAAc,MACd,eAAgB,QACpB,CAAC,EAwBM,SAASC,EAAeC,EAAcC,EAAoB,mBAAsC,CACnG,IAAMC,EAAeF,EAAK,OAEpBG,EAAY,OAAKH,CAAI,EAGrBI,EAAQD,EAAE,OAAO,EAAE,KAAK,GAAKA,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,GAAK,gBACvDE,EAAcF,EAAE,0BAA0B,EAAE,KAAK,SAAS,GAAK,wBAE/DG,EAAuB,CAAC,EAC9BH,EAAE,oCAAoC,EAAE,KAAK,CAACI,EAAGC,IAAO,CACpD,GAAI,CACA,IAAMC,EAAMN,EAAEK,CAAE,EAAE,KAAK,GAAK,GACtBE,EAAS,KAAK,MAAMD,CAAG,EAC7BH,EAAc,KAAKI,CAAM,CAC7B,MAAQ,CAER,CACJ,CAAC,EAGDP,EAAE,uEAAuE,EAAE,OAAO,EAOlF,IAAIQ,EAAc,GACdR,EAAE,MAAM,EAAE,OAAS,EACnBQ,EAAcR,EAAE,MAAM,EAAE,KAAK,GAAK,GAC3BA,EAAE,SAAS,EAAE,OAAS,EAC7BQ,EAAcR,EAAE,SAAS,EAAE,KAAK,GAAK,GAErCQ,EAAcR,EAAE,MAAM,EAAE,KAAK,GAAK,GAItC,IAAIS,EAAWf,EAAgB,SAASc,CAAW,EAc/CE,EAXgB,CAChB,KAAKT,CAAK,GACV,KAAKC,CAAW,GAChB,GACA,eAAeJ,CAAS,GACxB,kBAAkB,IAAI,KAAK,EAAE,YAAY,CAAC,GAC1C,GACA,MACA,EACJ,EAEgC,KAAK;AAAA,CAAI,EAAIW,EAGzCN,EAAc,OAAS,IACvBO,GAAiB;AAAA;AAAA;AAAA;AAAA;AAAA,EACjBP,EAAc,QAAQQ,GAAK,CACvBD,GAAiB,KAAK,UAAUC,EAAG,KAAM,CAAC,EAAI;AAAA,CAClD,CAAC,EACDD,GAAiB,SAGrB,IAAME,EAAeF,EAAc,OAC7BG,EAAsBd,EAAe,GAAMA,EAAea,GAAgBb,EAAgB,IAAM,EAEtG,MAAO,CACH,SAAUW,EACV,SAAU,CACN,MAAAT,EACA,YAAAC,EACA,OAAQC,CACZ,EACA,MAAO,CACH,iBAAkBJ,EAClB,aAAAa,EACA,oBAAAC,CACJ,CACJ,CACJ,CCtBO,SAASC,EAAgBC,EAA4B,CAC1D,IAAMC,EAAkB,CAAC,EAWzB,GARAA,EAAM,KAAK,KAAKD,EAAO,IAAI,EAAE,EAC7BC,EAAM,KAAK,EAAE,EAGbA,EAAM,KAAK,KAAKD,EAAO,OAAO,EAAE,EAChCC,EAAM,KAAK,EAAE,EAGTD,EAAO,QAAUA,EAAO,OAAO,OAAS,EAAG,CAC7CC,EAAM,KAAK,eAAe,EAC1BA,EAAM,KAAK,EAAE,EACb,QAAWC,KAASF,EAAO,OAAQ,CACjC,IAAMG,EAAU,GAAGH,EAAO,OAAO,GAAGE,EAAM,IAAI,GAC9CD,EAAM,KAAK,MAAMC,EAAM,IAAI,KAAKC,CAAO,MAAMD,EAAM,WAAW,EAAE,CAClE,CACAD,EAAM,KAAK,EAAE,CACf,CAGA,GAAID,EAAO,eAAiBA,EAAO,cAAc,OAAS,EAAG,CAC3DC,EAAM,KAAK,cAAc,EACzBA,EAAM,KAAK,EAAE,EACb,QAAWG,KAAQJ,EAAO,cACpBI,EAAK,YACPH,EAAM,KAAK,MAAMG,EAAK,KAAK,KAAKA,EAAK,GAAG,MAAMA,EAAK,WAAW,EAAE,EAEhEH,EAAM,KAAK,MAAMG,EAAK,KAAK,KAAKA,EAAK,GAAG,GAAG,EAG/CH,EAAM,KAAK,EAAE,CACf,CAGA,GAAID,EAAO,UAAYA,EAAO,SAAS,OAAS,EAC9C,QAAWK,KAAWL,EAAO,SAC3BC,EAAM,KAAK,MAAMI,EAAQ,OAAO,EAAE,EAClCJ,EAAM,KAAK,EAAE,EACbA,EAAM,KAAKI,EAAQ,OAAO,EAC1BJ,EAAM,KAAK,EAAE,EAIjB,OAAOA,EAAM,KAAK;AAAA,CAAI,EAAE,KAAK,EAAI;AAAA,CACnC,CF9HA,IAAAK,EAA8B,eAE9B,eAAeC,GAA6C,CACxD,IAAMC,EAAM,QAAQ,IAAI,EAClBC,EAAe,EAAAC,QAAK,QAAQF,EAAK,gBAAgB,EACjDG,EAAe,EAAAD,QAAK,QAAQF,EAAK,gBAAgB,EAEjDI,EAAY,MAAOC,GAAc,CACnC,GAAI,CACA,IAAMC,EAAS,MAAM,UAAO,iBAAcD,CAAC,EAAE,MAC7C,OAAOC,EAAO,SAAWA,CAC7B,MAAY,CACR,OAAO,IACX,CACJ,EAGIA,EAAS,MAAMF,EAAUH,CAAY,GAAK,MAAMG,EAAUD,CAAY,EAC1E,GAAIG,EAAQ,OAAOA,EAGnB,GAAI,CACA,IAAMC,EAAW,EAAAC,QAAG,WAAWP,CAAY,EAAIA,EAAgB,EAAAO,QAAG,WAAWL,CAAY,EAAIA,EAAe,KAC5G,GAAI,CAACI,EAAU,OAAO,KAEtB,IAAME,EAAU,EAAAD,QAAG,aAAaD,EAAU,MAAM,EAG1CG,EAAYD,EAAQ,MAAM,2BAA2B,EACrDE,EAAeF,EAAQ,MAAM,8BAA8B,EAC3DG,EAAeH,EAAQ,MAAM,8BAA8B,EAEjE,GAAIC,EACA,MAAO,CACH,KAAMA,EAAU,CAAC,EACjB,QAASC,EAAeA,EAAa,CAAC,EAAI,GAC1C,QAASC,EAAeA,EAAa,CAAC,EAAI,GAC1C,OAAQ,CAAC,CACb,CAER,MAAY,CAEZ,CAEA,OAAO,IACX,CAGA,SAASC,GAAU,CACf,IAAMC,EAAU,EAAAZ,QAAK,KAAK,QAAQ,IAAI,EAAG,YAAY,EACjD,EAAAM,QAAG,WAAWM,CAAO,GACF,EAAAN,QAAG,aAAaM,EAAS,MAAM,EACvC,MAAM,OAAO,EAAE,QAAQC,GAAQ,CACtC,IAAMC,EAAcD,EAAK,KAAK,EAC9B,GAAI,CAACC,GAAeA,EAAY,WAAW,GAAG,EAAG,OACjD,GAAM,CAACC,EAAK,GAAGC,CAAU,EAAIF,EAAY,MAAM,GAAG,EAC9CC,GAAOC,EAAW,OAAS,IAC3B,QAAQ,IAAID,EAAI,KAAK,CAAC,EAAIC,EAAW,KAAK,GAAG,EAAE,KAAK,EAAE,QAAQ,eAAgB,EAAE,EAExF,CAAC,CAET,CAEA,eAAeC,GAAO,CAClB,IAAMnB,EAAM,QAAQ,IAAI,EAClBoB,EAAa,EAAAlB,QAAK,KAAKF,EAAK,gBAAgB,EAC5CqB,EAAiB,EAAAnB,QAAK,KAAKF,EAAK,eAAe,EAErD,QAAQ,IAAI,EAAAsB,QAAG,KAAK;AAAA,+BAAkC,CAAC,EAGlD,EAAAd,QAAG,WAAWY,CAAU,EAqBzB,QAAQ,IAAI,EAAAE,QAAG,OAAO,iDAA4C,CAAC,GAHnE,EAAAd,QAAG,cAAcY,EAjBM;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAiBsB,MAAM,EACnD,QAAQ,IAAI,EAAAE,QAAG,MAAM,gBAAW,EAAI,iBAAiB,GAMpD,EAAAd,QAAG,WAAWa,CAAc,EAuB7B,QAAQ,IAAI,EAAAC,QAAG,OAAO,gDAA2C,CAAC,GAHlE,EAAAd,QAAG,cAAca,EAnBU;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAmB0B,MAAM,EAC3D,QAAQ,IAAI,EAAAC,QAAG,MAAM,gBAAW,EAAI,gBAAgB,GAKxD,QAAQ,IAAI,EAAAA,QAAG,QAAQ;AAAA,mCAA+B,CAAC,EACvD,QAAQ,IAAI,EAAAA,QAAG,IAAI,aAAa,CAAC,EACjC,QAAQ,IAAI,EAAAA,QAAG,IAAI,yCAAyC,CAAC,EAC7D,QAAQ,IAAI,EAAAA,QAAG,IAAI;AAAA,CAAgD,CAAC,CACxE,CAEA,eAAeC,GAAO,CAIlB,GAHa,QAAQ,KAAK,MAAM,CAAC,EACZ,CAAC,IAEN,OAAQ,CACpB,MAAMJ,EAAK,EACX,MACJ,CAEAN,EAAQ,EACR,QAAQ,IAAI,EAAAS,QAAG,KAAK;AAAA,8CAAiD,CAAC,EAGtE,IAAMtB,EAAM,QAAQ,IAAI,EAClBwB,EAAgB,EAAAtB,QAAK,KAAKF,EAAK,kBAAkB,EACjDyB,EAAgB,EAAAvB,QAAK,KAAKF,EAAK,cAAc,EAEnD,GAAI,CAAC,EAAAQ,QAAG,WAAWgB,CAAa,EAAG,CAC/B,QAAQ,IAAI,EAAAF,QAAG,OAAO,+CAA+CE,CAAa,EAAE,CAAC,EACrF,QAAQ,IAAI,EAAAF,QAAG,OAAO,gFAAgF,CAAC,EACvG,MACJ,CAGA,IAAMI,EAAQ,QAAM,QAAK,YAAa,CAAE,IAAKF,CAAc,CAAC,EAE5D,GAAIE,EAAM,SAAW,EAAG,CACpB,QAAQ,IAAI,EAAAJ,QAAG,OAAO,+CAA+C,CAAC,EACtE,MACJ,CAGK,EAAAd,QAAG,WAAWiB,CAAa,GAC5B,EAAAjB,QAAG,UAAUiB,EAAe,CAAE,UAAW,EAAK,CAAC,EAGnD,IAAIE,EAAoB,EACpBC,EAAoB,EACpBC,EAAsB,EAE1B,QAAWC,KAAQJ,EAAO,CACtB,IAAMK,EAAY,EAAA7B,QAAK,KAAKsB,EAAeM,CAAI,EAG3CE,EAAqBF,EAAK,QAAQ,UAAW,KAAK,EAEhDG,EAAa,EAAA/B,QAAK,KAAKuB,EAAeO,CAAkB,EAE9D,GAAI,CACA,IAAME,EAAc,EAAA1B,QAAG,aAAauB,EAAW,MAAM,EAE/CI,EAASC,EAAeF,EAAa,IAAIF,EAAmB,QAAQ,QAAS,EAAE,CAAC,EAAE,EAGlFK,EAAY,EAAAnC,QAAK,QAAQ+B,CAAU,EACpC,EAAAzB,QAAG,WAAW6B,CAAS,GACxB,EAAA7B,QAAG,UAAU6B,EAAW,CAAE,UAAW,EAAK,CAAC,EAG/C,EAAA7B,QAAG,cAAcyB,EAAYE,EAAO,SAAU,MAAM,EAEpDR,GAAqBQ,EAAO,MAAM,iBAClCP,GAAqBO,EAAO,MAAM,aAClCN,IAEA,IAAMS,GAAUH,EAAO,MAAM,iBAAmB,MAAM,QAAQ,CAAC,EACzDI,GAAQJ,EAAO,MAAM,aAAe,MAAM,QAAQ,CAAC,EAGrDK,EAAYV,EAAK,QAAQ,UAAW,EAAE,EACtCU,IAAc,QAASA,EAAY,IAClCA,EAAY,IAAIA,CAAS,GAE9B,QAAQ,IACJ,EAAAlB,QAAG,MAAM,kBAAa,EACtB,EAAAA,QAAG,IAAI,IAAIkB,CAAS,GAAG,EACvB,EAAAlB,QAAG,KAAK,IAAIgB,CAAM,SAASC,CAAI,KAAK,CACxC,CACJ,OAASE,EAAQ,CACb,QAAQ,MAAM,EAAAnB,QAAG,IAAI,4BAAuBQ,CAAI,KAAKW,EAAE,OAAO,EAAE,CAAC,CACrE,CACJ,CAEA,QAAQ,IACJ,EAAAnB,QAAG,KACC,EAAAA,QAAG,QAAQ,aAAaO,CAAmB,wBAAwBF,EAAoB,MAAM,QAAQ,CAAC,CAAC,UAAUC,EAAoB,MAAM,QAAQ,CAAC,CAAC,IAAI,CAC7J,CACJ,EAGA,IAAMc,EAAe,QAAQ,IAAI,aAC3BC,EAAgB,QAAQ,IAAI,oBAAsB,4BAExD,GAAID,GAAgBb,EAAsB,EAAG,CACzC,QAAQ,IAAI,EAAAP,QAAG,KAAK,+CAA+CqB,CAAa,MAAM,CAAC,EACvF,GAAI,CACA,IAAMC,EAAWlB,EAAM,IAAII,GAAQ,CAC/B,IAAMU,EAAYV,EAAK,QAAQ,UAAW,EAAE,EACtCe,EAAQL,IAAc,QAAU,IAAM,IAAIA,CAAS,GACnDM,EAAS,EAAA5C,QAAK,KAAKuB,EAAeK,EAAK,QAAQ,UAAW,KAAK,CAAC,EACtE,MAAO,CACH,MAAAe,EACA,SAAU,GAAGL,CAAS,MACtB,QAAS,EAAAhC,QAAG,aAAasC,EAAQ,MAAM,CAC3C,CACJ,CAAC,EAEKC,EAAM,MAAM,MAAM,GAAGJ,CAAa,aAAc,CAClD,OAAQ,OACR,QAAS,CACL,aAAcD,EACd,eAAgB,kBACpB,EACA,KAAM,KAAK,UAAU,CAAE,MAAOE,CAAS,CAAC,CAC5C,CAAC,EAED,GAAIG,EAAI,GACJ,QAAQ,IAAI,EAAAzB,QAAG,MAAM,sCAAiC,CAAC,MACpD,CACH,IAAM0B,EAAU,MAAMD,EAAI,KAAK,EAAE,MAAM,KAAO,CAAC,EAAE,EACjD,QAAQ,IAAI,EAAAzB,QAAG,OAAO,sCAAiC0B,EAAQ,OAASD,EAAI,UAAU,EAAE,CAAC,CAC7F,CACJ,OAASN,EAAQ,CACb,QAAQ,IAAI,EAAAnB,QAAG,OAAO,qCAAgCmB,EAAE,OAAO,EAAE,CAAC,CACtE,CACJ,CAGA,IAAMnC,EAAS,MAAMP,EAAe,EACpC,GAAIO,EAAQ,CACR,IAAM2C,EAAiBC,EAAgB5C,CAAM,EACvC6C,EAAc,EAAAjD,QAAK,KAAKF,EAAK,iBAAiB,EAG9CoD,EAAY,EAAAlD,QAAK,KAAKF,EAAK,QAAQ,EACpC,EAAAQ,QAAG,WAAW4C,CAAS,GACxB,EAAA5C,QAAG,UAAU4C,EAAW,CAAE,UAAW,EAAK,CAAC,EAG/C,EAAA5C,QAAG,cAAc2C,EAAaF,EAAgB,MAAM,EACpD,QAAQ,IAAI,EAAA3B,QAAG,MAAM,kBAAa,EAAI,EAAAA,QAAG,IAAI,YAAY,CAAC,CAC9D,CAEA,QAAQ,IAAI,EAAAA,QAAG,IAAI;AAAA,CAA8C,CAAC,CACtE,CAEAC,EAAK,EAAE,MAAMkB,GAAK,CACd,QAAQ,MAAM,EAAAnB,QAAG,IAAI,uBAAuBmB,EAAE,OAAO,EAAE,CAAC,EACxD,QAAQ,KAAK,CAAC,CAClB,CAAC","names":["import_glob","import_fs","import_path","import_picocolors","cheerio","import_turndown","turndownService","TurndownService","extractContent","html","sourceUrl","originalSize","$","title","description","jsonLdScripts","_","el","raw","parsed","contentHtml","markdown","finalMarkdown","j","markdownSize","tokenReductionRatio","generateLlmsTxt","config","lines","route","fullUrl","link","section","import_url","loadOntoConfig","cwd","configPathTs","path","configPathJs","tryImport","p","config","filePath","fs","content","nameMatch","summaryMatch","baseUrlMatch","loadEnv","envPath","line","trimmedLine","key","valueParts","init","configPath","middlewarePath","pc","main","nextAppDirDir","ontoPublicDir","files","totalOriginalSize","totalMarkdownSize","totalFilesProcessed","file","inputPath","outputPathRelative","outputPath","htmlContent","result","extractContent","outputDir","origKb","mdKb","routeName","e","ONTO_API_KEY","DASHBOARD_URL","manifest","route","mdPath","res","errData","llmsTxtContent","generateLlmsTxt","llmsTxtPath","publicDir"]}
1
+ {"version":3,"sources":["../src/cli.ts","../src/extractor.ts","../src/config.ts"],"sourcesContent":["#!/usr/bin/env node\r\nimport { glob } from 'glob';\r\nimport fs from 'fs';\r\nimport path from 'path';\r\nimport pc from 'picocolors';\r\nimport { extractContent } from './extractor';\r\nimport { generateLlmsTxt, OntoConfig } from './config';\r\n\r\nimport { pathToFileURL } from 'url';\r\n\r\nasync function loadOntoConfig(): Promise<OntoConfig | null> {\r\n const cwd = process.cwd();\r\n const configPathTs = path.resolve(cwd, 'onto.config.ts');\r\n const configPathJs = path.resolve(cwd, 'onto.config.js');\r\n\r\n const tryImport = async (p: string) => {\r\n try {\r\n const config = await import(pathToFileURL(p).href);\r\n return config.default || config;\r\n } catch (e) {\r\n return null;\r\n }\r\n };\r\n\r\n // 1. Try ESM Import\r\n let config = await tryImport(configPathTs) || await tryImport(configPathJs);\r\n if (config) return config;\r\n\r\n // 2. Fallback: Manual Parsing (Robust for environments without TS loader)\r\n try {\r\n const filePath = fs.existsSync(configPathTs) ? configPathTs : (fs.existsSync(configPathJs) ? configPathJs : null);\r\n if (!filePath) return null;\r\n\r\n const content = fs.readFileSync(filePath, 'utf8');\r\n \r\n // Simple regex extraction for name and summary\r\n const nameMatch = content.match(/name\\s*:\\s*['\"`](.*)['\"`]/);\r\n const summaryMatch = content.match(/summary\\s*:\\s*['\"`](.*)['\"`]/);\r\n const baseUrlMatch = content.match(/baseUrl\\s*:\\s*['\"`](.*)['\"`]/);\r\n\r\n // Basic route extraction\r\n const routes: any[] = [];\r\n const routeRegex = /path\\s*:\\s*['\"`](.*?)['\"`]\\s*,\\s*description\\s*:\\s*['\"`](.*?)['\"`]/g;\r\n let match;\r\n while ((match = routeRegex.exec(content)) !== null) {\r\n routes.push({ path: match[1], description: match[2] });\r\n }\r\n\r\n if (nameMatch) {\r\n return {\r\n name: nameMatch[1],\r\n summary: summaryMatch ? summaryMatch[1] : '',\r\n baseUrl: baseUrlMatch ? baseUrlMatch[1] : '',\r\n routes: routes\r\n } as OntoConfig;\r\n }\r\n } catch (e) {\r\n // Fallback failed\r\n }\r\n\r\n return null;\r\n}\r\n\r\n// Simple helper to load .env.local from the current working directory\r\nfunction loadEnv() {\r\n const envPath = path.join(process.cwd(), '.env.local');\r\n if (fs.existsSync(envPath)) {\r\n const envContent = fs.readFileSync(envPath, 'utf8');\r\n envContent.split(/\\r?\\n/).forEach(line => {\r\n const trimmedLine = line.trim();\r\n if (!trimmedLine || trimmedLine.startsWith('#')) return;\r\n const [key, ...valueParts] = trimmedLine.split('=');\r\n if (key && valueParts.length > 0) {\r\n process.env[key.trim()] = valueParts.join('=').trim().replace(/^[\"']|[\"']$/g, '');\r\n }\r\n });\r\n }\r\n}\r\n\r\nasync function init() {\r\n const cwd = process.cwd();\r\n const configPath = path.join(cwd, 'onto.config.ts');\r\n const middlewarePath = path.join(cwd, 'middleware.ts');\r\n\r\n console.log(pc.cyan('\\n[Onto] Initializing project...'));\r\n\r\n // 1. Create onto.config.ts\r\n if (!fs.existsSync(configPath)) {\r\n const configTemplate = `import { OntoConfig } from '@ontosdk/next';\r\n\r\nconst config: OntoConfig = {\r\n name: 'My Project',\r\n summary: 'A short description of my project for AI agents.',\r\n baseUrl: 'https://example.com',\r\n routes: [\r\n { \r\n path: '/', \r\n description: 'The homepage of my application.',\r\n pageType: 'about'\r\n }\r\n ]\r\n};\r\n\r\nexport default config;\r\n`;\r\n fs.writeFileSync(configPath, configTemplate, 'utf8');\r\n console.log(pc.green('✓ Created') + ' onto.config.ts');\r\n } else {\r\n console.log(pc.yellow('ℹ onto.config.ts already exists, skipping.'));\r\n }\r\n\r\n // 2. Create middleware.ts\r\n if (!fs.existsSync(middlewarePath)) {\r\n const middlewareTemplate = `import { NextRequest } from 'next/server';\r\nimport { ontoMiddleware } from '@ontosdk/next/middleware';\r\nimport ontoConfig from './onto.config';\r\n\r\nexport const middleware = (req: NextRequest) => ontoMiddleware(req, ontoConfig);\r\n\r\nexport const config = {\r\n matcher: [\r\n /*\r\n * Match all request paths except for the ones starting with:\r\n * - api (API routes)\r\n * - _next/static (static files)\r\n * - _next/image (image optimization files)\r\n * - favicon.ico, sitemap.xml, robots.txt (metadata files)\r\n */\r\n '/((?!api|_next/static|_next/image|favicon.ico|sitemap.xml|robots.txt).*)',\r\n ],\r\n};\r\n`;\r\n fs.writeFileSync(middlewarePath, middlewareTemplate, 'utf8');\r\n console.log(pc.green('✓ Created') + ' middleware.ts');\r\n } else {\r\n console.log(pc.yellow('ℹ middleware.ts already exists, skipping.'));\r\n }\r\n\r\n console.log(pc.magenta('\\nInitialization complete! 🚀'));\r\n console.log(pc.dim('Next steps:'));\r\n console.log(pc.dim('1. Update your routes in onto.config.ts'));\r\n console.log(pc.dim('2. Run \"npm run build\" to generate manifests\\n'));\r\n}\r\n\r\nasync function main() {\r\n const args = process.argv.slice(2);\r\n const command = args[0];\r\n\r\n if (command === 'init') {\r\n await init();\r\n return;\r\n }\r\n\r\n loadEnv();\r\n console.log(pc.cyan('\\n[Onto] Starting Semantic Output Generation...'));\r\n // ... rest of the existing main function logic ...\r\n\r\n const cwd = process.cwd();\r\n const nextAppDirDir = path.join(cwd, '.next/server/app');\r\n const ontoPublicDir = path.join(cwd, 'public/.onto');\r\n\r\n if (!fs.existsSync(nextAppDirDir)) {\r\n console.log(pc.yellow(`[Onto] Could not find Next.js app output at ${nextAppDirDir}`));\r\n console.log(pc.yellow(`[Onto] Ensure this is run after \"next build\" and you are using the App Router.`));\r\n return;\r\n }\r\n\r\n // Find all HTML files rendered by Next.js in the app directory\r\n const files = await glob('**/*.html', { cwd: nextAppDirDir });\r\n\r\n if (files.length === 0) {\r\n console.log(pc.yellow(`[Onto] No static HTML files found to process.`));\r\n return;\r\n }\r\n\r\n // Ensure output directory exists\r\n if (!fs.existsSync(ontoPublicDir)) {\r\n fs.mkdirSync(ontoPublicDir, { recursive: true });\r\n }\r\n\r\n let totalOriginalSize = 0;\r\n let totalMarkdownSize = 0;\r\n let totalFilesProcessed = 0;\r\n\r\n for (const file of files) {\r\n const inputPath = path.join(nextAppDirDir, file);\r\n\r\n // We map file path e.g. \"pricing.html\" to \"pricing.md\", or \"blog/post.html\" to \"blog/post.md\"\r\n let outputPathRelative = file.replace(/\\.html$/, '.md');\r\n // If it's a dynamic route page, or purely root index.html\r\n const outputPath = path.join(ontoPublicDir, outputPathRelative);\r\n\r\n try {\r\n const htmlContent = fs.readFileSync(inputPath, 'utf8');\r\n\r\n const result = extractContent(htmlContent, `/${outputPathRelative.replace(/\\.md$/, '')}`);\r\n\r\n // Ensure specific sub-directory exists (e.g., for blog/post.md)\r\n const outputDir = path.dirname(outputPath);\r\n if (!fs.existsSync(outputDir)) {\r\n fs.mkdirSync(outputDir, { recursive: true });\r\n }\r\n\r\n fs.writeFileSync(outputPath, result.markdown, 'utf8');\r\n\r\n totalOriginalSize += result.stats.originalHtmlSize;\r\n totalMarkdownSize += result.stats.markdownSize;\r\n totalFilesProcessed++;\r\n\r\n const origKb = (result.stats.originalHtmlSize / 1024).toFixed(1);\r\n const mdKb = (result.stats.markdownSize / 1024).toFixed(1);\r\n\r\n // /index.html -> /\r\n let routeName = file.replace(/\\.html$/, '');\r\n if (routeName === 'index') routeName = '/';\r\n else routeName = `/${routeName}`;\r\n\r\n console.log(\r\n pc.green(`✓ Optimized`) +\r\n pc.dim(` ${routeName} `) +\r\n pc.blue(`[${origKb}KB -> ${mdKb}KB]`)\r\n );\r\n } catch (e: any) {\r\n console.error(pc.red(`✗ Failed to process ${file}: ${e.message}`));\r\n }\r\n }\r\n\r\n console.log(\r\n pc.bold(\r\n pc.magenta(`Processed ${totalFilesProcessed} pages. Total Size: ${(totalOriginalSize / 1024).toFixed(1)}KB -> ${(totalMarkdownSize / 1024).toFixed(1)}KB`)\r\n )\r\n );\r\n\r\n // Sync with Onto Control Plane (Premium)\r\n const ONTO_API_KEY = process.env.ONTO_API_KEY;\r\n const DASHBOARD_URL = process.env.ONTO_DASHBOARD_URL || 'https://app.buildonto.dev';\r\n\r\n if (ONTO_API_KEY && totalFilesProcessed > 0) {\r\n console.log(pc.cyan(`[Onto] Syncing manifest with Control Plane [${DASHBOARD_URL}]...`));\r\n try {\r\n const manifest = files.map(file => {\r\n const routeName = file.replace(/\\.html$/, '');\r\n const route = routeName === 'index' ? '/' : `/${routeName}`;\r\n const mdPath = path.join(ontoPublicDir, file.replace(/\\.html$/, '.md'));\r\n return {\r\n route,\r\n filename: `${routeName}.md`,\r\n content: fs.readFileSync(mdPath, 'utf8')\r\n };\r\n });\r\n\r\n const res = await fetch(`${DASHBOARD_URL}/api/files`, {\r\n method: 'POST',\r\n headers: {\r\n 'x-onto-key': ONTO_API_KEY,\r\n 'Content-Type': 'application/json'\r\n },\r\n body: JSON.stringify({ files: manifest })\r\n });\r\n\r\n if (res.ok) {\r\n console.log(pc.green('✓ Control Plane sync successful'));\r\n } else {\r\n const errData = await res.json().catch(() => ({}));\r\n console.log(pc.yellow(`⚠ Control Plane sync skipped: ${errData.error || res.statusText}`));\r\n }\r\n } catch (e: any) {\r\n console.log(pc.yellow(`⚠ Control Plane sync failed: ${e.message}`));\r\n }\r\n }\r\n\r\n // --- Generate llms.txt manifest ---\r\n const config = await loadOntoConfig();\r\n if (config) {\r\n const llmsTxtContent = generateLlmsTxt(config);\r\n const llmsTxtPath = path.join(cwd, 'public/llms.txt');\r\n \r\n // Ensure public dir exists\r\n const publicDir = path.join(cwd, 'public');\r\n if (!fs.existsSync(publicDir)) {\r\n fs.mkdirSync(publicDir, { recursive: true });\r\n }\r\n\r\n fs.writeFileSync(llmsTxtPath, llmsTxtContent, 'utf8');\r\n console.log(pc.green('✓ Generated') + pc.dim(' /llms.txt'));\r\n }\r\n\r\n console.log(pc.dim(`Edge payloads are ready at /public/.onto/*\\n`));\r\n}\r\n\r\nmain().catch(e => {\r\n console.error(pc.red(`[Onto] Fatal Error: ${e.message}`));\r\n process.exit(1);\r\n});\r\n","import * as cheerio from 'cheerio';\r\nimport TurndownService from 'turndown';\r\n\r\nconst turndownService = new TurndownService({\r\n headingStyle: 'atx',\r\n codeBlockStyle: 'fenced',\r\n});\r\n\r\n// Configure turndown to keep some layout or handle semantic tags differently if needed\r\n\r\nexport interface ExtractionResult {\r\n markdown: string;\r\n metadata: {\r\n title: string;\r\n description: string;\r\n jsonLd: any[];\r\n };\r\n stats: {\r\n originalHtmlSize: number;\r\n markdownSize: number;\r\n tokenReductionRatio: number;\r\n };\r\n}\r\n\r\n/**\r\n * Extracts pure semantic markdown and metadata from rendered Next.js HTML strings.\r\n * @param html The raw HTML string.\r\n * @param sourceUrl (Optional) the URL this was generated from, to attach as metadata.\r\n * @returns {ExtractionResult} The extracted payload.\r\n */\r\nexport function extractContent(html: string, sourceUrl: string = 'Generated Output'): ExtractionResult {\r\n const originalSize = html.length;\r\n\r\n const $ = cheerio.load(html);\r\n\r\n // 1. Extract Metadata BEFORE removing structure\r\n const title = $('title').text() || $('h1').first().text() || 'Untitled Page';\r\n const description = $('meta[name=\"description\"]').attr('content') || 'No description found.';\r\n\r\n const jsonLdScripts: any[] = [];\r\n $('script[type=\"application/ld+json\"]').each((_, el) => {\r\n try {\r\n const raw = $(el).html() || '';\r\n const parsed = JSON.parse(raw);\r\n jsonLdScripts.push(parsed);\r\n } catch {\r\n // ignore bad json\r\n }\r\n });\r\n\r\n // 2. Strip noise (React boilerplate, styles, unnecessary tags)\r\n $('script, style, noscript, iframe, svg, nav, footer, meta, link, header').remove();\r\n\r\n // Optionally remove typical Next.js hidden wrappers if they don't contain real content.\r\n // Next.js uses <div id=\"__next\"> but we mostly just want semantic content.\r\n\r\n // 3. Find the entry point for content\r\n // Prefer <main> or <article> over <body>\r\n let contentHtml = '';\r\n if ($('main').length > 0) {\r\n contentHtml = $('main').html() || '';\r\n } else if ($('article').length > 0) {\r\n contentHtml = $('article').html() || '';\r\n } else {\r\n contentHtml = $('body').html() || '';\r\n }\r\n\r\n // 4. Convert to Markdown\r\n let markdown = turndownService.turndown(contentHtml);\r\n\r\n // 5. Optionally inject Metadata header\r\n const headerLines = [\r\n `# ${title}`,\r\n `> ${description}`,\r\n ``,\r\n `**Source:** ${sourceUrl}`,\r\n `**Extracted:** ${new Date().toISOString()}`,\r\n ``,\r\n `---`,\r\n ``\r\n ];\r\n\r\n let finalMarkdown = headerLines.join('\\n') + markdown;\r\n\r\n // Add JSON-LD section if exists\r\n if (jsonLdScripts.length > 0) {\r\n finalMarkdown += '\\n\\n---\\n## Structured Data (JSON-LD)\\n```json\\n';\r\n jsonLdScripts.forEach(j => {\r\n finalMarkdown += JSON.stringify(j, null, 2) + '\\n';\r\n });\r\n finalMarkdown += '```\\n';\r\n }\r\n\r\n const markdownSize = finalMarkdown.length;\r\n const tokenReductionRatio = originalSize > 0 ? ((originalSize - markdownSize) / originalSize) * 100 : 0;\r\n\r\n return {\r\n markdown: finalMarkdown,\r\n metadata: {\r\n title,\r\n description,\r\n jsonLd: jsonLdScripts\r\n },\r\n stats: {\r\n originalHtmlSize: originalSize,\r\n markdownSize,\r\n tokenReductionRatio\r\n }\r\n };\r\n}\r\n\r\nexport async function generateStaticPayloads(nextAppDirDir: string, ontoPublicDir: string) {\r\n const fs = await import('fs');\r\n const path = await import('path');\r\n const { glob } = await import('glob');\r\n\r\n if (!fs.existsSync(nextAppDirDir)) {\r\n return;\r\n }\r\n\r\n const files = await glob('**/*.html', { cwd: nextAppDirDir });\r\n if (files.length === 0) return;\r\n\r\n if (!fs.existsSync(ontoPublicDir)) {\r\n fs.mkdirSync(ontoPublicDir, { recursive: true });\r\n }\r\n\r\n let totalFilesProcessed = 0;\r\n\r\n for (const file of files) {\r\n const inputPath = path.join(nextAppDirDir, file);\r\n const outputPathRelative = file.replace(/\\.html$/, '.md');\r\n const outputPath = path.join(ontoPublicDir, outputPathRelative);\r\n\r\n try {\r\n const htmlContent = fs.readFileSync(inputPath, 'utf8');\r\n\r\n let routeName = file.replace(/\\.html$/, '');\r\n if (routeName === 'index') routeName = '/';\r\n else routeName = `/${routeName}`;\r\n\r\n const result = extractContent(htmlContent, routeName);\r\n\r\n const outputDir = path.dirname(outputPath);\r\n if (!fs.existsSync(outputDir)) {\r\n fs.mkdirSync(outputDir, { recursive: true });\r\n }\r\n\r\n fs.writeFileSync(outputPath, result.markdown, 'utf8');\r\n totalFilesProcessed++;\r\n } catch (e: any) {\r\n console.error(`[Onto] Failed to process ${file}: ${e.message}`);\r\n }\r\n }\r\n console.log(`[Onto] Successfully generated ${totalFilesProcessed} semantic markdown endpoints.`);\r\n}\r\n","/**\r\n * Configuration schema for onto.config.ts\r\n * Used to dynamically generate llms.txt and other AI discovery files\r\n */\r\n\r\nexport type PageType = 'scoring' | 'about' | 'default';\r\n\r\nexport interface OntoRoute {\r\n /**\r\n * The URL path (e.g., '/docs', '/api/reference')\r\n */\r\n path: string;\r\n /**\r\n * Description of what this route contains\r\n */\r\n description: string;\r\n /**\r\n * Optional: Page type for automatic JSON-LD schema injection\r\n * - 'scoring': Injects Methodology schema with AIO scoring weights (40/35/25)\r\n * - 'about': Injects Organization/AboutPage schema\r\n * - 'default': No automatic schema injection\r\n */\r\n pageType?: PageType;\r\n}\r\n\r\nexport interface OntoConfig {\r\n /**\r\n * The name of your project or site (required)\r\n * Used as the H1 heading in llms.txt\r\n */\r\n name: string;\r\n\r\n /**\r\n * A short summary of your project (required)\r\n * Displayed as a blockquote in llms.txt\r\n * Should contain key information necessary for understanding the rest of the file\r\n */\r\n summary: string;\r\n\r\n /**\r\n * The base URL of your site (e.g., 'https://example.com')\r\n */\r\n baseUrl: string;\r\n\r\n /**\r\n * Optional: Additional sections to include in llms.txt\r\n * Each section can contain any markdown content\r\n */\r\n sections?: {\r\n heading: string;\r\n content: string;\r\n }[];\r\n\r\n /**\r\n * Key routes that AI agents should know about\r\n * These will be formatted as a markdown list in llms.txt\r\n */\r\n routes?: OntoRoute[];\r\n\r\n /**\r\n * Optional: Links to external resources (documentation, API references, etc.)\r\n */\r\n externalLinks?: {\r\n title: string;\r\n url: string;\r\n description?: string;\r\n }[];\r\n\r\n /**\r\n * Optional: Organization information for JSON-LD schemas\r\n */\r\n organization?: {\r\n name: string;\r\n description?: string;\r\n url?: string;\r\n logo?: string;\r\n foundingDate?: string;\r\n };\r\n}\r\n\r\n/**\r\n * Generate llms.txt content from OntoConfig\r\n * Follows the llms.txt specification:\r\n * - H1 with project name\r\n * - Blockquote with summary\r\n * - Additional markdown sections\r\n */\r\nexport function generateLlmsTxt(config: OntoConfig): string {\r\n const lines: string[] = [];\r\n\r\n // H1: Project name (required)\r\n lines.push(`# ${config.name}`);\r\n lines.push('');\r\n\r\n // Blockquote: Summary (required)\r\n lines.push(`> ${config.summary}`);\r\n lines.push('');\r\n\r\n // Key Routes section (if provided)\r\n if (config.routes && config.routes.length > 0) {\r\n lines.push('## Key Routes');\r\n lines.push('');\r\n for (const route of config.routes) {\r\n const fullUrl = `${config.baseUrl}${route.path}`;\r\n lines.push(`- [${route.path}](${fullUrl}): ${route.description}`);\r\n }\r\n lines.push('');\r\n }\r\n\r\n // External Links section (if provided)\r\n if (config.externalLinks && config.externalLinks.length > 0) {\r\n lines.push('## Resources');\r\n lines.push('');\r\n for (const link of config.externalLinks) {\r\n if (link.description) {\r\n lines.push(`- [${link.title}](${link.url}): ${link.description}`);\r\n } else {\r\n lines.push(`- [${link.title}](${link.url})`);\r\n }\r\n }\r\n lines.push('');\r\n }\r\n\r\n // Custom sections (if provided)\r\n if (config.sections && config.sections.length > 0) {\r\n for (const section of config.sections) {\r\n lines.push(`## ${section.heading}`);\r\n lines.push('');\r\n lines.push(section.content);\r\n lines.push('');\r\n }\r\n }\r\n\r\n return lines.join('\\n').trim() + '\\n';\r\n}\r\n"],"mappings":";wdACA,IAAAA,EAAqB,gBACrBC,EAAe,iBACfC,EAAiB,mBACjBC,EAAe,yBCJf,IAAAC,EAAyB,sBACzBC,EAA4B,uBAEtBC,EAAkB,IAAI,EAAAC,QAAgB,CACxC,aAAc,MACd,eAAgB,QACpB,CAAC,EAwBM,SAASC,EAAeC,EAAcC,EAAoB,mBAAsC,CACnG,IAAMC,EAAeF,EAAK,OAEpBG,EAAY,OAAKH,CAAI,EAGrBI,EAAQD,EAAE,OAAO,EAAE,KAAK,GAAKA,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,GAAK,gBACvDE,EAAcF,EAAE,0BAA0B,EAAE,KAAK,SAAS,GAAK,wBAE/DG,EAAuB,CAAC,EAC9BH,EAAE,oCAAoC,EAAE,KAAK,CAACI,EAAGC,IAAO,CACpD,GAAI,CACA,IAAMC,EAAMN,EAAEK,CAAE,EAAE,KAAK,GAAK,GACtBE,EAAS,KAAK,MAAMD,CAAG,EAC7BH,EAAc,KAAKI,CAAM,CAC7B,MAAQ,CAER,CACJ,CAAC,EAGDP,EAAE,uEAAuE,EAAE,OAAO,EAOlF,IAAIQ,EAAc,GACdR,EAAE,MAAM,EAAE,OAAS,EACnBQ,EAAcR,EAAE,MAAM,EAAE,KAAK,GAAK,GAC3BA,EAAE,SAAS,EAAE,OAAS,EAC7BQ,EAAcR,EAAE,SAAS,EAAE,KAAK,GAAK,GAErCQ,EAAcR,EAAE,MAAM,EAAE,KAAK,GAAK,GAItC,IAAIS,EAAWf,EAAgB,SAASc,CAAW,EAc/CE,EAXgB,CAChB,KAAKT,CAAK,GACV,KAAKC,CAAW,GAChB,GACA,eAAeJ,CAAS,GACxB,kBAAkB,IAAI,KAAK,EAAE,YAAY,CAAC,GAC1C,GACA,MACA,EACJ,EAEgC,KAAK;AAAA,CAAI,EAAIW,EAGzCN,EAAc,OAAS,IACvBO,GAAiB;AAAA;AAAA;AAAA;AAAA;AAAA,EACjBP,EAAc,QAAQQ,GAAK,CACvBD,GAAiB,KAAK,UAAUC,EAAG,KAAM,CAAC,EAAI;AAAA,CAClD,CAAC,EACDD,GAAiB,SAGrB,IAAME,EAAeF,EAAc,OAC7BG,EAAsBd,EAAe,GAAMA,EAAea,GAAgBb,EAAgB,IAAM,EAEtG,MAAO,CACH,SAAUW,EACV,SAAU,CACN,MAAAT,EACA,YAAAC,EACA,OAAQC,CACZ,EACA,MAAO,CACH,iBAAkBJ,EAClB,aAAAa,EACA,oBAAAC,CACJ,CACJ,CACJ,CCtBO,SAASC,EAAgBC,EAA4B,CAC1D,IAAMC,EAAkB,CAAC,EAWzB,GARAA,EAAM,KAAK,KAAKD,EAAO,IAAI,EAAE,EAC7BC,EAAM,KAAK,EAAE,EAGbA,EAAM,KAAK,KAAKD,EAAO,OAAO,EAAE,EAChCC,EAAM,KAAK,EAAE,EAGTD,EAAO,QAAUA,EAAO,OAAO,OAAS,EAAG,CAC7CC,EAAM,KAAK,eAAe,EAC1BA,EAAM,KAAK,EAAE,EACb,QAAWC,KAASF,EAAO,OAAQ,CACjC,IAAMG,EAAU,GAAGH,EAAO,OAAO,GAAGE,EAAM,IAAI,GAC9CD,EAAM,KAAK,MAAMC,EAAM,IAAI,KAAKC,CAAO,MAAMD,EAAM,WAAW,EAAE,CAClE,CACAD,EAAM,KAAK,EAAE,CACf,CAGA,GAAID,EAAO,eAAiBA,EAAO,cAAc,OAAS,EAAG,CAC3DC,EAAM,KAAK,cAAc,EACzBA,EAAM,KAAK,EAAE,EACb,QAAWG,KAAQJ,EAAO,cACpBI,EAAK,YACPH,EAAM,KAAK,MAAMG,EAAK,KAAK,KAAKA,EAAK,GAAG,MAAMA,EAAK,WAAW,EAAE,EAEhEH,EAAM,KAAK,MAAMG,EAAK,KAAK,KAAKA,EAAK,GAAG,GAAG,EAG/CH,EAAM,KAAK,EAAE,CACf,CAGA,GAAID,EAAO,UAAYA,EAAO,SAAS,OAAS,EAC9C,QAAWK,KAAWL,EAAO,SAC3BC,EAAM,KAAK,MAAMI,EAAQ,OAAO,EAAE,EAClCJ,EAAM,KAAK,EAAE,EACbA,EAAM,KAAKI,EAAQ,OAAO,EAC1BJ,EAAM,KAAK,EAAE,EAIjB,OAAOA,EAAM,KAAK;AAAA,CAAI,EAAE,KAAK,EAAI;AAAA,CACnC,CF9HA,IAAAK,EAA8B,eAE9B,eAAeC,GAA6C,CACxD,IAAMC,EAAM,QAAQ,IAAI,EAClBC,EAAe,EAAAC,QAAK,QAAQF,EAAK,gBAAgB,EACjDG,EAAe,EAAAD,QAAK,QAAQF,EAAK,gBAAgB,EAEjDI,EAAY,MAAOC,GAAc,CACnC,GAAI,CACA,IAAMC,EAAS,MAAM,UAAO,iBAAcD,CAAC,EAAE,MAC7C,OAAOC,EAAO,SAAWA,CAC7B,MAAY,CACR,OAAO,IACX,CACJ,EAGIA,EAAS,MAAMF,EAAUH,CAAY,GAAK,MAAMG,EAAUD,CAAY,EAC1E,GAAIG,EAAQ,OAAOA,EAGnB,GAAI,CACA,IAAMC,EAAW,EAAAC,QAAG,WAAWP,CAAY,EAAIA,EAAgB,EAAAO,QAAG,WAAWL,CAAY,EAAIA,EAAe,KAC5G,GAAI,CAACI,EAAU,OAAO,KAEtB,IAAME,EAAU,EAAAD,QAAG,aAAaD,EAAU,MAAM,EAG1CG,EAAYD,EAAQ,MAAM,2BAA2B,EACrDE,EAAeF,EAAQ,MAAM,8BAA8B,EAC3DG,EAAeH,EAAQ,MAAM,8BAA8B,EAG3DI,EAAgB,CAAC,EACjBC,EAAa,sEACfC,EACJ,MAAQA,EAAQD,EAAW,KAAKL,CAAO,KAAO,MAC1CI,EAAO,KAAK,CAAE,KAAME,EAAM,CAAC,EAAG,YAAaA,EAAM,CAAC,CAAE,CAAC,EAGzD,GAAIL,EACA,MAAO,CACH,KAAMA,EAAU,CAAC,EACjB,QAASC,EAAeA,EAAa,CAAC,EAAI,GAC1C,QAASC,EAAeA,EAAa,CAAC,EAAI,GAC1C,OAAQC,CACZ,CAER,MAAY,CAEZ,CAEA,OAAO,IACX,CAGA,SAASG,GAAU,CACf,IAAMC,EAAU,EAAAf,QAAK,KAAK,QAAQ,IAAI,EAAG,YAAY,EACjD,EAAAM,QAAG,WAAWS,CAAO,GACF,EAAAT,QAAG,aAAaS,EAAS,MAAM,EACvC,MAAM,OAAO,EAAE,QAAQC,GAAQ,CACtC,IAAMC,EAAcD,EAAK,KAAK,EAC9B,GAAI,CAACC,GAAeA,EAAY,WAAW,GAAG,EAAG,OACjD,GAAM,CAACC,EAAK,GAAGC,CAAU,EAAIF,EAAY,MAAM,GAAG,EAC9CC,GAAOC,EAAW,OAAS,IAC3B,QAAQ,IAAID,EAAI,KAAK,CAAC,EAAIC,EAAW,KAAK,GAAG,EAAE,KAAK,EAAE,QAAQ,eAAgB,EAAE,EAExF,CAAC,CAET,CAEA,eAAeC,GAAO,CAClB,IAAMtB,EAAM,QAAQ,IAAI,EAClBuB,EAAa,EAAArB,QAAK,KAAKF,EAAK,gBAAgB,EAC5CwB,EAAiB,EAAAtB,QAAK,KAAKF,EAAK,eAAe,EAErD,QAAQ,IAAI,EAAAyB,QAAG,KAAK;AAAA,+BAAkC,CAAC,EAGlD,EAAAjB,QAAG,WAAWe,CAAU,EAqBzB,QAAQ,IAAI,EAAAE,QAAG,OAAO,iDAA4C,CAAC,GAHnE,EAAAjB,QAAG,cAAce,EAjBM;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAiBsB,MAAM,EACnD,QAAQ,IAAI,EAAAE,QAAG,MAAM,gBAAW,EAAI,iBAAiB,GAMpD,EAAAjB,QAAG,WAAWgB,CAAc,EAuB7B,QAAQ,IAAI,EAAAC,QAAG,OAAO,gDAA2C,CAAC,GAHlE,EAAAjB,QAAG,cAAcgB,EAnBU;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAmB0B,MAAM,EAC3D,QAAQ,IAAI,EAAAC,QAAG,MAAM,gBAAW,EAAI,gBAAgB,GAKxD,QAAQ,IAAI,EAAAA,QAAG,QAAQ;AAAA,mCAA+B,CAAC,EACvD,QAAQ,IAAI,EAAAA,QAAG,IAAI,aAAa,CAAC,EACjC,QAAQ,IAAI,EAAAA,QAAG,IAAI,yCAAyC,CAAC,EAC7D,QAAQ,IAAI,EAAAA,QAAG,IAAI;AAAA,CAAgD,CAAC,CACxE,CAEA,eAAeC,GAAO,CAIlB,GAHa,QAAQ,KAAK,MAAM,CAAC,EACZ,CAAC,IAEN,OAAQ,CACpB,MAAMJ,EAAK,EACX,MACJ,CAEAN,EAAQ,EACR,QAAQ,IAAI,EAAAS,QAAG,KAAK;AAAA,8CAAiD,CAAC,EAGtE,IAAMzB,EAAM,QAAQ,IAAI,EAClB2B,EAAgB,EAAAzB,QAAK,KAAKF,EAAK,kBAAkB,EACjD4B,EAAgB,EAAA1B,QAAK,KAAKF,EAAK,cAAc,EAEnD,GAAI,CAAC,EAAAQ,QAAG,WAAWmB,CAAa,EAAG,CAC/B,QAAQ,IAAI,EAAAF,QAAG,OAAO,+CAA+CE,CAAa,EAAE,CAAC,EACrF,QAAQ,IAAI,EAAAF,QAAG,OAAO,gFAAgF,CAAC,EACvG,MACJ,CAGA,IAAMI,EAAQ,QAAM,QAAK,YAAa,CAAE,IAAKF,CAAc,CAAC,EAE5D,GAAIE,EAAM,SAAW,EAAG,CACpB,QAAQ,IAAI,EAAAJ,QAAG,OAAO,+CAA+C,CAAC,EACtE,MACJ,CAGK,EAAAjB,QAAG,WAAWoB,CAAa,GAC5B,EAAApB,QAAG,UAAUoB,EAAe,CAAE,UAAW,EAAK,CAAC,EAGnD,IAAIE,EAAoB,EACpBC,EAAoB,EACpBC,EAAsB,EAE1B,QAAWC,KAAQJ,EAAO,CACtB,IAAMK,EAAY,EAAAhC,QAAK,KAAKyB,EAAeM,CAAI,EAG3CE,EAAqBF,EAAK,QAAQ,UAAW,KAAK,EAEhDG,EAAa,EAAAlC,QAAK,KAAK0B,EAAeO,CAAkB,EAE9D,GAAI,CACA,IAAME,EAAc,EAAA7B,QAAG,aAAa0B,EAAW,MAAM,EAE/CI,EAASC,EAAeF,EAAa,IAAIF,EAAmB,QAAQ,QAAS,EAAE,CAAC,EAAE,EAGlFK,EAAY,EAAAtC,QAAK,QAAQkC,CAAU,EACpC,EAAA5B,QAAG,WAAWgC,CAAS,GACxB,EAAAhC,QAAG,UAAUgC,EAAW,CAAE,UAAW,EAAK,CAAC,EAG/C,EAAAhC,QAAG,cAAc4B,EAAYE,EAAO,SAAU,MAAM,EAEpDR,GAAqBQ,EAAO,MAAM,iBAClCP,GAAqBO,EAAO,MAAM,aAClCN,IAEA,IAAMS,GAAUH,EAAO,MAAM,iBAAmB,MAAM,QAAQ,CAAC,EACzDI,GAAQJ,EAAO,MAAM,aAAe,MAAM,QAAQ,CAAC,EAGrDK,EAAYV,EAAK,QAAQ,UAAW,EAAE,EACtCU,IAAc,QAASA,EAAY,IAClCA,EAAY,IAAIA,CAAS,GAE9B,QAAQ,IACJ,EAAAlB,QAAG,MAAM,kBAAa,EACtB,EAAAA,QAAG,IAAI,IAAIkB,CAAS,GAAG,EACvB,EAAAlB,QAAG,KAAK,IAAIgB,CAAM,SAASC,CAAI,KAAK,CACxC,CACJ,OAASE,EAAQ,CACb,QAAQ,MAAM,EAAAnB,QAAG,IAAI,4BAAuBQ,CAAI,KAAKW,EAAE,OAAO,EAAE,CAAC,CACrE,CACJ,CAEA,QAAQ,IACJ,EAAAnB,QAAG,KACC,EAAAA,QAAG,QAAQ,aAAaO,CAAmB,wBAAwBF,EAAoB,MAAM,QAAQ,CAAC,CAAC,UAAUC,EAAoB,MAAM,QAAQ,CAAC,CAAC,IAAI,CAC7J,CACJ,EAGA,IAAMc,EAAe,QAAQ,IAAI,aAC3BC,EAAgB,QAAQ,IAAI,oBAAsB,4BAExD,GAAID,GAAgBb,EAAsB,EAAG,CACzC,QAAQ,IAAI,EAAAP,QAAG,KAAK,+CAA+CqB,CAAa,MAAM,CAAC,EACvF,GAAI,CACA,IAAMC,EAAWlB,EAAM,IAAII,GAAQ,CAC/B,IAAMU,EAAYV,EAAK,QAAQ,UAAW,EAAE,EACtCe,EAAQL,IAAc,QAAU,IAAM,IAAIA,CAAS,GACnDM,EAAS,EAAA/C,QAAK,KAAK0B,EAAeK,EAAK,QAAQ,UAAW,KAAK,CAAC,EACtE,MAAO,CACH,MAAAe,EACA,SAAU,GAAGL,CAAS,MACtB,QAAS,EAAAnC,QAAG,aAAayC,EAAQ,MAAM,CAC3C,CACJ,CAAC,EAEKC,EAAM,MAAM,MAAM,GAAGJ,CAAa,aAAc,CAClD,OAAQ,OACR,QAAS,CACL,aAAcD,EACd,eAAgB,kBACpB,EACA,KAAM,KAAK,UAAU,CAAE,MAAOE,CAAS,CAAC,CAC5C,CAAC,EAED,GAAIG,EAAI,GACJ,QAAQ,IAAI,EAAAzB,QAAG,MAAM,sCAAiC,CAAC,MACpD,CACH,IAAM0B,EAAU,MAAMD,EAAI,KAAK,EAAE,MAAM,KAAO,CAAC,EAAE,EACjD,QAAQ,IAAI,EAAAzB,QAAG,OAAO,sCAAiC0B,EAAQ,OAASD,EAAI,UAAU,EAAE,CAAC,CAC7F,CACJ,OAASN,EAAQ,CACb,QAAQ,IAAI,EAAAnB,QAAG,OAAO,qCAAgCmB,EAAE,OAAO,EAAE,CAAC,CACtE,CACJ,CAGA,IAAMtC,EAAS,MAAMP,EAAe,EACpC,GAAIO,EAAQ,CACR,IAAM8C,EAAiBC,EAAgB/C,CAAM,EACvCgD,EAAc,EAAApD,QAAK,KAAKF,EAAK,iBAAiB,EAG9CuD,EAAY,EAAArD,QAAK,KAAKF,EAAK,QAAQ,EACpC,EAAAQ,QAAG,WAAW+C,CAAS,GACxB,EAAA/C,QAAG,UAAU+C,EAAW,CAAE,UAAW,EAAK,CAAC,EAG/C,EAAA/C,QAAG,cAAc8C,EAAaF,EAAgB,MAAM,EACpD,QAAQ,IAAI,EAAA3B,QAAG,MAAM,kBAAa,EAAI,EAAAA,QAAG,IAAI,YAAY,CAAC,CAC9D,CAEA,QAAQ,IAAI,EAAAA,QAAG,IAAI;AAAA,CAA8C,CAAC,CACtE,CAEAC,EAAK,EAAE,MAAMkB,GAAK,CACd,QAAQ,MAAM,EAAAnB,QAAG,IAAI,uBAAuBmB,EAAE,OAAO,EAAE,CAAC,EACxD,QAAQ,KAAK,CAAC,CAClB,CAAC","names":["import_glob","import_fs","import_path","import_picocolors","cheerio","import_turndown","turndownService","TurndownService","extractContent","html","sourceUrl","originalSize","$","title","description","jsonLdScripts","_","el","raw","parsed","contentHtml","markdown","finalMarkdown","j","markdownSize","tokenReductionRatio","generateLlmsTxt","config","lines","route","fullUrl","link","section","import_url","loadOntoConfig","cwd","configPathTs","path","configPathJs","tryImport","p","config","filePath","fs","content","nameMatch","summaryMatch","baseUrlMatch","routes","routeRegex","match","loadEnv","envPath","line","trimmedLine","key","valueParts","init","configPath","middlewarePath","pc","main","nextAppDirDir","ontoPublicDir","files","totalOriginalSize","totalMarkdownSize","totalFilesProcessed","file","inputPath","outputPathRelative","outputPath","htmlContent","result","extractContent","outputDir","origKb","mdKb","routeName","e","ONTO_API_KEY","DASHBOARD_URL","manifest","route","mdPath","res","errData","llmsTxtContent","generateLlmsTxt","llmsTxtPath","publicDir"]}
package/dist/cli.mjs CHANGED
@@ -1,14 +1,14 @@
1
1
  #!/usr/bin/env node
2
- import{glob as R}from"glob";import i from"fs";import l from"path";import s from"picocolors";import*as k from"cheerio";import C from"turndown";var T=new C({headingStyle:"atx",codeBlockStyle:"fenced"});function P(n,e="Generated Output"){let t=n.length,o=k.load(n),c=o("title").text()||o("h1").first().text()||"Untitled Page",r=o('meta[name="description"]').attr("content")||"No description found.",a=[];o('script[type="application/ld+json"]').each((d,p)=>{try{let h=o(p).html()||"",y=JSON.parse(h);a.push(y)}catch{}}),o("script, style, noscript, iframe, svg, nav, footer, meta, link, header").remove();let u="";o("main").length>0?u=o("main").html()||"":o("article").length>0?u=o("article").html()||"":u=o("body").html()||"";let f=T.turndown(u),g=[`# ${c}`,`> ${r}`,"",`**Source:** ${e}`,`**Extracted:** ${new Date().toISOString()}`,"","---",""].join(`
3
- `)+f;a.length>0&&(g+=`
2
+ import{glob as R}from"glob";import i from"fs";import m from"path";import s from"picocolors";import*as k from"cheerio";import C from"turndown";var T=new C({headingStyle:"atx",codeBlockStyle:"fenced"});function P(n,e="Generated Output"){let t=n.length,o=k.load(n),l=o("title").text()||o("h1").first().text()||"Untitled Page",c=o('meta[name="description"]').attr("content")||"No description found.",r=[];o('script[type="application/ld+json"]').each((g,p)=>{try{let h=o(p).html()||"",x=JSON.parse(h);r.push(x)}catch{}}),o("script, style, noscript, iframe, svg, nav, footer, meta, link, header").remove();let d="";o("main").length>0?d=o("main").html()||"":o("article").length>0?d=o("article").html()||"":d=o("body").html()||"";let f=T.turndown(d),u=[`# ${l}`,`> ${c}`,"",`**Source:** ${e}`,`**Extracted:** ${new Date().toISOString()}`,"","---",""].join(`
3
+ `)+f;r.length>0&&(u+=`
4
4
 
5
5
  ---
6
6
  ## Structured Data (JSON-LD)
7
7
  \`\`\`json
8
- `,a.forEach(d=>{g+=JSON.stringify(d,null,2)+`
9
- `}),g+="```\n");let S=g.length,m=t>0?(t-S)/t*100:0;return{markdown:g,metadata:{title:c,description:r,jsonLd:a},stats:{originalHtmlSize:t,markdownSize:S,tokenReductionRatio:m}}}function v(n){let e=[];if(e.push(`# ${n.name}`),e.push(""),e.push(`> ${n.summary}`),e.push(""),n.routes&&n.routes.length>0){e.push("## Key Routes"),e.push("");for(let t of n.routes){let o=`${n.baseUrl}${t.path}`;e.push(`- [${t.path}](${o}): ${t.description}`)}e.push("")}if(n.externalLinks&&n.externalLinks.length>0){e.push("## Resources"),e.push("");for(let t of n.externalLinks)t.description?e.push(`- [${t.title}](${t.url}): ${t.description}`):e.push(`- [${t.title}](${t.url})`);e.push("")}if(n.sections&&n.sections.length>0)for(let t of n.sections)e.push(`## ${t.heading}`),e.push(""),e.push(t.content),e.push("");return e.join(`
8
+ `,r.forEach(g=>{u+=JSON.stringify(g,null,2)+`
9
+ `}),u+="```\n");let y=u.length,a=t>0?(t-y)/t*100:0;return{markdown:u,metadata:{title:l,description:c,jsonLd:r},stats:{originalHtmlSize:t,markdownSize:y,tokenReductionRatio:a}}}function v(n){let e=[];if(e.push(`# ${n.name}`),e.push(""),e.push(`> ${n.summary}`),e.push(""),n.routes&&n.routes.length>0){e.push("## Key Routes"),e.push("");for(let t of n.routes){let o=`${n.baseUrl}${t.path}`;e.push(`- [${t.path}](${o}): ${t.description}`)}e.push("")}if(n.externalLinks&&n.externalLinks.length>0){e.push("## Resources"),e.push("");for(let t of n.externalLinks)t.description?e.push(`- [${t.title}](${t.url}): ${t.description}`):e.push(`- [${t.title}](${t.url})`);e.push("")}if(n.sections&&n.sections.length>0)for(let t of n.sections)e.push(`## ${t.heading}`),e.push(""),e.push(t.content),e.push("");return e.join(`
10
10
  `).trim()+`
11
- `}import{pathToFileURL as F}from"url";async function z(){let n=process.cwd(),e=l.resolve(n,"onto.config.ts"),t=l.resolve(n,"onto.config.js"),o=async r=>{try{let a=await import(F(r).href);return a.default||a}catch{return null}},c=await o(e)||await o(t);if(c)return c;try{let r=i.existsSync(e)?e:i.existsSync(t)?t:null;if(!r)return null;let a=i.readFileSync(r,"utf8"),u=a.match(/name\s*:\s*['"`](.*)['"`]/),f=a.match(/summary\s*:\s*['"`](.*)['"`]/),w=a.match(/baseUrl\s*:\s*['"`](.*)['"`]/);if(u)return{name:u[1],summary:f?f[1]:"",baseUrl:w?w[1]:"",routes:[]}}catch{}return null}function L(){let n=l.join(process.cwd(),".env.local");i.existsSync(n)&&i.readFileSync(n,"utf8").split(/\r?\n/).forEach(t=>{let o=t.trim();if(!o||o.startsWith("#"))return;let[c,...r]=o.split("=");c&&r.length>0&&(process.env[c.trim()]=r.join("=").trim().replace(/^["']|["']$/g,""))})}async function N(){let n=process.cwd(),e=l.join(n,"onto.config.ts"),t=l.join(n,"middleware.ts");console.log(s.cyan(`
11
+ `}import{pathToFileURL as F}from"url";async function z(){let n=process.cwd(),e=m.resolve(n,"onto.config.ts"),t=m.resolve(n,"onto.config.js"),o=async c=>{try{let r=await import(F(c).href);return r.default||r}catch{return null}},l=await o(e)||await o(t);if(l)return l;try{let c=i.existsSync(e)?e:i.existsSync(t)?t:null;if(!c)return null;let r=i.readFileSync(c,"utf8"),d=r.match(/name\s*:\s*['"`](.*)['"`]/),f=r.match(/summary\s*:\s*['"`](.*)['"`]/),S=r.match(/baseUrl\s*:\s*['"`](.*)['"`]/),u=[],y=/path\s*:\s*['"`](.*?)['"`]\s*,\s*description\s*:\s*['"`](.*?)['"`]/g,a;for(;(a=y.exec(r))!==null;)u.push({path:a[1],description:a[2]});if(d)return{name:d[1],summary:f?f[1]:"",baseUrl:S?S[1]:"",routes:u}}catch{}return null}function L(){let n=m.join(process.cwd(),".env.local");i.existsSync(n)&&i.readFileSync(n,"utf8").split(/\r?\n/).forEach(t=>{let o=t.trim();if(!o||o.startsWith("#"))return;let[l,...c]=o.split("=");l&&c.length>0&&(process.env[l.trim()]=c.join("=").trim().replace(/^["']|["']$/g,""))})}async function N(){let n=process.cwd(),e=m.join(n,"onto.config.ts"),t=m.join(n,"middleware.ts");console.log(s.cyan(`
12
12
  [Onto] Initializing project...`)),i.existsSync(e)?console.log(s.yellow("\u2139 onto.config.ts already exists, skipping.")):(i.writeFileSync(e,`import { OntoConfig } from '@ontosdk/next';
13
13
 
14
14
  const config: OntoConfig = {
@@ -46,6 +46,6 @@ export const config = {
46
46
  `,"utf8"),console.log(s.green("\u2713 Created")+" middleware.ts")),console.log(s.magenta(`
47
47
  Initialization complete! \u{1F680}`)),console.log(s.dim("Next steps:")),console.log(s.dim("1. Update your routes in onto.config.ts")),console.log(s.dim(`2. Run "npm run build" to generate manifests
48
48
  `))}async function U(){if(process.argv.slice(2)[0]==="init"){await N();return}L(),console.log(s.cyan(`
49
- [Onto] Starting Semantic Output Generation...`));let t=process.cwd(),o=l.join(t,".next/server/app"),c=l.join(t,"public/.onto");if(!i.existsSync(o)){console.log(s.yellow(`[Onto] Could not find Next.js app output at ${o}`)),console.log(s.yellow('[Onto] Ensure this is run after "next build" and you are using the App Router.'));return}let r=await R("**/*.html",{cwd:o});if(r.length===0){console.log(s.yellow("[Onto] No static HTML files found to process."));return}i.existsSync(c)||i.mkdirSync(c,{recursive:!0});let a=0,u=0,f=0;for(let m of r){let d=l.join(o,m),p=m.replace(/\.html$/,".md"),h=l.join(c,p);try{let y=i.readFileSync(d,"utf8"),x=P(y,`/${p.replace(/\.md$/,"")}`),O=l.dirname(h);i.existsSync(O)||i.mkdirSync(O,{recursive:!0}),i.writeFileSync(h,x.markdown,"utf8"),a+=x.stats.originalHtmlSize,u+=x.stats.markdownSize,f++;let b=(x.stats.originalHtmlSize/1024).toFixed(1),j=(x.stats.markdownSize/1024).toFixed(1),$=m.replace(/\.html$/,"");$==="index"?$="/":$=`/${$}`,console.log(s.green("\u2713 Optimized")+s.dim(` ${$} `)+s.blue(`[${b}KB -> ${j}KB]`))}catch(y){console.error(s.red(`\u2717 Failed to process ${m}: ${y.message}`))}}console.log(s.bold(s.magenta(`Processed ${f} pages. Total Size: ${(a/1024).toFixed(1)}KB -> ${(u/1024).toFixed(1)}KB`)));let w=process.env.ONTO_API_KEY,g=process.env.ONTO_DASHBOARD_URL||"https://app.buildonto.dev";if(w&&f>0){console.log(s.cyan(`[Onto] Syncing manifest with Control Plane [${g}]...`));try{let m=r.map(p=>{let h=p.replace(/\.html$/,""),y=h==="index"?"/":`/${h}`,x=l.join(c,p.replace(/\.html$/,".md"));return{route:y,filename:`${h}.md`,content:i.readFileSync(x,"utf8")}}),d=await fetch(`${g}/api/files`,{method:"POST",headers:{"x-onto-key":w,"Content-Type":"application/json"},body:JSON.stringify({files:m})});if(d.ok)console.log(s.green("\u2713 Control Plane sync successful"));else{let p=await d.json().catch(()=>({}));console.log(s.yellow(`\u26A0 Control Plane sync skipped: ${p.error||d.statusText}`))}}catch(m){console.log(s.yellow(`\u26A0 Control Plane sync failed: ${m.message}`))}}let S=await z();if(S){let m=v(S),d=l.join(t,"public/llms.txt"),p=l.join(t,"public");i.existsSync(p)||i.mkdirSync(p,{recursive:!0}),i.writeFileSync(d,m,"utf8"),console.log(s.green("\u2713 Generated")+s.dim(" /llms.txt"))}console.log(s.dim(`Edge payloads are ready at /public/.onto/*
49
+ [Onto] Starting Semantic Output Generation...`));let t=process.cwd(),o=m.join(t,".next/server/app"),l=m.join(t,"public/.onto");if(!i.existsSync(o)){console.log(s.yellow(`[Onto] Could not find Next.js app output at ${o}`)),console.log(s.yellow('[Onto] Ensure this is run after "next build" and you are using the App Router.'));return}let c=await R("**/*.html",{cwd:o});if(c.length===0){console.log(s.yellow("[Onto] No static HTML files found to process."));return}i.existsSync(l)||i.mkdirSync(l,{recursive:!0});let r=0,d=0,f=0;for(let a of c){let g=m.join(o,a),p=a.replace(/\.html$/,".md"),h=m.join(l,p);try{let x=i.readFileSync(g,"utf8"),w=P(x,`/${p.replace(/\.md$/,"")}`),O=m.dirname(h);i.existsSync(O)||i.mkdirSync(O,{recursive:!0}),i.writeFileSync(h,w.markdown,"utf8"),r+=w.stats.originalHtmlSize,d+=w.stats.markdownSize,f++;let b=(w.stats.originalHtmlSize/1024).toFixed(1),j=(w.stats.markdownSize/1024).toFixed(1),$=a.replace(/\.html$/,"");$==="index"?$="/":$=`/${$}`,console.log(s.green("\u2713 Optimized")+s.dim(` ${$} `)+s.blue(`[${b}KB -> ${j}KB]`))}catch(x){console.error(s.red(`\u2717 Failed to process ${a}: ${x.message}`))}}console.log(s.bold(s.magenta(`Processed ${f} pages. Total Size: ${(r/1024).toFixed(1)}KB -> ${(d/1024).toFixed(1)}KB`)));let S=process.env.ONTO_API_KEY,u=process.env.ONTO_DASHBOARD_URL||"https://app.buildonto.dev";if(S&&f>0){console.log(s.cyan(`[Onto] Syncing manifest with Control Plane [${u}]...`));try{let a=c.map(p=>{let h=p.replace(/\.html$/,""),x=h==="index"?"/":`/${h}`,w=m.join(l,p.replace(/\.html$/,".md"));return{route:x,filename:`${h}.md`,content:i.readFileSync(w,"utf8")}}),g=await fetch(`${u}/api/files`,{method:"POST",headers:{"x-onto-key":S,"Content-Type":"application/json"},body:JSON.stringify({files:a})});if(g.ok)console.log(s.green("\u2713 Control Plane sync successful"));else{let p=await g.json().catch(()=>({}));console.log(s.yellow(`\u26A0 Control Plane sync skipped: ${p.error||g.statusText}`))}}catch(a){console.log(s.yellow(`\u26A0 Control Plane sync failed: ${a.message}`))}}let y=await z();if(y){let a=v(y),g=m.join(t,"public/llms.txt"),p=m.join(t,"public");i.existsSync(p)||i.mkdirSync(p,{recursive:!0}),i.writeFileSync(g,a,"utf8"),console.log(s.green("\u2713 Generated")+s.dim(" /llms.txt"))}console.log(s.dim(`Edge payloads are ready at /public/.onto/*
50
50
  `))}U().catch(n=>{console.error(s.red(`[Onto] Fatal Error: ${n.message}`)),process.exit(1)});
51
51
  //# sourceMappingURL=cli.mjs.map
package/dist/cli.mjs.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/cli.ts","../src/extractor.ts","../src/config.ts"],"sourcesContent":["#!/usr/bin/env node\r\nimport { glob } from 'glob';\r\nimport fs from 'fs';\r\nimport path from 'path';\r\nimport pc from 'picocolors';\r\nimport { extractContent } from './extractor';\r\nimport { generateLlmsTxt, OntoConfig } from './config';\r\n\r\nimport { pathToFileURL } from 'url';\r\n\r\nasync function loadOntoConfig(): Promise<OntoConfig | null> {\r\n const cwd = process.cwd();\r\n const configPathTs = path.resolve(cwd, 'onto.config.ts');\r\n const configPathJs = path.resolve(cwd, 'onto.config.js');\r\n\r\n const tryImport = async (p: string) => {\r\n try {\r\n const config = await import(pathToFileURL(p).href);\r\n return config.default || config;\r\n } catch (e) {\r\n return null;\r\n }\r\n };\r\n\r\n // 1. Try ESM Import\r\n let config = await tryImport(configPathTs) || await tryImport(configPathJs);\r\n if (config) return config;\r\n\r\n // 2. Fallback: Manual Parsing (Robust for environments without TS loader)\r\n try {\r\n const filePath = fs.existsSync(configPathTs) ? configPathTs : (fs.existsSync(configPathJs) ? configPathJs : null);\r\n if (!filePath) return null;\r\n\r\n const content = fs.readFileSync(filePath, 'utf8');\r\n \r\n // Simple regex extraction for name and summary\r\n const nameMatch = content.match(/name\\s*:\\s*['\"`](.*)['\"`]/);\r\n const summaryMatch = content.match(/summary\\s*:\\s*['\"`](.*)['\"`]/);\r\n const baseUrlMatch = content.match(/baseUrl\\s*:\\s*['\"`](.*)['\"`]/);\r\n\r\n if (nameMatch) {\r\n return {\r\n name: nameMatch[1],\r\n summary: summaryMatch ? summaryMatch[1] : '',\r\n baseUrl: baseUrlMatch ? baseUrlMatch[1] : '',\r\n routes: [] // We skip routes in manual fallback for simplicity\r\n } as OntoConfig;\r\n }\r\n } catch (e) {\r\n // Fallback failed\r\n }\r\n\r\n return null;\r\n}\r\n\r\n// Simple helper to load .env.local from the current working directory\r\nfunction loadEnv() {\r\n const envPath = path.join(process.cwd(), '.env.local');\r\n if (fs.existsSync(envPath)) {\r\n const envContent = fs.readFileSync(envPath, 'utf8');\r\n envContent.split(/\\r?\\n/).forEach(line => {\r\n const trimmedLine = line.trim();\r\n if (!trimmedLine || trimmedLine.startsWith('#')) return;\r\n const [key, ...valueParts] = trimmedLine.split('=');\r\n if (key && valueParts.length > 0) {\r\n process.env[key.trim()] = valueParts.join('=').trim().replace(/^[\"']|[\"']$/g, '');\r\n }\r\n });\r\n }\r\n}\r\n\r\nasync function init() {\r\n const cwd = process.cwd();\r\n const configPath = path.join(cwd, 'onto.config.ts');\r\n const middlewarePath = path.join(cwd, 'middleware.ts');\r\n\r\n console.log(pc.cyan('\\n[Onto] Initializing project...'));\r\n\r\n // 1. Create onto.config.ts\r\n if (!fs.existsSync(configPath)) {\r\n const configTemplate = `import { OntoConfig } from '@ontosdk/next';\r\n\r\nconst config: OntoConfig = {\r\n name: 'My Project',\r\n summary: 'A short description of my project for AI agents.',\r\n baseUrl: 'https://example.com',\r\n routes: [\r\n { \r\n path: '/', \r\n description: 'The homepage of my application.',\r\n pageType: 'about'\r\n }\r\n ]\r\n};\r\n\r\nexport default config;\r\n`;\r\n fs.writeFileSync(configPath, configTemplate, 'utf8');\r\n console.log(pc.green('✓ Created') + ' onto.config.ts');\r\n } else {\r\n console.log(pc.yellow('ℹ onto.config.ts already exists, skipping.'));\r\n }\r\n\r\n // 2. Create middleware.ts\r\n if (!fs.existsSync(middlewarePath)) {\r\n const middlewareTemplate = `import { NextRequest } from 'next/server';\r\nimport { ontoMiddleware } from '@ontosdk/next/middleware';\r\nimport ontoConfig from './onto.config';\r\n\r\nexport const middleware = (req: NextRequest) => ontoMiddleware(req, ontoConfig);\r\n\r\nexport const config = {\r\n matcher: [\r\n /*\r\n * Match all request paths except for the ones starting with:\r\n * - api (API routes)\r\n * - _next/static (static files)\r\n * - _next/image (image optimization files)\r\n * - favicon.ico, sitemap.xml, robots.txt (metadata files)\r\n */\r\n '/((?!api|_next/static|_next/image|favicon.ico|sitemap.xml|robots.txt).*)',\r\n ],\r\n};\r\n`;\r\n fs.writeFileSync(middlewarePath, middlewareTemplate, 'utf8');\r\n console.log(pc.green('✓ Created') + ' middleware.ts');\r\n } else {\r\n console.log(pc.yellow('ℹ middleware.ts already exists, skipping.'));\r\n }\r\n\r\n console.log(pc.magenta('\\nInitialization complete! 🚀'));\r\n console.log(pc.dim('Next steps:'));\r\n console.log(pc.dim('1. Update your routes in onto.config.ts'));\r\n console.log(pc.dim('2. Run \"npm run build\" to generate manifests\\n'));\r\n}\r\n\r\nasync function main() {\r\n const args = process.argv.slice(2);\r\n const command = args[0];\r\n\r\n if (command === 'init') {\r\n await init();\r\n return;\r\n }\r\n\r\n loadEnv();\r\n console.log(pc.cyan('\\n[Onto] Starting Semantic Output Generation...'));\r\n // ... rest of the existing main function logic ...\r\n\r\n const cwd = process.cwd();\r\n const nextAppDirDir = path.join(cwd, '.next/server/app');\r\n const ontoPublicDir = path.join(cwd, 'public/.onto');\r\n\r\n if (!fs.existsSync(nextAppDirDir)) {\r\n console.log(pc.yellow(`[Onto] Could not find Next.js app output at ${nextAppDirDir}`));\r\n console.log(pc.yellow(`[Onto] Ensure this is run after \"next build\" and you are using the App Router.`));\r\n return;\r\n }\r\n\r\n // Find all HTML files rendered by Next.js in the app directory\r\n const files = await glob('**/*.html', { cwd: nextAppDirDir });\r\n\r\n if (files.length === 0) {\r\n console.log(pc.yellow(`[Onto] No static HTML files found to process.`));\r\n return;\r\n }\r\n\r\n // Ensure output directory exists\r\n if (!fs.existsSync(ontoPublicDir)) {\r\n fs.mkdirSync(ontoPublicDir, { recursive: true });\r\n }\r\n\r\n let totalOriginalSize = 0;\r\n let totalMarkdownSize = 0;\r\n let totalFilesProcessed = 0;\r\n\r\n for (const file of files) {\r\n const inputPath = path.join(nextAppDirDir, file);\r\n\r\n // We map file path e.g. \"pricing.html\" to \"pricing.md\", or \"blog/post.html\" to \"blog/post.md\"\r\n let outputPathRelative = file.replace(/\\.html$/, '.md');\r\n // If it's a dynamic route page, or purely root index.html\r\n const outputPath = path.join(ontoPublicDir, outputPathRelative);\r\n\r\n try {\r\n const htmlContent = fs.readFileSync(inputPath, 'utf8');\r\n\r\n const result = extractContent(htmlContent, `/${outputPathRelative.replace(/\\.md$/, '')}`);\r\n\r\n // Ensure specific sub-directory exists (e.g., for blog/post.md)\r\n const outputDir = path.dirname(outputPath);\r\n if (!fs.existsSync(outputDir)) {\r\n fs.mkdirSync(outputDir, { recursive: true });\r\n }\r\n\r\n fs.writeFileSync(outputPath, result.markdown, 'utf8');\r\n\r\n totalOriginalSize += result.stats.originalHtmlSize;\r\n totalMarkdownSize += result.stats.markdownSize;\r\n totalFilesProcessed++;\r\n\r\n const origKb = (result.stats.originalHtmlSize / 1024).toFixed(1);\r\n const mdKb = (result.stats.markdownSize / 1024).toFixed(1);\r\n\r\n // /index.html -> /\r\n let routeName = file.replace(/\\.html$/, '');\r\n if (routeName === 'index') routeName = '/';\r\n else routeName = `/${routeName}`;\r\n\r\n console.log(\r\n pc.green(`✓ Optimized`) +\r\n pc.dim(` ${routeName} `) +\r\n pc.blue(`[${origKb}KB -> ${mdKb}KB]`)\r\n );\r\n } catch (e: any) {\r\n console.error(pc.red(`✗ Failed to process ${file}: ${e.message}`));\r\n }\r\n }\r\n\r\n console.log(\r\n pc.bold(\r\n pc.magenta(`Processed ${totalFilesProcessed} pages. Total Size: ${(totalOriginalSize / 1024).toFixed(1)}KB -> ${(totalMarkdownSize / 1024).toFixed(1)}KB`)\r\n )\r\n );\r\n\r\n // Sync with Onto Control Plane (Premium)\r\n const ONTO_API_KEY = process.env.ONTO_API_KEY;\r\n const DASHBOARD_URL = process.env.ONTO_DASHBOARD_URL || 'https://app.buildonto.dev';\r\n\r\n if (ONTO_API_KEY && totalFilesProcessed > 0) {\r\n console.log(pc.cyan(`[Onto] Syncing manifest with Control Plane [${DASHBOARD_URL}]...`));\r\n try {\r\n const manifest = files.map(file => {\r\n const routeName = file.replace(/\\.html$/, '');\r\n const route = routeName === 'index' ? '/' : `/${routeName}`;\r\n const mdPath = path.join(ontoPublicDir, file.replace(/\\.html$/, '.md'));\r\n return {\r\n route,\r\n filename: `${routeName}.md`,\r\n content: fs.readFileSync(mdPath, 'utf8')\r\n };\r\n });\r\n\r\n const res = await fetch(`${DASHBOARD_URL}/api/files`, {\r\n method: 'POST',\r\n headers: {\r\n 'x-onto-key': ONTO_API_KEY,\r\n 'Content-Type': 'application/json'\r\n },\r\n body: JSON.stringify({ files: manifest })\r\n });\r\n\r\n if (res.ok) {\r\n console.log(pc.green('✓ Control Plane sync successful'));\r\n } else {\r\n const errData = await res.json().catch(() => ({}));\r\n console.log(pc.yellow(`⚠ Control Plane sync skipped: ${errData.error || res.statusText}`));\r\n }\r\n } catch (e: any) {\r\n console.log(pc.yellow(`⚠ Control Plane sync failed: ${e.message}`));\r\n }\r\n }\r\n\r\n // --- Generate llms.txt manifest ---\r\n const config = await loadOntoConfig();\r\n if (config) {\r\n const llmsTxtContent = generateLlmsTxt(config);\r\n const llmsTxtPath = path.join(cwd, 'public/llms.txt');\r\n \r\n // Ensure public dir exists\r\n const publicDir = path.join(cwd, 'public');\r\n if (!fs.existsSync(publicDir)) {\r\n fs.mkdirSync(publicDir, { recursive: true });\r\n }\r\n\r\n fs.writeFileSync(llmsTxtPath, llmsTxtContent, 'utf8');\r\n console.log(pc.green('✓ Generated') + pc.dim(' /llms.txt'));\r\n }\r\n\r\n console.log(pc.dim(`Edge payloads are ready at /public/.onto/*\\n`));\r\n}\r\n\r\nmain().catch(e => {\r\n console.error(pc.red(`[Onto] Fatal Error: ${e.message}`));\r\n process.exit(1);\r\n});\r\n","import * as cheerio from 'cheerio';\r\nimport TurndownService from 'turndown';\r\n\r\nconst turndownService = new TurndownService({\r\n headingStyle: 'atx',\r\n codeBlockStyle: 'fenced',\r\n});\r\n\r\n// Configure turndown to keep some layout or handle semantic tags differently if needed\r\n\r\nexport interface ExtractionResult {\r\n markdown: string;\r\n metadata: {\r\n title: string;\r\n description: string;\r\n jsonLd: any[];\r\n };\r\n stats: {\r\n originalHtmlSize: number;\r\n markdownSize: number;\r\n tokenReductionRatio: number;\r\n };\r\n}\r\n\r\n/**\r\n * Extracts pure semantic markdown and metadata from rendered Next.js HTML strings.\r\n * @param html The raw HTML string.\r\n * @param sourceUrl (Optional) the URL this was generated from, to attach as metadata.\r\n * @returns {ExtractionResult} The extracted payload.\r\n */\r\nexport function extractContent(html: string, sourceUrl: string = 'Generated Output'): ExtractionResult {\r\n const originalSize = html.length;\r\n\r\n const $ = cheerio.load(html);\r\n\r\n // 1. Extract Metadata BEFORE removing structure\r\n const title = $('title').text() || $('h1').first().text() || 'Untitled Page';\r\n const description = $('meta[name=\"description\"]').attr('content') || 'No description found.';\r\n\r\n const jsonLdScripts: any[] = [];\r\n $('script[type=\"application/ld+json\"]').each((_, el) => {\r\n try {\r\n const raw = $(el).html() || '';\r\n const parsed = JSON.parse(raw);\r\n jsonLdScripts.push(parsed);\r\n } catch {\r\n // ignore bad json\r\n }\r\n });\r\n\r\n // 2. Strip noise (React boilerplate, styles, unnecessary tags)\r\n $('script, style, noscript, iframe, svg, nav, footer, meta, link, header').remove();\r\n\r\n // Optionally remove typical Next.js hidden wrappers if they don't contain real content.\r\n // Next.js uses <div id=\"__next\"> but we mostly just want semantic content.\r\n\r\n // 3. Find the entry point for content\r\n // Prefer <main> or <article> over <body>\r\n let contentHtml = '';\r\n if ($('main').length > 0) {\r\n contentHtml = $('main').html() || '';\r\n } else if ($('article').length > 0) {\r\n contentHtml = $('article').html() || '';\r\n } else {\r\n contentHtml = $('body').html() || '';\r\n }\r\n\r\n // 4. Convert to Markdown\r\n let markdown = turndownService.turndown(contentHtml);\r\n\r\n // 5. Optionally inject Metadata header\r\n const headerLines = [\r\n `# ${title}`,\r\n `> ${description}`,\r\n ``,\r\n `**Source:** ${sourceUrl}`,\r\n `**Extracted:** ${new Date().toISOString()}`,\r\n ``,\r\n `---`,\r\n ``\r\n ];\r\n\r\n let finalMarkdown = headerLines.join('\\n') + markdown;\r\n\r\n // Add JSON-LD section if exists\r\n if (jsonLdScripts.length > 0) {\r\n finalMarkdown += '\\n\\n---\\n## Structured Data (JSON-LD)\\n```json\\n';\r\n jsonLdScripts.forEach(j => {\r\n finalMarkdown += JSON.stringify(j, null, 2) + '\\n';\r\n });\r\n finalMarkdown += '```\\n';\r\n }\r\n\r\n const markdownSize = finalMarkdown.length;\r\n const tokenReductionRatio = originalSize > 0 ? ((originalSize - markdownSize) / originalSize) * 100 : 0;\r\n\r\n return {\r\n markdown: finalMarkdown,\r\n metadata: {\r\n title,\r\n description,\r\n jsonLd: jsonLdScripts\r\n },\r\n stats: {\r\n originalHtmlSize: originalSize,\r\n markdownSize,\r\n tokenReductionRatio\r\n }\r\n };\r\n}\r\n\r\nexport async function generateStaticPayloads(nextAppDirDir: string, ontoPublicDir: string) {\r\n const fs = await import('fs');\r\n const path = await import('path');\r\n const { glob } = await import('glob');\r\n\r\n if (!fs.existsSync(nextAppDirDir)) {\r\n return;\r\n }\r\n\r\n const files = await glob('**/*.html', { cwd: nextAppDirDir });\r\n if (files.length === 0) return;\r\n\r\n if (!fs.existsSync(ontoPublicDir)) {\r\n fs.mkdirSync(ontoPublicDir, { recursive: true });\r\n }\r\n\r\n let totalFilesProcessed = 0;\r\n\r\n for (const file of files) {\r\n const inputPath = path.join(nextAppDirDir, file);\r\n const outputPathRelative = file.replace(/\\.html$/, '.md');\r\n const outputPath = path.join(ontoPublicDir, outputPathRelative);\r\n\r\n try {\r\n const htmlContent = fs.readFileSync(inputPath, 'utf8');\r\n\r\n let routeName = file.replace(/\\.html$/, '');\r\n if (routeName === 'index') routeName = '/';\r\n else routeName = `/${routeName}`;\r\n\r\n const result = extractContent(htmlContent, routeName);\r\n\r\n const outputDir = path.dirname(outputPath);\r\n if (!fs.existsSync(outputDir)) {\r\n fs.mkdirSync(outputDir, { recursive: true });\r\n }\r\n\r\n fs.writeFileSync(outputPath, result.markdown, 'utf8');\r\n totalFilesProcessed++;\r\n } catch (e: any) {\r\n console.error(`[Onto] Failed to process ${file}: ${e.message}`);\r\n }\r\n }\r\n console.log(`[Onto] Successfully generated ${totalFilesProcessed} semantic markdown endpoints.`);\r\n}\r\n","/**\r\n * Configuration schema for onto.config.ts\r\n * Used to dynamically generate llms.txt and other AI discovery files\r\n */\r\n\r\nexport type PageType = 'scoring' | 'about' | 'default';\r\n\r\nexport interface OntoRoute {\r\n /**\r\n * The URL path (e.g., '/docs', '/api/reference')\r\n */\r\n path: string;\r\n /**\r\n * Description of what this route contains\r\n */\r\n description: string;\r\n /**\r\n * Optional: Page type for automatic JSON-LD schema injection\r\n * - 'scoring': Injects Methodology schema with AIO scoring weights (40/35/25)\r\n * - 'about': Injects Organization/AboutPage schema\r\n * - 'default': No automatic schema injection\r\n */\r\n pageType?: PageType;\r\n}\r\n\r\nexport interface OntoConfig {\r\n /**\r\n * The name of your project or site (required)\r\n * Used as the H1 heading in llms.txt\r\n */\r\n name: string;\r\n\r\n /**\r\n * A short summary of your project (required)\r\n * Displayed as a blockquote in llms.txt\r\n * Should contain key information necessary for understanding the rest of the file\r\n */\r\n summary: string;\r\n\r\n /**\r\n * The base URL of your site (e.g., 'https://example.com')\r\n */\r\n baseUrl: string;\r\n\r\n /**\r\n * Optional: Additional sections to include in llms.txt\r\n * Each section can contain any markdown content\r\n */\r\n sections?: {\r\n heading: string;\r\n content: string;\r\n }[];\r\n\r\n /**\r\n * Key routes that AI agents should know about\r\n * These will be formatted as a markdown list in llms.txt\r\n */\r\n routes?: OntoRoute[];\r\n\r\n /**\r\n * Optional: Links to external resources (documentation, API references, etc.)\r\n */\r\n externalLinks?: {\r\n title: string;\r\n url: string;\r\n description?: string;\r\n }[];\r\n\r\n /**\r\n * Optional: Organization information for JSON-LD schemas\r\n */\r\n organization?: {\r\n name: string;\r\n description?: string;\r\n url?: string;\r\n logo?: string;\r\n foundingDate?: string;\r\n };\r\n}\r\n\r\n/**\r\n * Generate llms.txt content from OntoConfig\r\n * Follows the llms.txt specification:\r\n * - H1 with project name\r\n * - Blockquote with summary\r\n * - Additional markdown sections\r\n */\r\nexport function generateLlmsTxt(config: OntoConfig): string {\r\n const lines: string[] = [];\r\n\r\n // H1: Project name (required)\r\n lines.push(`# ${config.name}`);\r\n lines.push('');\r\n\r\n // Blockquote: Summary (required)\r\n lines.push(`> ${config.summary}`);\r\n lines.push('');\r\n\r\n // Key Routes section (if provided)\r\n if (config.routes && config.routes.length > 0) {\r\n lines.push('## Key Routes');\r\n lines.push('');\r\n for (const route of config.routes) {\r\n const fullUrl = `${config.baseUrl}${route.path}`;\r\n lines.push(`- [${route.path}](${fullUrl}): ${route.description}`);\r\n }\r\n lines.push('');\r\n }\r\n\r\n // External Links section (if provided)\r\n if (config.externalLinks && config.externalLinks.length > 0) {\r\n lines.push('## Resources');\r\n lines.push('');\r\n for (const link of config.externalLinks) {\r\n if (link.description) {\r\n lines.push(`- [${link.title}](${link.url}): ${link.description}`);\r\n } else {\r\n lines.push(`- [${link.title}](${link.url})`);\r\n }\r\n }\r\n lines.push('');\r\n }\r\n\r\n // Custom sections (if provided)\r\n if (config.sections && config.sections.length > 0) {\r\n for (const section of config.sections) {\r\n lines.push(`## ${section.heading}`);\r\n lines.push('');\r\n lines.push(section.content);\r\n lines.push('');\r\n }\r\n }\r\n\r\n return lines.join('\\n').trim() + '\\n';\r\n}\r\n"],"mappings":";AACA,OAAS,QAAAA,MAAY,OACrB,OAAOC,MAAQ,KACf,OAAOC,MAAU,OACjB,OAAOC,MAAQ,aCJf,UAAYC,MAAa,UACzB,OAAOC,MAAqB,WAE5B,IAAMC,EAAkB,IAAID,EAAgB,CACxC,aAAc,MACd,eAAgB,QACpB,CAAC,EAwBM,SAASE,EAAeC,EAAcC,EAAoB,mBAAsC,CACnG,IAAMC,EAAeF,EAAK,OAEpBG,EAAY,OAAKH,CAAI,EAGrBI,EAAQD,EAAE,OAAO,EAAE,KAAK,GAAKA,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,GAAK,gBACvDE,EAAcF,EAAE,0BAA0B,EAAE,KAAK,SAAS,GAAK,wBAE/DG,EAAuB,CAAC,EAC9BH,EAAE,oCAAoC,EAAE,KAAK,CAACI,EAAGC,IAAO,CACpD,GAAI,CACA,IAAMC,EAAMN,EAAEK,CAAE,EAAE,KAAK,GAAK,GACtBE,EAAS,KAAK,MAAMD,CAAG,EAC7BH,EAAc,KAAKI,CAAM,CAC7B,MAAQ,CAER,CACJ,CAAC,EAGDP,EAAE,uEAAuE,EAAE,OAAO,EAOlF,IAAIQ,EAAc,GACdR,EAAE,MAAM,EAAE,OAAS,EACnBQ,EAAcR,EAAE,MAAM,EAAE,KAAK,GAAK,GAC3BA,EAAE,SAAS,EAAE,OAAS,EAC7BQ,EAAcR,EAAE,SAAS,EAAE,KAAK,GAAK,GAErCQ,EAAcR,EAAE,MAAM,EAAE,KAAK,GAAK,GAItC,IAAIS,EAAWd,EAAgB,SAASa,CAAW,EAc/CE,EAXgB,CAChB,KAAKT,CAAK,GACV,KAAKC,CAAW,GAChB,GACA,eAAeJ,CAAS,GACxB,kBAAkB,IAAI,KAAK,EAAE,YAAY,CAAC,GAC1C,GACA,MACA,EACJ,EAEgC,KAAK;AAAA,CAAI,EAAIW,EAGzCN,EAAc,OAAS,IACvBO,GAAiB;AAAA;AAAA;AAAA;AAAA;AAAA,EACjBP,EAAc,QAAQQ,GAAK,CACvBD,GAAiB,KAAK,UAAUC,EAAG,KAAM,CAAC,EAAI;AAAA,CAClD,CAAC,EACDD,GAAiB,SAGrB,IAAME,EAAeF,EAAc,OAC7BG,EAAsBd,EAAe,GAAMA,EAAea,GAAgBb,EAAgB,IAAM,EAEtG,MAAO,CACH,SAAUW,EACV,SAAU,CACN,MAAAT,EACA,YAAAC,EACA,OAAQC,CACZ,EACA,MAAO,CACH,iBAAkBJ,EAClB,aAAAa,EACA,oBAAAC,CACJ,CACJ,CACJ,CCtBO,SAASC,EAAgBC,EAA4B,CAC1D,IAAMC,EAAkB,CAAC,EAWzB,GARAA,EAAM,KAAK,KAAKD,EAAO,IAAI,EAAE,EAC7BC,EAAM,KAAK,EAAE,EAGbA,EAAM,KAAK,KAAKD,EAAO,OAAO,EAAE,EAChCC,EAAM,KAAK,EAAE,EAGTD,EAAO,QAAUA,EAAO,OAAO,OAAS,EAAG,CAC7CC,EAAM,KAAK,eAAe,EAC1BA,EAAM,KAAK,EAAE,EACb,QAAWC,KAASF,EAAO,OAAQ,CACjC,IAAMG,EAAU,GAAGH,EAAO,OAAO,GAAGE,EAAM,IAAI,GAC9CD,EAAM,KAAK,MAAMC,EAAM,IAAI,KAAKC,CAAO,MAAMD,EAAM,WAAW,EAAE,CAClE,CACAD,EAAM,KAAK,EAAE,CACf,CAGA,GAAID,EAAO,eAAiBA,EAAO,cAAc,OAAS,EAAG,CAC3DC,EAAM,KAAK,cAAc,EACzBA,EAAM,KAAK,EAAE,EACb,QAAWG,KAAQJ,EAAO,cACpBI,EAAK,YACPH,EAAM,KAAK,MAAMG,EAAK,KAAK,KAAKA,EAAK,GAAG,MAAMA,EAAK,WAAW,EAAE,EAEhEH,EAAM,KAAK,MAAMG,EAAK,KAAK,KAAKA,EAAK,GAAG,GAAG,EAG/CH,EAAM,KAAK,EAAE,CACf,CAGA,GAAID,EAAO,UAAYA,EAAO,SAAS,OAAS,EAC9C,QAAWK,KAAWL,EAAO,SAC3BC,EAAM,KAAK,MAAMI,EAAQ,OAAO,EAAE,EAClCJ,EAAM,KAAK,EAAE,EACbA,EAAM,KAAKI,EAAQ,OAAO,EAC1BJ,EAAM,KAAK,EAAE,EAIjB,OAAOA,EAAM,KAAK;AAAA,CAAI,EAAE,KAAK,EAAI;AAAA,CACnC,CF9HA,OAAS,iBAAAK,MAAqB,MAE9B,eAAeC,GAA6C,CACxD,IAAMC,EAAM,QAAQ,IAAI,EAClBC,EAAeC,EAAK,QAAQF,EAAK,gBAAgB,EACjDG,EAAeD,EAAK,QAAQF,EAAK,gBAAgB,EAEjDI,EAAY,MAAOC,GAAc,CACnC,GAAI,CACA,IAAMC,EAAS,MAAM,OAAOR,EAAcO,CAAC,EAAE,MAC7C,OAAOC,EAAO,SAAWA,CAC7B,MAAY,CACR,OAAO,IACX,CACJ,EAGIA,EAAS,MAAMF,EAAUH,CAAY,GAAK,MAAMG,EAAUD,CAAY,EAC1E,GAAIG,EAAQ,OAAOA,EAGnB,GAAI,CACA,IAAMC,EAAWC,EAAG,WAAWP,CAAY,EAAIA,EAAgBO,EAAG,WAAWL,CAAY,EAAIA,EAAe,KAC5G,GAAI,CAACI,EAAU,OAAO,KAEtB,IAAME,EAAUD,EAAG,aAAaD,EAAU,MAAM,EAG1CG,EAAYD,EAAQ,MAAM,2BAA2B,EACrDE,EAAeF,EAAQ,MAAM,8BAA8B,EAC3DG,EAAeH,EAAQ,MAAM,8BAA8B,EAEjE,GAAIC,EACA,MAAO,CACH,KAAMA,EAAU,CAAC,EACjB,QAASC,EAAeA,EAAa,CAAC,EAAI,GAC1C,QAASC,EAAeA,EAAa,CAAC,EAAI,GAC1C,OAAQ,CAAC,CACb,CAER,MAAY,CAEZ,CAEA,OAAO,IACX,CAGA,SAASC,GAAU,CACf,IAAMC,EAAUZ,EAAK,KAAK,QAAQ,IAAI,EAAG,YAAY,EACjDM,EAAG,WAAWM,CAAO,GACFN,EAAG,aAAaM,EAAS,MAAM,EACvC,MAAM,OAAO,EAAE,QAAQC,GAAQ,CACtC,IAAMC,EAAcD,EAAK,KAAK,EAC9B,GAAI,CAACC,GAAeA,EAAY,WAAW,GAAG,EAAG,OACjD,GAAM,CAACC,EAAK,GAAGC,CAAU,EAAIF,EAAY,MAAM,GAAG,EAC9CC,GAAOC,EAAW,OAAS,IAC3B,QAAQ,IAAID,EAAI,KAAK,CAAC,EAAIC,EAAW,KAAK,GAAG,EAAE,KAAK,EAAE,QAAQ,eAAgB,EAAE,EAExF,CAAC,CAET,CAEA,eAAeC,GAAO,CAClB,IAAMnB,EAAM,QAAQ,IAAI,EAClBoB,EAAalB,EAAK,KAAKF,EAAK,gBAAgB,EAC5CqB,EAAiBnB,EAAK,KAAKF,EAAK,eAAe,EAErD,QAAQ,IAAIsB,EAAG,KAAK;AAAA,+BAAkC,CAAC,EAGlDd,EAAG,WAAWY,CAAU,EAqBzB,QAAQ,IAAIE,EAAG,OAAO,iDAA4C,CAAC,GAHnEd,EAAG,cAAcY,EAjBM;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAiBsB,MAAM,EACnD,QAAQ,IAAIE,EAAG,MAAM,gBAAW,EAAI,iBAAiB,GAMpDd,EAAG,WAAWa,CAAc,EAuB7B,QAAQ,IAAIC,EAAG,OAAO,gDAA2C,CAAC,GAHlEd,EAAG,cAAca,EAnBU;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAmB0B,MAAM,EAC3D,QAAQ,IAAIC,EAAG,MAAM,gBAAW,EAAI,gBAAgB,GAKxD,QAAQ,IAAIA,EAAG,QAAQ;AAAA,mCAA+B,CAAC,EACvD,QAAQ,IAAIA,EAAG,IAAI,aAAa,CAAC,EACjC,QAAQ,IAAIA,EAAG,IAAI,yCAAyC,CAAC,EAC7D,QAAQ,IAAIA,EAAG,IAAI;AAAA,CAAgD,CAAC,CACxE,CAEA,eAAeC,GAAO,CAIlB,GAHa,QAAQ,KAAK,MAAM,CAAC,EACZ,CAAC,IAEN,OAAQ,CACpB,MAAMJ,EAAK,EACX,MACJ,CAEAN,EAAQ,EACR,QAAQ,IAAIS,EAAG,KAAK;AAAA,8CAAiD,CAAC,EAGtE,IAAMtB,EAAM,QAAQ,IAAI,EAClBwB,EAAgBtB,EAAK,KAAKF,EAAK,kBAAkB,EACjDyB,EAAgBvB,EAAK,KAAKF,EAAK,cAAc,EAEnD,GAAI,CAACQ,EAAG,WAAWgB,CAAa,EAAG,CAC/B,QAAQ,IAAIF,EAAG,OAAO,+CAA+CE,CAAa,EAAE,CAAC,EACrF,QAAQ,IAAIF,EAAG,OAAO,gFAAgF,CAAC,EACvG,MACJ,CAGA,IAAMI,EAAQ,MAAMC,EAAK,YAAa,CAAE,IAAKH,CAAc,CAAC,EAE5D,GAAIE,EAAM,SAAW,EAAG,CACpB,QAAQ,IAAIJ,EAAG,OAAO,+CAA+C,CAAC,EACtE,MACJ,CAGKd,EAAG,WAAWiB,CAAa,GAC5BjB,EAAG,UAAUiB,EAAe,CAAE,UAAW,EAAK,CAAC,EAGnD,IAAIG,EAAoB,EACpBC,EAAoB,EACpBC,EAAsB,EAE1B,QAAWC,KAAQL,EAAO,CACtB,IAAMM,EAAY9B,EAAK,KAAKsB,EAAeO,CAAI,EAG3CE,EAAqBF,EAAK,QAAQ,UAAW,KAAK,EAEhDG,EAAahC,EAAK,KAAKuB,EAAeQ,CAAkB,EAE9D,GAAI,CACA,IAAME,EAAc3B,EAAG,aAAawB,EAAW,MAAM,EAE/CI,EAASC,EAAeF,EAAa,IAAIF,EAAmB,QAAQ,QAAS,EAAE,CAAC,EAAE,EAGlFK,EAAYpC,EAAK,QAAQgC,CAAU,EACpC1B,EAAG,WAAW8B,CAAS,GACxB9B,EAAG,UAAU8B,EAAW,CAAE,UAAW,EAAK,CAAC,EAG/C9B,EAAG,cAAc0B,EAAYE,EAAO,SAAU,MAAM,EAEpDR,GAAqBQ,EAAO,MAAM,iBAClCP,GAAqBO,EAAO,MAAM,aAClCN,IAEA,IAAMS,GAAUH,EAAO,MAAM,iBAAmB,MAAM,QAAQ,CAAC,EACzDI,GAAQJ,EAAO,MAAM,aAAe,MAAM,QAAQ,CAAC,EAGrDK,EAAYV,EAAK,QAAQ,UAAW,EAAE,EACtCU,IAAc,QAASA,EAAY,IAClCA,EAAY,IAAIA,CAAS,GAE9B,QAAQ,IACJnB,EAAG,MAAM,kBAAa,EACtBA,EAAG,IAAI,IAAImB,CAAS,GAAG,EACvBnB,EAAG,KAAK,IAAIiB,CAAM,SAASC,CAAI,KAAK,CACxC,CACJ,OAASE,EAAQ,CACb,QAAQ,MAAMpB,EAAG,IAAI,4BAAuBS,CAAI,KAAKW,EAAE,OAAO,EAAE,CAAC,CACrE,CACJ,CAEA,QAAQ,IACJpB,EAAG,KACCA,EAAG,QAAQ,aAAaQ,CAAmB,wBAAwBF,EAAoB,MAAM,QAAQ,CAAC,CAAC,UAAUC,EAAoB,MAAM,QAAQ,CAAC,CAAC,IAAI,CAC7J,CACJ,EAGA,IAAMc,EAAe,QAAQ,IAAI,aAC3BC,EAAgB,QAAQ,IAAI,oBAAsB,4BAExD,GAAID,GAAgBb,EAAsB,EAAG,CACzC,QAAQ,IAAIR,EAAG,KAAK,+CAA+CsB,CAAa,MAAM,CAAC,EACvF,GAAI,CACA,IAAMC,EAAWnB,EAAM,IAAIK,GAAQ,CAC/B,IAAMU,EAAYV,EAAK,QAAQ,UAAW,EAAE,EACtCe,EAAQL,IAAc,QAAU,IAAM,IAAIA,CAAS,GACnDM,EAAS7C,EAAK,KAAKuB,EAAeM,EAAK,QAAQ,UAAW,KAAK,CAAC,EACtE,MAAO,CACH,MAAAe,EACA,SAAU,GAAGL,CAAS,MACtB,QAASjC,EAAG,aAAauC,EAAQ,MAAM,CAC3C,CACJ,CAAC,EAEKC,EAAM,MAAM,MAAM,GAAGJ,CAAa,aAAc,CAClD,OAAQ,OACR,QAAS,CACL,aAAcD,EACd,eAAgB,kBACpB,EACA,KAAM,KAAK,UAAU,CAAE,MAAOE,CAAS,CAAC,CAC5C,CAAC,EAED,GAAIG,EAAI,GACJ,QAAQ,IAAI1B,EAAG,MAAM,sCAAiC,CAAC,MACpD,CACH,IAAM2B,EAAU,MAAMD,EAAI,KAAK,EAAE,MAAM,KAAO,CAAC,EAAE,EACjD,QAAQ,IAAI1B,EAAG,OAAO,sCAAiC2B,EAAQ,OAASD,EAAI,UAAU,EAAE,CAAC,CAC7F,CACJ,OAASN,EAAQ,CACb,QAAQ,IAAIpB,EAAG,OAAO,qCAAgCoB,EAAE,OAAO,EAAE,CAAC,CACtE,CACJ,CAGA,IAAMpC,EAAS,MAAMP,EAAe,EACpC,GAAIO,EAAQ,CACR,IAAM4C,EAAiBC,EAAgB7C,CAAM,EACvC8C,EAAclD,EAAK,KAAKF,EAAK,iBAAiB,EAG9CqD,EAAYnD,EAAK,KAAKF,EAAK,QAAQ,EACpCQ,EAAG,WAAW6C,CAAS,GACxB7C,EAAG,UAAU6C,EAAW,CAAE,UAAW,EAAK,CAAC,EAG/C7C,EAAG,cAAc4C,EAAaF,EAAgB,MAAM,EACpD,QAAQ,IAAI5B,EAAG,MAAM,kBAAa,EAAIA,EAAG,IAAI,YAAY,CAAC,CAC9D,CAEA,QAAQ,IAAIA,EAAG,IAAI;AAAA,CAA8C,CAAC,CACtE,CAEAC,EAAK,EAAE,MAAMmB,GAAK,CACd,QAAQ,MAAMpB,EAAG,IAAI,uBAAuBoB,EAAE,OAAO,EAAE,CAAC,EACxD,QAAQ,KAAK,CAAC,CAClB,CAAC","names":["glob","fs","path","pc","cheerio","TurndownService","turndownService","extractContent","html","sourceUrl","originalSize","$","title","description","jsonLdScripts","_","el","raw","parsed","contentHtml","markdown","finalMarkdown","j","markdownSize","tokenReductionRatio","generateLlmsTxt","config","lines","route","fullUrl","link","section","pathToFileURL","loadOntoConfig","cwd","configPathTs","path","configPathJs","tryImport","p","config","filePath","fs","content","nameMatch","summaryMatch","baseUrlMatch","loadEnv","envPath","line","trimmedLine","key","valueParts","init","configPath","middlewarePath","pc","main","nextAppDirDir","ontoPublicDir","files","glob","totalOriginalSize","totalMarkdownSize","totalFilesProcessed","file","inputPath","outputPathRelative","outputPath","htmlContent","result","extractContent","outputDir","origKb","mdKb","routeName","e","ONTO_API_KEY","DASHBOARD_URL","manifest","route","mdPath","res","errData","llmsTxtContent","generateLlmsTxt","llmsTxtPath","publicDir"]}
1
+ {"version":3,"sources":["../src/cli.ts","../src/extractor.ts","../src/config.ts"],"sourcesContent":["#!/usr/bin/env node\r\nimport { glob } from 'glob';\r\nimport fs from 'fs';\r\nimport path from 'path';\r\nimport pc from 'picocolors';\r\nimport { extractContent } from './extractor';\r\nimport { generateLlmsTxt, OntoConfig } from './config';\r\n\r\nimport { pathToFileURL } from 'url';\r\n\r\nasync function loadOntoConfig(): Promise<OntoConfig | null> {\r\n const cwd = process.cwd();\r\n const configPathTs = path.resolve(cwd, 'onto.config.ts');\r\n const configPathJs = path.resolve(cwd, 'onto.config.js');\r\n\r\n const tryImport = async (p: string) => {\r\n try {\r\n const config = await import(pathToFileURL(p).href);\r\n return config.default || config;\r\n } catch (e) {\r\n return null;\r\n }\r\n };\r\n\r\n // 1. Try ESM Import\r\n let config = await tryImport(configPathTs) || await tryImport(configPathJs);\r\n if (config) return config;\r\n\r\n // 2. Fallback: Manual Parsing (Robust for environments without TS loader)\r\n try {\r\n const filePath = fs.existsSync(configPathTs) ? configPathTs : (fs.existsSync(configPathJs) ? configPathJs : null);\r\n if (!filePath) return null;\r\n\r\n const content = fs.readFileSync(filePath, 'utf8');\r\n \r\n // Simple regex extraction for name and summary\r\n const nameMatch = content.match(/name\\s*:\\s*['\"`](.*)['\"`]/);\r\n const summaryMatch = content.match(/summary\\s*:\\s*['\"`](.*)['\"`]/);\r\n const baseUrlMatch = content.match(/baseUrl\\s*:\\s*['\"`](.*)['\"`]/);\r\n\r\n // Basic route extraction\r\n const routes: any[] = [];\r\n const routeRegex = /path\\s*:\\s*['\"`](.*?)['\"`]\\s*,\\s*description\\s*:\\s*['\"`](.*?)['\"`]/g;\r\n let match;\r\n while ((match = routeRegex.exec(content)) !== null) {\r\n routes.push({ path: match[1], description: match[2] });\r\n }\r\n\r\n if (nameMatch) {\r\n return {\r\n name: nameMatch[1],\r\n summary: summaryMatch ? summaryMatch[1] : '',\r\n baseUrl: baseUrlMatch ? baseUrlMatch[1] : '',\r\n routes: routes\r\n } as OntoConfig;\r\n }\r\n } catch (e) {\r\n // Fallback failed\r\n }\r\n\r\n return null;\r\n}\r\n\r\n// Simple helper to load .env.local from the current working directory\r\nfunction loadEnv() {\r\n const envPath = path.join(process.cwd(), '.env.local');\r\n if (fs.existsSync(envPath)) {\r\n const envContent = fs.readFileSync(envPath, 'utf8');\r\n envContent.split(/\\r?\\n/).forEach(line => {\r\n const trimmedLine = line.trim();\r\n if (!trimmedLine || trimmedLine.startsWith('#')) return;\r\n const [key, ...valueParts] = trimmedLine.split('=');\r\n if (key && valueParts.length > 0) {\r\n process.env[key.trim()] = valueParts.join('=').trim().replace(/^[\"']|[\"']$/g, '');\r\n }\r\n });\r\n }\r\n}\r\n\r\nasync function init() {\r\n const cwd = process.cwd();\r\n const configPath = path.join(cwd, 'onto.config.ts');\r\n const middlewarePath = path.join(cwd, 'middleware.ts');\r\n\r\n console.log(pc.cyan('\\n[Onto] Initializing project...'));\r\n\r\n // 1. Create onto.config.ts\r\n if (!fs.existsSync(configPath)) {\r\n const configTemplate = `import { OntoConfig } from '@ontosdk/next';\r\n\r\nconst config: OntoConfig = {\r\n name: 'My Project',\r\n summary: 'A short description of my project for AI agents.',\r\n baseUrl: 'https://example.com',\r\n routes: [\r\n { \r\n path: '/', \r\n description: 'The homepage of my application.',\r\n pageType: 'about'\r\n }\r\n ]\r\n};\r\n\r\nexport default config;\r\n`;\r\n fs.writeFileSync(configPath, configTemplate, 'utf8');\r\n console.log(pc.green('✓ Created') + ' onto.config.ts');\r\n } else {\r\n console.log(pc.yellow('ℹ onto.config.ts already exists, skipping.'));\r\n }\r\n\r\n // 2. Create middleware.ts\r\n if (!fs.existsSync(middlewarePath)) {\r\n const middlewareTemplate = `import { NextRequest } from 'next/server';\r\nimport { ontoMiddleware } from '@ontosdk/next/middleware';\r\nimport ontoConfig from './onto.config';\r\n\r\nexport const middleware = (req: NextRequest) => ontoMiddleware(req, ontoConfig);\r\n\r\nexport const config = {\r\n matcher: [\r\n /*\r\n * Match all request paths except for the ones starting with:\r\n * - api (API routes)\r\n * - _next/static (static files)\r\n * - _next/image (image optimization files)\r\n * - favicon.ico, sitemap.xml, robots.txt (metadata files)\r\n */\r\n '/((?!api|_next/static|_next/image|favicon.ico|sitemap.xml|robots.txt).*)',\r\n ],\r\n};\r\n`;\r\n fs.writeFileSync(middlewarePath, middlewareTemplate, 'utf8');\r\n console.log(pc.green('✓ Created') + ' middleware.ts');\r\n } else {\r\n console.log(pc.yellow('ℹ middleware.ts already exists, skipping.'));\r\n }\r\n\r\n console.log(pc.magenta('\\nInitialization complete! 🚀'));\r\n console.log(pc.dim('Next steps:'));\r\n console.log(pc.dim('1. Update your routes in onto.config.ts'));\r\n console.log(pc.dim('2. Run \"npm run build\" to generate manifests\\n'));\r\n}\r\n\r\nasync function main() {\r\n const args = process.argv.slice(2);\r\n const command = args[0];\r\n\r\n if (command === 'init') {\r\n await init();\r\n return;\r\n }\r\n\r\n loadEnv();\r\n console.log(pc.cyan('\\n[Onto] Starting Semantic Output Generation...'));\r\n // ... rest of the existing main function logic ...\r\n\r\n const cwd = process.cwd();\r\n const nextAppDirDir = path.join(cwd, '.next/server/app');\r\n const ontoPublicDir = path.join(cwd, 'public/.onto');\r\n\r\n if (!fs.existsSync(nextAppDirDir)) {\r\n console.log(pc.yellow(`[Onto] Could not find Next.js app output at ${nextAppDirDir}`));\r\n console.log(pc.yellow(`[Onto] Ensure this is run after \"next build\" and you are using the App Router.`));\r\n return;\r\n }\r\n\r\n // Find all HTML files rendered by Next.js in the app directory\r\n const files = await glob('**/*.html', { cwd: nextAppDirDir });\r\n\r\n if (files.length === 0) {\r\n console.log(pc.yellow(`[Onto] No static HTML files found to process.`));\r\n return;\r\n }\r\n\r\n // Ensure output directory exists\r\n if (!fs.existsSync(ontoPublicDir)) {\r\n fs.mkdirSync(ontoPublicDir, { recursive: true });\r\n }\r\n\r\n let totalOriginalSize = 0;\r\n let totalMarkdownSize = 0;\r\n let totalFilesProcessed = 0;\r\n\r\n for (const file of files) {\r\n const inputPath = path.join(nextAppDirDir, file);\r\n\r\n // We map file path e.g. \"pricing.html\" to \"pricing.md\", or \"blog/post.html\" to \"blog/post.md\"\r\n let outputPathRelative = file.replace(/\\.html$/, '.md');\r\n // If it's a dynamic route page, or purely root index.html\r\n const outputPath = path.join(ontoPublicDir, outputPathRelative);\r\n\r\n try {\r\n const htmlContent = fs.readFileSync(inputPath, 'utf8');\r\n\r\n const result = extractContent(htmlContent, `/${outputPathRelative.replace(/\\.md$/, '')}`);\r\n\r\n // Ensure specific sub-directory exists (e.g., for blog/post.md)\r\n const outputDir = path.dirname(outputPath);\r\n if (!fs.existsSync(outputDir)) {\r\n fs.mkdirSync(outputDir, { recursive: true });\r\n }\r\n\r\n fs.writeFileSync(outputPath, result.markdown, 'utf8');\r\n\r\n totalOriginalSize += result.stats.originalHtmlSize;\r\n totalMarkdownSize += result.stats.markdownSize;\r\n totalFilesProcessed++;\r\n\r\n const origKb = (result.stats.originalHtmlSize / 1024).toFixed(1);\r\n const mdKb = (result.stats.markdownSize / 1024).toFixed(1);\r\n\r\n // /index.html -> /\r\n let routeName = file.replace(/\\.html$/, '');\r\n if (routeName === 'index') routeName = '/';\r\n else routeName = `/${routeName}`;\r\n\r\n console.log(\r\n pc.green(`✓ Optimized`) +\r\n pc.dim(` ${routeName} `) +\r\n pc.blue(`[${origKb}KB -> ${mdKb}KB]`)\r\n );\r\n } catch (e: any) {\r\n console.error(pc.red(`✗ Failed to process ${file}: ${e.message}`));\r\n }\r\n }\r\n\r\n console.log(\r\n pc.bold(\r\n pc.magenta(`Processed ${totalFilesProcessed} pages. Total Size: ${(totalOriginalSize / 1024).toFixed(1)}KB -> ${(totalMarkdownSize / 1024).toFixed(1)}KB`)\r\n )\r\n );\r\n\r\n // Sync with Onto Control Plane (Premium)\r\n const ONTO_API_KEY = process.env.ONTO_API_KEY;\r\n const DASHBOARD_URL = process.env.ONTO_DASHBOARD_URL || 'https://app.buildonto.dev';\r\n\r\n if (ONTO_API_KEY && totalFilesProcessed > 0) {\r\n console.log(pc.cyan(`[Onto] Syncing manifest with Control Plane [${DASHBOARD_URL}]...`));\r\n try {\r\n const manifest = files.map(file => {\r\n const routeName = file.replace(/\\.html$/, '');\r\n const route = routeName === 'index' ? '/' : `/${routeName}`;\r\n const mdPath = path.join(ontoPublicDir, file.replace(/\\.html$/, '.md'));\r\n return {\r\n route,\r\n filename: `${routeName}.md`,\r\n content: fs.readFileSync(mdPath, 'utf8')\r\n };\r\n });\r\n\r\n const res = await fetch(`${DASHBOARD_URL}/api/files`, {\r\n method: 'POST',\r\n headers: {\r\n 'x-onto-key': ONTO_API_KEY,\r\n 'Content-Type': 'application/json'\r\n },\r\n body: JSON.stringify({ files: manifest })\r\n });\r\n\r\n if (res.ok) {\r\n console.log(pc.green('✓ Control Plane sync successful'));\r\n } else {\r\n const errData = await res.json().catch(() => ({}));\r\n console.log(pc.yellow(`⚠ Control Plane sync skipped: ${errData.error || res.statusText}`));\r\n }\r\n } catch (e: any) {\r\n console.log(pc.yellow(`⚠ Control Plane sync failed: ${e.message}`));\r\n }\r\n }\r\n\r\n // --- Generate llms.txt manifest ---\r\n const config = await loadOntoConfig();\r\n if (config) {\r\n const llmsTxtContent = generateLlmsTxt(config);\r\n const llmsTxtPath = path.join(cwd, 'public/llms.txt');\r\n \r\n // Ensure public dir exists\r\n const publicDir = path.join(cwd, 'public');\r\n if (!fs.existsSync(publicDir)) {\r\n fs.mkdirSync(publicDir, { recursive: true });\r\n }\r\n\r\n fs.writeFileSync(llmsTxtPath, llmsTxtContent, 'utf8');\r\n console.log(pc.green('✓ Generated') + pc.dim(' /llms.txt'));\r\n }\r\n\r\n console.log(pc.dim(`Edge payloads are ready at /public/.onto/*\\n`));\r\n}\r\n\r\nmain().catch(e => {\r\n console.error(pc.red(`[Onto] Fatal Error: ${e.message}`));\r\n process.exit(1);\r\n});\r\n","import * as cheerio from 'cheerio';\r\nimport TurndownService from 'turndown';\r\n\r\nconst turndownService = new TurndownService({\r\n headingStyle: 'atx',\r\n codeBlockStyle: 'fenced',\r\n});\r\n\r\n// Configure turndown to keep some layout or handle semantic tags differently if needed\r\n\r\nexport interface ExtractionResult {\r\n markdown: string;\r\n metadata: {\r\n title: string;\r\n description: string;\r\n jsonLd: any[];\r\n };\r\n stats: {\r\n originalHtmlSize: number;\r\n markdownSize: number;\r\n tokenReductionRatio: number;\r\n };\r\n}\r\n\r\n/**\r\n * Extracts pure semantic markdown and metadata from rendered Next.js HTML strings.\r\n * @param html The raw HTML string.\r\n * @param sourceUrl (Optional) the URL this was generated from, to attach as metadata.\r\n * @returns {ExtractionResult} The extracted payload.\r\n */\r\nexport function extractContent(html: string, sourceUrl: string = 'Generated Output'): ExtractionResult {\r\n const originalSize = html.length;\r\n\r\n const $ = cheerio.load(html);\r\n\r\n // 1. Extract Metadata BEFORE removing structure\r\n const title = $('title').text() || $('h1').first().text() || 'Untitled Page';\r\n const description = $('meta[name=\"description\"]').attr('content') || 'No description found.';\r\n\r\n const jsonLdScripts: any[] = [];\r\n $('script[type=\"application/ld+json\"]').each((_, el) => {\r\n try {\r\n const raw = $(el).html() || '';\r\n const parsed = JSON.parse(raw);\r\n jsonLdScripts.push(parsed);\r\n } catch {\r\n // ignore bad json\r\n }\r\n });\r\n\r\n // 2. Strip noise (React boilerplate, styles, unnecessary tags)\r\n $('script, style, noscript, iframe, svg, nav, footer, meta, link, header').remove();\r\n\r\n // Optionally remove typical Next.js hidden wrappers if they don't contain real content.\r\n // Next.js uses <div id=\"__next\"> but we mostly just want semantic content.\r\n\r\n // 3. Find the entry point for content\r\n // Prefer <main> or <article> over <body>\r\n let contentHtml = '';\r\n if ($('main').length > 0) {\r\n contentHtml = $('main').html() || '';\r\n } else if ($('article').length > 0) {\r\n contentHtml = $('article').html() || '';\r\n } else {\r\n contentHtml = $('body').html() || '';\r\n }\r\n\r\n // 4. Convert to Markdown\r\n let markdown = turndownService.turndown(contentHtml);\r\n\r\n // 5. Optionally inject Metadata header\r\n const headerLines = [\r\n `# ${title}`,\r\n `> ${description}`,\r\n ``,\r\n `**Source:** ${sourceUrl}`,\r\n `**Extracted:** ${new Date().toISOString()}`,\r\n ``,\r\n `---`,\r\n ``\r\n ];\r\n\r\n let finalMarkdown = headerLines.join('\\n') + markdown;\r\n\r\n // Add JSON-LD section if exists\r\n if (jsonLdScripts.length > 0) {\r\n finalMarkdown += '\\n\\n---\\n## Structured Data (JSON-LD)\\n```json\\n';\r\n jsonLdScripts.forEach(j => {\r\n finalMarkdown += JSON.stringify(j, null, 2) + '\\n';\r\n });\r\n finalMarkdown += '```\\n';\r\n }\r\n\r\n const markdownSize = finalMarkdown.length;\r\n const tokenReductionRatio = originalSize > 0 ? ((originalSize - markdownSize) / originalSize) * 100 : 0;\r\n\r\n return {\r\n markdown: finalMarkdown,\r\n metadata: {\r\n title,\r\n description,\r\n jsonLd: jsonLdScripts\r\n },\r\n stats: {\r\n originalHtmlSize: originalSize,\r\n markdownSize,\r\n tokenReductionRatio\r\n }\r\n };\r\n}\r\n\r\nexport async function generateStaticPayloads(nextAppDirDir: string, ontoPublicDir: string) {\r\n const fs = await import('fs');\r\n const path = await import('path');\r\n const { glob } = await import('glob');\r\n\r\n if (!fs.existsSync(nextAppDirDir)) {\r\n return;\r\n }\r\n\r\n const files = await glob('**/*.html', { cwd: nextAppDirDir });\r\n if (files.length === 0) return;\r\n\r\n if (!fs.existsSync(ontoPublicDir)) {\r\n fs.mkdirSync(ontoPublicDir, { recursive: true });\r\n }\r\n\r\n let totalFilesProcessed = 0;\r\n\r\n for (const file of files) {\r\n const inputPath = path.join(nextAppDirDir, file);\r\n const outputPathRelative = file.replace(/\\.html$/, '.md');\r\n const outputPath = path.join(ontoPublicDir, outputPathRelative);\r\n\r\n try {\r\n const htmlContent = fs.readFileSync(inputPath, 'utf8');\r\n\r\n let routeName = file.replace(/\\.html$/, '');\r\n if (routeName === 'index') routeName = '/';\r\n else routeName = `/${routeName}`;\r\n\r\n const result = extractContent(htmlContent, routeName);\r\n\r\n const outputDir = path.dirname(outputPath);\r\n if (!fs.existsSync(outputDir)) {\r\n fs.mkdirSync(outputDir, { recursive: true });\r\n }\r\n\r\n fs.writeFileSync(outputPath, result.markdown, 'utf8');\r\n totalFilesProcessed++;\r\n } catch (e: any) {\r\n console.error(`[Onto] Failed to process ${file}: ${e.message}`);\r\n }\r\n }\r\n console.log(`[Onto] Successfully generated ${totalFilesProcessed} semantic markdown endpoints.`);\r\n}\r\n","/**\r\n * Configuration schema for onto.config.ts\r\n * Used to dynamically generate llms.txt and other AI discovery files\r\n */\r\n\r\nexport type PageType = 'scoring' | 'about' | 'default';\r\n\r\nexport interface OntoRoute {\r\n /**\r\n * The URL path (e.g., '/docs', '/api/reference')\r\n */\r\n path: string;\r\n /**\r\n * Description of what this route contains\r\n */\r\n description: string;\r\n /**\r\n * Optional: Page type for automatic JSON-LD schema injection\r\n * - 'scoring': Injects Methodology schema with AIO scoring weights (40/35/25)\r\n * - 'about': Injects Organization/AboutPage schema\r\n * - 'default': No automatic schema injection\r\n */\r\n pageType?: PageType;\r\n}\r\n\r\nexport interface OntoConfig {\r\n /**\r\n * The name of your project or site (required)\r\n * Used as the H1 heading in llms.txt\r\n */\r\n name: string;\r\n\r\n /**\r\n * A short summary of your project (required)\r\n * Displayed as a blockquote in llms.txt\r\n * Should contain key information necessary for understanding the rest of the file\r\n */\r\n summary: string;\r\n\r\n /**\r\n * The base URL of your site (e.g., 'https://example.com')\r\n */\r\n baseUrl: string;\r\n\r\n /**\r\n * Optional: Additional sections to include in llms.txt\r\n * Each section can contain any markdown content\r\n */\r\n sections?: {\r\n heading: string;\r\n content: string;\r\n }[];\r\n\r\n /**\r\n * Key routes that AI agents should know about\r\n * These will be formatted as a markdown list in llms.txt\r\n */\r\n routes?: OntoRoute[];\r\n\r\n /**\r\n * Optional: Links to external resources (documentation, API references, etc.)\r\n */\r\n externalLinks?: {\r\n title: string;\r\n url: string;\r\n description?: string;\r\n }[];\r\n\r\n /**\r\n * Optional: Organization information for JSON-LD schemas\r\n */\r\n organization?: {\r\n name: string;\r\n description?: string;\r\n url?: string;\r\n logo?: string;\r\n foundingDate?: string;\r\n };\r\n}\r\n\r\n/**\r\n * Generate llms.txt content from OntoConfig\r\n * Follows the llms.txt specification:\r\n * - H1 with project name\r\n * - Blockquote with summary\r\n * - Additional markdown sections\r\n */\r\nexport function generateLlmsTxt(config: OntoConfig): string {\r\n const lines: string[] = [];\r\n\r\n // H1: Project name (required)\r\n lines.push(`# ${config.name}`);\r\n lines.push('');\r\n\r\n // Blockquote: Summary (required)\r\n lines.push(`> ${config.summary}`);\r\n lines.push('');\r\n\r\n // Key Routes section (if provided)\r\n if (config.routes && config.routes.length > 0) {\r\n lines.push('## Key Routes');\r\n lines.push('');\r\n for (const route of config.routes) {\r\n const fullUrl = `${config.baseUrl}${route.path}`;\r\n lines.push(`- [${route.path}](${fullUrl}): ${route.description}`);\r\n }\r\n lines.push('');\r\n }\r\n\r\n // External Links section (if provided)\r\n if (config.externalLinks && config.externalLinks.length > 0) {\r\n lines.push('## Resources');\r\n lines.push('');\r\n for (const link of config.externalLinks) {\r\n if (link.description) {\r\n lines.push(`- [${link.title}](${link.url}): ${link.description}`);\r\n } else {\r\n lines.push(`- [${link.title}](${link.url})`);\r\n }\r\n }\r\n lines.push('');\r\n }\r\n\r\n // Custom sections (if provided)\r\n if (config.sections && config.sections.length > 0) {\r\n for (const section of config.sections) {\r\n lines.push(`## ${section.heading}`);\r\n lines.push('');\r\n lines.push(section.content);\r\n lines.push('');\r\n }\r\n }\r\n\r\n return lines.join('\\n').trim() + '\\n';\r\n}\r\n"],"mappings":";AACA,OAAS,QAAAA,MAAY,OACrB,OAAOC,MAAQ,KACf,OAAOC,MAAU,OACjB,OAAOC,MAAQ,aCJf,UAAYC,MAAa,UACzB,OAAOC,MAAqB,WAE5B,IAAMC,EAAkB,IAAID,EAAgB,CACxC,aAAc,MACd,eAAgB,QACpB,CAAC,EAwBM,SAASE,EAAeC,EAAcC,EAAoB,mBAAsC,CACnG,IAAMC,EAAeF,EAAK,OAEpBG,EAAY,OAAKH,CAAI,EAGrBI,EAAQD,EAAE,OAAO,EAAE,KAAK,GAAKA,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,GAAK,gBACvDE,EAAcF,EAAE,0BAA0B,EAAE,KAAK,SAAS,GAAK,wBAE/DG,EAAuB,CAAC,EAC9BH,EAAE,oCAAoC,EAAE,KAAK,CAACI,EAAGC,IAAO,CACpD,GAAI,CACA,IAAMC,EAAMN,EAAEK,CAAE,EAAE,KAAK,GAAK,GACtBE,EAAS,KAAK,MAAMD,CAAG,EAC7BH,EAAc,KAAKI,CAAM,CAC7B,MAAQ,CAER,CACJ,CAAC,EAGDP,EAAE,uEAAuE,EAAE,OAAO,EAOlF,IAAIQ,EAAc,GACdR,EAAE,MAAM,EAAE,OAAS,EACnBQ,EAAcR,EAAE,MAAM,EAAE,KAAK,GAAK,GAC3BA,EAAE,SAAS,EAAE,OAAS,EAC7BQ,EAAcR,EAAE,SAAS,EAAE,KAAK,GAAK,GAErCQ,EAAcR,EAAE,MAAM,EAAE,KAAK,GAAK,GAItC,IAAIS,EAAWd,EAAgB,SAASa,CAAW,EAc/CE,EAXgB,CAChB,KAAKT,CAAK,GACV,KAAKC,CAAW,GAChB,GACA,eAAeJ,CAAS,GACxB,kBAAkB,IAAI,KAAK,EAAE,YAAY,CAAC,GAC1C,GACA,MACA,EACJ,EAEgC,KAAK;AAAA,CAAI,EAAIW,EAGzCN,EAAc,OAAS,IACvBO,GAAiB;AAAA;AAAA;AAAA;AAAA;AAAA,EACjBP,EAAc,QAAQQ,GAAK,CACvBD,GAAiB,KAAK,UAAUC,EAAG,KAAM,CAAC,EAAI;AAAA,CAClD,CAAC,EACDD,GAAiB,SAGrB,IAAME,EAAeF,EAAc,OAC7BG,EAAsBd,EAAe,GAAMA,EAAea,GAAgBb,EAAgB,IAAM,EAEtG,MAAO,CACH,SAAUW,EACV,SAAU,CACN,MAAAT,EACA,YAAAC,EACA,OAAQC,CACZ,EACA,MAAO,CACH,iBAAkBJ,EAClB,aAAAa,EACA,oBAAAC,CACJ,CACJ,CACJ,CCtBO,SAASC,EAAgBC,EAA4B,CAC1D,IAAMC,EAAkB,CAAC,EAWzB,GARAA,EAAM,KAAK,KAAKD,EAAO,IAAI,EAAE,EAC7BC,EAAM,KAAK,EAAE,EAGbA,EAAM,KAAK,KAAKD,EAAO,OAAO,EAAE,EAChCC,EAAM,KAAK,EAAE,EAGTD,EAAO,QAAUA,EAAO,OAAO,OAAS,EAAG,CAC7CC,EAAM,KAAK,eAAe,EAC1BA,EAAM,KAAK,EAAE,EACb,QAAWC,KAASF,EAAO,OAAQ,CACjC,IAAMG,EAAU,GAAGH,EAAO,OAAO,GAAGE,EAAM,IAAI,GAC9CD,EAAM,KAAK,MAAMC,EAAM,IAAI,KAAKC,CAAO,MAAMD,EAAM,WAAW,EAAE,CAClE,CACAD,EAAM,KAAK,EAAE,CACf,CAGA,GAAID,EAAO,eAAiBA,EAAO,cAAc,OAAS,EAAG,CAC3DC,EAAM,KAAK,cAAc,EACzBA,EAAM,KAAK,EAAE,EACb,QAAWG,KAAQJ,EAAO,cACpBI,EAAK,YACPH,EAAM,KAAK,MAAMG,EAAK,KAAK,KAAKA,EAAK,GAAG,MAAMA,EAAK,WAAW,EAAE,EAEhEH,EAAM,KAAK,MAAMG,EAAK,KAAK,KAAKA,EAAK,GAAG,GAAG,EAG/CH,EAAM,KAAK,EAAE,CACf,CAGA,GAAID,EAAO,UAAYA,EAAO,SAAS,OAAS,EAC9C,QAAWK,KAAWL,EAAO,SAC3BC,EAAM,KAAK,MAAMI,EAAQ,OAAO,EAAE,EAClCJ,EAAM,KAAK,EAAE,EACbA,EAAM,KAAKI,EAAQ,OAAO,EAC1BJ,EAAM,KAAK,EAAE,EAIjB,OAAOA,EAAM,KAAK;AAAA,CAAI,EAAE,KAAK,EAAI;AAAA,CACnC,CF9HA,OAAS,iBAAAK,MAAqB,MAE9B,eAAeC,GAA6C,CACxD,IAAMC,EAAM,QAAQ,IAAI,EAClBC,EAAeC,EAAK,QAAQF,EAAK,gBAAgB,EACjDG,EAAeD,EAAK,QAAQF,EAAK,gBAAgB,EAEjDI,EAAY,MAAOC,GAAc,CACnC,GAAI,CACA,IAAMC,EAAS,MAAM,OAAOR,EAAcO,CAAC,EAAE,MAC7C,OAAOC,EAAO,SAAWA,CAC7B,MAAY,CACR,OAAO,IACX,CACJ,EAGIA,EAAS,MAAMF,EAAUH,CAAY,GAAK,MAAMG,EAAUD,CAAY,EAC1E,GAAIG,EAAQ,OAAOA,EAGnB,GAAI,CACA,IAAMC,EAAWC,EAAG,WAAWP,CAAY,EAAIA,EAAgBO,EAAG,WAAWL,CAAY,EAAIA,EAAe,KAC5G,GAAI,CAACI,EAAU,OAAO,KAEtB,IAAME,EAAUD,EAAG,aAAaD,EAAU,MAAM,EAG1CG,EAAYD,EAAQ,MAAM,2BAA2B,EACrDE,EAAeF,EAAQ,MAAM,8BAA8B,EAC3DG,EAAeH,EAAQ,MAAM,8BAA8B,EAG3DI,EAAgB,CAAC,EACjBC,EAAa,sEACfC,EACJ,MAAQA,EAAQD,EAAW,KAAKL,CAAO,KAAO,MAC1CI,EAAO,KAAK,CAAE,KAAME,EAAM,CAAC,EAAG,YAAaA,EAAM,CAAC,CAAE,CAAC,EAGzD,GAAIL,EACA,MAAO,CACH,KAAMA,EAAU,CAAC,EACjB,QAASC,EAAeA,EAAa,CAAC,EAAI,GAC1C,QAASC,EAAeA,EAAa,CAAC,EAAI,GAC1C,OAAQC,CACZ,CAER,MAAY,CAEZ,CAEA,OAAO,IACX,CAGA,SAASG,GAAU,CACf,IAAMC,EAAUf,EAAK,KAAK,QAAQ,IAAI,EAAG,YAAY,EACjDM,EAAG,WAAWS,CAAO,GACFT,EAAG,aAAaS,EAAS,MAAM,EACvC,MAAM,OAAO,EAAE,QAAQC,GAAQ,CACtC,IAAMC,EAAcD,EAAK,KAAK,EAC9B,GAAI,CAACC,GAAeA,EAAY,WAAW,GAAG,EAAG,OACjD,GAAM,CAACC,EAAK,GAAGC,CAAU,EAAIF,EAAY,MAAM,GAAG,EAC9CC,GAAOC,EAAW,OAAS,IAC3B,QAAQ,IAAID,EAAI,KAAK,CAAC,EAAIC,EAAW,KAAK,GAAG,EAAE,KAAK,EAAE,QAAQ,eAAgB,EAAE,EAExF,CAAC,CAET,CAEA,eAAeC,GAAO,CAClB,IAAMtB,EAAM,QAAQ,IAAI,EAClBuB,EAAarB,EAAK,KAAKF,EAAK,gBAAgB,EAC5CwB,EAAiBtB,EAAK,KAAKF,EAAK,eAAe,EAErD,QAAQ,IAAIyB,EAAG,KAAK;AAAA,+BAAkC,CAAC,EAGlDjB,EAAG,WAAWe,CAAU,EAqBzB,QAAQ,IAAIE,EAAG,OAAO,iDAA4C,CAAC,GAHnEjB,EAAG,cAAce,EAjBM;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAiBsB,MAAM,EACnD,QAAQ,IAAIE,EAAG,MAAM,gBAAW,EAAI,iBAAiB,GAMpDjB,EAAG,WAAWgB,CAAc,EAuB7B,QAAQ,IAAIC,EAAG,OAAO,gDAA2C,CAAC,GAHlEjB,EAAG,cAAcgB,EAnBU;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAmB0B,MAAM,EAC3D,QAAQ,IAAIC,EAAG,MAAM,gBAAW,EAAI,gBAAgB,GAKxD,QAAQ,IAAIA,EAAG,QAAQ;AAAA,mCAA+B,CAAC,EACvD,QAAQ,IAAIA,EAAG,IAAI,aAAa,CAAC,EACjC,QAAQ,IAAIA,EAAG,IAAI,yCAAyC,CAAC,EAC7D,QAAQ,IAAIA,EAAG,IAAI;AAAA,CAAgD,CAAC,CACxE,CAEA,eAAeC,GAAO,CAIlB,GAHa,QAAQ,KAAK,MAAM,CAAC,EACZ,CAAC,IAEN,OAAQ,CACpB,MAAMJ,EAAK,EACX,MACJ,CAEAN,EAAQ,EACR,QAAQ,IAAIS,EAAG,KAAK;AAAA,8CAAiD,CAAC,EAGtE,IAAMzB,EAAM,QAAQ,IAAI,EAClB2B,EAAgBzB,EAAK,KAAKF,EAAK,kBAAkB,EACjD4B,EAAgB1B,EAAK,KAAKF,EAAK,cAAc,EAEnD,GAAI,CAACQ,EAAG,WAAWmB,CAAa,EAAG,CAC/B,QAAQ,IAAIF,EAAG,OAAO,+CAA+CE,CAAa,EAAE,CAAC,EACrF,QAAQ,IAAIF,EAAG,OAAO,gFAAgF,CAAC,EACvG,MACJ,CAGA,IAAMI,EAAQ,MAAMC,EAAK,YAAa,CAAE,IAAKH,CAAc,CAAC,EAE5D,GAAIE,EAAM,SAAW,EAAG,CACpB,QAAQ,IAAIJ,EAAG,OAAO,+CAA+C,CAAC,EACtE,MACJ,CAGKjB,EAAG,WAAWoB,CAAa,GAC5BpB,EAAG,UAAUoB,EAAe,CAAE,UAAW,EAAK,CAAC,EAGnD,IAAIG,EAAoB,EACpBC,EAAoB,EACpBC,EAAsB,EAE1B,QAAWC,KAAQL,EAAO,CACtB,IAAMM,EAAYjC,EAAK,KAAKyB,EAAeO,CAAI,EAG3CE,EAAqBF,EAAK,QAAQ,UAAW,KAAK,EAEhDG,EAAanC,EAAK,KAAK0B,EAAeQ,CAAkB,EAE9D,GAAI,CACA,IAAME,EAAc9B,EAAG,aAAa2B,EAAW,MAAM,EAE/CI,EAASC,EAAeF,EAAa,IAAIF,EAAmB,QAAQ,QAAS,EAAE,CAAC,EAAE,EAGlFK,EAAYvC,EAAK,QAAQmC,CAAU,EACpC7B,EAAG,WAAWiC,CAAS,GACxBjC,EAAG,UAAUiC,EAAW,CAAE,UAAW,EAAK,CAAC,EAG/CjC,EAAG,cAAc6B,EAAYE,EAAO,SAAU,MAAM,EAEpDR,GAAqBQ,EAAO,MAAM,iBAClCP,GAAqBO,EAAO,MAAM,aAClCN,IAEA,IAAMS,GAAUH,EAAO,MAAM,iBAAmB,MAAM,QAAQ,CAAC,EACzDI,GAAQJ,EAAO,MAAM,aAAe,MAAM,QAAQ,CAAC,EAGrDK,EAAYV,EAAK,QAAQ,UAAW,EAAE,EACtCU,IAAc,QAASA,EAAY,IAClCA,EAAY,IAAIA,CAAS,GAE9B,QAAQ,IACJnB,EAAG,MAAM,kBAAa,EACtBA,EAAG,IAAI,IAAImB,CAAS,GAAG,EACvBnB,EAAG,KAAK,IAAIiB,CAAM,SAASC,CAAI,KAAK,CACxC,CACJ,OAASE,EAAQ,CACb,QAAQ,MAAMpB,EAAG,IAAI,4BAAuBS,CAAI,KAAKW,EAAE,OAAO,EAAE,CAAC,CACrE,CACJ,CAEA,QAAQ,IACJpB,EAAG,KACCA,EAAG,QAAQ,aAAaQ,CAAmB,wBAAwBF,EAAoB,MAAM,QAAQ,CAAC,CAAC,UAAUC,EAAoB,MAAM,QAAQ,CAAC,CAAC,IAAI,CAC7J,CACJ,EAGA,IAAMc,EAAe,QAAQ,IAAI,aAC3BC,EAAgB,QAAQ,IAAI,oBAAsB,4BAExD,GAAID,GAAgBb,EAAsB,EAAG,CACzC,QAAQ,IAAIR,EAAG,KAAK,+CAA+CsB,CAAa,MAAM,CAAC,EACvF,GAAI,CACA,IAAMC,EAAWnB,EAAM,IAAIK,GAAQ,CAC/B,IAAMU,EAAYV,EAAK,QAAQ,UAAW,EAAE,EACtCe,EAAQL,IAAc,QAAU,IAAM,IAAIA,CAAS,GACnDM,EAAShD,EAAK,KAAK0B,EAAeM,EAAK,QAAQ,UAAW,KAAK,CAAC,EACtE,MAAO,CACH,MAAAe,EACA,SAAU,GAAGL,CAAS,MACtB,QAASpC,EAAG,aAAa0C,EAAQ,MAAM,CAC3C,CACJ,CAAC,EAEKC,EAAM,MAAM,MAAM,GAAGJ,CAAa,aAAc,CAClD,OAAQ,OACR,QAAS,CACL,aAAcD,EACd,eAAgB,kBACpB,EACA,KAAM,KAAK,UAAU,CAAE,MAAOE,CAAS,CAAC,CAC5C,CAAC,EAED,GAAIG,EAAI,GACJ,QAAQ,IAAI1B,EAAG,MAAM,sCAAiC,CAAC,MACpD,CACH,IAAM2B,EAAU,MAAMD,EAAI,KAAK,EAAE,MAAM,KAAO,CAAC,EAAE,EACjD,QAAQ,IAAI1B,EAAG,OAAO,sCAAiC2B,EAAQ,OAASD,EAAI,UAAU,EAAE,CAAC,CAC7F,CACJ,OAASN,EAAQ,CACb,QAAQ,IAAIpB,EAAG,OAAO,qCAAgCoB,EAAE,OAAO,EAAE,CAAC,CACtE,CACJ,CAGA,IAAMvC,EAAS,MAAMP,EAAe,EACpC,GAAIO,EAAQ,CACR,IAAM+C,EAAiBC,EAAgBhD,CAAM,EACvCiD,EAAcrD,EAAK,KAAKF,EAAK,iBAAiB,EAG9CwD,EAAYtD,EAAK,KAAKF,EAAK,QAAQ,EACpCQ,EAAG,WAAWgD,CAAS,GACxBhD,EAAG,UAAUgD,EAAW,CAAE,UAAW,EAAK,CAAC,EAG/ChD,EAAG,cAAc+C,EAAaF,EAAgB,MAAM,EACpD,QAAQ,IAAI5B,EAAG,MAAM,kBAAa,EAAIA,EAAG,IAAI,YAAY,CAAC,CAC9D,CAEA,QAAQ,IAAIA,EAAG,IAAI;AAAA,CAA8C,CAAC,CACtE,CAEAC,EAAK,EAAE,MAAMmB,GAAK,CACd,QAAQ,MAAMpB,EAAG,IAAI,uBAAuBoB,EAAE,OAAO,EAAE,CAAC,EACxD,QAAQ,KAAK,CAAC,CAClB,CAAC","names":["glob","fs","path","pc","cheerio","TurndownService","turndownService","extractContent","html","sourceUrl","originalSize","$","title","description","jsonLdScripts","_","el","raw","parsed","contentHtml","markdown","finalMarkdown","j","markdownSize","tokenReductionRatio","generateLlmsTxt","config","lines","route","fullUrl","link","section","pathToFileURL","loadOntoConfig","cwd","configPathTs","path","configPathJs","tryImport","p","config","filePath","fs","content","nameMatch","summaryMatch","baseUrlMatch","routes","routeRegex","match","loadEnv","envPath","line","trimmedLine","key","valueParts","init","configPath","middlewarePath","pc","main","nextAppDirDir","ontoPublicDir","files","glob","totalOriginalSize","totalMarkdownSize","totalFilesProcessed","file","inputPath","outputPathRelative","outputPath","htmlContent","result","extractContent","outputDir","origKb","mdKb","routeName","e","ONTO_API_KEY","DASHBOARD_URL","manifest","route","mdPath","res","errData","llmsTxtContent","generateLlmsTxt","llmsTxtPath","publicDir"]}
@@ -1,8 +1,8 @@
1
- "use strict";var l=Object.defineProperty;var w=Object.getOwnPropertyDescriptor;var T=Object.getOwnPropertyNames;var k=Object.prototype.hasOwnProperty;var b=(t,e)=>{for(var o in e)l(t,o,{get:e[o],enumerable:!0})},R=(t,e,o,a)=>{if(e&&typeof e=="object"||typeof e=="function")for(let n of T(e))!k.call(t,n)&&n!==o&&l(t,n,{get:()=>e[n],enumerable:!(a=w(e,n))||a.enumerable});return t};var _=t=>R(l({},"__esModule",{value:!0}),t);var G={};b(G,{AI_BOT_USER_AGENTS:()=>x,matchBot:()=>c,ontoMiddleware:()=>U});module.exports=_(G);var p=require("next/server");var g=[{name:"GPTBot",company:"OpenAI"},{name:"ChatGPT-User",company:"OpenAI"},{name:"OAI-SearchBot",company:"OpenAI"},{name:"Googlebot",company:"Google"},{name:"Google-CloudVertexBot",company:"Google"},{name:"Google-Extended",company:"Google"},{name:"GoogleOther",company:"Google"},{name:"ClaudeBot",company:"Anthropic"},{name:"Claude-User",company:"Anthropic"},{name:"anthropic-ai",company:"Anthropic"},{name:"PerplexityBot",company:"Perplexity"},{name:"Perplexity-User",company:"Perplexity"},{name:"Meta-ExternalAgent",company:"Meta"},{name:"Meta-ExternalFetcher",company:"Meta"},{name:"FacebookBot",company:"Meta"},{name:"CCBot",company:"Common Crawl"},{name:"Bytespider",company:"ByteDance"},{name:"Applebot-Extended",company:"Apple"},{name:"cohere-ai",company:"Cohere"},{name:"YouBot",company:"You.com"}],x=g.map(t=>t.name);function c(t){if(!t)return;let e=t.toLowerCase();return g.find(o=>e.includes(o.name.toLowerCase()))}function f(t){let e=[];if(e.push(`# ${t.name}`),e.push(""),e.push(`> ${t.summary}`),e.push(""),t.routes&&t.routes.length>0){e.push("## Key Routes"),e.push("");for(let o of t.routes){let a=`${t.baseUrl}${o.path}`;e.push(`- [${o.path}](${a}): ${o.description}`)}e.push("")}if(t.externalLinks&&t.externalLinks.length>0){e.push("## Resources"),e.push("");for(let o of t.externalLinks)o.description?e.push(`- [${o.title}](${o.url}): ${o.description}`):e.push(`- [${o.title}](${o.url})`);e.push("")}if(t.sections&&t.sections.length>0)for(let o of t.sections)e.push(`## ${o.heading}`),e.push(""),e.push(o.content),e.push("");return e.join(`
1
+ "use strict";var d=Object.defineProperty;var T=Object.getOwnPropertyDescriptor;var b=Object.getOwnPropertyNames;var k=Object.prototype.hasOwnProperty;var R=(t,e)=>{for(var o in e)d(t,o,{get:e[o],enumerable:!0})},_=(t,e,o,a)=>{if(e&&typeof e=="object"||typeof e=="function")for(let n of b(e))!k.call(t,n)&&n!==o&&d(t,n,{get:()=>e[n],enumerable:!(a=T(e,n))||a.enumerable});return t};var P=t=>_(d({},"__esModule",{value:!0}),t);var G={};R(G,{AI_BOT_USER_AGENTS:()=>f,matchBot:()=>m,ontoMiddleware:()=>U});module.exports=P(G);var p=require("next/server");var y=[{name:"GPTBot",company:"OpenAI"},{name:"ChatGPT-User",company:"OpenAI"},{name:"OAI-SearchBot",company:"OpenAI"},{name:"Googlebot",company:"Google"},{name:"Google-CloudVertexBot",company:"Google"},{name:"Google-Extended",company:"Google"},{name:"GoogleOther",company:"Google"},{name:"ClaudeBot",company:"Anthropic"},{name:"Claude-User",company:"Anthropic"},{name:"anthropic-ai",company:"Anthropic"},{name:"PerplexityBot",company:"Perplexity"},{name:"Perplexity-User",company:"Perplexity"},{name:"Meta-ExternalAgent",company:"Meta"},{name:"Meta-ExternalFetcher",company:"Meta"},{name:"FacebookBot",company:"Meta"},{name:"CCBot",company:"Common Crawl"},{name:"Bytespider",company:"ByteDance"},{name:"Applebot-Extended",company:"Apple"},{name:"cohere-ai",company:"Cohere"},{name:"YouBot",company:"You.com"}],f=y.map(t=>t.name);function m(t){if(!t)return;let e=t.toLowerCase();return y.find(o=>e.includes(o.name.toLowerCase()))}function O(t){let e=[];if(e.push(`# ${t.name}`),e.push(""),e.push(`> ${t.summary}`),e.push(""),t.routes&&t.routes.length>0){e.push("## Key Routes"),e.push("");for(let o of t.routes){let a=`${t.baseUrl}${o.path}`;e.push(`- [${o.path}](${a}): ${o.description}`)}e.push("")}if(t.externalLinks&&t.externalLinks.length>0){e.push("## Resources"),e.push("");for(let o of t.externalLinks)o.description?e.push(`- [${o.title}](${o.url}): ${o.description}`):e.push(`- [${o.title}](${o.url})`);e.push("")}if(t.sections&&t.sections.length>0)for(let o of t.sections)e.push(`## ${o.heading}`),e.push(""),e.push(o.content),e.push("");return e.join(`
2
2
  `).trim()+`
3
- `}async function U(t,e){let o=t.headers.get("user-agent"),a=t.nextUrl.clone(),n=c(o),O=t.headers.get("accept")||"",$=!!n,A=O.includes("text/markdown");if($||A){if(a.pathname.startsWith("/_next"))return p.NextResponse.next();if(a.pathname==="/llms.txt")try{if(e){let r=f(e),i=new p.NextResponse(r,{headers:{"Content-Type":"text/plain; charset=utf-8","Cache-Control":"public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400"}});return n&&i.headers.set("X-Onto-Bot",`${n.name} (${n.company})`),i}else{a.pathname="/llms.txt";let r=p.NextResponse.rewrite(a);return r.headers.set("Content-Type","text/plain; charset=utf-8"),r.headers.set("Cache-Control","public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400"),n&&r.headers.set("X-Onto-Bot",`${n.name} (${n.company})`),r}}catch(r){console.error("[Onto] Failed to generate llms.txt:",r),a.pathname="/llms.txt";let i=p.NextResponse.rewrite(a);return i.headers.set("Content-Type","text/plain; charset=utf-8"),n&&i.headers.set("X-Onto-Bot",`${n.name} (${n.company})`),i}if(a.pathname.includes("."))return p.NextResponse.next();let s=a.pathname;(s==="/"||s==="")&&(s="/index"),s.endsWith("/")&&s!=="/"&&(s=s.slice(0,-1));let h={"Content-Type":"text/markdown; charset=utf-8","Cache-Control":"public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400"};n&&(h["X-Onto-Bot"]=`${n.name} (${n.company})`);let m=process.env.ONTO_API_KEY,u=process.env.ONTO_DASHBOARD_URL||"https://app.buildonto.dev";if(m){fetch(`${u}/api/track`,{method:"POST",headers:{"x-onto-key":m,"Content-Type":"application/json"},body:JSON.stringify({route:a.pathname,userAgent:o,bot:n?n.name:null,company:n?n.company:null})}).catch(()=>{});try{let r=await fetch(`${u}/api/sdk/inject?route=${a.pathname}`,{headers:{"x-onto-key":m},signal:AbortSignal.timeout(1500)});if(r.ok){let{injection:i}=await r.json();if(i){let B=`${a.origin}/.onto${s}.md`,y=await fetch(B);if(y.ok){let C=`${await y.text()}
3
+ `}async function U(t,e){let o=t.headers.get("user-agent"),a=t.nextUrl.clone(),n=m(o),$=t.headers.get("accept")||"",l=t.nextUrl.searchParams.has("onto"),C=!!n,A=$.includes("text/markdown")||l;if(C||A){if(a.pathname.startsWith("/_next"))return p.NextResponse.next();if(a.pathname==="/llms.txt")try{if(e){let r=O(e),i=new p.NextResponse(r,{headers:{"Content-Type":"text/plain; charset=utf-8","Cache-Control":"public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400"}});return n&&i.headers.set("X-Onto-Bot",`${n.name} (${n.company})`),i}else{a.pathname="/llms.txt";let r=p.NextResponse.rewrite(a);return r.headers.set("Content-Type","text/plain; charset=utf-8"),r.headers.set("Cache-Control","public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400"),n&&r.headers.set("X-Onto-Bot",`${n.name} (${n.company})`),r}}catch(r){console.error("[Onto] Failed to generate llms.txt:",r),a.pathname="/llms.txt";let i=p.NextResponse.rewrite(a);return i.headers.set("Content-Type","text/plain; charset=utf-8"),n&&i.headers.set("X-Onto-Bot",`${n.name} (${n.company})`),i}if(a.pathname.includes("."))return p.NextResponse.next();let s=a.pathname;(s==="/"||s==="")&&(s="/index"),s.endsWith("/")&&s!=="/"&&(s=s.slice(0,-1));let h={"Content-Type":"text/markdown; charset=utf-8","Cache-Control":"public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400"};n&&(h["X-Onto-Bot"]=`${n.name} (${n.company})`),l&&(h["X-Onto-Debug"]="true");let u=process.env.ONTO_API_KEY,g=process.env.ONTO_DASHBOARD_URL||"https://app.buildonto.dev";if(u){fetch(`${g}/api/track`,{method:"POST",headers:{"x-onto-key":u,"Content-Type":"application/json"},body:JSON.stringify({route:a.pathname,userAgent:o,bot:n?n.name:null,company:n?n.company:null})}).catch(()=>{});try{let r=await fetch(`${g}/api/sdk/inject?route=${a.pathname}`,{headers:{"x-onto-key":u},signal:AbortSignal.timeout(1500)});if(r.ok){let{injection:i}=await r.json();if(i){let B=`${a.origin}/.onto${s}.md`,x=await fetch(B);if(x.ok){let w=`${await x.text()}
4
4
 
5
5
  ---
6
6
 
7
- ${i}`;return new p.NextResponse(C,{headers:{...h,"X-Onto-Injected":"true"}})}}}}catch(r){console.error("[Onto] Injection failed",r)}}a.pathname=`/.onto${s}.md`;let d=p.NextResponse.rewrite(a);return n&&d.headers.set("X-Onto-Bot",`${n.name} (${n.company})`),d}return p.NextResponse.next()}0&&(module.exports={AI_BOT_USER_AGENTS,matchBot,ontoMiddleware});
7
+ ${i}`;return new p.NextResponse(w,{headers:{...h,"X-Onto-Injected":"true"}})}}}}catch(r){console.error("[Onto] Injection failed",r)}}a.pathname=`/.onto${s}.md`;let c=p.NextResponse.rewrite(a);return c.headers.set("Content-Type","text/markdown; charset=utf-8"),c.headers.set("Cache-Control","public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400"),n&&c.headers.set("X-Onto-Bot",`${n.name} (${n.company})`),l&&c.headers.set("X-Onto-Debug","true"),c}return p.NextResponse.next()}0&&(module.exports={AI_BOT_USER_AGENTS,matchBot,ontoMiddleware});
8
8
  //# sourceMappingURL=middleware.js.map
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/middleware.ts","../src/bots.ts","../src/config.ts"],"sourcesContent":["import { NextRequest, NextResponse } from 'next/server';\r\nimport { matchBot } from './bots';\r\nimport { generateLlmsTxt, OntoConfig } from './config';\r\n\r\nexport async function ontoMiddleware(request: any, config?: OntoConfig) {\r\n const userAgent = request.headers.get('user-agent');\r\n const url = request.nextUrl.clone();\r\n const matched = matchBot(userAgent);\r\n\r\n const accept = request.headers.get('accept') || '';\r\n\r\n const isAiBot = !!matched;\r\n const isMarkdownRequested = accept.includes('text/markdown');\r\n\r\n // If traffic is identified as an AI Bot or markdown is requested\r\n if (isAiBot || isMarkdownRequested) {\r\n\r\n // Ignore internal next.js requests & static assets (but not llms.txt)\r\n if (url.pathname.startsWith('/_next')) {\r\n return NextResponse.next();\r\n }\r\n\r\n // --- llms.txt Auto-Discovery ---\r\n // Serve the llms.txt manifest to AI agents\r\n if (url.pathname === '/llms.txt') {\r\n try {\r\n if (config) {\r\n // Generate llms.txt dynamically from config\r\n const llmsTxtContent = generateLlmsTxt(config);\r\n const response = new NextResponse(llmsTxtContent, {\r\n headers: {\r\n 'Content-Type': 'text/plain; charset=utf-8',\r\n 'Cache-Control': 'public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400',\r\n }\r\n });\r\n\r\n if (matched) {\r\n response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);\r\n }\r\n\r\n return response;\r\n } else {\r\n // Fallback: try to serve static llms.txt from public folder\r\n url.pathname = '/llms.txt';\r\n const response = NextResponse.rewrite(url);\r\n response.headers.set('Content-Type', 'text/plain; charset=utf-8');\r\n response.headers.set('Cache-Control', 'public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400');\r\n if (matched) {\r\n response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);\r\n }\r\n return response;\r\n }\r\n } catch (error) {\r\n console.error('[Onto] Failed to generate llms.txt:', error);\r\n // Fallback to static file on error\r\n url.pathname = '/llms.txt';\r\n const response = NextResponse.rewrite(url);\r\n response.headers.set('Content-Type', 'text/plain; charset=utf-8');\r\n if (matched) {\r\n response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);\r\n }\r\n return response;\r\n }\r\n }\r\n\r\n // Skip other static assets\r\n if (url.pathname.includes('.')) {\r\n return NextResponse.next();\r\n }\r\n\r\n // Determine the corresponding payload path\r\n let payloadPath = url.pathname;\r\n if (payloadPath === '/' || payloadPath === '') {\r\n payloadPath = '/index';\r\n }\r\n\r\n // Strip trailing slash if present\r\n if (payloadPath.endsWith('/') && payloadPath !== '/') {\r\n payloadPath = payloadPath.slice(0, -1);\r\n }\r\n\r\n // Common response headers for all bot responses\r\n const botHeaders: Record<string, string> = {\r\n 'Content-Type': 'text/markdown; charset=utf-8',\r\n 'Cache-Control': 'public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400',\r\n };\r\n if (matched) {\r\n botHeaders['X-Onto-Bot'] = `${matched.name} (${matched.company})`;\r\n }\r\n\r\n // --- Onto Control Plane Integration (Premium) ---\r\n const ONTO_API_KEY = process.env.ONTO_API_KEY;\r\n const DASHBOARD_URL = process.env.ONTO_DASHBOARD_URL || 'https://app.buildonto.dev';\r\n\r\n if (ONTO_API_KEY) {\r\n // 1. Fire-and-forget tracking — includes structured bot info\r\n fetch(`${DASHBOARD_URL}/api/track`, {\r\n method: 'POST',\r\n headers: {\r\n 'x-onto-key': ONTO_API_KEY,\r\n 'Content-Type': 'application/json'\r\n },\r\n body: JSON.stringify({\r\n route: url.pathname,\r\n userAgent: userAgent,\r\n bot: matched ? matched.name : null,\r\n company: matched ? matched.company : null,\r\n })\r\n }).catch(() => {});\r\n\r\n // 2. Dynamic Context Injection\r\n try {\r\n const injectRes = await fetch(`${DASHBOARD_URL}/api/sdk/inject?route=${url.pathname}`, {\r\n headers: { 'x-onto-key': ONTO_API_KEY },\r\n signal: AbortSignal.timeout(1500)\r\n });\r\n\r\n if (injectRes.ok) {\r\n const { injection } = await injectRes.json();\r\n \r\n if (injection) {\r\n const localMdUrl = `${url.origin}/.onto${payloadPath}.md`;\r\n const mdRes = await fetch(localMdUrl);\r\n \r\n if (mdRes.ok) {\r\n const baseMarkdown = await mdRes.text();\r\n const finalMarkdown = `${baseMarkdown}\\n\\n---\\n\\n${injection}`;\r\n \r\n return new NextResponse(finalMarkdown, {\r\n headers: {\r\n ...botHeaders,\r\n 'X-Onto-Injected': 'true'\r\n }\r\n });\r\n }\r\n }\r\n }\r\n } catch (err) {\r\n console.error('[Onto] Injection failed', err);\r\n }\r\n }\r\n // ------------------------------------------------\r\n\r\n url.pathname = `/.onto${payloadPath}.md`;\r\n\r\n // Rewrite implicitly serves the target URL transparently to the client.\r\n const response = NextResponse.rewrite(url);\r\n if (matched) {\r\n response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);\r\n }\r\n return response;\r\n }\r\n\r\n return NextResponse.next();\r\n}\r\n\r\n// Re-export the bot registry for consumers who want to extend or inspect it\r\nexport { AI_BOT_USER_AGENTS, matchBot } from './bots';\r\nexport type { AiBot } from './bots';\r\n\r\n\r\n","/**\n * Comprehensive registry of AI bot user-agent strings.\n * The middleware uses this list to detect AI crawlers and serve optimized markdown.\n */\n\nexport interface AiBot {\n /** The user-agent substring to match against */\n name: string;\n /** The company operating this bot */\n company: string;\n}\n\n/**\n * Structured registry of all known AI bots, grouped by company.\n * Useful for analytics and the Control Plane dashboard.\n */\nexport const AI_BOTS: AiBot[] = [\n // OpenAI\n { name: 'GPTBot', company: 'OpenAI' },\n { name: 'ChatGPT-User', company: 'OpenAI' },\n { name: 'OAI-SearchBot', company: 'OpenAI' },\n\n // Google\n { name: 'Googlebot', company: 'Google' },\n { name: 'Google-CloudVertexBot', company: 'Google' },\n { name: 'Google-Extended', company: 'Google' },\n { name: 'GoogleOther', company: 'Google' },\n\n // Anthropic\n { name: 'ClaudeBot', company: 'Anthropic' },\n { name: 'Claude-User', company: 'Anthropic' },\n { name: 'anthropic-ai', company: 'Anthropic' },\n\n // Perplexity\n { name: 'PerplexityBot', company: 'Perplexity' },\n { name: 'Perplexity-User', company: 'Perplexity' },\n\n // Meta\n { name: 'Meta-ExternalAgent', company: 'Meta' },\n { name: 'Meta-ExternalFetcher', company: 'Meta' },\n { name: 'FacebookBot', company: 'Meta' },\n\n // Common Crawl (used by most smaller AI companies)\n { name: 'CCBot', company: 'Common Crawl' },\n\n // Other notable AI crawlers\n { name: 'Bytespider', company: 'ByteDance' },\n { name: 'Applebot-Extended', company: 'Apple' },\n { name: 'cohere-ai', company: 'Cohere' },\n { name: 'YouBot', company: 'You.com' },\n];\n\n/**\n * Flat list of user-agent substrings for fast matching in the middleware.\n */\nexport const AI_BOT_USER_AGENTS: string[] = AI_BOTS.map(bot => bot.name);\n\n/**\n * Given a raw user-agent string, returns the matched AiBot entry or undefined.\n * Comparison is case-insensitive to handle inconsistent agent casing.\n */\nexport function matchBot(userAgent: string | null): AiBot | undefined {\n if (!userAgent) return undefined;\n const lowerUA = userAgent.toLowerCase();\n return AI_BOTS.find(bot => \n lowerUA.includes(bot.name.toLowerCase())\n );\n}\n","/**\r\n * Configuration schema for onto.config.ts\r\n * Used to dynamically generate llms.txt and other AI discovery files\r\n */\r\n\r\nexport type PageType = 'scoring' | 'about' | 'default';\r\n\r\nexport interface OntoRoute {\r\n /**\r\n * The URL path (e.g., '/docs', '/api/reference')\r\n */\r\n path: string;\r\n /**\r\n * Description of what this route contains\r\n */\r\n description: string;\r\n /**\r\n * Optional: Page type for automatic JSON-LD schema injection\r\n * - 'scoring': Injects Methodology schema with AIO scoring weights (40/35/25)\r\n * - 'about': Injects Organization/AboutPage schema\r\n * - 'default': No automatic schema injection\r\n */\r\n pageType?: PageType;\r\n}\r\n\r\nexport interface OntoConfig {\r\n /**\r\n * The name of your project or site (required)\r\n * Used as the H1 heading in llms.txt\r\n */\r\n name: string;\r\n\r\n /**\r\n * A short summary of your project (required)\r\n * Displayed as a blockquote in llms.txt\r\n * Should contain key information necessary for understanding the rest of the file\r\n */\r\n summary: string;\r\n\r\n /**\r\n * The base URL of your site (e.g., 'https://example.com')\r\n */\r\n baseUrl: string;\r\n\r\n /**\r\n * Optional: Additional sections to include in llms.txt\r\n * Each section can contain any markdown content\r\n */\r\n sections?: {\r\n heading: string;\r\n content: string;\r\n }[];\r\n\r\n /**\r\n * Key routes that AI agents should know about\r\n * These will be formatted as a markdown list in llms.txt\r\n */\r\n routes?: OntoRoute[];\r\n\r\n /**\r\n * Optional: Links to external resources (documentation, API references, etc.)\r\n */\r\n externalLinks?: {\r\n title: string;\r\n url: string;\r\n description?: string;\r\n }[];\r\n\r\n /**\r\n * Optional: Organization information for JSON-LD schemas\r\n */\r\n organization?: {\r\n name: string;\r\n description?: string;\r\n url?: string;\r\n logo?: string;\r\n foundingDate?: string;\r\n };\r\n}\r\n\r\n/**\r\n * Generate llms.txt content from OntoConfig\r\n * Follows the llms.txt specification:\r\n * - H1 with project name\r\n * - Blockquote with summary\r\n * - Additional markdown sections\r\n */\r\nexport function generateLlmsTxt(config: OntoConfig): string {\r\n const lines: string[] = [];\r\n\r\n // H1: Project name (required)\r\n lines.push(`# ${config.name}`);\r\n lines.push('');\r\n\r\n // Blockquote: Summary (required)\r\n lines.push(`> ${config.summary}`);\r\n lines.push('');\r\n\r\n // Key Routes section (if provided)\r\n if (config.routes && config.routes.length > 0) {\r\n lines.push('## Key Routes');\r\n lines.push('');\r\n for (const route of config.routes) {\r\n const fullUrl = `${config.baseUrl}${route.path}`;\r\n lines.push(`- [${route.path}](${fullUrl}): ${route.description}`);\r\n }\r\n lines.push('');\r\n }\r\n\r\n // External Links section (if provided)\r\n if (config.externalLinks && config.externalLinks.length > 0) {\r\n lines.push('## Resources');\r\n lines.push('');\r\n for (const link of config.externalLinks) {\r\n if (link.description) {\r\n lines.push(`- [${link.title}](${link.url}): ${link.description}`);\r\n } else {\r\n lines.push(`- [${link.title}](${link.url})`);\r\n }\r\n }\r\n lines.push('');\r\n }\r\n\r\n // Custom sections (if provided)\r\n if (config.sections && config.sections.length > 0) {\r\n for (const section of config.sections) {\r\n lines.push(`## ${section.heading}`);\r\n lines.push('');\r\n lines.push(section.content);\r\n lines.push('');\r\n }\r\n }\r\n\r\n return lines.join('\\n').trim() + '\\n';\r\n}\r\n"],"mappings":"yaAAA,IAAAA,EAAA,GAAAC,EAAAD,EAAA,wBAAAE,EAAA,aAAAC,EAAA,mBAAAC,IAAA,eAAAC,EAAAL,GAAA,IAAAM,EAA0C,uBCgBnC,IAAMC,EAAmB,CAE5B,CAAE,KAAM,SAAqB,QAAS,QAAS,EAC/C,CAAE,KAAM,eAAoB,QAAS,QAAS,EAC9C,CAAE,KAAM,gBAAoB,QAAS,QAAS,EAG9C,CAAE,KAAM,YAA0B,QAAS,QAAS,EACpD,CAAE,KAAM,wBAA2B,QAAS,QAAS,EACrD,CAAE,KAAM,kBAA2B,QAAS,QAAS,EACrD,CAAE,KAAM,cAA2B,QAAS,QAAS,EAGrD,CAAE,KAAM,YAAmB,QAAS,WAAY,EAChD,CAAE,KAAM,cAAkB,QAAS,WAAY,EAC/C,CAAE,KAAM,eAAkB,QAAS,WAAY,EAG/C,CAAE,KAAM,gBAAmB,QAAS,YAAa,EACjD,CAAE,KAAM,kBAAmB,QAAS,YAAa,EAGjD,CAAE,KAAM,qBAAwB,QAAS,MAAO,EAChD,CAAE,KAAM,uBAAwB,QAAS,MAAO,EAChD,CAAE,KAAM,cAAuB,QAAS,MAAO,EAG/C,CAAE,KAAM,QAAS,QAAS,cAAe,EAGzC,CAAE,KAAM,aAAqB,QAAS,WAAY,EAClD,CAAE,KAAM,oBAAqB,QAAS,OAAQ,EAC9C,CAAE,KAAM,YAAoB,QAAS,QAAS,EAC9C,CAAE,KAAM,SAAoB,QAAS,SAAU,CACnD,EAKaC,EAA+BD,EAAQ,IAAIE,GAAOA,EAAI,IAAI,EAMhE,SAASC,EAASC,EAA6C,CAClE,GAAI,CAACA,EAAW,OAChB,IAAMC,EAAUD,EAAU,YAAY,EACtC,OAAOJ,EAAQ,KAAKE,GAChBG,EAAQ,SAASH,EAAI,KAAK,YAAY,CAAC,CAC3C,CACJ,CCoBO,SAASI,EAAgBC,EAA4B,CAC1D,IAAMC,EAAkB,CAAC,EAWzB,GARAA,EAAM,KAAK,KAAKD,EAAO,IAAI,EAAE,EAC7BC,EAAM,KAAK,EAAE,EAGbA,EAAM,KAAK,KAAKD,EAAO,OAAO,EAAE,EAChCC,EAAM,KAAK,EAAE,EAGTD,EAAO,QAAUA,EAAO,OAAO,OAAS,EAAG,CAC7CC,EAAM,KAAK,eAAe,EAC1BA,EAAM,KAAK,EAAE,EACb,QAAWC,KAASF,EAAO,OAAQ,CACjC,IAAMG,EAAU,GAAGH,EAAO,OAAO,GAAGE,EAAM,IAAI,GAC9CD,EAAM,KAAK,MAAMC,EAAM,IAAI,KAAKC,CAAO,MAAMD,EAAM,WAAW,EAAE,CAClE,CACAD,EAAM,KAAK,EAAE,CACf,CAGA,GAAID,EAAO,eAAiBA,EAAO,cAAc,OAAS,EAAG,CAC3DC,EAAM,KAAK,cAAc,EACzBA,EAAM,KAAK,EAAE,EACb,QAAWG,KAAQJ,EAAO,cACpBI,EAAK,YACPH,EAAM,KAAK,MAAMG,EAAK,KAAK,KAAKA,EAAK,GAAG,MAAMA,EAAK,WAAW,EAAE,EAEhEH,EAAM,KAAK,MAAMG,EAAK,KAAK,KAAKA,EAAK,GAAG,GAAG,EAG/CH,EAAM,KAAK,EAAE,CACf,CAGA,GAAID,EAAO,UAAYA,EAAO,SAAS,OAAS,EAC9C,QAAWK,KAAWL,EAAO,SAC3BC,EAAM,KAAK,MAAMI,EAAQ,OAAO,EAAE,EAClCJ,EAAM,KAAK,EAAE,EACbA,EAAM,KAAKI,EAAQ,OAAO,EAC1BJ,EAAM,KAAK,EAAE,EAIjB,OAAOA,EAAM,KAAK;AAAA,CAAI,EAAE,KAAK,EAAI;AAAA,CACnC,CFlIA,eAAsBK,EAAeC,EAAcC,EAAqB,CACpE,IAAMC,EAAYF,EAAQ,QAAQ,IAAI,YAAY,EAC5CG,EAAMH,EAAQ,QAAQ,MAAM,EAC5BI,EAAUC,EAASH,CAAS,EAE5BI,EAASN,EAAQ,QAAQ,IAAI,QAAQ,GAAK,GAE1CO,EAAU,CAAC,CAACH,EACZI,EAAsBF,EAAO,SAAS,eAAe,EAG3D,GAAIC,GAAWC,EAAqB,CAGhC,GAAIL,EAAI,SAAS,WAAW,QAAQ,EAChC,OAAO,eAAa,KAAK,EAK7B,GAAIA,EAAI,WAAa,YACjB,GAAI,CACA,GAAIF,EAAQ,CAER,IAAMQ,EAAiBC,EAAgBT,CAAM,EACvCU,EAAW,IAAI,eAAaF,EAAgB,CAC9C,QAAS,CACL,eAAgB,4BAChB,gBAAiB,mEACrB,CACJ,CAAC,EAED,OAAIL,GACAO,EAAS,QAAQ,IAAI,aAAc,GAAGP,EAAQ,IAAI,KAAKA,EAAQ,OAAO,GAAG,EAGtEO,CACX,KAAO,CAEHR,EAAI,SAAW,YACf,IAAMQ,EAAW,eAAa,QAAQR,CAAG,EACzC,OAAAQ,EAAS,QAAQ,IAAI,eAAgB,2BAA2B,EAChEA,EAAS,QAAQ,IAAI,gBAAiB,mEAAmE,EACrGP,GACAO,EAAS,QAAQ,IAAI,aAAc,GAAGP,EAAQ,IAAI,KAAKA,EAAQ,OAAO,GAAG,EAEtEO,CACX,CACJ,OAASC,EAAO,CACZ,QAAQ,MAAM,sCAAuCA,CAAK,EAE1DT,EAAI,SAAW,YACf,IAAMQ,EAAW,eAAa,QAAQR,CAAG,EACzC,OAAAQ,EAAS,QAAQ,IAAI,eAAgB,2BAA2B,EAC5DP,GACAO,EAAS,QAAQ,IAAI,aAAc,GAAGP,EAAQ,IAAI,KAAKA,EAAQ,OAAO,GAAG,EAEtEO,CACX,CAIJ,GAAIR,EAAI,SAAS,SAAS,GAAG,EACzB,OAAO,eAAa,KAAK,EAI7B,IAAIU,EAAcV,EAAI,UAClBU,IAAgB,KAAOA,IAAgB,MACvCA,EAAc,UAIdA,EAAY,SAAS,GAAG,GAAKA,IAAgB,MAC7CA,EAAcA,EAAY,MAAM,EAAG,EAAE,GAIzC,IAAMC,EAAqC,CACvC,eAAgB,+BAChB,gBAAiB,mEACrB,EACIV,IACAU,EAAW,YAAY,EAAI,GAAGV,EAAQ,IAAI,KAAKA,EAAQ,OAAO,KAIlE,IAAMW,EAAe,QAAQ,IAAI,aAC3BC,EAAgB,QAAQ,IAAI,oBAAsB,4BAExD,GAAID,EAAc,CAEd,MAAM,GAAGC,CAAa,aAAc,CAChC,OAAQ,OACR,QAAS,CACL,aAAcD,EACd,eAAgB,kBACpB,EACA,KAAM,KAAK,UAAU,CACjB,MAAOZ,EAAI,SACX,UAAWD,EACX,IAAKE,EAAUA,EAAQ,KAAO,KAC9B,QAASA,EAAUA,EAAQ,QAAU,IACzC,CAAC,CACL,CAAC,EAAE,MAAM,IAAM,CAAC,CAAC,EAGjB,GAAI,CACA,IAAMa,EAAY,MAAM,MAAM,GAAGD,CAAa,yBAAyBb,EAAI,QAAQ,GAAI,CACnF,QAAS,CAAE,aAAcY,CAAa,EACtC,OAAQ,YAAY,QAAQ,IAAI,CACpC,CAAC,EAED,GAAIE,EAAU,GAAI,CACd,GAAM,CAAE,UAAAC,CAAU,EAAI,MAAMD,EAAU,KAAK,EAE3C,GAAIC,EAAW,CACX,IAAMC,EAAa,GAAGhB,EAAI,MAAM,SAASU,CAAW,MAC9CO,EAAQ,MAAM,MAAMD,CAAU,EAEpC,GAAIC,EAAM,GAAI,CAEV,IAAMC,EAAgB,GADD,MAAMD,EAAM,KAAK,CACD;AAAA;AAAA;AAAA;AAAA,EAAcF,CAAS,GAE5D,OAAO,IAAI,eAAaG,EAAe,CACnC,QAAS,CACL,GAAGP,EACH,kBAAmB,MACvB,CACJ,CAAC,CACL,CACJ,CACJ,CACJ,OAASQ,EAAK,CACV,QAAQ,MAAM,0BAA2BA,CAAG,CAChD,CACJ,CAGAnB,EAAI,SAAW,SAASU,CAAW,MAGnC,IAAMF,EAAW,eAAa,QAAQR,CAAG,EACzC,OAAIC,GACAO,EAAS,QAAQ,IAAI,aAAc,GAAGP,EAAQ,IAAI,KAAKA,EAAQ,OAAO,GAAG,EAEtEO,CACX,CAEA,OAAO,eAAa,KAAK,CAC7B","names":["middleware_exports","__export","AI_BOT_USER_AGENTS","matchBot","ontoMiddleware","__toCommonJS","import_server","AI_BOTS","AI_BOT_USER_AGENTS","bot","matchBot","userAgent","lowerUA","generateLlmsTxt","config","lines","route","fullUrl","link","section","ontoMiddleware","request","config","userAgent","url","matched","matchBot","accept","isAiBot","isMarkdownRequested","llmsTxtContent","generateLlmsTxt","response","error","payloadPath","botHeaders","ONTO_API_KEY","DASHBOARD_URL","injectRes","injection","localMdUrl","mdRes","finalMarkdown","err"]}
1
+ {"version":3,"sources":["../src/middleware.ts","../src/bots.ts","../src/config.ts"],"sourcesContent":["import { NextRequest, NextResponse } from 'next/server';\r\nimport { matchBot } from './bots';\r\nimport { generateLlmsTxt, OntoConfig } from './config';\r\n\r\nexport async function ontoMiddleware(request: any, config?: OntoConfig) {\r\n const userAgent = request.headers.get('user-agent');\r\n const url = request.nextUrl.clone();\r\n const matched = matchBot(userAgent);\r\n\r\n const accept = request.headers.get('accept') || '';\r\n const hasDebugParam = request.nextUrl.searchParams.has('onto');\r\n\r\n const isAiBot = !!matched;\r\n const isMarkdownRequested = accept.includes('text/markdown') || hasDebugParam;\r\n\r\n // If traffic is identified as an AI Bot or markdown is requested\r\n if (isAiBot || isMarkdownRequested) {\r\n\r\n // Ignore internal next.js requests & static assets (but not llms.txt)\r\n if (url.pathname.startsWith('/_next')) {\r\n return NextResponse.next();\r\n }\r\n\r\n // --- llms.txt Auto-Discovery ---\r\n // Serve the llms.txt manifest to AI agents\r\n if (url.pathname === '/llms.txt') {\r\n try {\r\n if (config) {\r\n // Generate llms.txt dynamically from config\r\n const llmsTxtContent = generateLlmsTxt(config);\r\n const response = new NextResponse(llmsTxtContent, {\r\n headers: {\r\n 'Content-Type': 'text/plain; charset=utf-8',\r\n 'Cache-Control': 'public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400',\r\n }\r\n });\r\n\r\n if (matched) {\r\n response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);\r\n }\r\n\r\n return response;\r\n } else {\r\n // Fallback: try to serve static llms.txt from public folder\r\n url.pathname = '/llms.txt';\r\n const response = NextResponse.rewrite(url);\r\n response.headers.set('Content-Type', 'text/plain; charset=utf-8');\r\n response.headers.set('Cache-Control', 'public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400');\r\n if (matched) {\r\n response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);\r\n }\r\n return response;\r\n }\r\n } catch (error) {\r\n console.error('[Onto] Failed to generate llms.txt:', error);\r\n // Fallback to static file on error\r\n url.pathname = '/llms.txt';\r\n const response = NextResponse.rewrite(url);\r\n response.headers.set('Content-Type', 'text/plain; charset=utf-8');\r\n if (matched) {\r\n response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);\r\n }\r\n return response;\r\n }\r\n }\r\n\r\n // Skip other static assets\r\n if (url.pathname.includes('.')) {\r\n return NextResponse.next();\r\n }\r\n\r\n // Determine the corresponding payload path\r\n let payloadPath = url.pathname;\r\n if (payloadPath === '/' || payloadPath === '') {\r\n payloadPath = '/index';\r\n }\r\n\r\n // Strip trailing slash if present\r\n if (payloadPath.endsWith('/') && payloadPath !== '/') {\r\n payloadPath = payloadPath.slice(0, -1);\r\n }\r\n\r\n // Common response headers for all bot responses\r\n const botHeaders: Record<string, string> = {\r\n 'Content-Type': 'text/markdown; charset=utf-8',\r\n 'Cache-Control': 'public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400',\r\n };\r\n if (matched) {\r\n botHeaders['X-Onto-Bot'] = `${matched.name} (${matched.company})`;\r\n }\r\n if (hasDebugParam) {\r\n botHeaders['X-Onto-Debug'] = 'true';\r\n }\r\n\r\n // --- Onto Control Plane Integration (Premium) ---\r\n const ONTO_API_KEY = process.env.ONTO_API_KEY;\r\n const DASHBOARD_URL = process.env.ONTO_DASHBOARD_URL || 'https://app.buildonto.dev';\r\n\r\n if (ONTO_API_KEY) {\r\n // 1. Fire-and-forget tracking\r\n fetch(`${DASHBOARD_URL}/api/track`, {\r\n method: 'POST',\r\n headers: {\r\n 'x-onto-key': ONTO_API_KEY,\r\n 'Content-Type': 'application/json'\r\n },\r\n body: JSON.stringify({\r\n route: url.pathname,\r\n userAgent: userAgent,\r\n bot: matched ? matched.name : null,\r\n company: matched ? matched.company : null,\r\n })\r\n }).catch(() => {});\r\n\r\n // 2. Dynamic Context Injection\r\n try {\r\n const injectRes = await fetch(`${DASHBOARD_URL}/api/sdk/inject?route=${url.pathname}`, {\r\n headers: { 'x-onto-key': ONTO_API_KEY },\r\n signal: AbortSignal.timeout(1500)\r\n });\r\n\r\n if (injectRes.ok) {\r\n const { injection } = await injectRes.json();\r\n \r\n if (injection) {\r\n const localMdUrl = `${url.origin}/.onto${payloadPath}.md`;\r\n const mdRes = await fetch(localMdUrl);\r\n \r\n if (mdRes.ok) {\r\n const baseMarkdown = await mdRes.text();\r\n const finalMarkdown = `${baseMarkdown}\\n\\n---\\n\\n${injection}`;\r\n \r\n return new NextResponse(finalMarkdown, {\r\n headers: {\r\n ...botHeaders,\r\n 'X-Onto-Injected': 'true'\r\n }\r\n });\r\n }\r\n }\r\n }\r\n } catch (err) {\r\n console.error('[Onto] Injection failed', err);\r\n }\r\n }\r\n // ------------------------------------------------\r\n\r\n url.pathname = `/.onto${payloadPath}.md`;\r\n\r\n // Rewrite implicitly serves the target URL transparently to the client.\r\n const response = NextResponse.rewrite(url);\r\n \r\n // Explicitly set headers on the rewrite response\r\n response.headers.set('Content-Type', 'text/markdown; charset=utf-8');\r\n response.headers.set('Cache-Control', 'public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400');\r\n \r\n if (matched) {\r\n response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);\r\n }\r\n if (hasDebugParam) {\r\n response.headers.set('X-Onto-Debug', 'true');\r\n }\r\n\r\n return response;\r\n }\r\n\r\n return NextResponse.next();\r\n}\r\n\r\n// Re-export the bot registry for consumers who want to extend or inspect it\r\nexport { AI_BOT_USER_AGENTS, matchBot } from './bots';\r\nexport type { AiBot } from './bots';\r\n\r\n\r\n","/**\n * Comprehensive registry of AI bot user-agent strings.\n * The middleware uses this list to detect AI crawlers and serve optimized markdown.\n */\n\nexport interface AiBot {\n /** The user-agent substring to match against */\n name: string;\n /** The company operating this bot */\n company: string;\n}\n\n/**\n * Structured registry of all known AI bots, grouped by company.\n * Useful for analytics and the Control Plane dashboard.\n */\nexport const AI_BOTS: AiBot[] = [\n // OpenAI\n { name: 'GPTBot', company: 'OpenAI' },\n { name: 'ChatGPT-User', company: 'OpenAI' },\n { name: 'OAI-SearchBot', company: 'OpenAI' },\n\n // Google\n { name: 'Googlebot', company: 'Google' },\n { name: 'Google-CloudVertexBot', company: 'Google' },\n { name: 'Google-Extended', company: 'Google' },\n { name: 'GoogleOther', company: 'Google' },\n\n // Anthropic\n { name: 'ClaudeBot', company: 'Anthropic' },\n { name: 'Claude-User', company: 'Anthropic' },\n { name: 'anthropic-ai', company: 'Anthropic' },\n\n // Perplexity\n { name: 'PerplexityBot', company: 'Perplexity' },\n { name: 'Perplexity-User', company: 'Perplexity' },\n\n // Meta\n { name: 'Meta-ExternalAgent', company: 'Meta' },\n { name: 'Meta-ExternalFetcher', company: 'Meta' },\n { name: 'FacebookBot', company: 'Meta' },\n\n // Common Crawl (used by most smaller AI companies)\n { name: 'CCBot', company: 'Common Crawl' },\n\n // Other notable AI crawlers\n { name: 'Bytespider', company: 'ByteDance' },\n { name: 'Applebot-Extended', company: 'Apple' },\n { name: 'cohere-ai', company: 'Cohere' },\n { name: 'YouBot', company: 'You.com' },\n];\n\n/**\n * Flat list of user-agent substrings for fast matching in the middleware.\n */\nexport const AI_BOT_USER_AGENTS: string[] = AI_BOTS.map(bot => bot.name);\n\n/**\n * Given a raw user-agent string, returns the matched AiBot entry or undefined.\n * Comparison is case-insensitive to handle inconsistent agent casing.\n */\nexport function matchBot(userAgent: string | null): AiBot | undefined {\n if (!userAgent) return undefined;\n const lowerUA = userAgent.toLowerCase();\n return AI_BOTS.find(bot => \n lowerUA.includes(bot.name.toLowerCase())\n );\n}\n","/**\r\n * Configuration schema for onto.config.ts\r\n * Used to dynamically generate llms.txt and other AI discovery files\r\n */\r\n\r\nexport type PageType = 'scoring' | 'about' | 'default';\r\n\r\nexport interface OntoRoute {\r\n /**\r\n * The URL path (e.g., '/docs', '/api/reference')\r\n */\r\n path: string;\r\n /**\r\n * Description of what this route contains\r\n */\r\n description: string;\r\n /**\r\n * Optional: Page type for automatic JSON-LD schema injection\r\n * - 'scoring': Injects Methodology schema with AIO scoring weights (40/35/25)\r\n * - 'about': Injects Organization/AboutPage schema\r\n * - 'default': No automatic schema injection\r\n */\r\n pageType?: PageType;\r\n}\r\n\r\nexport interface OntoConfig {\r\n /**\r\n * The name of your project or site (required)\r\n * Used as the H1 heading in llms.txt\r\n */\r\n name: string;\r\n\r\n /**\r\n * A short summary of your project (required)\r\n * Displayed as a blockquote in llms.txt\r\n * Should contain key information necessary for understanding the rest of the file\r\n */\r\n summary: string;\r\n\r\n /**\r\n * The base URL of your site (e.g., 'https://example.com')\r\n */\r\n baseUrl: string;\r\n\r\n /**\r\n * Optional: Additional sections to include in llms.txt\r\n * Each section can contain any markdown content\r\n */\r\n sections?: {\r\n heading: string;\r\n content: string;\r\n }[];\r\n\r\n /**\r\n * Key routes that AI agents should know about\r\n * These will be formatted as a markdown list in llms.txt\r\n */\r\n routes?: OntoRoute[];\r\n\r\n /**\r\n * Optional: Links to external resources (documentation, API references, etc.)\r\n */\r\n externalLinks?: {\r\n title: string;\r\n url: string;\r\n description?: string;\r\n }[];\r\n\r\n /**\r\n * Optional: Organization information for JSON-LD schemas\r\n */\r\n organization?: {\r\n name: string;\r\n description?: string;\r\n url?: string;\r\n logo?: string;\r\n foundingDate?: string;\r\n };\r\n}\r\n\r\n/**\r\n * Generate llms.txt content from OntoConfig\r\n * Follows the llms.txt specification:\r\n * - H1 with project name\r\n * - Blockquote with summary\r\n * - Additional markdown sections\r\n */\r\nexport function generateLlmsTxt(config: OntoConfig): string {\r\n const lines: string[] = [];\r\n\r\n // H1: Project name (required)\r\n lines.push(`# ${config.name}`);\r\n lines.push('');\r\n\r\n // Blockquote: Summary (required)\r\n lines.push(`> ${config.summary}`);\r\n lines.push('');\r\n\r\n // Key Routes section (if provided)\r\n if (config.routes && config.routes.length > 0) {\r\n lines.push('## Key Routes');\r\n lines.push('');\r\n for (const route of config.routes) {\r\n const fullUrl = `${config.baseUrl}${route.path}`;\r\n lines.push(`- [${route.path}](${fullUrl}): ${route.description}`);\r\n }\r\n lines.push('');\r\n }\r\n\r\n // External Links section (if provided)\r\n if (config.externalLinks && config.externalLinks.length > 0) {\r\n lines.push('## Resources');\r\n lines.push('');\r\n for (const link of config.externalLinks) {\r\n if (link.description) {\r\n lines.push(`- [${link.title}](${link.url}): ${link.description}`);\r\n } else {\r\n lines.push(`- [${link.title}](${link.url})`);\r\n }\r\n }\r\n lines.push('');\r\n }\r\n\r\n // Custom sections (if provided)\r\n if (config.sections && config.sections.length > 0) {\r\n for (const section of config.sections) {\r\n lines.push(`## ${section.heading}`);\r\n lines.push('');\r\n lines.push(section.content);\r\n lines.push('');\r\n }\r\n }\r\n\r\n return lines.join('\\n').trim() + '\\n';\r\n}\r\n"],"mappings":"yaAAA,IAAAA,EAAA,GAAAC,EAAAD,EAAA,wBAAAE,EAAA,aAAAC,EAAA,mBAAAC,IAAA,eAAAC,EAAAL,GAAA,IAAAM,EAA0C,uBCgBnC,IAAMC,EAAmB,CAE5B,CAAE,KAAM,SAAqB,QAAS,QAAS,EAC/C,CAAE,KAAM,eAAoB,QAAS,QAAS,EAC9C,CAAE,KAAM,gBAAoB,QAAS,QAAS,EAG9C,CAAE,KAAM,YAA0B,QAAS,QAAS,EACpD,CAAE,KAAM,wBAA2B,QAAS,QAAS,EACrD,CAAE,KAAM,kBAA2B,QAAS,QAAS,EACrD,CAAE,KAAM,cAA2B,QAAS,QAAS,EAGrD,CAAE,KAAM,YAAmB,QAAS,WAAY,EAChD,CAAE,KAAM,cAAkB,QAAS,WAAY,EAC/C,CAAE,KAAM,eAAkB,QAAS,WAAY,EAG/C,CAAE,KAAM,gBAAmB,QAAS,YAAa,EACjD,CAAE,KAAM,kBAAmB,QAAS,YAAa,EAGjD,CAAE,KAAM,qBAAwB,QAAS,MAAO,EAChD,CAAE,KAAM,uBAAwB,QAAS,MAAO,EAChD,CAAE,KAAM,cAAuB,QAAS,MAAO,EAG/C,CAAE,KAAM,QAAS,QAAS,cAAe,EAGzC,CAAE,KAAM,aAAqB,QAAS,WAAY,EAClD,CAAE,KAAM,oBAAqB,QAAS,OAAQ,EAC9C,CAAE,KAAM,YAAoB,QAAS,QAAS,EAC9C,CAAE,KAAM,SAAoB,QAAS,SAAU,CACnD,EAKaC,EAA+BD,EAAQ,IAAIE,GAAOA,EAAI,IAAI,EAMhE,SAASC,EAASC,EAA6C,CAClE,GAAI,CAACA,EAAW,OAChB,IAAMC,EAAUD,EAAU,YAAY,EACtC,OAAOJ,EAAQ,KAAKE,GAChBG,EAAQ,SAASH,EAAI,KAAK,YAAY,CAAC,CAC3C,CACJ,CCoBO,SAASI,EAAgBC,EAA4B,CAC1D,IAAMC,EAAkB,CAAC,EAWzB,GARAA,EAAM,KAAK,KAAKD,EAAO,IAAI,EAAE,EAC7BC,EAAM,KAAK,EAAE,EAGbA,EAAM,KAAK,KAAKD,EAAO,OAAO,EAAE,EAChCC,EAAM,KAAK,EAAE,EAGTD,EAAO,QAAUA,EAAO,OAAO,OAAS,EAAG,CAC7CC,EAAM,KAAK,eAAe,EAC1BA,EAAM,KAAK,EAAE,EACb,QAAWC,KAASF,EAAO,OAAQ,CACjC,IAAMG,EAAU,GAAGH,EAAO,OAAO,GAAGE,EAAM,IAAI,GAC9CD,EAAM,KAAK,MAAMC,EAAM,IAAI,KAAKC,CAAO,MAAMD,EAAM,WAAW,EAAE,CAClE,CACAD,EAAM,KAAK,EAAE,CACf,CAGA,GAAID,EAAO,eAAiBA,EAAO,cAAc,OAAS,EAAG,CAC3DC,EAAM,KAAK,cAAc,EACzBA,EAAM,KAAK,EAAE,EACb,QAAWG,KAAQJ,EAAO,cACpBI,EAAK,YACPH,EAAM,KAAK,MAAMG,EAAK,KAAK,KAAKA,EAAK,GAAG,MAAMA,EAAK,WAAW,EAAE,EAEhEH,EAAM,KAAK,MAAMG,EAAK,KAAK,KAAKA,EAAK,GAAG,GAAG,EAG/CH,EAAM,KAAK,EAAE,CACf,CAGA,GAAID,EAAO,UAAYA,EAAO,SAAS,OAAS,EAC9C,QAAWK,KAAWL,EAAO,SAC3BC,EAAM,KAAK,MAAMI,EAAQ,OAAO,EAAE,EAClCJ,EAAM,KAAK,EAAE,EACbA,EAAM,KAAKI,EAAQ,OAAO,EAC1BJ,EAAM,KAAK,EAAE,EAIjB,OAAOA,EAAM,KAAK;AAAA,CAAI,EAAE,KAAK,EAAI;AAAA,CACnC,CFlIA,eAAsBK,EAAeC,EAAcC,EAAqB,CACpE,IAAMC,EAAYF,EAAQ,QAAQ,IAAI,YAAY,EAC5CG,EAAMH,EAAQ,QAAQ,MAAM,EAC5BI,EAAUC,EAASH,CAAS,EAE5BI,EAASN,EAAQ,QAAQ,IAAI,QAAQ,GAAK,GAC1CO,EAAgBP,EAAQ,QAAQ,aAAa,IAAI,MAAM,EAEvDQ,EAAU,CAAC,CAACJ,EACZK,EAAsBH,EAAO,SAAS,eAAe,GAAKC,EAGhE,GAAIC,GAAWC,EAAqB,CAGhC,GAAIN,EAAI,SAAS,WAAW,QAAQ,EAChC,OAAO,eAAa,KAAK,EAK7B,GAAIA,EAAI,WAAa,YACjB,GAAI,CACA,GAAIF,EAAQ,CAER,IAAMS,EAAiBC,EAAgBV,CAAM,EACvCW,EAAW,IAAI,eAAaF,EAAgB,CAC9C,QAAS,CACL,eAAgB,4BAChB,gBAAiB,mEACrB,CACJ,CAAC,EAED,OAAIN,GACAQ,EAAS,QAAQ,IAAI,aAAc,GAAGR,EAAQ,IAAI,KAAKA,EAAQ,OAAO,GAAG,EAGtEQ,CACX,KAAO,CAEHT,EAAI,SAAW,YACf,IAAMS,EAAW,eAAa,QAAQT,CAAG,EACzC,OAAAS,EAAS,QAAQ,IAAI,eAAgB,2BAA2B,EAChEA,EAAS,QAAQ,IAAI,gBAAiB,mEAAmE,EACrGR,GACAQ,EAAS,QAAQ,IAAI,aAAc,GAAGR,EAAQ,IAAI,KAAKA,EAAQ,OAAO,GAAG,EAEtEQ,CACX,CACJ,OAASC,EAAO,CACZ,QAAQ,MAAM,sCAAuCA,CAAK,EAE1DV,EAAI,SAAW,YACf,IAAMS,EAAW,eAAa,QAAQT,CAAG,EACzC,OAAAS,EAAS,QAAQ,IAAI,eAAgB,2BAA2B,EAC5DR,GACAQ,EAAS,QAAQ,IAAI,aAAc,GAAGR,EAAQ,IAAI,KAAKA,EAAQ,OAAO,GAAG,EAEtEQ,CACX,CAIJ,GAAIT,EAAI,SAAS,SAAS,GAAG,EACzB,OAAO,eAAa,KAAK,EAI7B,IAAIW,EAAcX,EAAI,UAClBW,IAAgB,KAAOA,IAAgB,MACvCA,EAAc,UAIdA,EAAY,SAAS,GAAG,GAAKA,IAAgB,MAC7CA,EAAcA,EAAY,MAAM,EAAG,EAAE,GAIzC,IAAMC,EAAqC,CACvC,eAAgB,+BAChB,gBAAiB,mEACrB,EACIX,IACAW,EAAW,YAAY,EAAI,GAAGX,EAAQ,IAAI,KAAKA,EAAQ,OAAO,KAE9DG,IACAQ,EAAW,cAAc,EAAI,QAIjC,IAAMC,EAAe,QAAQ,IAAI,aAC3BC,EAAgB,QAAQ,IAAI,oBAAsB,4BAExD,GAAID,EAAc,CAEd,MAAM,GAAGC,CAAa,aAAc,CAChC,OAAQ,OACR,QAAS,CACL,aAAcD,EACd,eAAgB,kBACpB,EACA,KAAM,KAAK,UAAU,CACjB,MAAOb,EAAI,SACX,UAAWD,EACX,IAAKE,EAAUA,EAAQ,KAAO,KAC9B,QAASA,EAAUA,EAAQ,QAAU,IACzC,CAAC,CACL,CAAC,EAAE,MAAM,IAAM,CAAC,CAAC,EAGjB,GAAI,CACA,IAAMc,EAAY,MAAM,MAAM,GAAGD,CAAa,yBAAyBd,EAAI,QAAQ,GAAI,CACnF,QAAS,CAAE,aAAca,CAAa,EACtC,OAAQ,YAAY,QAAQ,IAAI,CACpC,CAAC,EAED,GAAIE,EAAU,GAAI,CACd,GAAM,CAAE,UAAAC,CAAU,EAAI,MAAMD,EAAU,KAAK,EAE3C,GAAIC,EAAW,CACX,IAAMC,EAAa,GAAGjB,EAAI,MAAM,SAASW,CAAW,MAC9CO,EAAQ,MAAM,MAAMD,CAAU,EAEpC,GAAIC,EAAM,GAAI,CAEV,IAAMC,EAAgB,GADD,MAAMD,EAAM,KAAK,CACD;AAAA;AAAA;AAAA;AAAA,EAAcF,CAAS,GAE5D,OAAO,IAAI,eAAaG,EAAe,CACnC,QAAS,CACL,GAAGP,EACH,kBAAmB,MACvB,CACJ,CAAC,CACL,CACJ,CACJ,CACJ,OAASQ,EAAK,CACV,QAAQ,MAAM,0BAA2BA,CAAG,CAChD,CACJ,CAGApB,EAAI,SAAW,SAASW,CAAW,MAGnC,IAAMF,EAAW,eAAa,QAAQT,CAAG,EAGzC,OAAAS,EAAS,QAAQ,IAAI,eAAgB,8BAA8B,EACnEA,EAAS,QAAQ,IAAI,gBAAiB,mEAAmE,EAErGR,GACAQ,EAAS,QAAQ,IAAI,aAAc,GAAGR,EAAQ,IAAI,KAAKA,EAAQ,OAAO,GAAG,EAEzEG,GACAK,EAAS,QAAQ,IAAI,eAAgB,MAAM,EAGxCA,CACX,CAEA,OAAO,eAAa,KAAK,CAC7B","names":["middleware_exports","__export","AI_BOT_USER_AGENTS","matchBot","ontoMiddleware","__toCommonJS","import_server","AI_BOTS","AI_BOT_USER_AGENTS","bot","matchBot","userAgent","lowerUA","generateLlmsTxt","config","lines","route","fullUrl","link","section","ontoMiddleware","request","config","userAgent","url","matched","matchBot","accept","hasDebugParam","isAiBot","isMarkdownRequested","llmsTxtContent","generateLlmsTxt","response","error","payloadPath","botHeaders","ONTO_API_KEY","DASHBOARD_URL","injectRes","injection","localMdUrl","mdRes","finalMarkdown","err"]}
@@ -1,8 +1,8 @@
1
- import{NextResponse as p}from"next/server";var y=[{name:"GPTBot",company:"OpenAI"},{name:"ChatGPT-User",company:"OpenAI"},{name:"OAI-SearchBot",company:"OpenAI"},{name:"Googlebot",company:"Google"},{name:"Google-CloudVertexBot",company:"Google"},{name:"Google-Extended",company:"Google"},{name:"GoogleOther",company:"Google"},{name:"ClaudeBot",company:"Anthropic"},{name:"Claude-User",company:"Anthropic"},{name:"anthropic-ai",company:"Anthropic"},{name:"PerplexityBot",company:"Perplexity"},{name:"Perplexity-User",company:"Perplexity"},{name:"Meta-ExternalAgent",company:"Meta"},{name:"Meta-ExternalFetcher",company:"Meta"},{name:"FacebookBot",company:"Meta"},{name:"CCBot",company:"Common Crawl"},{name:"Bytespider",company:"ByteDance"},{name:"Applebot-Extended",company:"Apple"},{name:"cohere-ai",company:"Cohere"},{name:"YouBot",company:"You.com"}],B=y.map(t=>t.name);function m(t){if(!t)return;let e=t.toLowerCase();return y.find(o=>e.includes(o.name.toLowerCase()))}function g(t){let e=[];if(e.push(`# ${t.name}`),e.push(""),e.push(`> ${t.summary}`),e.push(""),t.routes&&t.routes.length>0){e.push("## Key Routes"),e.push("");for(let o of t.routes){let a=`${t.baseUrl}${o.path}`;e.push(`- [${o.path}](${a}): ${o.description}`)}e.push("")}if(t.externalLinks&&t.externalLinks.length>0){e.push("## Resources"),e.push("");for(let o of t.externalLinks)o.description?e.push(`- [${o.title}](${o.url}): ${o.description}`):e.push(`- [${o.title}](${o.url})`);e.push("")}if(t.sections&&t.sections.length>0)for(let o of t.sections)e.push(`## ${o.heading}`),e.push(""),e.push(o.content),e.push("");return e.join(`
1
+ import{NextResponse as p}from"next/server";var x=[{name:"GPTBot",company:"OpenAI"},{name:"ChatGPT-User",company:"OpenAI"},{name:"OAI-SearchBot",company:"OpenAI"},{name:"Googlebot",company:"Google"},{name:"Google-CloudVertexBot",company:"Google"},{name:"Google-Extended",company:"Google"},{name:"GoogleOther",company:"Google"},{name:"ClaudeBot",company:"Anthropic"},{name:"Claude-User",company:"Anthropic"},{name:"anthropic-ai",company:"Anthropic"},{name:"PerplexityBot",company:"Perplexity"},{name:"Perplexity-User",company:"Perplexity"},{name:"Meta-ExternalAgent",company:"Meta"},{name:"Meta-ExternalFetcher",company:"Meta"},{name:"FacebookBot",company:"Meta"},{name:"CCBot",company:"Common Crawl"},{name:"Bytespider",company:"ByteDance"},{name:"Applebot-Extended",company:"Apple"},{name:"cohere-ai",company:"Cohere"},{name:"YouBot",company:"You.com"}],B=x.map(t=>t.name);function u(t){if(!t)return;let e=t.toLowerCase();return x.find(o=>e.includes(o.name.toLowerCase()))}function y(t){let e=[];if(e.push(`# ${t.name}`),e.push(""),e.push(`> ${t.summary}`),e.push(""),t.routes&&t.routes.length>0){e.push("## Key Routes"),e.push("");for(let o of t.routes){let a=`${t.baseUrl}${o.path}`;e.push(`- [${o.path}](${a}): ${o.description}`)}e.push("")}if(t.externalLinks&&t.externalLinks.length>0){e.push("## Resources"),e.push("");for(let o of t.externalLinks)o.description?e.push(`- [${o.title}](${o.url}): ${o.description}`):e.push(`- [${o.title}](${o.url})`);e.push("")}if(t.sections&&t.sections.length>0)for(let o of t.sections)e.push(`## ${o.heading}`),e.push(""),e.push(o.content),e.push("");return e.join(`
2
2
  `).trim()+`
3
- `}async function G(t,e){let o=t.headers.get("user-agent"),a=t.nextUrl.clone(),n=m(o),x=t.headers.get("accept")||"",f=!!n,O=x.includes("text/markdown");if(f||O){if(a.pathname.startsWith("/_next"))return p.next();if(a.pathname==="/llms.txt")try{if(e){let r=g(e),i=new p(r,{headers:{"Content-Type":"text/plain; charset=utf-8","Cache-Control":"public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400"}});return n&&i.headers.set("X-Onto-Bot",`${n.name} (${n.company})`),i}else{a.pathname="/llms.txt";let r=p.rewrite(a);return r.headers.set("Content-Type","text/plain; charset=utf-8"),r.headers.set("Cache-Control","public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400"),n&&r.headers.set("X-Onto-Bot",`${n.name} (${n.company})`),r}}catch(r){console.error("[Onto] Failed to generate llms.txt:",r),a.pathname="/llms.txt";let i=p.rewrite(a);return i.headers.set("Content-Type","text/plain; charset=utf-8"),n&&i.headers.set("X-Onto-Bot",`${n.name} (${n.company})`),i}if(a.pathname.includes("."))return p.next();let s=a.pathname;(s==="/"||s==="")&&(s="/index"),s.endsWith("/")&&s!=="/"&&(s=s.slice(0,-1));let l={"Content-Type":"text/markdown; charset=utf-8","Cache-Control":"public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400"};n&&(l["X-Onto-Bot"]=`${n.name} (${n.company})`);let c=process.env.ONTO_API_KEY,h=process.env.ONTO_DASHBOARD_URL||"https://app.buildonto.dev";if(c){fetch(`${h}/api/track`,{method:"POST",headers:{"x-onto-key":c,"Content-Type":"application/json"},body:JSON.stringify({route:a.pathname,userAgent:o,bot:n?n.name:null,company:n?n.company:null})}).catch(()=>{});try{let r=await fetch(`${h}/api/sdk/inject?route=${a.pathname}`,{headers:{"x-onto-key":c},signal:AbortSignal.timeout(1500)});if(r.ok){let{injection:i}=await r.json();if(i){let $=`${a.origin}/.onto${s}.md`,d=await fetch($);if(d.ok){let A=`${await d.text()}
3
+ `}async function G(t,e){let o=t.headers.get("user-agent"),a=t.nextUrl.clone(),n=u(o),f=t.headers.get("accept")||"",m=t.nextUrl.searchParams.has("onto"),O=!!n,$=f.includes("text/markdown")||m;if(O||$){if(a.pathname.startsWith("/_next"))return p.next();if(a.pathname==="/llms.txt")try{if(e){let r=y(e),i=new p(r,{headers:{"Content-Type":"text/plain; charset=utf-8","Cache-Control":"public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400"}});return n&&i.headers.set("X-Onto-Bot",`${n.name} (${n.company})`),i}else{a.pathname="/llms.txt";let r=p.rewrite(a);return r.headers.set("Content-Type","text/plain; charset=utf-8"),r.headers.set("Cache-Control","public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400"),n&&r.headers.set("X-Onto-Bot",`${n.name} (${n.company})`),r}}catch(r){console.error("[Onto] Failed to generate llms.txt:",r),a.pathname="/llms.txt";let i=p.rewrite(a);return i.headers.set("Content-Type","text/plain; charset=utf-8"),n&&i.headers.set("X-Onto-Bot",`${n.name} (${n.company})`),i}if(a.pathname.includes("."))return p.next();let s=a.pathname;(s==="/"||s==="")&&(s="/index"),s.endsWith("/")&&s!=="/"&&(s=s.slice(0,-1));let l={"Content-Type":"text/markdown; charset=utf-8","Cache-Control":"public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400"};n&&(l["X-Onto-Bot"]=`${n.name} (${n.company})`),m&&(l["X-Onto-Debug"]="true");let h=process.env.ONTO_API_KEY,d=process.env.ONTO_DASHBOARD_URL||"https://app.buildonto.dev";if(h){fetch(`${d}/api/track`,{method:"POST",headers:{"x-onto-key":h,"Content-Type":"application/json"},body:JSON.stringify({route:a.pathname,userAgent:o,bot:n?n.name:null,company:n?n.company:null})}).catch(()=>{});try{let r=await fetch(`${d}/api/sdk/inject?route=${a.pathname}`,{headers:{"x-onto-key":h},signal:AbortSignal.timeout(1500)});if(r.ok){let{injection:i}=await r.json();if(i){let C=`${a.origin}/.onto${s}.md`,g=await fetch(C);if(g.ok){let A=`${await g.text()}
4
4
 
5
5
  ---
6
6
 
7
- ${i}`;return new p(A,{headers:{...l,"X-Onto-Injected":"true"}})}}}}catch(r){console.error("[Onto] Injection failed",r)}}a.pathname=`/.onto${s}.md`;let u=p.rewrite(a);return n&&u.headers.set("X-Onto-Bot",`${n.name} (${n.company})`),u}return p.next()}export{B as AI_BOT_USER_AGENTS,m as matchBot,G as ontoMiddleware};
7
+ ${i}`;return new p(A,{headers:{...l,"X-Onto-Injected":"true"}})}}}}catch(r){console.error("[Onto] Injection failed",r)}}a.pathname=`/.onto${s}.md`;let c=p.rewrite(a);return c.headers.set("Content-Type","text/markdown; charset=utf-8"),c.headers.set("Cache-Control","public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400"),n&&c.headers.set("X-Onto-Bot",`${n.name} (${n.company})`),m&&c.headers.set("X-Onto-Debug","true"),c}return p.next()}export{B as AI_BOT_USER_AGENTS,u as matchBot,G as ontoMiddleware};
8
8
  //# sourceMappingURL=middleware.mjs.map
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/middleware.ts","../src/bots.ts","../src/config.ts"],"sourcesContent":["import { NextRequest, NextResponse } from 'next/server';\r\nimport { matchBot } from './bots';\r\nimport { generateLlmsTxt, OntoConfig } from './config';\r\n\r\nexport async function ontoMiddleware(request: any, config?: OntoConfig) {\r\n const userAgent = request.headers.get('user-agent');\r\n const url = request.nextUrl.clone();\r\n const matched = matchBot(userAgent);\r\n\r\n const accept = request.headers.get('accept') || '';\r\n\r\n const isAiBot = !!matched;\r\n const isMarkdownRequested = accept.includes('text/markdown');\r\n\r\n // If traffic is identified as an AI Bot or markdown is requested\r\n if (isAiBot || isMarkdownRequested) {\r\n\r\n // Ignore internal next.js requests & static assets (but not llms.txt)\r\n if (url.pathname.startsWith('/_next')) {\r\n return NextResponse.next();\r\n }\r\n\r\n // --- llms.txt Auto-Discovery ---\r\n // Serve the llms.txt manifest to AI agents\r\n if (url.pathname === '/llms.txt') {\r\n try {\r\n if (config) {\r\n // Generate llms.txt dynamically from config\r\n const llmsTxtContent = generateLlmsTxt(config);\r\n const response = new NextResponse(llmsTxtContent, {\r\n headers: {\r\n 'Content-Type': 'text/plain; charset=utf-8',\r\n 'Cache-Control': 'public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400',\r\n }\r\n });\r\n\r\n if (matched) {\r\n response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);\r\n }\r\n\r\n return response;\r\n } else {\r\n // Fallback: try to serve static llms.txt from public folder\r\n url.pathname = '/llms.txt';\r\n const response = NextResponse.rewrite(url);\r\n response.headers.set('Content-Type', 'text/plain; charset=utf-8');\r\n response.headers.set('Cache-Control', 'public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400');\r\n if (matched) {\r\n response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);\r\n }\r\n return response;\r\n }\r\n } catch (error) {\r\n console.error('[Onto] Failed to generate llms.txt:', error);\r\n // Fallback to static file on error\r\n url.pathname = '/llms.txt';\r\n const response = NextResponse.rewrite(url);\r\n response.headers.set('Content-Type', 'text/plain; charset=utf-8');\r\n if (matched) {\r\n response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);\r\n }\r\n return response;\r\n }\r\n }\r\n\r\n // Skip other static assets\r\n if (url.pathname.includes('.')) {\r\n return NextResponse.next();\r\n }\r\n\r\n // Determine the corresponding payload path\r\n let payloadPath = url.pathname;\r\n if (payloadPath === '/' || payloadPath === '') {\r\n payloadPath = '/index';\r\n }\r\n\r\n // Strip trailing slash if present\r\n if (payloadPath.endsWith('/') && payloadPath !== '/') {\r\n payloadPath = payloadPath.slice(0, -1);\r\n }\r\n\r\n // Common response headers for all bot responses\r\n const botHeaders: Record<string, string> = {\r\n 'Content-Type': 'text/markdown; charset=utf-8',\r\n 'Cache-Control': 'public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400',\r\n };\r\n if (matched) {\r\n botHeaders['X-Onto-Bot'] = `${matched.name} (${matched.company})`;\r\n }\r\n\r\n // --- Onto Control Plane Integration (Premium) ---\r\n const ONTO_API_KEY = process.env.ONTO_API_KEY;\r\n const DASHBOARD_URL = process.env.ONTO_DASHBOARD_URL || 'https://app.buildonto.dev';\r\n\r\n if (ONTO_API_KEY) {\r\n // 1. Fire-and-forget tracking — includes structured bot info\r\n fetch(`${DASHBOARD_URL}/api/track`, {\r\n method: 'POST',\r\n headers: {\r\n 'x-onto-key': ONTO_API_KEY,\r\n 'Content-Type': 'application/json'\r\n },\r\n body: JSON.stringify({\r\n route: url.pathname,\r\n userAgent: userAgent,\r\n bot: matched ? matched.name : null,\r\n company: matched ? matched.company : null,\r\n })\r\n }).catch(() => {});\r\n\r\n // 2. Dynamic Context Injection\r\n try {\r\n const injectRes = await fetch(`${DASHBOARD_URL}/api/sdk/inject?route=${url.pathname}`, {\r\n headers: { 'x-onto-key': ONTO_API_KEY },\r\n signal: AbortSignal.timeout(1500)\r\n });\r\n\r\n if (injectRes.ok) {\r\n const { injection } = await injectRes.json();\r\n \r\n if (injection) {\r\n const localMdUrl = `${url.origin}/.onto${payloadPath}.md`;\r\n const mdRes = await fetch(localMdUrl);\r\n \r\n if (mdRes.ok) {\r\n const baseMarkdown = await mdRes.text();\r\n const finalMarkdown = `${baseMarkdown}\\n\\n---\\n\\n${injection}`;\r\n \r\n return new NextResponse(finalMarkdown, {\r\n headers: {\r\n ...botHeaders,\r\n 'X-Onto-Injected': 'true'\r\n }\r\n });\r\n }\r\n }\r\n }\r\n } catch (err) {\r\n console.error('[Onto] Injection failed', err);\r\n }\r\n }\r\n // ------------------------------------------------\r\n\r\n url.pathname = `/.onto${payloadPath}.md`;\r\n\r\n // Rewrite implicitly serves the target URL transparently to the client.\r\n const response = NextResponse.rewrite(url);\r\n if (matched) {\r\n response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);\r\n }\r\n return response;\r\n }\r\n\r\n return NextResponse.next();\r\n}\r\n\r\n// Re-export the bot registry for consumers who want to extend or inspect it\r\nexport { AI_BOT_USER_AGENTS, matchBot } from './bots';\r\nexport type { AiBot } from './bots';\r\n\r\n\r\n","/**\n * Comprehensive registry of AI bot user-agent strings.\n * The middleware uses this list to detect AI crawlers and serve optimized markdown.\n */\n\nexport interface AiBot {\n /** The user-agent substring to match against */\n name: string;\n /** The company operating this bot */\n company: string;\n}\n\n/**\n * Structured registry of all known AI bots, grouped by company.\n * Useful for analytics and the Control Plane dashboard.\n */\nexport const AI_BOTS: AiBot[] = [\n // OpenAI\n { name: 'GPTBot', company: 'OpenAI' },\n { name: 'ChatGPT-User', company: 'OpenAI' },\n { name: 'OAI-SearchBot', company: 'OpenAI' },\n\n // Google\n { name: 'Googlebot', company: 'Google' },\n { name: 'Google-CloudVertexBot', company: 'Google' },\n { name: 'Google-Extended', company: 'Google' },\n { name: 'GoogleOther', company: 'Google' },\n\n // Anthropic\n { name: 'ClaudeBot', company: 'Anthropic' },\n { name: 'Claude-User', company: 'Anthropic' },\n { name: 'anthropic-ai', company: 'Anthropic' },\n\n // Perplexity\n { name: 'PerplexityBot', company: 'Perplexity' },\n { name: 'Perplexity-User', company: 'Perplexity' },\n\n // Meta\n { name: 'Meta-ExternalAgent', company: 'Meta' },\n { name: 'Meta-ExternalFetcher', company: 'Meta' },\n { name: 'FacebookBot', company: 'Meta' },\n\n // Common Crawl (used by most smaller AI companies)\n { name: 'CCBot', company: 'Common Crawl' },\n\n // Other notable AI crawlers\n { name: 'Bytespider', company: 'ByteDance' },\n { name: 'Applebot-Extended', company: 'Apple' },\n { name: 'cohere-ai', company: 'Cohere' },\n { name: 'YouBot', company: 'You.com' },\n];\n\n/**\n * Flat list of user-agent substrings for fast matching in the middleware.\n */\nexport const AI_BOT_USER_AGENTS: string[] = AI_BOTS.map(bot => bot.name);\n\n/**\n * Given a raw user-agent string, returns the matched AiBot entry or undefined.\n * Comparison is case-insensitive to handle inconsistent agent casing.\n */\nexport function matchBot(userAgent: string | null): AiBot | undefined {\n if (!userAgent) return undefined;\n const lowerUA = userAgent.toLowerCase();\n return AI_BOTS.find(bot => \n lowerUA.includes(bot.name.toLowerCase())\n );\n}\n","/**\r\n * Configuration schema for onto.config.ts\r\n * Used to dynamically generate llms.txt and other AI discovery files\r\n */\r\n\r\nexport type PageType = 'scoring' | 'about' | 'default';\r\n\r\nexport interface OntoRoute {\r\n /**\r\n * The URL path (e.g., '/docs', '/api/reference')\r\n */\r\n path: string;\r\n /**\r\n * Description of what this route contains\r\n */\r\n description: string;\r\n /**\r\n * Optional: Page type for automatic JSON-LD schema injection\r\n * - 'scoring': Injects Methodology schema with AIO scoring weights (40/35/25)\r\n * - 'about': Injects Organization/AboutPage schema\r\n * - 'default': No automatic schema injection\r\n */\r\n pageType?: PageType;\r\n}\r\n\r\nexport interface OntoConfig {\r\n /**\r\n * The name of your project or site (required)\r\n * Used as the H1 heading in llms.txt\r\n */\r\n name: string;\r\n\r\n /**\r\n * A short summary of your project (required)\r\n * Displayed as a blockquote in llms.txt\r\n * Should contain key information necessary for understanding the rest of the file\r\n */\r\n summary: string;\r\n\r\n /**\r\n * The base URL of your site (e.g., 'https://example.com')\r\n */\r\n baseUrl: string;\r\n\r\n /**\r\n * Optional: Additional sections to include in llms.txt\r\n * Each section can contain any markdown content\r\n */\r\n sections?: {\r\n heading: string;\r\n content: string;\r\n }[];\r\n\r\n /**\r\n * Key routes that AI agents should know about\r\n * These will be formatted as a markdown list in llms.txt\r\n */\r\n routes?: OntoRoute[];\r\n\r\n /**\r\n * Optional: Links to external resources (documentation, API references, etc.)\r\n */\r\n externalLinks?: {\r\n title: string;\r\n url: string;\r\n description?: string;\r\n }[];\r\n\r\n /**\r\n * Optional: Organization information for JSON-LD schemas\r\n */\r\n organization?: {\r\n name: string;\r\n description?: string;\r\n url?: string;\r\n logo?: string;\r\n foundingDate?: string;\r\n };\r\n}\r\n\r\n/**\r\n * Generate llms.txt content from OntoConfig\r\n * Follows the llms.txt specification:\r\n * - H1 with project name\r\n * - Blockquote with summary\r\n * - Additional markdown sections\r\n */\r\nexport function generateLlmsTxt(config: OntoConfig): string {\r\n const lines: string[] = [];\r\n\r\n // H1: Project name (required)\r\n lines.push(`# ${config.name}`);\r\n lines.push('');\r\n\r\n // Blockquote: Summary (required)\r\n lines.push(`> ${config.summary}`);\r\n lines.push('');\r\n\r\n // Key Routes section (if provided)\r\n if (config.routes && config.routes.length > 0) {\r\n lines.push('## Key Routes');\r\n lines.push('');\r\n for (const route of config.routes) {\r\n const fullUrl = `${config.baseUrl}${route.path}`;\r\n lines.push(`- [${route.path}](${fullUrl}): ${route.description}`);\r\n }\r\n lines.push('');\r\n }\r\n\r\n // External Links section (if provided)\r\n if (config.externalLinks && config.externalLinks.length > 0) {\r\n lines.push('## Resources');\r\n lines.push('');\r\n for (const link of config.externalLinks) {\r\n if (link.description) {\r\n lines.push(`- [${link.title}](${link.url}): ${link.description}`);\r\n } else {\r\n lines.push(`- [${link.title}](${link.url})`);\r\n }\r\n }\r\n lines.push('');\r\n }\r\n\r\n // Custom sections (if provided)\r\n if (config.sections && config.sections.length > 0) {\r\n for (const section of config.sections) {\r\n lines.push(`## ${section.heading}`);\r\n lines.push('');\r\n lines.push(section.content);\r\n lines.push('');\r\n }\r\n }\r\n\r\n return lines.join('\\n').trim() + '\\n';\r\n}\r\n"],"mappings":"AAAA,OAAsB,gBAAAA,MAAoB,cCgBnC,IAAMC,EAAmB,CAE5B,CAAE,KAAM,SAAqB,QAAS,QAAS,EAC/C,CAAE,KAAM,eAAoB,QAAS,QAAS,EAC9C,CAAE,KAAM,gBAAoB,QAAS,QAAS,EAG9C,CAAE,KAAM,YAA0B,QAAS,QAAS,EACpD,CAAE,KAAM,wBAA2B,QAAS,QAAS,EACrD,CAAE,KAAM,kBAA2B,QAAS,QAAS,EACrD,CAAE,KAAM,cAA2B,QAAS,QAAS,EAGrD,CAAE,KAAM,YAAmB,QAAS,WAAY,EAChD,CAAE,KAAM,cAAkB,QAAS,WAAY,EAC/C,CAAE,KAAM,eAAkB,QAAS,WAAY,EAG/C,CAAE,KAAM,gBAAmB,QAAS,YAAa,EACjD,CAAE,KAAM,kBAAmB,QAAS,YAAa,EAGjD,CAAE,KAAM,qBAAwB,QAAS,MAAO,EAChD,CAAE,KAAM,uBAAwB,QAAS,MAAO,EAChD,CAAE,KAAM,cAAuB,QAAS,MAAO,EAG/C,CAAE,KAAM,QAAS,QAAS,cAAe,EAGzC,CAAE,KAAM,aAAqB,QAAS,WAAY,EAClD,CAAE,KAAM,oBAAqB,QAAS,OAAQ,EAC9C,CAAE,KAAM,YAAoB,QAAS,QAAS,EAC9C,CAAE,KAAM,SAAoB,QAAS,SAAU,CACnD,EAKaC,EAA+BD,EAAQ,IAAIE,GAAOA,EAAI,IAAI,EAMhE,SAASC,EAASC,EAA6C,CAClE,GAAI,CAACA,EAAW,OAChB,IAAMC,EAAUD,EAAU,YAAY,EACtC,OAAOJ,EAAQ,KAAKE,GAChBG,EAAQ,SAASH,EAAI,KAAK,YAAY,CAAC,CAC3C,CACJ,CCoBO,SAASI,EAAgBC,EAA4B,CAC1D,IAAMC,EAAkB,CAAC,EAWzB,GARAA,EAAM,KAAK,KAAKD,EAAO,IAAI,EAAE,EAC7BC,EAAM,KAAK,EAAE,EAGbA,EAAM,KAAK,KAAKD,EAAO,OAAO,EAAE,EAChCC,EAAM,KAAK,EAAE,EAGTD,EAAO,QAAUA,EAAO,OAAO,OAAS,EAAG,CAC7CC,EAAM,KAAK,eAAe,EAC1BA,EAAM,KAAK,EAAE,EACb,QAAWC,KAASF,EAAO,OAAQ,CACjC,IAAMG,EAAU,GAAGH,EAAO,OAAO,GAAGE,EAAM,IAAI,GAC9CD,EAAM,KAAK,MAAMC,EAAM,IAAI,KAAKC,CAAO,MAAMD,EAAM,WAAW,EAAE,CAClE,CACAD,EAAM,KAAK,EAAE,CACf,CAGA,GAAID,EAAO,eAAiBA,EAAO,cAAc,OAAS,EAAG,CAC3DC,EAAM,KAAK,cAAc,EACzBA,EAAM,KAAK,EAAE,EACb,QAAWG,KAAQJ,EAAO,cACpBI,EAAK,YACPH,EAAM,KAAK,MAAMG,EAAK,KAAK,KAAKA,EAAK,GAAG,MAAMA,EAAK,WAAW,EAAE,EAEhEH,EAAM,KAAK,MAAMG,EAAK,KAAK,KAAKA,EAAK,GAAG,GAAG,EAG/CH,EAAM,KAAK,EAAE,CACf,CAGA,GAAID,EAAO,UAAYA,EAAO,SAAS,OAAS,EAC9C,QAAWK,KAAWL,EAAO,SAC3BC,EAAM,KAAK,MAAMI,EAAQ,OAAO,EAAE,EAClCJ,EAAM,KAAK,EAAE,EACbA,EAAM,KAAKI,EAAQ,OAAO,EAC1BJ,EAAM,KAAK,EAAE,EAIjB,OAAOA,EAAM,KAAK;AAAA,CAAI,EAAE,KAAK,EAAI;AAAA,CACnC,CFlIA,eAAsBK,EAAeC,EAAcC,EAAqB,CACpE,IAAMC,EAAYF,EAAQ,QAAQ,IAAI,YAAY,EAC5CG,EAAMH,EAAQ,QAAQ,MAAM,EAC5BI,EAAUC,EAASH,CAAS,EAE5BI,EAASN,EAAQ,QAAQ,IAAI,QAAQ,GAAK,GAE1CO,EAAU,CAAC,CAACH,EACZI,EAAsBF,EAAO,SAAS,eAAe,EAG3D,GAAIC,GAAWC,EAAqB,CAGhC,GAAIL,EAAI,SAAS,WAAW,QAAQ,EAChC,OAAOM,EAAa,KAAK,EAK7B,GAAIN,EAAI,WAAa,YACjB,GAAI,CACA,GAAIF,EAAQ,CAER,IAAMS,EAAiBC,EAAgBV,CAAM,EACvCW,EAAW,IAAIH,EAAaC,EAAgB,CAC9C,QAAS,CACL,eAAgB,4BAChB,gBAAiB,mEACrB,CACJ,CAAC,EAED,OAAIN,GACAQ,EAAS,QAAQ,IAAI,aAAc,GAAGR,EAAQ,IAAI,KAAKA,EAAQ,OAAO,GAAG,EAGtEQ,CACX,KAAO,CAEHT,EAAI,SAAW,YACf,IAAMS,EAAWH,EAAa,QAAQN,CAAG,EACzC,OAAAS,EAAS,QAAQ,IAAI,eAAgB,2BAA2B,EAChEA,EAAS,QAAQ,IAAI,gBAAiB,mEAAmE,EACrGR,GACAQ,EAAS,QAAQ,IAAI,aAAc,GAAGR,EAAQ,IAAI,KAAKA,EAAQ,OAAO,GAAG,EAEtEQ,CACX,CACJ,OAASC,EAAO,CACZ,QAAQ,MAAM,sCAAuCA,CAAK,EAE1DV,EAAI,SAAW,YACf,IAAMS,EAAWH,EAAa,QAAQN,CAAG,EACzC,OAAAS,EAAS,QAAQ,IAAI,eAAgB,2BAA2B,EAC5DR,GACAQ,EAAS,QAAQ,IAAI,aAAc,GAAGR,EAAQ,IAAI,KAAKA,EAAQ,OAAO,GAAG,EAEtEQ,CACX,CAIJ,GAAIT,EAAI,SAAS,SAAS,GAAG,EACzB,OAAOM,EAAa,KAAK,EAI7B,IAAIK,EAAcX,EAAI,UAClBW,IAAgB,KAAOA,IAAgB,MACvCA,EAAc,UAIdA,EAAY,SAAS,GAAG,GAAKA,IAAgB,MAC7CA,EAAcA,EAAY,MAAM,EAAG,EAAE,GAIzC,IAAMC,EAAqC,CACvC,eAAgB,+BAChB,gBAAiB,mEACrB,EACIX,IACAW,EAAW,YAAY,EAAI,GAAGX,EAAQ,IAAI,KAAKA,EAAQ,OAAO,KAIlE,IAAMY,EAAe,QAAQ,IAAI,aAC3BC,EAAgB,QAAQ,IAAI,oBAAsB,4BAExD,GAAID,EAAc,CAEd,MAAM,GAAGC,CAAa,aAAc,CAChC,OAAQ,OACR,QAAS,CACL,aAAcD,EACd,eAAgB,kBACpB,EACA,KAAM,KAAK,UAAU,CACjB,MAAOb,EAAI,SACX,UAAWD,EACX,IAAKE,EAAUA,EAAQ,KAAO,KAC9B,QAASA,EAAUA,EAAQ,QAAU,IACzC,CAAC,CACL,CAAC,EAAE,MAAM,IAAM,CAAC,CAAC,EAGjB,GAAI,CACA,IAAMc,EAAY,MAAM,MAAM,GAAGD,CAAa,yBAAyBd,EAAI,QAAQ,GAAI,CACnF,QAAS,CAAE,aAAca,CAAa,EACtC,OAAQ,YAAY,QAAQ,IAAI,CACpC,CAAC,EAED,GAAIE,EAAU,GAAI,CACd,GAAM,CAAE,UAAAC,CAAU,EAAI,MAAMD,EAAU,KAAK,EAE3C,GAAIC,EAAW,CACX,IAAMC,EAAa,GAAGjB,EAAI,MAAM,SAASW,CAAW,MAC9CO,EAAQ,MAAM,MAAMD,CAAU,EAEpC,GAAIC,EAAM,GAAI,CAEV,IAAMC,EAAgB,GADD,MAAMD,EAAM,KAAK,CACD;AAAA;AAAA;AAAA;AAAA,EAAcF,CAAS,GAE5D,OAAO,IAAIV,EAAaa,EAAe,CACnC,QAAS,CACL,GAAGP,EACH,kBAAmB,MACvB,CACJ,CAAC,CACL,CACJ,CACJ,CACJ,OAASQ,EAAK,CACV,QAAQ,MAAM,0BAA2BA,CAAG,CAChD,CACJ,CAGApB,EAAI,SAAW,SAASW,CAAW,MAGnC,IAAMF,EAAWH,EAAa,QAAQN,CAAG,EACzC,OAAIC,GACAQ,EAAS,QAAQ,IAAI,aAAc,GAAGR,EAAQ,IAAI,KAAKA,EAAQ,OAAO,GAAG,EAEtEQ,CACX,CAEA,OAAOH,EAAa,KAAK,CAC7B","names":["NextResponse","AI_BOTS","AI_BOT_USER_AGENTS","bot","matchBot","userAgent","lowerUA","generateLlmsTxt","config","lines","route","fullUrl","link","section","ontoMiddleware","request","config","userAgent","url","matched","matchBot","accept","isAiBot","isMarkdownRequested","NextResponse","llmsTxtContent","generateLlmsTxt","response","error","payloadPath","botHeaders","ONTO_API_KEY","DASHBOARD_URL","injectRes","injection","localMdUrl","mdRes","finalMarkdown","err"]}
1
+ {"version":3,"sources":["../src/middleware.ts","../src/bots.ts","../src/config.ts"],"sourcesContent":["import { NextRequest, NextResponse } from 'next/server';\r\nimport { matchBot } from './bots';\r\nimport { generateLlmsTxt, OntoConfig } from './config';\r\n\r\nexport async function ontoMiddleware(request: any, config?: OntoConfig) {\r\n const userAgent = request.headers.get('user-agent');\r\n const url = request.nextUrl.clone();\r\n const matched = matchBot(userAgent);\r\n\r\n const accept = request.headers.get('accept') || '';\r\n const hasDebugParam = request.nextUrl.searchParams.has('onto');\r\n\r\n const isAiBot = !!matched;\r\n const isMarkdownRequested = accept.includes('text/markdown') || hasDebugParam;\r\n\r\n // If traffic is identified as an AI Bot or markdown is requested\r\n if (isAiBot || isMarkdownRequested) {\r\n\r\n // Ignore internal next.js requests & static assets (but not llms.txt)\r\n if (url.pathname.startsWith('/_next')) {\r\n return NextResponse.next();\r\n }\r\n\r\n // --- llms.txt Auto-Discovery ---\r\n // Serve the llms.txt manifest to AI agents\r\n if (url.pathname === '/llms.txt') {\r\n try {\r\n if (config) {\r\n // Generate llms.txt dynamically from config\r\n const llmsTxtContent = generateLlmsTxt(config);\r\n const response = new NextResponse(llmsTxtContent, {\r\n headers: {\r\n 'Content-Type': 'text/plain; charset=utf-8',\r\n 'Cache-Control': 'public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400',\r\n }\r\n });\r\n\r\n if (matched) {\r\n response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);\r\n }\r\n\r\n return response;\r\n } else {\r\n // Fallback: try to serve static llms.txt from public folder\r\n url.pathname = '/llms.txt';\r\n const response = NextResponse.rewrite(url);\r\n response.headers.set('Content-Type', 'text/plain; charset=utf-8');\r\n response.headers.set('Cache-Control', 'public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400');\r\n if (matched) {\r\n response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);\r\n }\r\n return response;\r\n }\r\n } catch (error) {\r\n console.error('[Onto] Failed to generate llms.txt:', error);\r\n // Fallback to static file on error\r\n url.pathname = '/llms.txt';\r\n const response = NextResponse.rewrite(url);\r\n response.headers.set('Content-Type', 'text/plain; charset=utf-8');\r\n if (matched) {\r\n response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);\r\n }\r\n return response;\r\n }\r\n }\r\n\r\n // Skip other static assets\r\n if (url.pathname.includes('.')) {\r\n return NextResponse.next();\r\n }\r\n\r\n // Determine the corresponding payload path\r\n let payloadPath = url.pathname;\r\n if (payloadPath === '/' || payloadPath === '') {\r\n payloadPath = '/index';\r\n }\r\n\r\n // Strip trailing slash if present\r\n if (payloadPath.endsWith('/') && payloadPath !== '/') {\r\n payloadPath = payloadPath.slice(0, -1);\r\n }\r\n\r\n // Common response headers for all bot responses\r\n const botHeaders: Record<string, string> = {\r\n 'Content-Type': 'text/markdown; charset=utf-8',\r\n 'Cache-Control': 'public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400',\r\n };\r\n if (matched) {\r\n botHeaders['X-Onto-Bot'] = `${matched.name} (${matched.company})`;\r\n }\r\n if (hasDebugParam) {\r\n botHeaders['X-Onto-Debug'] = 'true';\r\n }\r\n\r\n // --- Onto Control Plane Integration (Premium) ---\r\n const ONTO_API_KEY = process.env.ONTO_API_KEY;\r\n const DASHBOARD_URL = process.env.ONTO_DASHBOARD_URL || 'https://app.buildonto.dev';\r\n\r\n if (ONTO_API_KEY) {\r\n // 1. Fire-and-forget tracking\r\n fetch(`${DASHBOARD_URL}/api/track`, {\r\n method: 'POST',\r\n headers: {\r\n 'x-onto-key': ONTO_API_KEY,\r\n 'Content-Type': 'application/json'\r\n },\r\n body: JSON.stringify({\r\n route: url.pathname,\r\n userAgent: userAgent,\r\n bot: matched ? matched.name : null,\r\n company: matched ? matched.company : null,\r\n })\r\n }).catch(() => {});\r\n\r\n // 2. Dynamic Context Injection\r\n try {\r\n const injectRes = await fetch(`${DASHBOARD_URL}/api/sdk/inject?route=${url.pathname}`, {\r\n headers: { 'x-onto-key': ONTO_API_KEY },\r\n signal: AbortSignal.timeout(1500)\r\n });\r\n\r\n if (injectRes.ok) {\r\n const { injection } = await injectRes.json();\r\n \r\n if (injection) {\r\n const localMdUrl = `${url.origin}/.onto${payloadPath}.md`;\r\n const mdRes = await fetch(localMdUrl);\r\n \r\n if (mdRes.ok) {\r\n const baseMarkdown = await mdRes.text();\r\n const finalMarkdown = `${baseMarkdown}\\n\\n---\\n\\n${injection}`;\r\n \r\n return new NextResponse(finalMarkdown, {\r\n headers: {\r\n ...botHeaders,\r\n 'X-Onto-Injected': 'true'\r\n }\r\n });\r\n }\r\n }\r\n }\r\n } catch (err) {\r\n console.error('[Onto] Injection failed', err);\r\n }\r\n }\r\n // ------------------------------------------------\r\n\r\n url.pathname = `/.onto${payloadPath}.md`;\r\n\r\n // Rewrite implicitly serves the target URL transparently to the client.\r\n const response = NextResponse.rewrite(url);\r\n \r\n // Explicitly set headers on the rewrite response\r\n response.headers.set('Content-Type', 'text/markdown; charset=utf-8');\r\n response.headers.set('Cache-Control', 'public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400');\r\n \r\n if (matched) {\r\n response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);\r\n }\r\n if (hasDebugParam) {\r\n response.headers.set('X-Onto-Debug', 'true');\r\n }\r\n\r\n return response;\r\n }\r\n\r\n return NextResponse.next();\r\n}\r\n\r\n// Re-export the bot registry for consumers who want to extend or inspect it\r\nexport { AI_BOT_USER_AGENTS, matchBot } from './bots';\r\nexport type { AiBot } from './bots';\r\n\r\n\r\n","/**\n * Comprehensive registry of AI bot user-agent strings.\n * The middleware uses this list to detect AI crawlers and serve optimized markdown.\n */\n\nexport interface AiBot {\n /** The user-agent substring to match against */\n name: string;\n /** The company operating this bot */\n company: string;\n}\n\n/**\n * Structured registry of all known AI bots, grouped by company.\n * Useful for analytics and the Control Plane dashboard.\n */\nexport const AI_BOTS: AiBot[] = [\n // OpenAI\n { name: 'GPTBot', company: 'OpenAI' },\n { name: 'ChatGPT-User', company: 'OpenAI' },\n { name: 'OAI-SearchBot', company: 'OpenAI' },\n\n // Google\n { name: 'Googlebot', company: 'Google' },\n { name: 'Google-CloudVertexBot', company: 'Google' },\n { name: 'Google-Extended', company: 'Google' },\n { name: 'GoogleOther', company: 'Google' },\n\n // Anthropic\n { name: 'ClaudeBot', company: 'Anthropic' },\n { name: 'Claude-User', company: 'Anthropic' },\n { name: 'anthropic-ai', company: 'Anthropic' },\n\n // Perplexity\n { name: 'PerplexityBot', company: 'Perplexity' },\n { name: 'Perplexity-User', company: 'Perplexity' },\n\n // Meta\n { name: 'Meta-ExternalAgent', company: 'Meta' },\n { name: 'Meta-ExternalFetcher', company: 'Meta' },\n { name: 'FacebookBot', company: 'Meta' },\n\n // Common Crawl (used by most smaller AI companies)\n { name: 'CCBot', company: 'Common Crawl' },\n\n // Other notable AI crawlers\n { name: 'Bytespider', company: 'ByteDance' },\n { name: 'Applebot-Extended', company: 'Apple' },\n { name: 'cohere-ai', company: 'Cohere' },\n { name: 'YouBot', company: 'You.com' },\n];\n\n/**\n * Flat list of user-agent substrings for fast matching in the middleware.\n */\nexport const AI_BOT_USER_AGENTS: string[] = AI_BOTS.map(bot => bot.name);\n\n/**\n * Given a raw user-agent string, returns the matched AiBot entry or undefined.\n * Comparison is case-insensitive to handle inconsistent agent casing.\n */\nexport function matchBot(userAgent: string | null): AiBot | undefined {\n if (!userAgent) return undefined;\n const lowerUA = userAgent.toLowerCase();\n return AI_BOTS.find(bot => \n lowerUA.includes(bot.name.toLowerCase())\n );\n}\n","/**\r\n * Configuration schema for onto.config.ts\r\n * Used to dynamically generate llms.txt and other AI discovery files\r\n */\r\n\r\nexport type PageType = 'scoring' | 'about' | 'default';\r\n\r\nexport interface OntoRoute {\r\n /**\r\n * The URL path (e.g., '/docs', '/api/reference')\r\n */\r\n path: string;\r\n /**\r\n * Description of what this route contains\r\n */\r\n description: string;\r\n /**\r\n * Optional: Page type for automatic JSON-LD schema injection\r\n * - 'scoring': Injects Methodology schema with AIO scoring weights (40/35/25)\r\n * - 'about': Injects Organization/AboutPage schema\r\n * - 'default': No automatic schema injection\r\n */\r\n pageType?: PageType;\r\n}\r\n\r\nexport interface OntoConfig {\r\n /**\r\n * The name of your project or site (required)\r\n * Used as the H1 heading in llms.txt\r\n */\r\n name: string;\r\n\r\n /**\r\n * A short summary of your project (required)\r\n * Displayed as a blockquote in llms.txt\r\n * Should contain key information necessary for understanding the rest of the file\r\n */\r\n summary: string;\r\n\r\n /**\r\n * The base URL of your site (e.g., 'https://example.com')\r\n */\r\n baseUrl: string;\r\n\r\n /**\r\n * Optional: Additional sections to include in llms.txt\r\n * Each section can contain any markdown content\r\n */\r\n sections?: {\r\n heading: string;\r\n content: string;\r\n }[];\r\n\r\n /**\r\n * Key routes that AI agents should know about\r\n * These will be formatted as a markdown list in llms.txt\r\n */\r\n routes?: OntoRoute[];\r\n\r\n /**\r\n * Optional: Links to external resources (documentation, API references, etc.)\r\n */\r\n externalLinks?: {\r\n title: string;\r\n url: string;\r\n description?: string;\r\n }[];\r\n\r\n /**\r\n * Optional: Organization information for JSON-LD schemas\r\n */\r\n organization?: {\r\n name: string;\r\n description?: string;\r\n url?: string;\r\n logo?: string;\r\n foundingDate?: string;\r\n };\r\n}\r\n\r\n/**\r\n * Generate llms.txt content from OntoConfig\r\n * Follows the llms.txt specification:\r\n * - H1 with project name\r\n * - Blockquote with summary\r\n * - Additional markdown sections\r\n */\r\nexport function generateLlmsTxt(config: OntoConfig): string {\r\n const lines: string[] = [];\r\n\r\n // H1: Project name (required)\r\n lines.push(`# ${config.name}`);\r\n lines.push('');\r\n\r\n // Blockquote: Summary (required)\r\n lines.push(`> ${config.summary}`);\r\n lines.push('');\r\n\r\n // Key Routes section (if provided)\r\n if (config.routes && config.routes.length > 0) {\r\n lines.push('## Key Routes');\r\n lines.push('');\r\n for (const route of config.routes) {\r\n const fullUrl = `${config.baseUrl}${route.path}`;\r\n lines.push(`- [${route.path}](${fullUrl}): ${route.description}`);\r\n }\r\n lines.push('');\r\n }\r\n\r\n // External Links section (if provided)\r\n if (config.externalLinks && config.externalLinks.length > 0) {\r\n lines.push('## Resources');\r\n lines.push('');\r\n for (const link of config.externalLinks) {\r\n if (link.description) {\r\n lines.push(`- [${link.title}](${link.url}): ${link.description}`);\r\n } else {\r\n lines.push(`- [${link.title}](${link.url})`);\r\n }\r\n }\r\n lines.push('');\r\n }\r\n\r\n // Custom sections (if provided)\r\n if (config.sections && config.sections.length > 0) {\r\n for (const section of config.sections) {\r\n lines.push(`## ${section.heading}`);\r\n lines.push('');\r\n lines.push(section.content);\r\n lines.push('');\r\n }\r\n }\r\n\r\n return lines.join('\\n').trim() + '\\n';\r\n}\r\n"],"mappings":"AAAA,OAAsB,gBAAAA,MAAoB,cCgBnC,IAAMC,EAAmB,CAE5B,CAAE,KAAM,SAAqB,QAAS,QAAS,EAC/C,CAAE,KAAM,eAAoB,QAAS,QAAS,EAC9C,CAAE,KAAM,gBAAoB,QAAS,QAAS,EAG9C,CAAE,KAAM,YAA0B,QAAS,QAAS,EACpD,CAAE,KAAM,wBAA2B,QAAS,QAAS,EACrD,CAAE,KAAM,kBAA2B,QAAS,QAAS,EACrD,CAAE,KAAM,cAA2B,QAAS,QAAS,EAGrD,CAAE,KAAM,YAAmB,QAAS,WAAY,EAChD,CAAE,KAAM,cAAkB,QAAS,WAAY,EAC/C,CAAE,KAAM,eAAkB,QAAS,WAAY,EAG/C,CAAE,KAAM,gBAAmB,QAAS,YAAa,EACjD,CAAE,KAAM,kBAAmB,QAAS,YAAa,EAGjD,CAAE,KAAM,qBAAwB,QAAS,MAAO,EAChD,CAAE,KAAM,uBAAwB,QAAS,MAAO,EAChD,CAAE,KAAM,cAAuB,QAAS,MAAO,EAG/C,CAAE,KAAM,QAAS,QAAS,cAAe,EAGzC,CAAE,KAAM,aAAqB,QAAS,WAAY,EAClD,CAAE,KAAM,oBAAqB,QAAS,OAAQ,EAC9C,CAAE,KAAM,YAAoB,QAAS,QAAS,EAC9C,CAAE,KAAM,SAAoB,QAAS,SAAU,CACnD,EAKaC,EAA+BD,EAAQ,IAAIE,GAAOA,EAAI,IAAI,EAMhE,SAASC,EAASC,EAA6C,CAClE,GAAI,CAACA,EAAW,OAChB,IAAMC,EAAUD,EAAU,YAAY,EACtC,OAAOJ,EAAQ,KAAKE,GAChBG,EAAQ,SAASH,EAAI,KAAK,YAAY,CAAC,CAC3C,CACJ,CCoBO,SAASI,EAAgBC,EAA4B,CAC1D,IAAMC,EAAkB,CAAC,EAWzB,GARAA,EAAM,KAAK,KAAKD,EAAO,IAAI,EAAE,EAC7BC,EAAM,KAAK,EAAE,EAGbA,EAAM,KAAK,KAAKD,EAAO,OAAO,EAAE,EAChCC,EAAM,KAAK,EAAE,EAGTD,EAAO,QAAUA,EAAO,OAAO,OAAS,EAAG,CAC7CC,EAAM,KAAK,eAAe,EAC1BA,EAAM,KAAK,EAAE,EACb,QAAWC,KAASF,EAAO,OAAQ,CACjC,IAAMG,EAAU,GAAGH,EAAO,OAAO,GAAGE,EAAM,IAAI,GAC9CD,EAAM,KAAK,MAAMC,EAAM,IAAI,KAAKC,CAAO,MAAMD,EAAM,WAAW,EAAE,CAClE,CACAD,EAAM,KAAK,EAAE,CACf,CAGA,GAAID,EAAO,eAAiBA,EAAO,cAAc,OAAS,EAAG,CAC3DC,EAAM,KAAK,cAAc,EACzBA,EAAM,KAAK,EAAE,EACb,QAAWG,KAAQJ,EAAO,cACpBI,EAAK,YACPH,EAAM,KAAK,MAAMG,EAAK,KAAK,KAAKA,EAAK,GAAG,MAAMA,EAAK,WAAW,EAAE,EAEhEH,EAAM,KAAK,MAAMG,EAAK,KAAK,KAAKA,EAAK,GAAG,GAAG,EAG/CH,EAAM,KAAK,EAAE,CACf,CAGA,GAAID,EAAO,UAAYA,EAAO,SAAS,OAAS,EAC9C,QAAWK,KAAWL,EAAO,SAC3BC,EAAM,KAAK,MAAMI,EAAQ,OAAO,EAAE,EAClCJ,EAAM,KAAK,EAAE,EACbA,EAAM,KAAKI,EAAQ,OAAO,EAC1BJ,EAAM,KAAK,EAAE,EAIjB,OAAOA,EAAM,KAAK;AAAA,CAAI,EAAE,KAAK,EAAI;AAAA,CACnC,CFlIA,eAAsBK,EAAeC,EAAcC,EAAqB,CACpE,IAAMC,EAAYF,EAAQ,QAAQ,IAAI,YAAY,EAC5CG,EAAMH,EAAQ,QAAQ,MAAM,EAC5BI,EAAUC,EAASH,CAAS,EAE5BI,EAASN,EAAQ,QAAQ,IAAI,QAAQ,GAAK,GAC1CO,EAAgBP,EAAQ,QAAQ,aAAa,IAAI,MAAM,EAEvDQ,EAAU,CAAC,CAACJ,EACZK,EAAsBH,EAAO,SAAS,eAAe,GAAKC,EAGhE,GAAIC,GAAWC,EAAqB,CAGhC,GAAIN,EAAI,SAAS,WAAW,QAAQ,EAChC,OAAOO,EAAa,KAAK,EAK7B,GAAIP,EAAI,WAAa,YACjB,GAAI,CACA,GAAIF,EAAQ,CAER,IAAMU,EAAiBC,EAAgBX,CAAM,EACvCY,EAAW,IAAIH,EAAaC,EAAgB,CAC9C,QAAS,CACL,eAAgB,4BAChB,gBAAiB,mEACrB,CACJ,CAAC,EAED,OAAIP,GACAS,EAAS,QAAQ,IAAI,aAAc,GAAGT,EAAQ,IAAI,KAAKA,EAAQ,OAAO,GAAG,EAGtES,CACX,KAAO,CAEHV,EAAI,SAAW,YACf,IAAMU,EAAWH,EAAa,QAAQP,CAAG,EACzC,OAAAU,EAAS,QAAQ,IAAI,eAAgB,2BAA2B,EAChEA,EAAS,QAAQ,IAAI,gBAAiB,mEAAmE,EACrGT,GACAS,EAAS,QAAQ,IAAI,aAAc,GAAGT,EAAQ,IAAI,KAAKA,EAAQ,OAAO,GAAG,EAEtES,CACX,CACJ,OAASC,EAAO,CACZ,QAAQ,MAAM,sCAAuCA,CAAK,EAE1DX,EAAI,SAAW,YACf,IAAMU,EAAWH,EAAa,QAAQP,CAAG,EACzC,OAAAU,EAAS,QAAQ,IAAI,eAAgB,2BAA2B,EAC5DT,GACAS,EAAS,QAAQ,IAAI,aAAc,GAAGT,EAAQ,IAAI,KAAKA,EAAQ,OAAO,GAAG,EAEtES,CACX,CAIJ,GAAIV,EAAI,SAAS,SAAS,GAAG,EACzB,OAAOO,EAAa,KAAK,EAI7B,IAAIK,EAAcZ,EAAI,UAClBY,IAAgB,KAAOA,IAAgB,MACvCA,EAAc,UAIdA,EAAY,SAAS,GAAG,GAAKA,IAAgB,MAC7CA,EAAcA,EAAY,MAAM,EAAG,EAAE,GAIzC,IAAMC,EAAqC,CACvC,eAAgB,+BAChB,gBAAiB,mEACrB,EACIZ,IACAY,EAAW,YAAY,EAAI,GAAGZ,EAAQ,IAAI,KAAKA,EAAQ,OAAO,KAE9DG,IACAS,EAAW,cAAc,EAAI,QAIjC,IAAMC,EAAe,QAAQ,IAAI,aAC3BC,EAAgB,QAAQ,IAAI,oBAAsB,4BAExD,GAAID,EAAc,CAEd,MAAM,GAAGC,CAAa,aAAc,CAChC,OAAQ,OACR,QAAS,CACL,aAAcD,EACd,eAAgB,kBACpB,EACA,KAAM,KAAK,UAAU,CACjB,MAAOd,EAAI,SACX,UAAWD,EACX,IAAKE,EAAUA,EAAQ,KAAO,KAC9B,QAASA,EAAUA,EAAQ,QAAU,IACzC,CAAC,CACL,CAAC,EAAE,MAAM,IAAM,CAAC,CAAC,EAGjB,GAAI,CACA,IAAMe,EAAY,MAAM,MAAM,GAAGD,CAAa,yBAAyBf,EAAI,QAAQ,GAAI,CACnF,QAAS,CAAE,aAAcc,CAAa,EACtC,OAAQ,YAAY,QAAQ,IAAI,CACpC,CAAC,EAED,GAAIE,EAAU,GAAI,CACd,GAAM,CAAE,UAAAC,CAAU,EAAI,MAAMD,EAAU,KAAK,EAE3C,GAAIC,EAAW,CACX,IAAMC,EAAa,GAAGlB,EAAI,MAAM,SAASY,CAAW,MAC9CO,EAAQ,MAAM,MAAMD,CAAU,EAEpC,GAAIC,EAAM,GAAI,CAEV,IAAMC,EAAgB,GADD,MAAMD,EAAM,KAAK,CACD;AAAA;AAAA;AAAA;AAAA,EAAcF,CAAS,GAE5D,OAAO,IAAIV,EAAaa,EAAe,CACnC,QAAS,CACL,GAAGP,EACH,kBAAmB,MACvB,CACJ,CAAC,CACL,CACJ,CACJ,CACJ,OAASQ,EAAK,CACV,QAAQ,MAAM,0BAA2BA,CAAG,CAChD,CACJ,CAGArB,EAAI,SAAW,SAASY,CAAW,MAGnC,IAAMF,EAAWH,EAAa,QAAQP,CAAG,EAGzC,OAAAU,EAAS,QAAQ,IAAI,eAAgB,8BAA8B,EACnEA,EAAS,QAAQ,IAAI,gBAAiB,mEAAmE,EAErGT,GACAS,EAAS,QAAQ,IAAI,aAAc,GAAGT,EAAQ,IAAI,KAAKA,EAAQ,OAAO,GAAG,EAEzEG,GACAM,EAAS,QAAQ,IAAI,eAAgB,MAAM,EAGxCA,CACX,CAEA,OAAOH,EAAa,KAAK,CAC7B","names":["NextResponse","AI_BOTS","AI_BOT_USER_AGENTS","bot","matchBot","userAgent","lowerUA","generateLlmsTxt","config","lines","route","fullUrl","link","section","ontoMiddleware","request","config","userAgent","url","matched","matchBot","accept","hasDebugParam","isAiBot","isMarkdownRequested","NextResponse","llmsTxtContent","generateLlmsTxt","response","error","payloadPath","botHeaders","ONTO_API_KEY","DASHBOARD_URL","injectRes","injection","localMdUrl","mdRes","finalMarkdown","err"]}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ontosdk/next",
3
- "version": "1.4.1",
3
+ "version": "1.4.3",
4
4
  "description": "Extracts semantic Markdown from React/Next.js pages for AI Agents",
5
5
  "main": "dist/index.js",
6
6
  "module": "dist/index.mjs",
package/src/cli.ts CHANGED
@@ -38,12 +38,20 @@ async function loadOntoConfig(): Promise<OntoConfig | null> {
38
38
  const summaryMatch = content.match(/summary\s*:\s*['"`](.*)['"`]/);
39
39
  const baseUrlMatch = content.match(/baseUrl\s*:\s*['"`](.*)['"`]/);
40
40
 
41
+ // Basic route extraction
42
+ const routes: any[] = [];
43
+ const routeRegex = /path\s*:\s*['"`](.*?)['"`]\s*,\s*description\s*:\s*['"`](.*?)['"`]/g;
44
+ let match;
45
+ while ((match = routeRegex.exec(content)) !== null) {
46
+ routes.push({ path: match[1], description: match[2] });
47
+ }
48
+
41
49
  if (nameMatch) {
42
50
  return {
43
51
  name: nameMatch[1],
44
52
  summary: summaryMatch ? summaryMatch[1] : '',
45
53
  baseUrl: baseUrlMatch ? baseUrlMatch[1] : '',
46
- routes: [] // We skip routes in manual fallback for simplicity
54
+ routes: routes
47
55
  } as OntoConfig;
48
56
  }
49
57
  } catch (e) {
package/src/middleware.ts CHANGED
@@ -8,9 +8,10 @@ export async function ontoMiddleware(request: any, config?: OntoConfig) {
8
8
  const matched = matchBot(userAgent);
9
9
 
10
10
  const accept = request.headers.get('accept') || '';
11
+ const hasDebugParam = request.nextUrl.searchParams.has('onto');
11
12
 
12
13
  const isAiBot = !!matched;
13
- const isMarkdownRequested = accept.includes('text/markdown');
14
+ const isMarkdownRequested = accept.includes('text/markdown') || hasDebugParam;
14
15
 
15
16
  // If traffic is identified as an AI Bot or markdown is requested
16
17
  if (isAiBot || isMarkdownRequested) {
@@ -87,13 +88,16 @@ export async function ontoMiddleware(request: any, config?: OntoConfig) {
87
88
  if (matched) {
88
89
  botHeaders['X-Onto-Bot'] = `${matched.name} (${matched.company})`;
89
90
  }
91
+ if (hasDebugParam) {
92
+ botHeaders['X-Onto-Debug'] = 'true';
93
+ }
90
94
 
91
95
  // --- Onto Control Plane Integration (Premium) ---
92
96
  const ONTO_API_KEY = process.env.ONTO_API_KEY;
93
97
  const DASHBOARD_URL = process.env.ONTO_DASHBOARD_URL || 'https://app.buildonto.dev';
94
98
 
95
99
  if (ONTO_API_KEY) {
96
- // 1. Fire-and-forget tracking — includes structured bot info
100
+ // 1. Fire-and-forget tracking
97
101
  fetch(`${DASHBOARD_URL}/api/track`, {
98
102
  method: 'POST',
99
103
  headers: {
@@ -145,9 +149,18 @@ export async function ontoMiddleware(request: any, config?: OntoConfig) {
145
149
 
146
150
  // Rewrite implicitly serves the target URL transparently to the client.
147
151
  const response = NextResponse.rewrite(url);
152
+
153
+ // Explicitly set headers on the rewrite response
154
+ response.headers.set('Content-Type', 'text/markdown; charset=utf-8');
155
+ response.headers.set('Cache-Control', 'public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400');
156
+
148
157
  if (matched) {
149
158
  response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);
150
159
  }
160
+ if (hasDebugParam) {
161
+ response.headers.set('X-Onto-Debug', 'true');
162
+ }
163
+
151
164
  return response;
152
165
  }
153
166