@ontosdk/next 1.5.1 → 1.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/OntoProvider.js +1 -1
- package/dist/OntoProvider.js.map +1 -1
- package/dist/OntoProvider.mjs +1 -1
- package/dist/OntoProvider.mjs.map +1 -1
- package/dist/index.js +5 -5
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +5 -5
- package/dist/index.mjs.map +1 -1
- package/dist/middleware.js +2 -2
- package/dist/middleware.js.map +1 -1
- package/dist/middleware.mjs +2 -2
- package/dist/middleware.mjs.map +1 -1
- package/dist/schemas.d.mts +3 -4
- package/dist/schemas.d.ts +3 -4
- package/dist/schemas.js +1 -1
- package/dist/schemas.js.map +1 -1
- package/dist/schemas.mjs +1 -1
- package/dist/schemas.mjs.map +1 -1
- package/package.json +1 -1
- package/src/bots.ts +7 -0
- package/src/schemas.ts +10 -17
package/dist/OntoProvider.js
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
"use strict";"use client";var s=Object.defineProperty;var y=Object.getOwnPropertyDescriptor;var S=Object.getOwnPropertyNames;var
|
|
1
|
+
"use strict";"use client";var s=Object.defineProperty;var y=Object.getOwnPropertyDescriptor;var S=Object.getOwnPropertyNames;var O=Object.prototype.hasOwnProperty;var x=(t,n)=>{for(var e in n)s(t,e,{get:n[e],enumerable:!0})},z=(t,n,e,o)=>{if(n&&typeof n=="object"||typeof n=="function")for(let a of S(n))!O.call(t,a)&&a!==e&&s(t,a,{get:()=>n[a],enumerable:!(o=y(n,a))||o.enumerable});return t};var w=t=>z(s({},"__esModule",{value:!0}),t);var b={};x(b,{OntoProvider:()=>M});module.exports=w(b);var h=require("next/navigation"),c=require("react");function A(t,n){return{"@context":"https://schema.org","@type":"HowTo",name:"AIO Score Calculation Methodology",description:"AI Optimization (AIO) Score measures how well a website is optimized for AI agents and LLM crawlers. Scored out of 100 points based on three core pillars.",step:[{"@type":"HowToStep",name:"React Tax (Token Efficiency)",text:'Measures the ratio of useful content to total page weight. Weight: 40%. Sites with high "React Tax" (heavy JS/HTML/CSS noise) score lower as they consume more tokens for less information.',position:1},{"@type":"HowToStep",name:"Semantic Richness",text:"Evaluates the presence of structured metadata (JSON-LD), semantic HTML tags (<main>, <article>), and proper heading hierarchy. Weight: 35%. Essential for confident AI extraction.",position:2},{"@type":"HowToStep",name:"Content Negotiation",text:"Tests whether your server can negotiate and serve optimized Markdown payloads directly to AI agents via the Accept: text/markdown header. Weight: 25%.",position:3}]}}function T(t,n){if(!t.organization)return null;let e={"@context":"https://schema.org","@type":"Organization",name:t.organization.name};return t.organization.url&&(e.url=t.organization.url),t.organization.description&&(e.description=t.organization.description),t.organization.logo&&(e.logo=t.organization.logo),t.organization.foundingDate&&(e.foundingDate=t.organization.foundingDate),e}function P(t,n){let e=T(t,n),o={"@context":"https://schema.org","@type":"AboutPage",name:`About ${t.name}`,url:n};return t.summary&&(o.description=t.summary),e&&(o.mainEntity=e),o}function u(t,n,e){switch(t){case"scoring":return A(n,e);case"about":return P(n,e);default:return null}}function l(t){return t?JSON.stringify(t,null,2):null}var r=require("react/jsx-runtime");function M({baseUrl:t,children:n,config:e}){let o=(0,h.usePathname)(),a=t.endsWith("/")?t.slice(0,-1):t,d=`${a}${o}?format=md`,g=`${a}${o}`,i=(0,c.useMemo)(()=>e?.routes&&e.routes.find(f=>f.path===o)?.pageType||"default",[e,o]),p=(0,c.useMemo)(()=>{if(!e||i==="default")return null;let m=u(i,e,g);return l(m)},[e,i,g]);return(0,r.jsxs)(r.Fragment,{children:[(0,r.jsx)("link",{rel:"alternate",type:"text/markdown",href:d,title:"AI-optimized Markdown version"}),p&&(0,r.jsx)("script",{type:"application/ld+json",dangerouslySetInnerHTML:{__html:p}}),n]})}0&&(module.exports={OntoProvider});
|
|
2
2
|
//# sourceMappingURL=OntoProvider.js.map
|
package/dist/OntoProvider.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/OntoProvider.tsx","../src/schemas.ts"],"sourcesContent":["'use client';\r\n\r\nimport { usePathname } from 'next/navigation';\r\nimport { ReactNode, useMemo } from 'react';\r\nimport type { OntoConfig, PageType } from './config';\r\nimport { generateSchemaForPageType, serializeSchema } from './schemas';\r\n\r\nexport interface OntoProviderProps {\r\n /**\r\n * The base URL of your site (e.g., 'https://example.com')\r\n * Used to construct the full href for the AI discovery link tag.\r\n */\r\n baseUrl: string;\r\n /**\r\n * Child components to render\r\n */\r\n children: ReactNode;\r\n /**\r\n * Optional: Onto configuration for automatic JSON-LD schema injection\r\n * If provided, the provider will automatically inject JSON-LD schemas\r\n * based on the page type configuration\r\n */\r\n config?: OntoConfig;\r\n}\r\n\r\n/**\r\n * OntoProvider — Automatic AI Discovery Provider\r\n *\r\n * Wraps your application and automatically injects:\r\n * 1. `<link rel=\"alternate\">` tags for AI discovery\r\n * 2. JSON-LD structured data schemas based on page type\r\n *\r\n * With config, automatically generates JSON-LD schemas:\r\n * - 'scoring' pages get Methodology schema with AIO weights (40/35/25)\r\n * - 'about' pages get Organization/AboutPage schema\r\n *\r\n * Usage in a Next.js App Router layout:\r\n * ```tsx\r\n * import { OntoProvider } from '@ontosdk/next/provider';\r\n * import config from '../onto.config';\r\n *\r\n * export default function RootLayout({ children }) {\r\n * return (\r\n * <OntoProvider baseUrl=\"https://example.com\" config={config}>\r\n * <html>\r\n * <head />\r\n * <body>{children}</body>\r\n * </html>\r\n * </OntoProvider>\r\n * );\r\n * }\r\n * ```\r\n */\r\nexport function OntoProvider({ baseUrl, children, config }: OntoProviderProps) {\r\n const pathname = usePathname();\r\n\r\n // Construct the full URL with the current path and ?format=md query string\r\n const cleanBaseUrl = baseUrl.endsWith('/') ? baseUrl.slice(0, -1) : baseUrl;\r\n const markdownHref = `${cleanBaseUrl}${pathname}?format=md`;\r\n const fullPageUrl = `${cleanBaseUrl}${pathname}`;\r\n\r\n // Determine page type from config routes\r\n const pageType: PageType = useMemo(() => {\r\n if (!config?.routes) return 'default';\r\n\r\n const matchingRoute = config.routes.find(route => route.path === pathname);\r\n return matchingRoute?.pageType || 'default';\r\n }, [config, pathname]);\r\n\r\n // Generate JSON-LD schema based on page type\r\n const jsonLdSchema = useMemo(() => {\r\n if (!config || pageType === 'default') return null;\r\n\r\n const schema = generateSchemaForPageType(pageType, config, fullPageUrl);\r\n return serializeSchema(schema);\r\n }, [config, pageType, fullPageUrl]);\r\n\r\n return (\r\n <>\r\n <link\r\n rel=\"alternate\"\r\n type=\"text/markdown\"\r\n href={markdownHref}\r\n title=\"AI-optimized Markdown version\"\r\n />\r\n {jsonLdSchema && (\r\n <script\r\n type=\"application/ld+json\"\r\n dangerouslySetInnerHTML={{ __html: jsonLdSchema }}\r\n />\r\n )}\r\n {children}\r\n </>\r\n );\r\n}\r\n","/**\r\n * JSON-LD Schema generators for automatic structured data injection\r\n * Follows Schema.org standards for AI-friendly metadata\r\n */\r\n\r\nimport { OntoConfig } from './config';\r\n\r\n/**\r\n * Standard AIO (AI Optimization) scoring methodology\r\n * Based on the Onto scoring algorithm:\r\n * - Content Negotiation: 40% (30 points)\r\n * - React Tax / Token Efficiency: 35% (30 points)\r\n * - Structured Data: 25% (25 points)\r\n * - Semantic HTML: Bonus (15 points)\r\n */\r\nexport interface AIOMethodologySchema {\r\n '@context': 'https://schema.org';\r\n '@type': 'HowTo';\r\n name: string;\r\n description: string;\r\n step: Array<{\r\n '@type': 'HowToStep';\r\n name: string;\r\n text: string;\r\n position: number;\r\n }>;\r\n}\r\n\r\n/**\r\n * Generate AIO Scoring Methodology JSON-LD schema\r\n * This explains to AI agents how the scoring system works\r\n */\r\nexport function generateAIOMethodologySchema(\r\n config: OntoConfig,\r\n pageUrl: string\r\n): AIOMethodologySchema {\r\n return {\r\n '@context': 'https://schema.org',\r\n '@type': 'HowTo',\r\n name: 'AIO Score Calculation Methodology',\r\n description: 'AI Optimization (AIO) Score measures how well a website is optimized for AI agents and LLM crawlers. Scored out of 100 points based on four key metrics.',\r\n step: [\r\n {\r\n '@type': 'HowToStep',\r\n name: 'Content Negotiation',\r\n text: 'Check if the site responds to Accept: text/markdown header. Weight: 40%. Penalty: -30 points if missing. This ensures AI bots receive optimized content instead of heavy HTML.',\r\n position: 1\r\n },\r\n {\r\n '@type': 'HowToStep',\r\n name: 'Token Efficiency (React Tax)',\r\n text: 'Measure the ratio of visible text to total HTML size. Weight: 35%. Penalty: -30 points if HTML > 100KB but text < 1KB. Detects JavaScript-heavy sites that are difficult for AI to parse.',\r\n position: 2\r\n },\r\n {\r\n '@type': 'HowToStep',\r\n name: 'Structured Data',\r\n text: 'Verify presence of JSON-LD structured data (Schema.org). Weight: 25%. Penalty: -25 points if missing. Enables AI to confidently extract pricing, products, and entities.',\r\n position: 3\r\n },\r\n {\r\n '@type': 'HowToStep',\r\n name: 'Semantic HTML',\r\n text: 'Check for semantic tags like <main> and <article>. Bonus: +15 points if present. Helps AI agents separate navigation from core content.',\r\n position: 4\r\n }\r\n ]\r\n };\r\n}\r\n\r\n/**\r\n * Organization schema for About pages\r\n */\r\nexport interface OrganizationSchema {\r\n '@context': 'https://schema.org';\r\n '@type': 'Organization';\r\n name: string;\r\n url?: string;\r\n description?: string;\r\n logo?: string;\r\n foundingDate?: string;\r\n}\r\n\r\n/**\r\n * Generate Organization JSON-LD schema for About pages\r\n */\r\nexport function generateOrganizationSchema(\r\n config: OntoConfig,\r\n pageUrl: string\r\n): OrganizationSchema | null {\r\n if (!config.organization) {\r\n return null;\r\n }\r\n\r\n const schema: OrganizationSchema = {\r\n '@context': 'https://schema.org',\r\n '@type': 'Organization',\r\n name: config.organization.name\r\n };\r\n\r\n if (config.organization.url) {\r\n schema.url = config.organization.url;\r\n }\r\n\r\n if (config.organization.description) {\r\n schema.description = config.organization.description;\r\n }\r\n\r\n if (config.organization.logo) {\r\n schema.logo = config.organization.logo;\r\n }\r\n\r\n if (config.organization.foundingDate) {\r\n schema.foundingDate = config.organization.foundingDate;\r\n }\r\n\r\n return schema;\r\n}\r\n\r\n/**\r\n * AboutPage schema combining Organization and WebPage\r\n */\r\nexport interface AboutPageSchema {\r\n '@context': 'https://schema.org';\r\n '@type': 'AboutPage';\r\n name: string;\r\n url: string;\r\n description?: string;\r\n mainEntity?: OrganizationSchema;\r\n}\r\n\r\n/**\r\n * Generate AboutPage JSON-LD schema\r\n */\r\nexport function generateAboutPageSchema(\r\n config: OntoConfig,\r\n pageUrl: string\r\n): AboutPageSchema {\r\n const orgSchema = generateOrganizationSchema(config, pageUrl);\r\n\r\n const schema: AboutPageSchema = {\r\n '@context': 'https://schema.org',\r\n '@type': 'AboutPage',\r\n name: `About ${config.name}`,\r\n url: pageUrl\r\n };\r\n\r\n if (config.summary) {\r\n schema.description = config.summary;\r\n }\r\n\r\n if (orgSchema) {\r\n schema.mainEntity = orgSchema;\r\n }\r\n\r\n return schema;\r\n}\r\n\r\n/**\r\n * Determine which schema to generate based on page type\r\n */\r\nexport function generateSchemaForPageType(\r\n pageType: 'scoring' | 'about' | 'default',\r\n config: OntoConfig,\r\n pageUrl: string\r\n): any | null {\r\n switch (pageType) {\r\n case 'scoring':\r\n return generateAIOMethodologySchema(config, pageUrl);\r\n case 'about':\r\n return generateAboutPageSchema(config, pageUrl);\r\n case 'default':\r\n default:\r\n return null;\r\n }\r\n}\r\n\r\n/**\r\n * Serialize schema to JSON-LD script tag content\r\n */\r\nexport function serializeSchema(schema: any | null): string | null {\r\n if (!schema) {\r\n return null;\r\n }\r\n return JSON.stringify(schema, null, 2);\r\n}\r\n"],"mappings":"sbAAA,IAAAA,EAAA,GAAAC,EAAAD,EAAA,kBAAAE,IAAA,eAAAC,EAAAH,GAEA,IAAAI,EAA4B,2BAC5BC,EAAmC,iBC6B5B,SAASC,EACdC,EACAC,EACsB,CACtB,MAAO,CACL,WAAY,qBACZ,QAAS,QACT,KAAM,oCACN,YAAa,2JACb,KAAM,CACJ,CACE,QAAS,YACT,KAAM,sBACN,KAAM,iLACN,SAAU,CACZ,EACA,CACE,QAAS,YACT,KAAM,+BACN,KAAM,4LACN,SAAU,CACZ,EACA,CACE,QAAS,YACT,KAAM,kBACN,KAAM,2KACN,SAAU,CACZ,EACA,CACE,QAAS,YACT,KAAM,gBACN,KAAM,0IACN,SAAU,CACZ,CACF,CACF,CACF,CAkBO,SAASC,EACdF,EACAC,EAC2B,CAC3B,GAAI,CAACD,EAAO,aACV,OAAO,KAGT,IAAMG,EAA6B,CACjC,WAAY,qBACZ,QAAS,eACT,KAAMH,EAAO,aAAa,IAC5B,EAEA,OAAIA,EAAO,aAAa,MACtBG,EAAO,IAAMH,EAAO,aAAa,KAG/BA,EAAO,aAAa,cACtBG,EAAO,YAAcH,EAAO,aAAa,aAGvCA,EAAO,aAAa,OACtBG,EAAO,KAAOH,EAAO,aAAa,MAGhCA,EAAO,aAAa,eACtBG,EAAO,aAAeH,EAAO,aAAa,cAGrCG,CACT,CAiBO,SAASC,EACdJ,EACAC,EACiB,CACjB,IAAMI,EAAYH,EAA2BF,EAAQC,CAAO,EAEtDE,EAA0B,CAC9B,WAAY,qBACZ,QAAS,YACT,KAAM,SAASH,EAAO,IAAI,GAC1B,IAAKC,CACP,EAEA,OAAID,EAAO,UACTG,EAAO,YAAcH,EAAO,SAG1BK,IACFF,EAAO,WAAaE,GAGfF,CACT,CAKO,SAASG,EACdC,EACAP,EACAC,EACY,CACZ,OAAQM,EAAU,CAChB,IAAK,UACH,OAAOR,EAA6BC,EAAQC,CAAO,EACrD,IAAK,QACH,OAAOG,EAAwBJ,EAAQC,CAAO,EAEhD,QACE,OAAO,IACX,CACF,CAKO,SAASO,EAAgBL,EAAmC,CACjE,OAAKA,EAGE,KAAK,UAAUA,EAAQ,KAAM,CAAC,EAF5B,IAGX,CD3GI,IAAAM,EAAA,6BAzBG,SAASC,EAAa,CAAE,QAAAC,EAAS,SAAAC,EAAU,OAAAC,CAAO,EAAsB,CAC7E,IAAMC,KAAW,eAAY,EAGvBC,EAAeJ,EAAQ,SAAS,GAAG,EAAIA,EAAQ,MAAM,EAAG,EAAE,EAAIA,EAC9DK,EAAe,GAAGD,CAAY,GAAGD,CAAQ,aACzCG,EAAc,GAAGF,CAAY,GAAGD,CAAQ,GAGxCI,KAAqB,WAAQ,IAC5BL,GAAQ,QAESA,EAAO,OAAO,KAAKM,GAASA,EAAM,OAASL,CAAQ,GACnD,UAAY,UACjC,CAACD,EAAQC,CAAQ,CAAC,EAGfM,KAAe,WAAQ,IAAM,CACjC,GAAI,CAACP,GAAUK,IAAa,UAAW,OAAO,KAE9C,IAAMG,EAASC,EAA0BJ,EAAUL,EAAQI,CAAW,EACtE,OAAOM,EAAgBF,CAAM,CAC/B,EAAG,CAACR,EAAQK,EAAUD,CAAW,CAAC,EAElC,SACE,oBACE,oBAAC,QACC,IAAI,YACJ,KAAK,gBACL,KAAMD,EACN,MAAM,gCACR,EACCI,MACC,OAAC,UACC,KAAK,sBACL,wBAAyB,CAAE,OAAQA,CAAa,EAClD,EAEDR,GACH,CAEJ","names":["OntoProvider_exports","__export","OntoProvider","__toCommonJS","import_navigation","import_react","generateAIOMethodologySchema","config","pageUrl","generateOrganizationSchema","schema","generateAboutPageSchema","orgSchema","generateSchemaForPageType","pageType","serializeSchema","import_jsx_runtime","OntoProvider","baseUrl","children","config","pathname","cleanBaseUrl","markdownHref","fullPageUrl","pageType","route","jsonLdSchema","schema","generateSchemaForPageType","serializeSchema"]}
|
|
1
|
+
{"version":3,"sources":["../src/OntoProvider.tsx","../src/schemas.ts"],"sourcesContent":["'use client';\r\n\r\nimport { usePathname } from 'next/navigation';\r\nimport { ReactNode, useMemo } from 'react';\r\nimport type { OntoConfig, PageType } from './config';\r\nimport { generateSchemaForPageType, serializeSchema } from './schemas';\r\n\r\nexport interface OntoProviderProps {\r\n /**\r\n * The base URL of your site (e.g., 'https://example.com')\r\n * Used to construct the full href for the AI discovery link tag.\r\n */\r\n baseUrl: string;\r\n /**\r\n * Child components to render\r\n */\r\n children: ReactNode;\r\n /**\r\n * Optional: Onto configuration for automatic JSON-LD schema injection\r\n * If provided, the provider will automatically inject JSON-LD schemas\r\n * based on the page type configuration\r\n */\r\n config?: OntoConfig;\r\n}\r\n\r\n/**\r\n * OntoProvider — Automatic AI Discovery Provider\r\n *\r\n * Wraps your application and automatically injects:\r\n * 1. `<link rel=\"alternate\">` tags for AI discovery\r\n * 2. JSON-LD structured data schemas based on page type\r\n *\r\n * With config, automatically generates JSON-LD schemas:\r\n * - 'scoring' pages get Methodology schema with AIO weights (40/35/25)\r\n * - 'about' pages get Organization/AboutPage schema\r\n *\r\n * Usage in a Next.js App Router layout:\r\n * ```tsx\r\n * import { OntoProvider } from '@ontosdk/next/provider';\r\n * import config from '../onto.config';\r\n *\r\n * export default function RootLayout({ children }) {\r\n * return (\r\n * <OntoProvider baseUrl=\"https://example.com\" config={config}>\r\n * <html>\r\n * <head />\r\n * <body>{children}</body>\r\n * </html>\r\n * </OntoProvider>\r\n * );\r\n * }\r\n * ```\r\n */\r\nexport function OntoProvider({ baseUrl, children, config }: OntoProviderProps) {\r\n const pathname = usePathname();\r\n\r\n // Construct the full URL with the current path and ?format=md query string\r\n const cleanBaseUrl = baseUrl.endsWith('/') ? baseUrl.slice(0, -1) : baseUrl;\r\n const markdownHref = `${cleanBaseUrl}${pathname}?format=md`;\r\n const fullPageUrl = `${cleanBaseUrl}${pathname}`;\r\n\r\n // Determine page type from config routes\r\n const pageType: PageType = useMemo(() => {\r\n if (!config?.routes) return 'default';\r\n\r\n const matchingRoute = config.routes.find(route => route.path === pathname);\r\n return matchingRoute?.pageType || 'default';\r\n }, [config, pathname]);\r\n\r\n // Generate JSON-LD schema based on page type\r\n const jsonLdSchema = useMemo(() => {\r\n if (!config || pageType === 'default') return null;\r\n\r\n const schema = generateSchemaForPageType(pageType, config, fullPageUrl);\r\n return serializeSchema(schema);\r\n }, [config, pageType, fullPageUrl]);\r\n\r\n return (\r\n <>\r\n <link\r\n rel=\"alternate\"\r\n type=\"text/markdown\"\r\n href={markdownHref}\r\n title=\"AI-optimized Markdown version\"\r\n />\r\n {jsonLdSchema && (\r\n <script\r\n type=\"application/ld+json\"\r\n dangerouslySetInnerHTML={{ __html: jsonLdSchema }}\r\n />\r\n )}\r\n {children}\r\n </>\r\n );\r\n}\r\n","/**\r\n * JSON-LD Schema generators for automatic structured data injection\r\n * Follows Schema.org standards for AI-friendly metadata\r\n */\r\n\r\nimport { OntoConfig } from './config';\r\n\r\n/**\r\n * Standard AIO (AI Optimization) scoring methodology\r\n * Based on the Onto scoring algorithm:\r\n * - React Tax (Efficiency): 40% (Step 1)\r\n * - Semantic Richness: 35% (Step 2)\r\n * - Content Negotiation: 25% (Step 3)\r\n */\r\nexport interface AIOMethodologySchema {\r\n '@context': 'https://schema.org';\r\n '@type': 'HowTo';\r\n name: string;\r\n description: string;\r\n step: Array<{\r\n '@type': 'HowToStep';\r\n name: string;\r\n text: string;\r\n position: number;\r\n }>;\r\n}\r\n\r\n/**\r\n * Generate AIO Scoring Methodology JSON-LD schema\r\n * This explains to AI agents how the scoring system works\r\n */\r\nexport function generateAIOMethodologySchema(\r\n config: OntoConfig,\r\n pageUrl: string\r\n): AIOMethodologySchema {\r\n return {\r\n '@context': 'https://schema.org',\r\n '@type': 'HowTo',\r\n name: 'AIO Score Calculation Methodology',\r\n description: 'AI Optimization (AIO) Score measures how well a website is optimized for AI agents and LLM crawlers. Scored out of 100 points based on three core pillars.',\r\n step: [\r\n {\r\n '@type': 'HowToStep',\r\n name: 'React Tax (Token Efficiency)',\r\n text: 'Measures the ratio of useful content to total page weight. Weight: 40%. Sites with high \"React Tax\" (heavy JS/HTML/CSS noise) score lower as they consume more tokens for less information.',\r\n position: 1\r\n },\r\n {\r\n '@type': 'HowToStep',\r\n name: 'Semantic Richness',\r\n text: 'Evaluates the presence of structured metadata (JSON-LD), semantic HTML tags (<main>, <article>), and proper heading hierarchy. Weight: 35%. Essential for confident AI extraction.',\r\n position: 2\r\n },\r\n {\r\n '@type': 'HowToStep',\r\n name: 'Content Negotiation',\r\n text: 'Tests whether your server can negotiate and serve optimized Markdown payloads directly to AI agents via the Accept: text/markdown header. Weight: 25%.',\r\n position: 3\r\n }\r\n ]\r\n };\r\n}\r\n\r\n/**\r\n * Organization schema for About pages\r\n */\r\nexport interface OrganizationSchema {\r\n '@context': 'https://schema.org';\r\n '@type': 'Organization';\r\n name: string;\r\n url?: string;\r\n description?: string;\r\n logo?: string;\r\n foundingDate?: string;\r\n}\r\n\r\n/**\r\n * Generate Organization JSON-LD schema for About pages\r\n */\r\nexport function generateOrganizationSchema(\r\n config: OntoConfig,\r\n pageUrl: string\r\n): OrganizationSchema | null {\r\n if (!config.organization) {\r\n return null;\r\n }\r\n\r\n const schema: OrganizationSchema = {\r\n '@context': 'https://schema.org',\r\n '@type': 'Organization',\r\n name: config.organization.name\r\n };\r\n\r\n if (config.organization.url) {\r\n schema.url = config.organization.url;\r\n }\r\n\r\n if (config.organization.description) {\r\n schema.description = config.organization.description;\r\n }\r\n\r\n if (config.organization.logo) {\r\n schema.logo = config.organization.logo;\r\n }\r\n\r\n if (config.organization.foundingDate) {\r\n schema.foundingDate = config.organization.foundingDate;\r\n }\r\n\r\n return schema;\r\n}\r\n\r\n/**\r\n * AboutPage schema combining Organization and WebPage\r\n */\r\nexport interface AboutPageSchema {\r\n '@context': 'https://schema.org';\r\n '@type': 'AboutPage';\r\n name: string;\r\n url: string;\r\n description?: string;\r\n mainEntity?: OrganizationSchema;\r\n}\r\n\r\n/**\r\n * Generate AboutPage JSON-LD schema\r\n */\r\nexport function generateAboutPageSchema(\r\n config: OntoConfig,\r\n pageUrl: string\r\n): AboutPageSchema {\r\n const orgSchema = generateOrganizationSchema(config, pageUrl);\r\n\r\n const schema: AboutPageSchema = {\r\n '@context': 'https://schema.org',\r\n '@type': 'AboutPage',\r\n name: `About ${config.name}`,\r\n url: pageUrl\r\n };\r\n\r\n if (config.summary) {\r\n schema.description = config.summary;\r\n }\r\n\r\n if (orgSchema) {\r\n schema.mainEntity = orgSchema;\r\n }\r\n\r\n return schema;\r\n}\r\n\r\n/**\r\n * Determine which schema to generate based on page type\r\n */\r\nexport function generateSchemaForPageType(\r\n pageType: 'scoring' | 'about' | 'default',\r\n config: OntoConfig,\r\n pageUrl: string\r\n): any | null {\r\n switch (pageType) {\r\n case 'scoring':\r\n return generateAIOMethodologySchema(config, pageUrl);\r\n case 'about':\r\n return generateAboutPageSchema(config, pageUrl);\r\n case 'default':\r\n default:\r\n return null;\r\n }\r\n}\r\n\r\n/**\r\n * Serialize schema to JSON-LD script tag content\r\n */\r\nexport function serializeSchema(schema: any | null): string | null {\r\n if (!schema) {\r\n return null;\r\n }\r\n return JSON.stringify(schema, null, 2);\r\n}\r\n"],"mappings":"sbAAA,IAAAA,EAAA,GAAAC,EAAAD,EAAA,kBAAAE,IAAA,eAAAC,EAAAH,GAEA,IAAAI,EAA4B,2BAC5BC,EAAmC,iBC4B5B,SAASC,EACdC,EACAC,EACsB,CACtB,MAAO,CACL,WAAY,qBACZ,QAAS,QACT,KAAM,oCACN,YAAa,6JACb,KAAM,CACJ,CACE,QAAS,YACT,KAAM,+BACN,KAAM,8LACN,SAAU,CACZ,EACA,CACE,QAAS,YACT,KAAM,oBACN,KAAM,qLACN,SAAU,CACZ,EACA,CACE,QAAS,YACT,KAAM,sBACN,KAAM,yJACN,SAAU,CACZ,CACF,CACF,CACF,CAkBO,SAASC,EACdF,EACAC,EAC2B,CAC3B,GAAI,CAACD,EAAO,aACV,OAAO,KAGT,IAAMG,EAA6B,CACjC,WAAY,qBACZ,QAAS,eACT,KAAMH,EAAO,aAAa,IAC5B,EAEA,OAAIA,EAAO,aAAa,MACtBG,EAAO,IAAMH,EAAO,aAAa,KAG/BA,EAAO,aAAa,cACtBG,EAAO,YAAcH,EAAO,aAAa,aAGvCA,EAAO,aAAa,OACtBG,EAAO,KAAOH,EAAO,aAAa,MAGhCA,EAAO,aAAa,eACtBG,EAAO,aAAeH,EAAO,aAAa,cAGrCG,CACT,CAiBO,SAASC,EACdJ,EACAC,EACiB,CACjB,IAAMI,EAAYH,EAA2BF,EAAQC,CAAO,EAEtDE,EAA0B,CAC9B,WAAY,qBACZ,QAAS,YACT,KAAM,SAASH,EAAO,IAAI,GAC1B,IAAKC,CACP,EAEA,OAAID,EAAO,UACTG,EAAO,YAAcH,EAAO,SAG1BK,IACFF,EAAO,WAAaE,GAGfF,CACT,CAKO,SAASG,EACdC,EACAP,EACAC,EACY,CACZ,OAAQM,EAAU,CAChB,IAAK,UACH,OAAOR,EAA6BC,EAAQC,CAAO,EACrD,IAAK,QACH,OAAOG,EAAwBJ,EAAQC,CAAO,EAEhD,QACE,OAAO,IACX,CACF,CAKO,SAASO,EAAgBL,EAAmC,CACjE,OAAKA,EAGE,KAAK,UAAUA,EAAQ,KAAM,CAAC,EAF5B,IAGX,CDpGI,IAAAM,EAAA,6BAzBG,SAASC,EAAa,CAAE,QAAAC,EAAS,SAAAC,EAAU,OAAAC,CAAO,EAAsB,CAC7E,IAAMC,KAAW,eAAY,EAGvBC,EAAeJ,EAAQ,SAAS,GAAG,EAAIA,EAAQ,MAAM,EAAG,EAAE,EAAIA,EAC9DK,EAAe,GAAGD,CAAY,GAAGD,CAAQ,aACzCG,EAAc,GAAGF,CAAY,GAAGD,CAAQ,GAGxCI,KAAqB,WAAQ,IAC5BL,GAAQ,QAESA,EAAO,OAAO,KAAKM,GAASA,EAAM,OAASL,CAAQ,GACnD,UAAY,UACjC,CAACD,EAAQC,CAAQ,CAAC,EAGfM,KAAe,WAAQ,IAAM,CACjC,GAAI,CAACP,GAAUK,IAAa,UAAW,OAAO,KAE9C,IAAMG,EAASC,EAA0BJ,EAAUL,EAAQI,CAAW,EACtE,OAAOM,EAAgBF,CAAM,CAC/B,EAAG,CAACR,EAAQK,EAAUD,CAAW,CAAC,EAElC,SACE,oBACE,oBAAC,QACC,IAAI,YACJ,KAAK,gBACL,KAAMD,EACN,MAAM,gCACR,EACCI,MACC,OAAC,UACC,KAAK,sBACL,wBAAyB,CAAE,OAAQA,CAAa,EAClD,EAEDR,GACH,CAEJ","names":["OntoProvider_exports","__export","OntoProvider","__toCommonJS","import_navigation","import_react","generateAIOMethodologySchema","config","pageUrl","generateOrganizationSchema","schema","generateAboutPageSchema","orgSchema","generateSchemaForPageType","pageType","serializeSchema","import_jsx_runtime","OntoProvider","baseUrl","children","config","pathname","cleanBaseUrl","markdownHref","fullPageUrl","pageType","route","jsonLdSchema","schema","generateSchemaForPageType","serializeSchema"]}
|
package/dist/OntoProvider.mjs
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
"use client";import{usePathname as S}from"next/navigation";import{useMemo as m}from"react";function d(t,n){return{"@context":"https://schema.org","@type":"HowTo",name:"AIO Score Calculation Methodology",description:"AI Optimization (AIO) Score measures how well a website is optimized for AI agents and LLM crawlers. Scored out of 100 points based on
|
|
1
|
+
"use client";import{usePathname as S}from"next/navigation";import{useMemo as m}from"react";function d(t,n){return{"@context":"https://schema.org","@type":"HowTo",name:"AIO Score Calculation Methodology",description:"AI Optimization (AIO) Score measures how well a website is optimized for AI agents and LLM crawlers. Scored out of 100 points based on three core pillars.",step:[{"@type":"HowToStep",name:"React Tax (Token Efficiency)",text:'Measures the ratio of useful content to total page weight. Weight: 40%. Sites with high "React Tax" (heavy JS/HTML/CSS noise) score lower as they consume more tokens for less information.',position:1},{"@type":"HowToStep",name:"Semantic Richness",text:"Evaluates the presence of structured metadata (JSON-LD), semantic HTML tags (<main>, <article>), and proper heading hierarchy. Weight: 35%. Essential for confident AI extraction.",position:2},{"@type":"HowToStep",name:"Content Negotiation",text:"Tests whether your server can negotiate and serve optimized Markdown payloads directly to AI agents via the Accept: text/markdown header. Weight: 25%.",position:3}]}}function f(t,n){if(!t.organization)return null;let e={"@context":"https://schema.org","@type":"Organization",name:t.organization.name};return t.organization.url&&(e.url=t.organization.url),t.organization.description&&(e.description=t.organization.description),t.organization.logo&&(e.logo=t.organization.logo),t.organization.foundingDate&&(e.foundingDate=t.organization.foundingDate),e}function y(t,n){let e=f(t,n),o={"@context":"https://schema.org","@type":"AboutPage",name:`About ${t.name}`,url:n};return t.summary&&(o.description=t.summary),e&&(o.mainEntity=e),o}function g(t,n,e){switch(t){case"scoring":return d(n,e);case"about":return y(n,e);default:return null}}function p(t){return t?JSON.stringify(t,null,2):null}import{Fragment as O,jsx as u,jsxs as x}from"react/jsx-runtime";function M({baseUrl:t,children:n,config:e}){let o=S(),r=t.endsWith("/")?t.slice(0,-1):t,l=`${r}${o}?format=md`,i=`${r}${o}`,a=m(()=>e?.routes&&e.routes.find(h=>h.path===o)?.pageType||"default",[e,o]),s=m(()=>{if(!e||a==="default")return null;let c=g(a,e,i);return p(c)},[e,a,i]);return x(O,{children:[u("link",{rel:"alternate",type:"text/markdown",href:l,title:"AI-optimized Markdown version"}),s&&u("script",{type:"application/ld+json",dangerouslySetInnerHTML:{__html:s}}),n]})}export{M as OntoProvider};
|
|
2
2
|
//# sourceMappingURL=OntoProvider.mjs.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/OntoProvider.tsx","../src/schemas.ts"],"sourcesContent":["'use client';\r\n\r\nimport { usePathname } from 'next/navigation';\r\nimport { ReactNode, useMemo } from 'react';\r\nimport type { OntoConfig, PageType } from './config';\r\nimport { generateSchemaForPageType, serializeSchema } from './schemas';\r\n\r\nexport interface OntoProviderProps {\r\n /**\r\n * The base URL of your site (e.g., 'https://example.com')\r\n * Used to construct the full href for the AI discovery link tag.\r\n */\r\n baseUrl: string;\r\n /**\r\n * Child components to render\r\n */\r\n children: ReactNode;\r\n /**\r\n * Optional: Onto configuration for automatic JSON-LD schema injection\r\n * If provided, the provider will automatically inject JSON-LD schemas\r\n * based on the page type configuration\r\n */\r\n config?: OntoConfig;\r\n}\r\n\r\n/**\r\n * OntoProvider — Automatic AI Discovery Provider\r\n *\r\n * Wraps your application and automatically injects:\r\n * 1. `<link rel=\"alternate\">` tags for AI discovery\r\n * 2. JSON-LD structured data schemas based on page type\r\n *\r\n * With config, automatically generates JSON-LD schemas:\r\n * - 'scoring' pages get Methodology schema with AIO weights (40/35/25)\r\n * - 'about' pages get Organization/AboutPage schema\r\n *\r\n * Usage in a Next.js App Router layout:\r\n * ```tsx\r\n * import { OntoProvider } from '@ontosdk/next/provider';\r\n * import config from '../onto.config';\r\n *\r\n * export default function RootLayout({ children }) {\r\n * return (\r\n * <OntoProvider baseUrl=\"https://example.com\" config={config}>\r\n * <html>\r\n * <head />\r\n * <body>{children}</body>\r\n * </html>\r\n * </OntoProvider>\r\n * );\r\n * }\r\n * ```\r\n */\r\nexport function OntoProvider({ baseUrl, children, config }: OntoProviderProps) {\r\n const pathname = usePathname();\r\n\r\n // Construct the full URL with the current path and ?format=md query string\r\n const cleanBaseUrl = baseUrl.endsWith('/') ? baseUrl.slice(0, -1) : baseUrl;\r\n const markdownHref = `${cleanBaseUrl}${pathname}?format=md`;\r\n const fullPageUrl = `${cleanBaseUrl}${pathname}`;\r\n\r\n // Determine page type from config routes\r\n const pageType: PageType = useMemo(() => {\r\n if (!config?.routes) return 'default';\r\n\r\n const matchingRoute = config.routes.find(route => route.path === pathname);\r\n return matchingRoute?.pageType || 'default';\r\n }, [config, pathname]);\r\n\r\n // Generate JSON-LD schema based on page type\r\n const jsonLdSchema = useMemo(() => {\r\n if (!config || pageType === 'default') return null;\r\n\r\n const schema = generateSchemaForPageType(pageType, config, fullPageUrl);\r\n return serializeSchema(schema);\r\n }, [config, pageType, fullPageUrl]);\r\n\r\n return (\r\n <>\r\n <link\r\n rel=\"alternate\"\r\n type=\"text/markdown\"\r\n href={markdownHref}\r\n title=\"AI-optimized Markdown version\"\r\n />\r\n {jsonLdSchema && (\r\n <script\r\n type=\"application/ld+json\"\r\n dangerouslySetInnerHTML={{ __html: jsonLdSchema }}\r\n />\r\n )}\r\n {children}\r\n </>\r\n );\r\n}\r\n","/**\r\n * JSON-LD Schema generators for automatic structured data injection\r\n * Follows Schema.org standards for AI-friendly metadata\r\n */\r\n\r\nimport { OntoConfig } from './config';\r\n\r\n/**\r\n * Standard AIO (AI Optimization) scoring methodology\r\n * Based on the Onto scoring algorithm:\r\n * - Content Negotiation: 40% (30 points)\r\n * - React Tax / Token Efficiency: 35% (30 points)\r\n * - Structured Data: 25% (25 points)\r\n * - Semantic HTML: Bonus (15 points)\r\n */\r\nexport interface AIOMethodologySchema {\r\n '@context': 'https://schema.org';\r\n '@type': 'HowTo';\r\n name: string;\r\n description: string;\r\n step: Array<{\r\n '@type': 'HowToStep';\r\n name: string;\r\n text: string;\r\n position: number;\r\n }>;\r\n}\r\n\r\n/**\r\n * Generate AIO Scoring Methodology JSON-LD schema\r\n * This explains to AI agents how the scoring system works\r\n */\r\nexport function generateAIOMethodologySchema(\r\n config: OntoConfig,\r\n pageUrl: string\r\n): AIOMethodologySchema {\r\n return {\r\n '@context': 'https://schema.org',\r\n '@type': 'HowTo',\r\n name: 'AIO Score Calculation Methodology',\r\n description: 'AI Optimization (AIO) Score measures how well a website is optimized for AI agents and LLM crawlers. Scored out of 100 points based on four key metrics.',\r\n step: [\r\n {\r\n '@type': 'HowToStep',\r\n name: 'Content Negotiation',\r\n text: 'Check if the site responds to Accept: text/markdown header. Weight: 40%. Penalty: -30 points if missing. This ensures AI bots receive optimized content instead of heavy HTML.',\r\n position: 1\r\n },\r\n {\r\n '@type': 'HowToStep',\r\n name: 'Token Efficiency (React Tax)',\r\n text: 'Measure the ratio of visible text to total HTML size. Weight: 35%. Penalty: -30 points if HTML > 100KB but text < 1KB. Detects JavaScript-heavy sites that are difficult for AI to parse.',\r\n position: 2\r\n },\r\n {\r\n '@type': 'HowToStep',\r\n name: 'Structured Data',\r\n text: 'Verify presence of JSON-LD structured data (Schema.org). Weight: 25%. Penalty: -25 points if missing. Enables AI to confidently extract pricing, products, and entities.',\r\n position: 3\r\n },\r\n {\r\n '@type': 'HowToStep',\r\n name: 'Semantic HTML',\r\n text: 'Check for semantic tags like <main> and <article>. Bonus: +15 points if present. Helps AI agents separate navigation from core content.',\r\n position: 4\r\n }\r\n ]\r\n };\r\n}\r\n\r\n/**\r\n * Organization schema for About pages\r\n */\r\nexport interface OrganizationSchema {\r\n '@context': 'https://schema.org';\r\n '@type': 'Organization';\r\n name: string;\r\n url?: string;\r\n description?: string;\r\n logo?: string;\r\n foundingDate?: string;\r\n}\r\n\r\n/**\r\n * Generate Organization JSON-LD schema for About pages\r\n */\r\nexport function generateOrganizationSchema(\r\n config: OntoConfig,\r\n pageUrl: string\r\n): OrganizationSchema | null {\r\n if (!config.organization) {\r\n return null;\r\n }\r\n\r\n const schema: OrganizationSchema = {\r\n '@context': 'https://schema.org',\r\n '@type': 'Organization',\r\n name: config.organization.name\r\n };\r\n\r\n if (config.organization.url) {\r\n schema.url = config.organization.url;\r\n }\r\n\r\n if (config.organization.description) {\r\n schema.description = config.organization.description;\r\n }\r\n\r\n if (config.organization.logo) {\r\n schema.logo = config.organization.logo;\r\n }\r\n\r\n if (config.organization.foundingDate) {\r\n schema.foundingDate = config.organization.foundingDate;\r\n }\r\n\r\n return schema;\r\n}\r\n\r\n/**\r\n * AboutPage schema combining Organization and WebPage\r\n */\r\nexport interface AboutPageSchema {\r\n '@context': 'https://schema.org';\r\n '@type': 'AboutPage';\r\n name: string;\r\n url: string;\r\n description?: string;\r\n mainEntity?: OrganizationSchema;\r\n}\r\n\r\n/**\r\n * Generate AboutPage JSON-LD schema\r\n */\r\nexport function generateAboutPageSchema(\r\n config: OntoConfig,\r\n pageUrl: string\r\n): AboutPageSchema {\r\n const orgSchema = generateOrganizationSchema(config, pageUrl);\r\n\r\n const schema: AboutPageSchema = {\r\n '@context': 'https://schema.org',\r\n '@type': 'AboutPage',\r\n name: `About ${config.name}`,\r\n url: pageUrl\r\n };\r\n\r\n if (config.summary) {\r\n schema.description = config.summary;\r\n }\r\n\r\n if (orgSchema) {\r\n schema.mainEntity = orgSchema;\r\n }\r\n\r\n return schema;\r\n}\r\n\r\n/**\r\n * Determine which schema to generate based on page type\r\n */\r\nexport function generateSchemaForPageType(\r\n pageType: 'scoring' | 'about' | 'default',\r\n config: OntoConfig,\r\n pageUrl: string\r\n): any | null {\r\n switch (pageType) {\r\n case 'scoring':\r\n return generateAIOMethodologySchema(config, pageUrl);\r\n case 'about':\r\n return generateAboutPageSchema(config, pageUrl);\r\n case 'default':\r\n default:\r\n return null;\r\n }\r\n}\r\n\r\n/**\r\n * Serialize schema to JSON-LD script tag content\r\n */\r\nexport function serializeSchema(schema: any | null): string | null {\r\n if (!schema) {\r\n return null;\r\n }\r\n return JSON.stringify(schema, null, 2);\r\n}\r\n"],"mappings":"aAEA,OAAS,eAAAA,MAAmB,kBAC5B,OAAoB,WAAAC,MAAe,QC6B5B,SAASC,EACdC,EACAC,EACsB,CACtB,MAAO,CACL,WAAY,qBACZ,QAAS,QACT,KAAM,oCACN,YAAa,2JACb,KAAM,CACJ,CACE,QAAS,YACT,KAAM,sBACN,KAAM,iLACN,SAAU,CACZ,EACA,CACE,QAAS,YACT,KAAM,+BACN,KAAM,4LACN,SAAU,CACZ,EACA,CACE,QAAS,YACT,KAAM,kBACN,KAAM,2KACN,SAAU,CACZ,EACA,CACE,QAAS,YACT,KAAM,gBACN,KAAM,0IACN,SAAU,CACZ,CACF,CACF,CACF,CAkBO,SAASC,EACdF,EACAC,EAC2B,CAC3B,GAAI,CAACD,EAAO,aACV,OAAO,KAGT,IAAMG,EAA6B,CACjC,WAAY,qBACZ,QAAS,eACT,KAAMH,EAAO,aAAa,IAC5B,EAEA,OAAIA,EAAO,aAAa,MACtBG,EAAO,IAAMH,EAAO,aAAa,KAG/BA,EAAO,aAAa,cACtBG,EAAO,YAAcH,EAAO,aAAa,aAGvCA,EAAO,aAAa,OACtBG,EAAO,KAAOH,EAAO,aAAa,MAGhCA,EAAO,aAAa,eACtBG,EAAO,aAAeH,EAAO,aAAa,cAGrCG,CACT,CAiBO,SAASC,EACdJ,EACAC,EACiB,CACjB,IAAMI,EAAYH,EAA2BF,EAAQC,CAAO,EAEtDE,EAA0B,CAC9B,WAAY,qBACZ,QAAS,YACT,KAAM,SAASH,EAAO,IAAI,GAC1B,IAAKC,CACP,EAEA,OAAID,EAAO,UACTG,EAAO,YAAcH,EAAO,SAG1BK,IACFF,EAAO,WAAaE,GAGfF,CACT,CAKO,SAASG,EACdC,EACAP,EACAC,EACY,CACZ,OAAQM,EAAU,CAChB,IAAK,UACH,OAAOR,EAA6BC,EAAQC,CAAO,EACrD,IAAK,QACH,OAAOG,EAAwBJ,EAAQC,CAAO,EAEhD,QACE,OAAO,IACX,CACF,CAKO,SAASO,EAAgBL,EAAmC,CACjE,OAAKA,EAGE,KAAK,UAAUA,EAAQ,KAAM,CAAC,EAF5B,IAGX,CD3GI,mBAAAM,EACE,OAAAC,EADF,QAAAC,MAAA,oBAzBG,SAASC,EAAa,CAAE,QAAAC,EAAS,SAAAC,EAAU,OAAAC,CAAO,EAAsB,CAC7E,IAAMC,EAAWC,EAAY,EAGvBC,EAAeL,EAAQ,SAAS,GAAG,EAAIA,EAAQ,MAAM,EAAG,EAAE,EAAIA,EAC9DM,EAAe,GAAGD,CAAY,GAAGF,CAAQ,aACzCI,EAAc,GAAGF,CAAY,GAAGF,CAAQ,GAGxCK,EAAqBC,EAAQ,IAC5BP,GAAQ,QAESA,EAAO,OAAO,KAAKQ,GAASA,EAAM,OAASP,CAAQ,GACnD,UAAY,UACjC,CAACD,EAAQC,CAAQ,CAAC,EAGfQ,EAAeF,EAAQ,IAAM,CACjC,GAAI,CAACP,GAAUM,IAAa,UAAW,OAAO,KAE9C,IAAMI,EAASC,EAA0BL,EAAUN,EAAQK,CAAW,EACtE,OAAOO,EAAgBF,CAAM,CAC/B,EAAG,CAACV,EAAQM,EAAUD,CAAW,CAAC,EAElC,OACET,EAAAF,EAAA,CACE,UAAAC,EAAC,QACC,IAAI,YACJ,KAAK,gBACL,KAAMS,EACN,MAAM,gCACR,EACCK,GACCd,EAAC,UACC,KAAK,sBACL,wBAAyB,CAAE,OAAQc,CAAa,EAClD,EAEDV,GACH,CAEJ","names":["usePathname","useMemo","generateAIOMethodologySchema","config","pageUrl","generateOrganizationSchema","schema","generateAboutPageSchema","orgSchema","generateSchemaForPageType","pageType","serializeSchema","Fragment","jsx","jsxs","OntoProvider","baseUrl","children","config","pathname","usePathname","cleanBaseUrl","markdownHref","fullPageUrl","pageType","useMemo","route","jsonLdSchema","schema","generateSchemaForPageType","serializeSchema"]}
|
|
1
|
+
{"version":3,"sources":["../src/OntoProvider.tsx","../src/schemas.ts"],"sourcesContent":["'use client';\r\n\r\nimport { usePathname } from 'next/navigation';\r\nimport { ReactNode, useMemo } from 'react';\r\nimport type { OntoConfig, PageType } from './config';\r\nimport { generateSchemaForPageType, serializeSchema } from './schemas';\r\n\r\nexport interface OntoProviderProps {\r\n /**\r\n * The base URL of your site (e.g., 'https://example.com')\r\n * Used to construct the full href for the AI discovery link tag.\r\n */\r\n baseUrl: string;\r\n /**\r\n * Child components to render\r\n */\r\n children: ReactNode;\r\n /**\r\n * Optional: Onto configuration for automatic JSON-LD schema injection\r\n * If provided, the provider will automatically inject JSON-LD schemas\r\n * based on the page type configuration\r\n */\r\n config?: OntoConfig;\r\n}\r\n\r\n/**\r\n * OntoProvider — Automatic AI Discovery Provider\r\n *\r\n * Wraps your application and automatically injects:\r\n * 1. `<link rel=\"alternate\">` tags for AI discovery\r\n * 2. JSON-LD structured data schemas based on page type\r\n *\r\n * With config, automatically generates JSON-LD schemas:\r\n * - 'scoring' pages get Methodology schema with AIO weights (40/35/25)\r\n * - 'about' pages get Organization/AboutPage schema\r\n *\r\n * Usage in a Next.js App Router layout:\r\n * ```tsx\r\n * import { OntoProvider } from '@ontosdk/next/provider';\r\n * import config from '../onto.config';\r\n *\r\n * export default function RootLayout({ children }) {\r\n * return (\r\n * <OntoProvider baseUrl=\"https://example.com\" config={config}>\r\n * <html>\r\n * <head />\r\n * <body>{children}</body>\r\n * </html>\r\n * </OntoProvider>\r\n * );\r\n * }\r\n * ```\r\n */\r\nexport function OntoProvider({ baseUrl, children, config }: OntoProviderProps) {\r\n const pathname = usePathname();\r\n\r\n // Construct the full URL with the current path and ?format=md query string\r\n const cleanBaseUrl = baseUrl.endsWith('/') ? baseUrl.slice(0, -1) : baseUrl;\r\n const markdownHref = `${cleanBaseUrl}${pathname}?format=md`;\r\n const fullPageUrl = `${cleanBaseUrl}${pathname}`;\r\n\r\n // Determine page type from config routes\r\n const pageType: PageType = useMemo(() => {\r\n if (!config?.routes) return 'default';\r\n\r\n const matchingRoute = config.routes.find(route => route.path === pathname);\r\n return matchingRoute?.pageType || 'default';\r\n }, [config, pathname]);\r\n\r\n // Generate JSON-LD schema based on page type\r\n const jsonLdSchema = useMemo(() => {\r\n if (!config || pageType === 'default') return null;\r\n\r\n const schema = generateSchemaForPageType(pageType, config, fullPageUrl);\r\n return serializeSchema(schema);\r\n }, [config, pageType, fullPageUrl]);\r\n\r\n return (\r\n <>\r\n <link\r\n rel=\"alternate\"\r\n type=\"text/markdown\"\r\n href={markdownHref}\r\n title=\"AI-optimized Markdown version\"\r\n />\r\n {jsonLdSchema && (\r\n <script\r\n type=\"application/ld+json\"\r\n dangerouslySetInnerHTML={{ __html: jsonLdSchema }}\r\n />\r\n )}\r\n {children}\r\n </>\r\n );\r\n}\r\n","/**\r\n * JSON-LD Schema generators for automatic structured data injection\r\n * Follows Schema.org standards for AI-friendly metadata\r\n */\r\n\r\nimport { OntoConfig } from './config';\r\n\r\n/**\r\n * Standard AIO (AI Optimization) scoring methodology\r\n * Based on the Onto scoring algorithm:\r\n * - React Tax (Efficiency): 40% (Step 1)\r\n * - Semantic Richness: 35% (Step 2)\r\n * - Content Negotiation: 25% (Step 3)\r\n */\r\nexport interface AIOMethodologySchema {\r\n '@context': 'https://schema.org';\r\n '@type': 'HowTo';\r\n name: string;\r\n description: string;\r\n step: Array<{\r\n '@type': 'HowToStep';\r\n name: string;\r\n text: string;\r\n position: number;\r\n }>;\r\n}\r\n\r\n/**\r\n * Generate AIO Scoring Methodology JSON-LD schema\r\n * This explains to AI agents how the scoring system works\r\n */\r\nexport function generateAIOMethodologySchema(\r\n config: OntoConfig,\r\n pageUrl: string\r\n): AIOMethodologySchema {\r\n return {\r\n '@context': 'https://schema.org',\r\n '@type': 'HowTo',\r\n name: 'AIO Score Calculation Methodology',\r\n description: 'AI Optimization (AIO) Score measures how well a website is optimized for AI agents and LLM crawlers. Scored out of 100 points based on three core pillars.',\r\n step: [\r\n {\r\n '@type': 'HowToStep',\r\n name: 'React Tax (Token Efficiency)',\r\n text: 'Measures the ratio of useful content to total page weight. Weight: 40%. Sites with high \"React Tax\" (heavy JS/HTML/CSS noise) score lower as they consume more tokens for less information.',\r\n position: 1\r\n },\r\n {\r\n '@type': 'HowToStep',\r\n name: 'Semantic Richness',\r\n text: 'Evaluates the presence of structured metadata (JSON-LD), semantic HTML tags (<main>, <article>), and proper heading hierarchy. Weight: 35%. Essential for confident AI extraction.',\r\n position: 2\r\n },\r\n {\r\n '@type': 'HowToStep',\r\n name: 'Content Negotiation',\r\n text: 'Tests whether your server can negotiate and serve optimized Markdown payloads directly to AI agents via the Accept: text/markdown header. Weight: 25%.',\r\n position: 3\r\n }\r\n ]\r\n };\r\n}\r\n\r\n/**\r\n * Organization schema for About pages\r\n */\r\nexport interface OrganizationSchema {\r\n '@context': 'https://schema.org';\r\n '@type': 'Organization';\r\n name: string;\r\n url?: string;\r\n description?: string;\r\n logo?: string;\r\n foundingDate?: string;\r\n}\r\n\r\n/**\r\n * Generate Organization JSON-LD schema for About pages\r\n */\r\nexport function generateOrganizationSchema(\r\n config: OntoConfig,\r\n pageUrl: string\r\n): OrganizationSchema | null {\r\n if (!config.organization) {\r\n return null;\r\n }\r\n\r\n const schema: OrganizationSchema = {\r\n '@context': 'https://schema.org',\r\n '@type': 'Organization',\r\n name: config.organization.name\r\n };\r\n\r\n if (config.organization.url) {\r\n schema.url = config.organization.url;\r\n }\r\n\r\n if (config.organization.description) {\r\n schema.description = config.organization.description;\r\n }\r\n\r\n if (config.organization.logo) {\r\n schema.logo = config.organization.logo;\r\n }\r\n\r\n if (config.organization.foundingDate) {\r\n schema.foundingDate = config.organization.foundingDate;\r\n }\r\n\r\n return schema;\r\n}\r\n\r\n/**\r\n * AboutPage schema combining Organization and WebPage\r\n */\r\nexport interface AboutPageSchema {\r\n '@context': 'https://schema.org';\r\n '@type': 'AboutPage';\r\n name: string;\r\n url: string;\r\n description?: string;\r\n mainEntity?: OrganizationSchema;\r\n}\r\n\r\n/**\r\n * Generate AboutPage JSON-LD schema\r\n */\r\nexport function generateAboutPageSchema(\r\n config: OntoConfig,\r\n pageUrl: string\r\n): AboutPageSchema {\r\n const orgSchema = generateOrganizationSchema(config, pageUrl);\r\n\r\n const schema: AboutPageSchema = {\r\n '@context': 'https://schema.org',\r\n '@type': 'AboutPage',\r\n name: `About ${config.name}`,\r\n url: pageUrl\r\n };\r\n\r\n if (config.summary) {\r\n schema.description = config.summary;\r\n }\r\n\r\n if (orgSchema) {\r\n schema.mainEntity = orgSchema;\r\n }\r\n\r\n return schema;\r\n}\r\n\r\n/**\r\n * Determine which schema to generate based on page type\r\n */\r\nexport function generateSchemaForPageType(\r\n pageType: 'scoring' | 'about' | 'default',\r\n config: OntoConfig,\r\n pageUrl: string\r\n): any | null {\r\n switch (pageType) {\r\n case 'scoring':\r\n return generateAIOMethodologySchema(config, pageUrl);\r\n case 'about':\r\n return generateAboutPageSchema(config, pageUrl);\r\n case 'default':\r\n default:\r\n return null;\r\n }\r\n}\r\n\r\n/**\r\n * Serialize schema to JSON-LD script tag content\r\n */\r\nexport function serializeSchema(schema: any | null): string | null {\r\n if (!schema) {\r\n return null;\r\n }\r\n return JSON.stringify(schema, null, 2);\r\n}\r\n"],"mappings":"aAEA,OAAS,eAAAA,MAAmB,kBAC5B,OAAoB,WAAAC,MAAe,QC4B5B,SAASC,EACdC,EACAC,EACsB,CACtB,MAAO,CACL,WAAY,qBACZ,QAAS,QACT,KAAM,oCACN,YAAa,6JACb,KAAM,CACJ,CACE,QAAS,YACT,KAAM,+BACN,KAAM,8LACN,SAAU,CACZ,EACA,CACE,QAAS,YACT,KAAM,oBACN,KAAM,qLACN,SAAU,CACZ,EACA,CACE,QAAS,YACT,KAAM,sBACN,KAAM,yJACN,SAAU,CACZ,CACF,CACF,CACF,CAkBO,SAASC,EACdF,EACAC,EAC2B,CAC3B,GAAI,CAACD,EAAO,aACV,OAAO,KAGT,IAAMG,EAA6B,CACjC,WAAY,qBACZ,QAAS,eACT,KAAMH,EAAO,aAAa,IAC5B,EAEA,OAAIA,EAAO,aAAa,MACtBG,EAAO,IAAMH,EAAO,aAAa,KAG/BA,EAAO,aAAa,cACtBG,EAAO,YAAcH,EAAO,aAAa,aAGvCA,EAAO,aAAa,OACtBG,EAAO,KAAOH,EAAO,aAAa,MAGhCA,EAAO,aAAa,eACtBG,EAAO,aAAeH,EAAO,aAAa,cAGrCG,CACT,CAiBO,SAASC,EACdJ,EACAC,EACiB,CACjB,IAAMI,EAAYH,EAA2BF,EAAQC,CAAO,EAEtDE,EAA0B,CAC9B,WAAY,qBACZ,QAAS,YACT,KAAM,SAASH,EAAO,IAAI,GAC1B,IAAKC,CACP,EAEA,OAAID,EAAO,UACTG,EAAO,YAAcH,EAAO,SAG1BK,IACFF,EAAO,WAAaE,GAGfF,CACT,CAKO,SAASG,EACdC,EACAP,EACAC,EACY,CACZ,OAAQM,EAAU,CAChB,IAAK,UACH,OAAOR,EAA6BC,EAAQC,CAAO,EACrD,IAAK,QACH,OAAOG,EAAwBJ,EAAQC,CAAO,EAEhD,QACE,OAAO,IACX,CACF,CAKO,SAASO,EAAgBL,EAAmC,CACjE,OAAKA,EAGE,KAAK,UAAUA,EAAQ,KAAM,CAAC,EAF5B,IAGX,CDpGI,mBAAAM,EACE,OAAAC,EADF,QAAAC,MAAA,oBAzBG,SAASC,EAAa,CAAE,QAAAC,EAAS,SAAAC,EAAU,OAAAC,CAAO,EAAsB,CAC7E,IAAMC,EAAWC,EAAY,EAGvBC,EAAeL,EAAQ,SAAS,GAAG,EAAIA,EAAQ,MAAM,EAAG,EAAE,EAAIA,EAC9DM,EAAe,GAAGD,CAAY,GAAGF,CAAQ,aACzCI,EAAc,GAAGF,CAAY,GAAGF,CAAQ,GAGxCK,EAAqBC,EAAQ,IAC5BP,GAAQ,QAESA,EAAO,OAAO,KAAKQ,GAASA,EAAM,OAASP,CAAQ,GACnD,UAAY,UACjC,CAACD,EAAQC,CAAQ,CAAC,EAGfQ,EAAeF,EAAQ,IAAM,CACjC,GAAI,CAACP,GAAUM,IAAa,UAAW,OAAO,KAE9C,IAAMI,EAASC,EAA0BL,EAAUN,EAAQK,CAAW,EACtE,OAAOO,EAAgBF,CAAM,CAC/B,EAAG,CAACV,EAAQM,EAAUD,CAAW,CAAC,EAElC,OACET,EAAAF,EAAA,CACE,UAAAC,EAAC,QACC,IAAI,YACJ,KAAK,gBACL,KAAMS,EACN,MAAM,gCACR,EACCK,GACCd,EAAC,UACC,KAAK,sBACL,wBAAyB,CAAE,OAAQc,CAAa,EAClD,EAEDV,GACH,CAEJ","names":["usePathname","useMemo","generateAIOMethodologySchema","config","pageUrl","generateOrganizationSchema","schema","generateAboutPageSchema","orgSchema","generateSchemaForPageType","pageType","serializeSchema","Fragment","jsx","jsxs","OntoProvider","baseUrl","children","config","pathname","usePathname","cleanBaseUrl","markdownHref","fullPageUrl","pageType","useMemo","route","jsonLdSchema","schema","generateSchemaForPageType","serializeSchema"]}
|
package/dist/index.js
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
"use strict";var
|
|
2
|
-
`)
|
|
1
|
+
"use strict";var L=Object.create;var c=Object.defineProperty;var P=Object.getOwnPropertyDescriptor;var R=Object.getOwnPropertyNames;var C=Object.getPrototypeOf,v=Object.prototype.hasOwnProperty;var I=(t,e)=>{for(var n in e)c(t,n,{get:e[n],enumerable:!0})},d=(t,e,n,o)=>{if(e&&typeof e=="object"||typeof e=="function")for(let r of R(e))!v.call(t,r)&&r!==n&&c(t,r,{get:()=>e[r],enumerable:!(o=P(e,r))||o.enumerable});return t};var f=(t,e,n)=>(n=t!=null?L(C(t)):{},d(e||!t||!t.__esModule?c(n,"default",{value:t,enumerable:!0}):n,t)),M=t=>d(c({},"__esModule",{value:!0}),t);var j={};I(j,{extractContent:()=>x,generateAIOMethodologySchema:()=>l,generateAboutPageSchema:()=>g,generateLlmsTxt:()=>O,generateOrganizationSchema:()=>u,generateSchemaForPageType:()=>w,serializeSchema:()=>z});module.exports=M(j);var y=f(require("cheerio")),S=f(require("turndown")),H=new S.default({headingStyle:"atx",codeBlockStyle:"fenced"});function x(t,e="Generated Output"){let n=t.length,o=y.load(t),r=o("title").text()||o("h1").first().text()||"Untitled Page",p=o('meta[name="description"]').attr("content")||"No description found.",a=[];o('script[type="application/ld+json"]').each((h,T)=>{try{let b=o(T).html()||"",A=JSON.parse(b);a.push(A)}catch{}}),o("script, style, noscript, iframe, svg, nav, footer, meta, link, header").remove();let s="";o("main").length>0?s=o("main").html()||"":o("article").length>0?s=o("article").html()||"":s=o("body").html()||"";let $=H.turndown(s),i=[`# ${r}`,`> ${p}`,"",`**Source:** ${e}`,`**Extracted:** ${new Date().toISOString()}`,"","---",""].join(`
|
|
2
|
+
`)+$;a.length>0&&(i+=`
|
|
3
3
|
|
|
4
4
|
---
|
|
5
5
|
## Structured Data (JSON-LD)
|
|
6
6
|
\`\`\`json
|
|
7
|
-
`,a.forEach(h=>{
|
|
8
|
-
`}),
|
|
7
|
+
`,a.forEach(h=>{i+=JSON.stringify(h,null,2)+`
|
|
8
|
+
`}),i+="```\n");let m=i.length,k=n>0?(n-m)/n*100:0;return{markdown:i,metadata:{title:r,description:p,jsonLd:a},stats:{originalHtmlSize:n,markdownSize:m,tokenReductionRatio:k}}}function O(t){let e=[];if(e.push(`# ${t.name}`),e.push(""),e.push(`> ${t.summary}`),e.push(""),t.routes&&t.routes.length>0){e.push("## Key Routes"),e.push("");for(let n of t.routes){let o=`${t.baseUrl}${n.path}`;e.push(`- [${n.path}](${o}): ${n.description}`)}e.push("")}if(t.externalLinks&&t.externalLinks.length>0){e.push("## Resources"),e.push("");for(let n of t.externalLinks)n.description?e.push(`- [${n.title}](${n.url}): ${n.description}`):e.push(`- [${n.title}](${n.url})`);e.push("")}if(t.sections&&t.sections.length>0)for(let n of t.sections)e.push(`## ${n.heading}`),e.push(""),e.push(n.content),e.push("");return e.join(`
|
|
9
9
|
`).trim()+`
|
|
10
|
-
`}function
|
|
10
|
+
`}function l(t,e){return{"@context":"https://schema.org","@type":"HowTo",name:"AIO Score Calculation Methodology",description:"AI Optimization (AIO) Score measures how well a website is optimized for AI agents and LLM crawlers. Scored out of 100 points based on three core pillars.",step:[{"@type":"HowToStep",name:"React Tax (Token Efficiency)",text:'Measures the ratio of useful content to total page weight. Weight: 40%. Sites with high "React Tax" (heavy JS/HTML/CSS noise) score lower as they consume more tokens for less information.',position:1},{"@type":"HowToStep",name:"Semantic Richness",text:"Evaluates the presence of structured metadata (JSON-LD), semantic HTML tags (<main>, <article>), and proper heading hierarchy. Weight: 35%. Essential for confident AI extraction.",position:2},{"@type":"HowToStep",name:"Content Negotiation",text:"Tests whether your server can negotiate and serve optimized Markdown payloads directly to AI agents via the Accept: text/markdown header. Weight: 25%.",position:3}]}}function u(t,e){if(!t.organization)return null;let n={"@context":"https://schema.org","@type":"Organization",name:t.organization.name};return t.organization.url&&(n.url=t.organization.url),t.organization.description&&(n.description=t.organization.description),t.organization.logo&&(n.logo=t.organization.logo),t.organization.foundingDate&&(n.foundingDate=t.organization.foundingDate),n}function g(t,e){let n=u(t,e),o={"@context":"https://schema.org","@type":"AboutPage",name:`About ${t.name}`,url:e};return t.summary&&(o.description=t.summary),n&&(o.mainEntity=n),o}function w(t,e,n){switch(t){case"scoring":return l(e,n);case"about":return g(e,n);default:return null}}function z(t){return t?JSON.stringify(t,null,2):null}0&&(module.exports={extractContent,generateAIOMethodologySchema,generateAboutPageSchema,generateLlmsTxt,generateOrganizationSchema,generateSchemaForPageType,serializeSchema});
|
|
11
11
|
//# sourceMappingURL=index.js.map
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/index.ts","../src/extractor.ts","../src/config.ts","../src/schemas.ts"],"sourcesContent":["// We cannot use Webpack plugins reliably in Next.js Turbopack due to WorkerError restrictions.\r\n// Users must instead run `npx onto-next` as a postbuild script.\r\nexport { extractContent } from './extractor';\r\nexport { OntoConfig, OntoRoute, generateLlmsTxt } from './config';\r\nexport type { OntoConfig as OntoConfigType, OntoRoute as OntoRouteType, PageType } from './config';\r\nexport {\r\n generateAIOMethodologySchema,\r\n generateOrganizationSchema,\r\n generateAboutPageSchema,\r\n generateSchemaForPageType,\r\n serializeSchema\r\n} from './schemas';\r\nexport type {\r\n AIOMethodologySchema,\r\n OrganizationSchema,\r\n AboutPageSchema\r\n} from './schemas';\r\n","import * as cheerio from 'cheerio';\r\nimport TurndownService from 'turndown';\r\n\r\nconst turndownService = new TurndownService({\r\n headingStyle: 'atx',\r\n codeBlockStyle: 'fenced',\r\n});\r\n\r\n// Configure turndown to keep some layout or handle semantic tags differently if needed\r\n\r\nexport interface ExtractionResult {\r\n markdown: string;\r\n metadata: {\r\n title: string;\r\n description: string;\r\n jsonLd: any[];\r\n };\r\n stats: {\r\n originalHtmlSize: number;\r\n markdownSize: number;\r\n tokenReductionRatio: number;\r\n };\r\n}\r\n\r\n/**\r\n * Extracts pure semantic markdown and metadata from rendered Next.js HTML strings.\r\n * @param html The raw HTML string.\r\n * @param sourceUrl (Optional) the URL this was generated from, to attach as metadata.\r\n * @returns {ExtractionResult} The extracted payload.\r\n */\r\nexport function extractContent(html: string, sourceUrl: string = 'Generated Output'): ExtractionResult {\r\n const originalSize = html.length;\r\n\r\n const $ = cheerio.load(html);\r\n\r\n // 1. Extract Metadata BEFORE removing structure\r\n const title = $('title').text() || $('h1').first().text() || 'Untitled Page';\r\n const description = $('meta[name=\"description\"]').attr('content') || 'No description found.';\r\n\r\n const jsonLdScripts: any[] = [];\r\n $('script[type=\"application/ld+json\"]').each((_, el) => {\r\n try {\r\n const raw = $(el).html() || '';\r\n const parsed = JSON.parse(raw);\r\n jsonLdScripts.push(parsed);\r\n } catch {\r\n // ignore bad json\r\n }\r\n });\r\n\r\n // 2. Strip noise (React boilerplate, styles, unnecessary tags)\r\n $('script, style, noscript, iframe, svg, nav, footer, meta, link, header').remove();\r\n\r\n // Optionally remove typical Next.js hidden wrappers if they don't contain real content.\r\n // Next.js uses <div id=\"__next\"> but we mostly just want semantic content.\r\n\r\n // 3. Find the entry point for content\r\n // Prefer <main> or <article> over <body>\r\n let contentHtml = '';\r\n if ($('main').length > 0) {\r\n contentHtml = $('main').html() || '';\r\n } else if ($('article').length > 0) {\r\n contentHtml = $('article').html() || '';\r\n } else {\r\n contentHtml = $('body').html() || '';\r\n }\r\n\r\n // 4. Convert to Markdown\r\n let markdown = turndownService.turndown(contentHtml);\r\n\r\n // 5. Optionally inject Metadata header\r\n const headerLines = [\r\n `# ${title}`,\r\n `> ${description}`,\r\n ``,\r\n `**Source:** ${sourceUrl}`,\r\n `**Extracted:** ${new Date().toISOString()}`,\r\n ``,\r\n `---`,\r\n ``\r\n ];\r\n\r\n let finalMarkdown = headerLines.join('\\n') + markdown;\r\n\r\n // Add JSON-LD section if exists\r\n if (jsonLdScripts.length > 0) {\r\n finalMarkdown += '\\n\\n---\\n## Structured Data (JSON-LD)\\n```json\\n';\r\n jsonLdScripts.forEach(j => {\r\n finalMarkdown += JSON.stringify(j, null, 2) + '\\n';\r\n });\r\n finalMarkdown += '```\\n';\r\n }\r\n\r\n const markdownSize = finalMarkdown.length;\r\n const tokenReductionRatio = originalSize > 0 ? ((originalSize - markdownSize) / originalSize) * 100 : 0;\r\n\r\n return {\r\n markdown: finalMarkdown,\r\n metadata: {\r\n title,\r\n description,\r\n jsonLd: jsonLdScripts\r\n },\r\n stats: {\r\n originalHtmlSize: originalSize,\r\n markdownSize,\r\n tokenReductionRatio\r\n }\r\n };\r\n}\r\n\r\nexport async function generateStaticPayloads(nextAppDirDir: string, ontoPublicDir: string) {\r\n const fs = await import('fs');\r\n const path = await import('path');\r\n const { glob } = await import('glob');\r\n\r\n if (!fs.existsSync(nextAppDirDir)) {\r\n return;\r\n }\r\n\r\n const files = await glob('**/*.html', { cwd: nextAppDirDir });\r\n if (files.length === 0) return;\r\n\r\n if (!fs.existsSync(ontoPublicDir)) {\r\n fs.mkdirSync(ontoPublicDir, { recursive: true });\r\n }\r\n\r\n let totalFilesProcessed = 0;\r\n\r\n for (const file of files) {\r\n const inputPath = path.join(nextAppDirDir, file);\r\n const outputPathRelative = file.replace(/\\.html$/, '.md');\r\n const outputPath = path.join(ontoPublicDir, outputPathRelative);\r\n\r\n try {\r\n const htmlContent = fs.readFileSync(inputPath, 'utf8');\r\n\r\n let routeName = file.replace(/\\.html$/, '');\r\n if (routeName === 'index') routeName = '/';\r\n else routeName = `/${routeName}`;\r\n\r\n const result = extractContent(htmlContent, routeName);\r\n\r\n const outputDir = path.dirname(outputPath);\r\n if (!fs.existsSync(outputDir)) {\r\n fs.mkdirSync(outputDir, { recursive: true });\r\n }\r\n\r\n fs.writeFileSync(outputPath, result.markdown, 'utf8');\r\n totalFilesProcessed++;\r\n } catch (e: any) {\r\n console.error(`[Onto] Failed to process ${file}: ${e.message}`);\r\n }\r\n }\r\n console.log(`[Onto] Successfully generated ${totalFilesProcessed} semantic markdown endpoints.`);\r\n}\r\n","/**\r\n * Configuration schema for onto.config.ts\r\n * Used to dynamically generate llms.txt and other AI discovery files\r\n */\r\n\r\nexport type PageType = 'scoring' | 'about' | 'default';\r\n\r\nexport interface OntoRoute {\r\n /**\r\n * The URL path (e.g., '/docs', '/api/reference')\r\n */\r\n path: string;\r\n /**\r\n * Description of what this route contains\r\n */\r\n description: string;\r\n /**\r\n * Optional: Page type for automatic JSON-LD schema injection\r\n * - 'scoring': Injects Methodology schema with AIO scoring weights (40/35/25)\r\n * - 'about': Injects Organization/AboutPage schema\r\n * - 'default': No automatic schema injection\r\n */\r\n pageType?: PageType;\r\n}\r\n\r\nexport interface OntoConfig {\r\n /**\r\n * The name of your project or site (required)\r\n * Used as the H1 heading in llms.txt\r\n */\r\n name: string;\r\n\r\n /**\r\n * A short summary of your project (required)\r\n * Displayed as a blockquote in llms.txt\r\n * Should contain key information necessary for understanding the rest of the file\r\n */\r\n summary: string;\r\n\r\n /**\r\n * The base URL of your site (e.g., 'https://example.com')\r\n */\r\n baseUrl: string;\r\n\r\n /**\r\n * Optional: Additional sections to include in llms.txt\r\n * Each section can contain any markdown content\r\n */\r\n sections?: {\r\n heading: string;\r\n content: string;\r\n }[];\r\n\r\n /**\r\n * Key routes that AI agents should know about\r\n * These will be formatted as a markdown list in llms.txt\r\n */\r\n routes?: OntoRoute[];\r\n\r\n /**\r\n * Optional: Links to external resources (documentation, API references, etc.)\r\n */\r\n externalLinks?: {\r\n title: string;\r\n url: string;\r\n description?: string;\r\n }[];\r\n\r\n /**\r\n * Optional: Organization information for JSON-LD schemas\r\n */\r\n organization?: {\r\n name: string;\r\n description?: string;\r\n url?: string;\r\n logo?: string;\r\n foundingDate?: string;\r\n };\r\n}\r\n\r\n/**\r\n * Generate llms.txt content from OntoConfig\r\n * Follows the llms.txt specification:\r\n * - H1 with project name\r\n * - Blockquote with summary\r\n * - Additional markdown sections\r\n */\r\nexport function generateLlmsTxt(config: OntoConfig): string {\r\n const lines: string[] = [];\r\n\r\n // H1: Project name (required)\r\n lines.push(`# ${config.name}`);\r\n lines.push('');\r\n\r\n // Blockquote: Summary (required)\r\n lines.push(`> ${config.summary}`);\r\n lines.push('');\r\n\r\n // Key Routes section (if provided)\r\n if (config.routes && config.routes.length > 0) {\r\n lines.push('## Key Routes');\r\n lines.push('');\r\n for (const route of config.routes) {\r\n const fullUrl = `${config.baseUrl}${route.path}`;\r\n lines.push(`- [${route.path}](${fullUrl}): ${route.description}`);\r\n }\r\n lines.push('');\r\n }\r\n\r\n // External Links section (if provided)\r\n if (config.externalLinks && config.externalLinks.length > 0) {\r\n lines.push('## Resources');\r\n lines.push('');\r\n for (const link of config.externalLinks) {\r\n if (link.description) {\r\n lines.push(`- [${link.title}](${link.url}): ${link.description}`);\r\n } else {\r\n lines.push(`- [${link.title}](${link.url})`);\r\n }\r\n }\r\n lines.push('');\r\n }\r\n\r\n // Custom sections (if provided)\r\n if (config.sections && config.sections.length > 0) {\r\n for (const section of config.sections) {\r\n lines.push(`## ${section.heading}`);\r\n lines.push('');\r\n lines.push(section.content);\r\n lines.push('');\r\n }\r\n }\r\n\r\n return lines.join('\\n').trim() + '\\n';\r\n}\r\n","/**\r\n * JSON-LD Schema generators for automatic structured data injection\r\n * Follows Schema.org standards for AI-friendly metadata\r\n */\r\n\r\nimport { OntoConfig } from './config';\r\n\r\n/**\r\n * Standard AIO (AI Optimization) scoring methodology\r\n * Based on the Onto scoring algorithm:\r\n * - Content Negotiation: 40% (30 points)\r\n * - React Tax / Token Efficiency: 35% (30 points)\r\n * - Structured Data: 25% (25 points)\r\n * - Semantic HTML: Bonus (15 points)\r\n */\r\nexport interface AIOMethodologySchema {\r\n '@context': 'https://schema.org';\r\n '@type': 'HowTo';\r\n name: string;\r\n description: string;\r\n step: Array<{\r\n '@type': 'HowToStep';\r\n name: string;\r\n text: string;\r\n position: number;\r\n }>;\r\n}\r\n\r\n/**\r\n * Generate AIO Scoring Methodology JSON-LD schema\r\n * This explains to AI agents how the scoring system works\r\n */\r\nexport function generateAIOMethodologySchema(\r\n config: OntoConfig,\r\n pageUrl: string\r\n): AIOMethodologySchema {\r\n return {\r\n '@context': 'https://schema.org',\r\n '@type': 'HowTo',\r\n name: 'AIO Score Calculation Methodology',\r\n description: 'AI Optimization (AIO) Score measures how well a website is optimized for AI agents and LLM crawlers. Scored out of 100 points based on four key metrics.',\r\n step: [\r\n {\r\n '@type': 'HowToStep',\r\n name: 'Content Negotiation',\r\n text: 'Check if the site responds to Accept: text/markdown header. Weight: 40%. Penalty: -30 points if missing. This ensures AI bots receive optimized content instead of heavy HTML.',\r\n position: 1\r\n },\r\n {\r\n '@type': 'HowToStep',\r\n name: 'Token Efficiency (React Tax)',\r\n text: 'Measure the ratio of visible text to total HTML size. Weight: 35%. Penalty: -30 points if HTML > 100KB but text < 1KB. Detects JavaScript-heavy sites that are difficult for AI to parse.',\r\n position: 2\r\n },\r\n {\r\n '@type': 'HowToStep',\r\n name: 'Structured Data',\r\n text: 'Verify presence of JSON-LD structured data (Schema.org). Weight: 25%. Penalty: -25 points if missing. Enables AI to confidently extract pricing, products, and entities.',\r\n position: 3\r\n },\r\n {\r\n '@type': 'HowToStep',\r\n name: 'Semantic HTML',\r\n text: 'Check for semantic tags like <main> and <article>. Bonus: +15 points if present. Helps AI agents separate navigation from core content.',\r\n position: 4\r\n }\r\n ]\r\n };\r\n}\r\n\r\n/**\r\n * Organization schema for About pages\r\n */\r\nexport interface OrganizationSchema {\r\n '@context': 'https://schema.org';\r\n '@type': 'Organization';\r\n name: string;\r\n url?: string;\r\n description?: string;\r\n logo?: string;\r\n foundingDate?: string;\r\n}\r\n\r\n/**\r\n * Generate Organization JSON-LD schema for About pages\r\n */\r\nexport function generateOrganizationSchema(\r\n config: OntoConfig,\r\n pageUrl: string\r\n): OrganizationSchema | null {\r\n if (!config.organization) {\r\n return null;\r\n }\r\n\r\n const schema: OrganizationSchema = {\r\n '@context': 'https://schema.org',\r\n '@type': 'Organization',\r\n name: config.organization.name\r\n };\r\n\r\n if (config.organization.url) {\r\n schema.url = config.organization.url;\r\n }\r\n\r\n if (config.organization.description) {\r\n schema.description = config.organization.description;\r\n }\r\n\r\n if (config.organization.logo) {\r\n schema.logo = config.organization.logo;\r\n }\r\n\r\n if (config.organization.foundingDate) {\r\n schema.foundingDate = config.organization.foundingDate;\r\n }\r\n\r\n return schema;\r\n}\r\n\r\n/**\r\n * AboutPage schema combining Organization and WebPage\r\n */\r\nexport interface AboutPageSchema {\r\n '@context': 'https://schema.org';\r\n '@type': 'AboutPage';\r\n name: string;\r\n url: string;\r\n description?: string;\r\n mainEntity?: OrganizationSchema;\r\n}\r\n\r\n/**\r\n * Generate AboutPage JSON-LD schema\r\n */\r\nexport function generateAboutPageSchema(\r\n config: OntoConfig,\r\n pageUrl: string\r\n): AboutPageSchema {\r\n const orgSchema = generateOrganizationSchema(config, pageUrl);\r\n\r\n const schema: AboutPageSchema = {\r\n '@context': 'https://schema.org',\r\n '@type': 'AboutPage',\r\n name: `About ${config.name}`,\r\n url: pageUrl\r\n };\r\n\r\n if (config.summary) {\r\n schema.description = config.summary;\r\n }\r\n\r\n if (orgSchema) {\r\n schema.mainEntity = orgSchema;\r\n }\r\n\r\n return schema;\r\n}\r\n\r\n/**\r\n * Determine which schema to generate based on page type\r\n */\r\nexport function generateSchemaForPageType(\r\n pageType: 'scoring' | 'about' | 'default',\r\n config: OntoConfig,\r\n pageUrl: string\r\n): any | null {\r\n switch (pageType) {\r\n case 'scoring':\r\n return generateAIOMethodologySchema(config, pageUrl);\r\n case 'about':\r\n return generateAboutPageSchema(config, pageUrl);\r\n case 'default':\r\n default:\r\n return null;\r\n }\r\n}\r\n\r\n/**\r\n * Serialize schema to JSON-LD script tag content\r\n */\r\nexport function serializeSchema(schema: any | null): string | null {\r\n if (!schema) {\r\n return null;\r\n }\r\n return JSON.stringify(schema, null, 2);\r\n}\r\n"],"mappings":"0jBAAA,IAAAA,EAAA,GAAAC,EAAAD,EAAA,oBAAAE,EAAA,iCAAAC,EAAA,4BAAAC,EAAA,oBAAAC,EAAA,+BAAAC,EAAA,8BAAAC,EAAA,oBAAAC,IAAA,eAAAC,EAAAT,GCAA,IAAAU,EAAyB,sBACzBC,EAA4B,uBAEtBC,EAAkB,IAAI,EAAAC,QAAgB,CACxC,aAAc,MACd,eAAgB,QACpB,CAAC,EAwBM,SAASC,EAAeC,EAAcC,EAAoB,mBAAsC,CACnG,IAAMC,EAAeF,EAAK,OAEpBG,EAAY,OAAKH,CAAI,EAGrBI,EAAQD,EAAE,OAAO,EAAE,KAAK,GAAKA,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,GAAK,gBACvDE,EAAcF,EAAE,0BAA0B,EAAE,KAAK,SAAS,GAAK,wBAE/DG,EAAuB,CAAC,EAC9BH,EAAE,oCAAoC,EAAE,KAAK,CAACI,EAAGC,IAAO,CACpD,GAAI,CACA,IAAMC,EAAMN,EAAEK,CAAE,EAAE,KAAK,GAAK,GACtBE,EAAS,KAAK,MAAMD,CAAG,EAC7BH,EAAc,KAAKI,CAAM,CAC7B,MAAQ,CAER,CACJ,CAAC,EAGDP,EAAE,uEAAuE,EAAE,OAAO,EAOlF,IAAIQ,EAAc,GACdR,EAAE,MAAM,EAAE,OAAS,EACnBQ,EAAcR,EAAE,MAAM,EAAE,KAAK,GAAK,GAC3BA,EAAE,SAAS,EAAE,OAAS,EAC7BQ,EAAcR,EAAE,SAAS,EAAE,KAAK,GAAK,GAErCQ,EAAcR,EAAE,MAAM,EAAE,KAAK,GAAK,GAItC,IAAIS,EAAWf,EAAgB,SAASc,CAAW,EAc/CE,EAXgB,CAChB,KAAKT,CAAK,GACV,KAAKC,CAAW,GAChB,GACA,eAAeJ,CAAS,GACxB,kBAAkB,IAAI,KAAK,EAAE,YAAY,CAAC,GAC1C,GACA,MACA,EACJ,EAEgC,KAAK;AAAA,CAAI,EAAIW,EAGzCN,EAAc,OAAS,IACvBO,GAAiB;AAAA;AAAA;AAAA;AAAA;AAAA,EACjBP,EAAc,QAAQQ,GAAK,CACvBD,GAAiB,KAAK,UAAUC,EAAG,KAAM,CAAC,EAAI;AAAA,CAClD,CAAC,EACDD,GAAiB,SAGrB,IAAME,EAAeF,EAAc,OAC7BG,EAAsBd,EAAe,GAAMA,EAAea,GAAgBb,EAAgB,IAAM,EAEtG,MAAO,CACH,SAAUW,EACV,SAAU,CACN,MAAAT,EACA,YAAAC,EACA,OAAQC,CACZ,EACA,MAAO,CACH,iBAAkBJ,EAClB,aAAAa,EACA,oBAAAC,CACJ,CACJ,CACJ,CCtBO,SAASC,EAAgBC,EAA4B,CAC1D,IAAMC,EAAkB,CAAC,EAWzB,GARAA,EAAM,KAAK,KAAKD,EAAO,IAAI,EAAE,EAC7BC,EAAM,KAAK,EAAE,EAGbA,EAAM,KAAK,KAAKD,EAAO,OAAO,EAAE,EAChCC,EAAM,KAAK,EAAE,EAGTD,EAAO,QAAUA,EAAO,OAAO,OAAS,EAAG,CAC7CC,EAAM,KAAK,eAAe,EAC1BA,EAAM,KAAK,EAAE,EACb,QAAWC,KAASF,EAAO,OAAQ,CACjC,IAAMG,EAAU,GAAGH,EAAO,OAAO,GAAGE,EAAM,IAAI,GAC9CD,EAAM,KAAK,MAAMC,EAAM,IAAI,KAAKC,CAAO,MAAMD,EAAM,WAAW,EAAE,CAClE,CACAD,EAAM,KAAK,EAAE,CACf,CAGA,GAAID,EAAO,eAAiBA,EAAO,cAAc,OAAS,EAAG,CAC3DC,EAAM,KAAK,cAAc,EACzBA,EAAM,KAAK,EAAE,EACb,QAAWG,KAAQJ,EAAO,cACpBI,EAAK,YACPH,EAAM,KAAK,MAAMG,EAAK,KAAK,KAAKA,EAAK,GAAG,MAAMA,EAAK,WAAW,EAAE,EAEhEH,EAAM,KAAK,MAAMG,EAAK,KAAK,KAAKA,EAAK,GAAG,GAAG,EAG/CH,EAAM,KAAK,EAAE,CACf,CAGA,GAAID,EAAO,UAAYA,EAAO,SAAS,OAAS,EAC9C,QAAWK,KAAWL,EAAO,SAC3BC,EAAM,KAAK,MAAMI,EAAQ,OAAO,EAAE,EAClCJ,EAAM,KAAK,EAAE,EACbA,EAAM,KAAKI,EAAQ,OAAO,EAC1BJ,EAAM,KAAK,EAAE,EAIjB,OAAOA,EAAM,KAAK;AAAA,CAAI,EAAE,KAAK,EAAI;AAAA,CACnC,CCtGO,SAASK,EACdC,EACAC,EACsB,CACtB,MAAO,CACL,WAAY,qBACZ,QAAS,QACT,KAAM,oCACN,YAAa,2JACb,KAAM,CACJ,CACE,QAAS,YACT,KAAM,sBACN,KAAM,iLACN,SAAU,CACZ,EACA,CACE,QAAS,YACT,KAAM,+BACN,KAAM,4LACN,SAAU,CACZ,EACA,CACE,QAAS,YACT,KAAM,kBACN,KAAM,2KACN,SAAU,CACZ,EACA,CACE,QAAS,YACT,KAAM,gBACN,KAAM,0IACN,SAAU,CACZ,CACF,CACF,CACF,CAkBO,SAASC,EACdF,EACAC,EAC2B,CAC3B,GAAI,CAACD,EAAO,aACV,OAAO,KAGT,IAAMG,EAA6B,CACjC,WAAY,qBACZ,QAAS,eACT,KAAMH,EAAO,aAAa,IAC5B,EAEA,OAAIA,EAAO,aAAa,MACtBG,EAAO,IAAMH,EAAO,aAAa,KAG/BA,EAAO,aAAa,cACtBG,EAAO,YAAcH,EAAO,aAAa,aAGvCA,EAAO,aAAa,OACtBG,EAAO,KAAOH,EAAO,aAAa,MAGhCA,EAAO,aAAa,eACtBG,EAAO,aAAeH,EAAO,aAAa,cAGrCG,CACT,CAiBO,SAASC,EACdJ,EACAC,EACiB,CACjB,IAAMI,EAAYH,EAA2BF,EAAQC,CAAO,EAEtDE,EAA0B,CAC9B,WAAY,qBACZ,QAAS,YACT,KAAM,SAASH,EAAO,IAAI,GAC1B,IAAKC,CACP,EAEA,OAAID,EAAO,UACTG,EAAO,YAAcH,EAAO,SAG1BK,IACFF,EAAO,WAAaE,GAGfF,CACT,CAKO,SAASG,EACdC,EACAP,EACAC,EACY,CACZ,OAAQM,EAAU,CAChB,IAAK,UACH,OAAOR,EAA6BC,EAAQC,CAAO,EACrD,IAAK,QACH,OAAOG,EAAwBJ,EAAQC,CAAO,EAEhD,QACE,OAAO,IACX,CACF,CAKO,SAASO,EAAgBL,EAAmC,CACjE,OAAKA,EAGE,KAAK,UAAUA,EAAQ,KAAM,CAAC,EAF5B,IAGX","names":["index_exports","__export","extractContent","generateAIOMethodologySchema","generateAboutPageSchema","generateLlmsTxt","generateOrganizationSchema","generateSchemaForPageType","serializeSchema","__toCommonJS","cheerio","import_turndown","turndownService","TurndownService","extractContent","html","sourceUrl","originalSize","$","title","description","jsonLdScripts","_","el","raw","parsed","contentHtml","markdown","finalMarkdown","j","markdownSize","tokenReductionRatio","generateLlmsTxt","config","lines","route","fullUrl","link","section","generateAIOMethodologySchema","config","pageUrl","generateOrganizationSchema","schema","generateAboutPageSchema","orgSchema","generateSchemaForPageType","pageType","serializeSchema"]}
|
|
1
|
+
{"version":3,"sources":["../src/index.ts","../src/extractor.ts","../src/config.ts","../src/schemas.ts"],"sourcesContent":["// We cannot use Webpack plugins reliably in Next.js Turbopack due to WorkerError restrictions.\r\n// Users must instead run `npx onto-next` as a postbuild script.\r\nexport { extractContent } from './extractor';\r\nexport { OntoConfig, OntoRoute, generateLlmsTxt } from './config';\r\nexport type { OntoConfig as OntoConfigType, OntoRoute as OntoRouteType, PageType } from './config';\r\nexport {\r\n generateAIOMethodologySchema,\r\n generateOrganizationSchema,\r\n generateAboutPageSchema,\r\n generateSchemaForPageType,\r\n serializeSchema\r\n} from './schemas';\r\nexport type {\r\n AIOMethodologySchema,\r\n OrganizationSchema,\r\n AboutPageSchema\r\n} from './schemas';\r\n","import * as cheerio from 'cheerio';\r\nimport TurndownService from 'turndown';\r\n\r\nconst turndownService = new TurndownService({\r\n headingStyle: 'atx',\r\n codeBlockStyle: 'fenced',\r\n});\r\n\r\n// Configure turndown to keep some layout or handle semantic tags differently if needed\r\n\r\nexport interface ExtractionResult {\r\n markdown: string;\r\n metadata: {\r\n title: string;\r\n description: string;\r\n jsonLd: any[];\r\n };\r\n stats: {\r\n originalHtmlSize: number;\r\n markdownSize: number;\r\n tokenReductionRatio: number;\r\n };\r\n}\r\n\r\n/**\r\n * Extracts pure semantic markdown and metadata from rendered Next.js HTML strings.\r\n * @param html The raw HTML string.\r\n * @param sourceUrl (Optional) the URL this was generated from, to attach as metadata.\r\n * @returns {ExtractionResult} The extracted payload.\r\n */\r\nexport function extractContent(html: string, sourceUrl: string = 'Generated Output'): ExtractionResult {\r\n const originalSize = html.length;\r\n\r\n const $ = cheerio.load(html);\r\n\r\n // 1. Extract Metadata BEFORE removing structure\r\n const title = $('title').text() || $('h1').first().text() || 'Untitled Page';\r\n const description = $('meta[name=\"description\"]').attr('content') || 'No description found.';\r\n\r\n const jsonLdScripts: any[] = [];\r\n $('script[type=\"application/ld+json\"]').each((_, el) => {\r\n try {\r\n const raw = $(el).html() || '';\r\n const parsed = JSON.parse(raw);\r\n jsonLdScripts.push(parsed);\r\n } catch {\r\n // ignore bad json\r\n }\r\n });\r\n\r\n // 2. Strip noise (React boilerplate, styles, unnecessary tags)\r\n $('script, style, noscript, iframe, svg, nav, footer, meta, link, header').remove();\r\n\r\n // Optionally remove typical Next.js hidden wrappers if they don't contain real content.\r\n // Next.js uses <div id=\"__next\"> but we mostly just want semantic content.\r\n\r\n // 3. Find the entry point for content\r\n // Prefer <main> or <article> over <body>\r\n let contentHtml = '';\r\n if ($('main').length > 0) {\r\n contentHtml = $('main').html() || '';\r\n } else if ($('article').length > 0) {\r\n contentHtml = $('article').html() || '';\r\n } else {\r\n contentHtml = $('body').html() || '';\r\n }\r\n\r\n // 4. Convert to Markdown\r\n let markdown = turndownService.turndown(contentHtml);\r\n\r\n // 5. Optionally inject Metadata header\r\n const headerLines = [\r\n `# ${title}`,\r\n `> ${description}`,\r\n ``,\r\n `**Source:** ${sourceUrl}`,\r\n `**Extracted:** ${new Date().toISOString()}`,\r\n ``,\r\n `---`,\r\n ``\r\n ];\r\n\r\n let finalMarkdown = headerLines.join('\\n') + markdown;\r\n\r\n // Add JSON-LD section if exists\r\n if (jsonLdScripts.length > 0) {\r\n finalMarkdown += '\\n\\n---\\n## Structured Data (JSON-LD)\\n```json\\n';\r\n jsonLdScripts.forEach(j => {\r\n finalMarkdown += JSON.stringify(j, null, 2) + '\\n';\r\n });\r\n finalMarkdown += '```\\n';\r\n }\r\n\r\n const markdownSize = finalMarkdown.length;\r\n const tokenReductionRatio = originalSize > 0 ? ((originalSize - markdownSize) / originalSize) * 100 : 0;\r\n\r\n return {\r\n markdown: finalMarkdown,\r\n metadata: {\r\n title,\r\n description,\r\n jsonLd: jsonLdScripts\r\n },\r\n stats: {\r\n originalHtmlSize: originalSize,\r\n markdownSize,\r\n tokenReductionRatio\r\n }\r\n };\r\n}\r\n\r\nexport async function generateStaticPayloads(nextAppDirDir: string, ontoPublicDir: string) {\r\n const fs = await import('fs');\r\n const path = await import('path');\r\n const { glob } = await import('glob');\r\n\r\n if (!fs.existsSync(nextAppDirDir)) {\r\n return;\r\n }\r\n\r\n const files = await glob('**/*.html', { cwd: nextAppDirDir });\r\n if (files.length === 0) return;\r\n\r\n if (!fs.existsSync(ontoPublicDir)) {\r\n fs.mkdirSync(ontoPublicDir, { recursive: true });\r\n }\r\n\r\n let totalFilesProcessed = 0;\r\n\r\n for (const file of files) {\r\n const inputPath = path.join(nextAppDirDir, file);\r\n const outputPathRelative = file.replace(/\\.html$/, '.md');\r\n const outputPath = path.join(ontoPublicDir, outputPathRelative);\r\n\r\n try {\r\n const htmlContent = fs.readFileSync(inputPath, 'utf8');\r\n\r\n let routeName = file.replace(/\\.html$/, '');\r\n if (routeName === 'index') routeName = '/';\r\n else routeName = `/${routeName}`;\r\n\r\n const result = extractContent(htmlContent, routeName);\r\n\r\n const outputDir = path.dirname(outputPath);\r\n if (!fs.existsSync(outputDir)) {\r\n fs.mkdirSync(outputDir, { recursive: true });\r\n }\r\n\r\n fs.writeFileSync(outputPath, result.markdown, 'utf8');\r\n totalFilesProcessed++;\r\n } catch (e: any) {\r\n console.error(`[Onto] Failed to process ${file}: ${e.message}`);\r\n }\r\n }\r\n console.log(`[Onto] Successfully generated ${totalFilesProcessed} semantic markdown endpoints.`);\r\n}\r\n","/**\r\n * Configuration schema for onto.config.ts\r\n * Used to dynamically generate llms.txt and other AI discovery files\r\n */\r\n\r\nexport type PageType = 'scoring' | 'about' | 'default';\r\n\r\nexport interface OntoRoute {\r\n /**\r\n * The URL path (e.g., '/docs', '/api/reference')\r\n */\r\n path: string;\r\n /**\r\n * Description of what this route contains\r\n */\r\n description: string;\r\n /**\r\n * Optional: Page type for automatic JSON-LD schema injection\r\n * - 'scoring': Injects Methodology schema with AIO scoring weights (40/35/25)\r\n * - 'about': Injects Organization/AboutPage schema\r\n * - 'default': No automatic schema injection\r\n */\r\n pageType?: PageType;\r\n}\r\n\r\nexport interface OntoConfig {\r\n /**\r\n * The name of your project or site (required)\r\n * Used as the H1 heading in llms.txt\r\n */\r\n name: string;\r\n\r\n /**\r\n * A short summary of your project (required)\r\n * Displayed as a blockquote in llms.txt\r\n * Should contain key information necessary for understanding the rest of the file\r\n */\r\n summary: string;\r\n\r\n /**\r\n * The base URL of your site (e.g., 'https://example.com')\r\n */\r\n baseUrl: string;\r\n\r\n /**\r\n * Optional: Additional sections to include in llms.txt\r\n * Each section can contain any markdown content\r\n */\r\n sections?: {\r\n heading: string;\r\n content: string;\r\n }[];\r\n\r\n /**\r\n * Key routes that AI agents should know about\r\n * These will be formatted as a markdown list in llms.txt\r\n */\r\n routes?: OntoRoute[];\r\n\r\n /**\r\n * Optional: Links to external resources (documentation, API references, etc.)\r\n */\r\n externalLinks?: {\r\n title: string;\r\n url: string;\r\n description?: string;\r\n }[];\r\n\r\n /**\r\n * Optional: Organization information for JSON-LD schemas\r\n */\r\n organization?: {\r\n name: string;\r\n description?: string;\r\n url?: string;\r\n logo?: string;\r\n foundingDate?: string;\r\n };\r\n}\r\n\r\n/**\r\n * Generate llms.txt content from OntoConfig\r\n * Follows the llms.txt specification:\r\n * - H1 with project name\r\n * - Blockquote with summary\r\n * - Additional markdown sections\r\n */\r\nexport function generateLlmsTxt(config: OntoConfig): string {\r\n const lines: string[] = [];\r\n\r\n // H1: Project name (required)\r\n lines.push(`# ${config.name}`);\r\n lines.push('');\r\n\r\n // Blockquote: Summary (required)\r\n lines.push(`> ${config.summary}`);\r\n lines.push('');\r\n\r\n // Key Routes section (if provided)\r\n if (config.routes && config.routes.length > 0) {\r\n lines.push('## Key Routes');\r\n lines.push('');\r\n for (const route of config.routes) {\r\n const fullUrl = `${config.baseUrl}${route.path}`;\r\n lines.push(`- [${route.path}](${fullUrl}): ${route.description}`);\r\n }\r\n lines.push('');\r\n }\r\n\r\n // External Links section (if provided)\r\n if (config.externalLinks && config.externalLinks.length > 0) {\r\n lines.push('## Resources');\r\n lines.push('');\r\n for (const link of config.externalLinks) {\r\n if (link.description) {\r\n lines.push(`- [${link.title}](${link.url}): ${link.description}`);\r\n } else {\r\n lines.push(`- [${link.title}](${link.url})`);\r\n }\r\n }\r\n lines.push('');\r\n }\r\n\r\n // Custom sections (if provided)\r\n if (config.sections && config.sections.length > 0) {\r\n for (const section of config.sections) {\r\n lines.push(`## ${section.heading}`);\r\n lines.push('');\r\n lines.push(section.content);\r\n lines.push('');\r\n }\r\n }\r\n\r\n return lines.join('\\n').trim() + '\\n';\r\n}\r\n","/**\r\n * JSON-LD Schema generators for automatic structured data injection\r\n * Follows Schema.org standards for AI-friendly metadata\r\n */\r\n\r\nimport { OntoConfig } from './config';\r\n\r\n/**\r\n * Standard AIO (AI Optimization) scoring methodology\r\n * Based on the Onto scoring algorithm:\r\n * - React Tax (Efficiency): 40% (Step 1)\r\n * - Semantic Richness: 35% (Step 2)\r\n * - Content Negotiation: 25% (Step 3)\r\n */\r\nexport interface AIOMethodologySchema {\r\n '@context': 'https://schema.org';\r\n '@type': 'HowTo';\r\n name: string;\r\n description: string;\r\n step: Array<{\r\n '@type': 'HowToStep';\r\n name: string;\r\n text: string;\r\n position: number;\r\n }>;\r\n}\r\n\r\n/**\r\n * Generate AIO Scoring Methodology JSON-LD schema\r\n * This explains to AI agents how the scoring system works\r\n */\r\nexport function generateAIOMethodologySchema(\r\n config: OntoConfig,\r\n pageUrl: string\r\n): AIOMethodologySchema {\r\n return {\r\n '@context': 'https://schema.org',\r\n '@type': 'HowTo',\r\n name: 'AIO Score Calculation Methodology',\r\n description: 'AI Optimization (AIO) Score measures how well a website is optimized for AI agents and LLM crawlers. Scored out of 100 points based on three core pillars.',\r\n step: [\r\n {\r\n '@type': 'HowToStep',\r\n name: 'React Tax (Token Efficiency)',\r\n text: 'Measures the ratio of useful content to total page weight. Weight: 40%. Sites with high \"React Tax\" (heavy JS/HTML/CSS noise) score lower as they consume more tokens for less information.',\r\n position: 1\r\n },\r\n {\r\n '@type': 'HowToStep',\r\n name: 'Semantic Richness',\r\n text: 'Evaluates the presence of structured metadata (JSON-LD), semantic HTML tags (<main>, <article>), and proper heading hierarchy. Weight: 35%. Essential for confident AI extraction.',\r\n position: 2\r\n },\r\n {\r\n '@type': 'HowToStep',\r\n name: 'Content Negotiation',\r\n text: 'Tests whether your server can negotiate and serve optimized Markdown payloads directly to AI agents via the Accept: text/markdown header. Weight: 25%.',\r\n position: 3\r\n }\r\n ]\r\n };\r\n}\r\n\r\n/**\r\n * Organization schema for About pages\r\n */\r\nexport interface OrganizationSchema {\r\n '@context': 'https://schema.org';\r\n '@type': 'Organization';\r\n name: string;\r\n url?: string;\r\n description?: string;\r\n logo?: string;\r\n foundingDate?: string;\r\n}\r\n\r\n/**\r\n * Generate Organization JSON-LD schema for About pages\r\n */\r\nexport function generateOrganizationSchema(\r\n config: OntoConfig,\r\n pageUrl: string\r\n): OrganizationSchema | null {\r\n if (!config.organization) {\r\n return null;\r\n }\r\n\r\n const schema: OrganizationSchema = {\r\n '@context': 'https://schema.org',\r\n '@type': 'Organization',\r\n name: config.organization.name\r\n };\r\n\r\n if (config.organization.url) {\r\n schema.url = config.organization.url;\r\n }\r\n\r\n if (config.organization.description) {\r\n schema.description = config.organization.description;\r\n }\r\n\r\n if (config.organization.logo) {\r\n schema.logo = config.organization.logo;\r\n }\r\n\r\n if (config.organization.foundingDate) {\r\n schema.foundingDate = config.organization.foundingDate;\r\n }\r\n\r\n return schema;\r\n}\r\n\r\n/**\r\n * AboutPage schema combining Organization and WebPage\r\n */\r\nexport interface AboutPageSchema {\r\n '@context': 'https://schema.org';\r\n '@type': 'AboutPage';\r\n name: string;\r\n url: string;\r\n description?: string;\r\n mainEntity?: OrganizationSchema;\r\n}\r\n\r\n/**\r\n * Generate AboutPage JSON-LD schema\r\n */\r\nexport function generateAboutPageSchema(\r\n config: OntoConfig,\r\n pageUrl: string\r\n): AboutPageSchema {\r\n const orgSchema = generateOrganizationSchema(config, pageUrl);\r\n\r\n const schema: AboutPageSchema = {\r\n '@context': 'https://schema.org',\r\n '@type': 'AboutPage',\r\n name: `About ${config.name}`,\r\n url: pageUrl\r\n };\r\n\r\n if (config.summary) {\r\n schema.description = config.summary;\r\n }\r\n\r\n if (orgSchema) {\r\n schema.mainEntity = orgSchema;\r\n }\r\n\r\n return schema;\r\n}\r\n\r\n/**\r\n * Determine which schema to generate based on page type\r\n */\r\nexport function generateSchemaForPageType(\r\n pageType: 'scoring' | 'about' | 'default',\r\n config: OntoConfig,\r\n pageUrl: string\r\n): any | null {\r\n switch (pageType) {\r\n case 'scoring':\r\n return generateAIOMethodologySchema(config, pageUrl);\r\n case 'about':\r\n return generateAboutPageSchema(config, pageUrl);\r\n case 'default':\r\n default:\r\n return null;\r\n }\r\n}\r\n\r\n/**\r\n * Serialize schema to JSON-LD script tag content\r\n */\r\nexport function serializeSchema(schema: any | null): string | null {\r\n if (!schema) {\r\n return null;\r\n }\r\n return JSON.stringify(schema, null, 2);\r\n}\r\n"],"mappings":"0jBAAA,IAAAA,EAAA,GAAAC,EAAAD,EAAA,oBAAAE,EAAA,iCAAAC,EAAA,4BAAAC,EAAA,oBAAAC,EAAA,+BAAAC,EAAA,8BAAAC,EAAA,oBAAAC,IAAA,eAAAC,EAAAT,GCAA,IAAAU,EAAyB,sBACzBC,EAA4B,uBAEtBC,EAAkB,IAAI,EAAAC,QAAgB,CACxC,aAAc,MACd,eAAgB,QACpB,CAAC,EAwBM,SAASC,EAAeC,EAAcC,EAAoB,mBAAsC,CACnG,IAAMC,EAAeF,EAAK,OAEpBG,EAAY,OAAKH,CAAI,EAGrBI,EAAQD,EAAE,OAAO,EAAE,KAAK,GAAKA,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,GAAK,gBACvDE,EAAcF,EAAE,0BAA0B,EAAE,KAAK,SAAS,GAAK,wBAE/DG,EAAuB,CAAC,EAC9BH,EAAE,oCAAoC,EAAE,KAAK,CAACI,EAAGC,IAAO,CACpD,GAAI,CACA,IAAMC,EAAMN,EAAEK,CAAE,EAAE,KAAK,GAAK,GACtBE,EAAS,KAAK,MAAMD,CAAG,EAC7BH,EAAc,KAAKI,CAAM,CAC7B,MAAQ,CAER,CACJ,CAAC,EAGDP,EAAE,uEAAuE,EAAE,OAAO,EAOlF,IAAIQ,EAAc,GACdR,EAAE,MAAM,EAAE,OAAS,EACnBQ,EAAcR,EAAE,MAAM,EAAE,KAAK,GAAK,GAC3BA,EAAE,SAAS,EAAE,OAAS,EAC7BQ,EAAcR,EAAE,SAAS,EAAE,KAAK,GAAK,GAErCQ,EAAcR,EAAE,MAAM,EAAE,KAAK,GAAK,GAItC,IAAIS,EAAWf,EAAgB,SAASc,CAAW,EAc/CE,EAXgB,CAChB,KAAKT,CAAK,GACV,KAAKC,CAAW,GAChB,GACA,eAAeJ,CAAS,GACxB,kBAAkB,IAAI,KAAK,EAAE,YAAY,CAAC,GAC1C,GACA,MACA,EACJ,EAEgC,KAAK;AAAA,CAAI,EAAIW,EAGzCN,EAAc,OAAS,IACvBO,GAAiB;AAAA;AAAA;AAAA;AAAA;AAAA,EACjBP,EAAc,QAAQQ,GAAK,CACvBD,GAAiB,KAAK,UAAUC,EAAG,KAAM,CAAC,EAAI;AAAA,CAClD,CAAC,EACDD,GAAiB,SAGrB,IAAME,EAAeF,EAAc,OAC7BG,EAAsBd,EAAe,GAAMA,EAAea,GAAgBb,EAAgB,IAAM,EAEtG,MAAO,CACH,SAAUW,EACV,SAAU,CACN,MAAAT,EACA,YAAAC,EACA,OAAQC,CACZ,EACA,MAAO,CACH,iBAAkBJ,EAClB,aAAAa,EACA,oBAAAC,CACJ,CACJ,CACJ,CCtBO,SAASC,EAAgBC,EAA4B,CAC1D,IAAMC,EAAkB,CAAC,EAWzB,GARAA,EAAM,KAAK,KAAKD,EAAO,IAAI,EAAE,EAC7BC,EAAM,KAAK,EAAE,EAGbA,EAAM,KAAK,KAAKD,EAAO,OAAO,EAAE,EAChCC,EAAM,KAAK,EAAE,EAGTD,EAAO,QAAUA,EAAO,OAAO,OAAS,EAAG,CAC7CC,EAAM,KAAK,eAAe,EAC1BA,EAAM,KAAK,EAAE,EACb,QAAWC,KAASF,EAAO,OAAQ,CACjC,IAAMG,EAAU,GAAGH,EAAO,OAAO,GAAGE,EAAM,IAAI,GAC9CD,EAAM,KAAK,MAAMC,EAAM,IAAI,KAAKC,CAAO,MAAMD,EAAM,WAAW,EAAE,CAClE,CACAD,EAAM,KAAK,EAAE,CACf,CAGA,GAAID,EAAO,eAAiBA,EAAO,cAAc,OAAS,EAAG,CAC3DC,EAAM,KAAK,cAAc,EACzBA,EAAM,KAAK,EAAE,EACb,QAAWG,KAAQJ,EAAO,cACpBI,EAAK,YACPH,EAAM,KAAK,MAAMG,EAAK,KAAK,KAAKA,EAAK,GAAG,MAAMA,EAAK,WAAW,EAAE,EAEhEH,EAAM,KAAK,MAAMG,EAAK,KAAK,KAAKA,EAAK,GAAG,GAAG,EAG/CH,EAAM,KAAK,EAAE,CACf,CAGA,GAAID,EAAO,UAAYA,EAAO,SAAS,OAAS,EAC9C,QAAWK,KAAWL,EAAO,SAC3BC,EAAM,KAAK,MAAMI,EAAQ,OAAO,EAAE,EAClCJ,EAAM,KAAK,EAAE,EACbA,EAAM,KAAKI,EAAQ,OAAO,EAC1BJ,EAAM,KAAK,EAAE,EAIjB,OAAOA,EAAM,KAAK;AAAA,CAAI,EAAE,KAAK,EAAI;AAAA,CACnC,CCvGO,SAASK,EACdC,EACAC,EACsB,CACtB,MAAO,CACL,WAAY,qBACZ,QAAS,QACT,KAAM,oCACN,YAAa,6JACb,KAAM,CACJ,CACE,QAAS,YACT,KAAM,+BACN,KAAM,8LACN,SAAU,CACZ,EACA,CACE,QAAS,YACT,KAAM,oBACN,KAAM,qLACN,SAAU,CACZ,EACA,CACE,QAAS,YACT,KAAM,sBACN,KAAM,yJACN,SAAU,CACZ,CACF,CACF,CACF,CAkBO,SAASC,EACdF,EACAC,EAC2B,CAC3B,GAAI,CAACD,EAAO,aACV,OAAO,KAGT,IAAMG,EAA6B,CACjC,WAAY,qBACZ,QAAS,eACT,KAAMH,EAAO,aAAa,IAC5B,EAEA,OAAIA,EAAO,aAAa,MACtBG,EAAO,IAAMH,EAAO,aAAa,KAG/BA,EAAO,aAAa,cACtBG,EAAO,YAAcH,EAAO,aAAa,aAGvCA,EAAO,aAAa,OACtBG,EAAO,KAAOH,EAAO,aAAa,MAGhCA,EAAO,aAAa,eACtBG,EAAO,aAAeH,EAAO,aAAa,cAGrCG,CACT,CAiBO,SAASC,EACdJ,EACAC,EACiB,CACjB,IAAMI,EAAYH,EAA2BF,EAAQC,CAAO,EAEtDE,EAA0B,CAC9B,WAAY,qBACZ,QAAS,YACT,KAAM,SAASH,EAAO,IAAI,GAC1B,IAAKC,CACP,EAEA,OAAID,EAAO,UACTG,EAAO,YAAcH,EAAO,SAG1BK,IACFF,EAAO,WAAaE,GAGfF,CACT,CAKO,SAASG,EACdC,EACAP,EACAC,EACY,CACZ,OAAQM,EAAU,CAChB,IAAK,UACH,OAAOR,EAA6BC,EAAQC,CAAO,EACrD,IAAK,QACH,OAAOG,EAAwBJ,EAAQC,CAAO,EAEhD,QACE,OAAO,IACX,CACF,CAKO,SAASO,EAAgBL,EAAmC,CACjE,OAAKA,EAGE,KAAK,UAAUA,EAAQ,KAAM,CAAC,EAF5B,IAGX","names":["index_exports","__export","extractContent","generateAIOMethodologySchema","generateAboutPageSchema","generateLlmsTxt","generateOrganizationSchema","generateSchemaForPageType","serializeSchema","__toCommonJS","cheerio","import_turndown","turndownService","TurndownService","extractContent","html","sourceUrl","originalSize","$","title","description","jsonLdScripts","_","el","raw","parsed","contentHtml","markdown","finalMarkdown","j","markdownSize","tokenReductionRatio","generateLlmsTxt","config","lines","route","fullUrl","link","section","generateAIOMethodologySchema","config","pageUrl","generateOrganizationSchema","schema","generateAboutPageSchema","orgSchema","generateSchemaForPageType","pageType","serializeSchema"]}
|
package/dist/index.mjs
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
import*as
|
|
2
|
-
`)+d;
|
|
1
|
+
import*as g from"cheerio";import O from"turndown";var w=new O({headingStyle:"atx",codeBlockStyle:"fenced"});function z(t,n="Generated Output"){let e=t.length,o=g.load(t),s=o("title").text()||o("h1").first().text()||"Untitled Page",c=o('meta[name="description"]').attr("content")||"No description found.",i=[];o('script[type="application/ld+json"]').each((u,y)=>{try{let S=o(y).html()||"",x=JSON.parse(S);i.push(x)}catch{}}),o("script, style, noscript, iframe, svg, nav, footer, meta, link, header").remove();let a="";o("main").length>0?a=o("main").html()||"":o("article").length>0?a=o("article").html()||"":a=o("body").html()||"";let d=w.turndown(a),r=[`# ${s}`,`> ${c}`,"",`**Source:** ${n}`,`**Extracted:** ${new Date().toISOString()}`,"","---",""].join(`
|
|
2
|
+
`)+d;i.length>0&&(r+=`
|
|
3
3
|
|
|
4
4
|
---
|
|
5
5
|
## Structured Data (JSON-LD)
|
|
6
6
|
\`\`\`json
|
|
7
|
-
`,
|
|
8
|
-
`}),
|
|
7
|
+
`,i.forEach(u=>{r+=JSON.stringify(u,null,2)+`
|
|
8
|
+
`}),r+="```\n");let l=r.length,f=e>0?(e-l)/e*100:0;return{markdown:r,metadata:{title:s,description:c,jsonLd:i},stats:{originalHtmlSize:e,markdownSize:l,tokenReductionRatio:f}}}function $(t){let n=[];if(n.push(`# ${t.name}`),n.push(""),n.push(`> ${t.summary}`),n.push(""),t.routes&&t.routes.length>0){n.push("## Key Routes"),n.push("");for(let e of t.routes){let o=`${t.baseUrl}${e.path}`;n.push(`- [${e.path}](${o}): ${e.description}`)}n.push("")}if(t.externalLinks&&t.externalLinks.length>0){n.push("## Resources"),n.push("");for(let e of t.externalLinks)e.description?n.push(`- [${e.title}](${e.url}): ${e.description}`):n.push(`- [${e.title}](${e.url})`);n.push("")}if(t.sections&&t.sections.length>0)for(let e of t.sections)n.push(`## ${e.heading}`),n.push(""),n.push(e.content),n.push("");return n.join(`
|
|
9
9
|
`).trim()+`
|
|
10
|
-
`}function
|
|
10
|
+
`}function p(t,n){return{"@context":"https://schema.org","@type":"HowTo",name:"AIO Score Calculation Methodology",description:"AI Optimization (AIO) Score measures how well a website is optimized for AI agents and LLM crawlers. Scored out of 100 points based on three core pillars.",step:[{"@type":"HowToStep",name:"React Tax (Token Efficiency)",text:'Measures the ratio of useful content to total page weight. Weight: 40%. Sites with high "React Tax" (heavy JS/HTML/CSS noise) score lower as they consume more tokens for less information.',position:1},{"@type":"HowToStep",name:"Semantic Richness",text:"Evaluates the presence of structured metadata (JSON-LD), semantic HTML tags (<main>, <article>), and proper heading hierarchy. Weight: 35%. Essential for confident AI extraction.",position:2},{"@type":"HowToStep",name:"Content Negotiation",text:"Tests whether your server can negotiate and serve optimized Markdown payloads directly to AI agents via the Accept: text/markdown header. Weight: 25%.",position:3}]}}function m(t,n){if(!t.organization)return null;let e={"@context":"https://schema.org","@type":"Organization",name:t.organization.name};return t.organization.url&&(e.url=t.organization.url),t.organization.description&&(e.description=t.organization.description),t.organization.logo&&(e.logo=t.organization.logo),t.organization.foundingDate&&(e.foundingDate=t.organization.foundingDate),e}function h(t,n){let e=m(t,n),o={"@context":"https://schema.org","@type":"AboutPage",name:`About ${t.name}`,url:n};return t.summary&&(o.description=t.summary),e&&(o.mainEntity=e),o}function k(t,n,e){switch(t){case"scoring":return p(n,e);case"about":return h(n,e);default:return null}}function T(t){return t?JSON.stringify(t,null,2):null}export{z as extractContent,p as generateAIOMethodologySchema,h as generateAboutPageSchema,$ as generateLlmsTxt,m as generateOrganizationSchema,k as generateSchemaForPageType,T as serializeSchema};
|
|
11
11
|
//# sourceMappingURL=index.mjs.map
|
package/dist/index.mjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/extractor.ts","../src/config.ts","../src/schemas.ts"],"sourcesContent":["import * as cheerio from 'cheerio';\r\nimport TurndownService from 'turndown';\r\n\r\nconst turndownService = new TurndownService({\r\n headingStyle: 'atx',\r\n codeBlockStyle: 'fenced',\r\n});\r\n\r\n// Configure turndown to keep some layout or handle semantic tags differently if needed\r\n\r\nexport interface ExtractionResult {\r\n markdown: string;\r\n metadata: {\r\n title: string;\r\n description: string;\r\n jsonLd: any[];\r\n };\r\n stats: {\r\n originalHtmlSize: number;\r\n markdownSize: number;\r\n tokenReductionRatio: number;\r\n };\r\n}\r\n\r\n/**\r\n * Extracts pure semantic markdown and metadata from rendered Next.js HTML strings.\r\n * @param html The raw HTML string.\r\n * @param sourceUrl (Optional) the URL this was generated from, to attach as metadata.\r\n * @returns {ExtractionResult} The extracted payload.\r\n */\r\nexport function extractContent(html: string, sourceUrl: string = 'Generated Output'): ExtractionResult {\r\n const originalSize = html.length;\r\n\r\n const $ = cheerio.load(html);\r\n\r\n // 1. Extract Metadata BEFORE removing structure\r\n const title = $('title').text() || $('h1').first().text() || 'Untitled Page';\r\n const description = $('meta[name=\"description\"]').attr('content') || 'No description found.';\r\n\r\n const jsonLdScripts: any[] = [];\r\n $('script[type=\"application/ld+json\"]').each((_, el) => {\r\n try {\r\n const raw = $(el).html() || '';\r\n const parsed = JSON.parse(raw);\r\n jsonLdScripts.push(parsed);\r\n } catch {\r\n // ignore bad json\r\n }\r\n });\r\n\r\n // 2. Strip noise (React boilerplate, styles, unnecessary tags)\r\n $('script, style, noscript, iframe, svg, nav, footer, meta, link, header').remove();\r\n\r\n // Optionally remove typical Next.js hidden wrappers if they don't contain real content.\r\n // Next.js uses <div id=\"__next\"> but we mostly just want semantic content.\r\n\r\n // 3. Find the entry point for content\r\n // Prefer <main> or <article> over <body>\r\n let contentHtml = '';\r\n if ($('main').length > 0) {\r\n contentHtml = $('main').html() || '';\r\n } else if ($('article').length > 0) {\r\n contentHtml = $('article').html() || '';\r\n } else {\r\n contentHtml = $('body').html() || '';\r\n }\r\n\r\n // 4. Convert to Markdown\r\n let markdown = turndownService.turndown(contentHtml);\r\n\r\n // 5. Optionally inject Metadata header\r\n const headerLines = [\r\n `# ${title}`,\r\n `> ${description}`,\r\n ``,\r\n `**Source:** ${sourceUrl}`,\r\n `**Extracted:** ${new Date().toISOString()}`,\r\n ``,\r\n `---`,\r\n ``\r\n ];\r\n\r\n let finalMarkdown = headerLines.join('\\n') + markdown;\r\n\r\n // Add JSON-LD section if exists\r\n if (jsonLdScripts.length > 0) {\r\n finalMarkdown += '\\n\\n---\\n## Structured Data (JSON-LD)\\n```json\\n';\r\n jsonLdScripts.forEach(j => {\r\n finalMarkdown += JSON.stringify(j, null, 2) + '\\n';\r\n });\r\n finalMarkdown += '```\\n';\r\n }\r\n\r\n const markdownSize = finalMarkdown.length;\r\n const tokenReductionRatio = originalSize > 0 ? ((originalSize - markdownSize) / originalSize) * 100 : 0;\r\n\r\n return {\r\n markdown: finalMarkdown,\r\n metadata: {\r\n title,\r\n description,\r\n jsonLd: jsonLdScripts\r\n },\r\n stats: {\r\n originalHtmlSize: originalSize,\r\n markdownSize,\r\n tokenReductionRatio\r\n }\r\n };\r\n}\r\n\r\nexport async function generateStaticPayloads(nextAppDirDir: string, ontoPublicDir: string) {\r\n const fs = await import('fs');\r\n const path = await import('path');\r\n const { glob } = await import('glob');\r\n\r\n if (!fs.existsSync(nextAppDirDir)) {\r\n return;\r\n }\r\n\r\n const files = await glob('**/*.html', { cwd: nextAppDirDir });\r\n if (files.length === 0) return;\r\n\r\n if (!fs.existsSync(ontoPublicDir)) {\r\n fs.mkdirSync(ontoPublicDir, { recursive: true });\r\n }\r\n\r\n let totalFilesProcessed = 0;\r\n\r\n for (const file of files) {\r\n const inputPath = path.join(nextAppDirDir, file);\r\n const outputPathRelative = file.replace(/\\.html$/, '.md');\r\n const outputPath = path.join(ontoPublicDir, outputPathRelative);\r\n\r\n try {\r\n const htmlContent = fs.readFileSync(inputPath, 'utf8');\r\n\r\n let routeName = file.replace(/\\.html$/, '');\r\n if (routeName === 'index') routeName = '/';\r\n else routeName = `/${routeName}`;\r\n\r\n const result = extractContent(htmlContent, routeName);\r\n\r\n const outputDir = path.dirname(outputPath);\r\n if (!fs.existsSync(outputDir)) {\r\n fs.mkdirSync(outputDir, { recursive: true });\r\n }\r\n\r\n fs.writeFileSync(outputPath, result.markdown, 'utf8');\r\n totalFilesProcessed++;\r\n } catch (e: any) {\r\n console.error(`[Onto] Failed to process ${file}: ${e.message}`);\r\n }\r\n }\r\n console.log(`[Onto] Successfully generated ${totalFilesProcessed} semantic markdown endpoints.`);\r\n}\r\n","/**\r\n * Configuration schema for onto.config.ts\r\n * Used to dynamically generate llms.txt and other AI discovery files\r\n */\r\n\r\nexport type PageType = 'scoring' | 'about' | 'default';\r\n\r\nexport interface OntoRoute {\r\n /**\r\n * The URL path (e.g., '/docs', '/api/reference')\r\n */\r\n path: string;\r\n /**\r\n * Description of what this route contains\r\n */\r\n description: string;\r\n /**\r\n * Optional: Page type for automatic JSON-LD schema injection\r\n * - 'scoring': Injects Methodology schema with AIO scoring weights (40/35/25)\r\n * - 'about': Injects Organization/AboutPage schema\r\n * - 'default': No automatic schema injection\r\n */\r\n pageType?: PageType;\r\n}\r\n\r\nexport interface OntoConfig {\r\n /**\r\n * The name of your project or site (required)\r\n * Used as the H1 heading in llms.txt\r\n */\r\n name: string;\r\n\r\n /**\r\n * A short summary of your project (required)\r\n * Displayed as a blockquote in llms.txt\r\n * Should contain key information necessary for understanding the rest of the file\r\n */\r\n summary: string;\r\n\r\n /**\r\n * The base URL of your site (e.g., 'https://example.com')\r\n */\r\n baseUrl: string;\r\n\r\n /**\r\n * Optional: Additional sections to include in llms.txt\r\n * Each section can contain any markdown content\r\n */\r\n sections?: {\r\n heading: string;\r\n content: string;\r\n }[];\r\n\r\n /**\r\n * Key routes that AI agents should know about\r\n * These will be formatted as a markdown list in llms.txt\r\n */\r\n routes?: OntoRoute[];\r\n\r\n /**\r\n * Optional: Links to external resources (documentation, API references, etc.)\r\n */\r\n externalLinks?: {\r\n title: string;\r\n url: string;\r\n description?: string;\r\n }[];\r\n\r\n /**\r\n * Optional: Organization information for JSON-LD schemas\r\n */\r\n organization?: {\r\n name: string;\r\n description?: string;\r\n url?: string;\r\n logo?: string;\r\n foundingDate?: string;\r\n };\r\n}\r\n\r\n/**\r\n * Generate llms.txt content from OntoConfig\r\n * Follows the llms.txt specification:\r\n * - H1 with project name\r\n * - Blockquote with summary\r\n * - Additional markdown sections\r\n */\r\nexport function generateLlmsTxt(config: OntoConfig): string {\r\n const lines: string[] = [];\r\n\r\n // H1: Project name (required)\r\n lines.push(`# ${config.name}`);\r\n lines.push('');\r\n\r\n // Blockquote: Summary (required)\r\n lines.push(`> ${config.summary}`);\r\n lines.push('');\r\n\r\n // Key Routes section (if provided)\r\n if (config.routes && config.routes.length > 0) {\r\n lines.push('## Key Routes');\r\n lines.push('');\r\n for (const route of config.routes) {\r\n const fullUrl = `${config.baseUrl}${route.path}`;\r\n lines.push(`- [${route.path}](${fullUrl}): ${route.description}`);\r\n }\r\n lines.push('');\r\n }\r\n\r\n // External Links section (if provided)\r\n if (config.externalLinks && config.externalLinks.length > 0) {\r\n lines.push('## Resources');\r\n lines.push('');\r\n for (const link of config.externalLinks) {\r\n if (link.description) {\r\n lines.push(`- [${link.title}](${link.url}): ${link.description}`);\r\n } else {\r\n lines.push(`- [${link.title}](${link.url})`);\r\n }\r\n }\r\n lines.push('');\r\n }\r\n\r\n // Custom sections (if provided)\r\n if (config.sections && config.sections.length > 0) {\r\n for (const section of config.sections) {\r\n lines.push(`## ${section.heading}`);\r\n lines.push('');\r\n lines.push(section.content);\r\n lines.push('');\r\n }\r\n }\r\n\r\n return lines.join('\\n').trim() + '\\n';\r\n}\r\n","/**\r\n * JSON-LD Schema generators for automatic structured data injection\r\n * Follows Schema.org standards for AI-friendly metadata\r\n */\r\n\r\nimport { OntoConfig } from './config';\r\n\r\n/**\r\n * Standard AIO (AI Optimization) scoring methodology\r\n * Based on the Onto scoring algorithm:\r\n * - Content Negotiation: 40% (30 points)\r\n * - React Tax / Token Efficiency: 35% (30 points)\r\n * - Structured Data: 25% (25 points)\r\n * - Semantic HTML: Bonus (15 points)\r\n */\r\nexport interface AIOMethodologySchema {\r\n '@context': 'https://schema.org';\r\n '@type': 'HowTo';\r\n name: string;\r\n description: string;\r\n step: Array<{\r\n '@type': 'HowToStep';\r\n name: string;\r\n text: string;\r\n position: number;\r\n }>;\r\n}\r\n\r\n/**\r\n * Generate AIO Scoring Methodology JSON-LD schema\r\n * This explains to AI agents how the scoring system works\r\n */\r\nexport function generateAIOMethodologySchema(\r\n config: OntoConfig,\r\n pageUrl: string\r\n): AIOMethodologySchema {\r\n return {\r\n '@context': 'https://schema.org',\r\n '@type': 'HowTo',\r\n name: 'AIO Score Calculation Methodology',\r\n description: 'AI Optimization (AIO) Score measures how well a website is optimized for AI agents and LLM crawlers. Scored out of 100 points based on four key metrics.',\r\n step: [\r\n {\r\n '@type': 'HowToStep',\r\n name: 'Content Negotiation',\r\n text: 'Check if the site responds to Accept: text/markdown header. Weight: 40%. Penalty: -30 points if missing. This ensures AI bots receive optimized content instead of heavy HTML.',\r\n position: 1\r\n },\r\n {\r\n '@type': 'HowToStep',\r\n name: 'Token Efficiency (React Tax)',\r\n text: 'Measure the ratio of visible text to total HTML size. Weight: 35%. Penalty: -30 points if HTML > 100KB but text < 1KB. Detects JavaScript-heavy sites that are difficult for AI to parse.',\r\n position: 2\r\n },\r\n {\r\n '@type': 'HowToStep',\r\n name: 'Structured Data',\r\n text: 'Verify presence of JSON-LD structured data (Schema.org). Weight: 25%. Penalty: -25 points if missing. Enables AI to confidently extract pricing, products, and entities.',\r\n position: 3\r\n },\r\n {\r\n '@type': 'HowToStep',\r\n name: 'Semantic HTML',\r\n text: 'Check for semantic tags like <main> and <article>. Bonus: +15 points if present. Helps AI agents separate navigation from core content.',\r\n position: 4\r\n }\r\n ]\r\n };\r\n}\r\n\r\n/**\r\n * Organization schema for About pages\r\n */\r\nexport interface OrganizationSchema {\r\n '@context': 'https://schema.org';\r\n '@type': 'Organization';\r\n name: string;\r\n url?: string;\r\n description?: string;\r\n logo?: string;\r\n foundingDate?: string;\r\n}\r\n\r\n/**\r\n * Generate Organization JSON-LD schema for About pages\r\n */\r\nexport function generateOrganizationSchema(\r\n config: OntoConfig,\r\n pageUrl: string\r\n): OrganizationSchema | null {\r\n if (!config.organization) {\r\n return null;\r\n }\r\n\r\n const schema: OrganizationSchema = {\r\n '@context': 'https://schema.org',\r\n '@type': 'Organization',\r\n name: config.organization.name\r\n };\r\n\r\n if (config.organization.url) {\r\n schema.url = config.organization.url;\r\n }\r\n\r\n if (config.organization.description) {\r\n schema.description = config.organization.description;\r\n }\r\n\r\n if (config.organization.logo) {\r\n schema.logo = config.organization.logo;\r\n }\r\n\r\n if (config.organization.foundingDate) {\r\n schema.foundingDate = config.organization.foundingDate;\r\n }\r\n\r\n return schema;\r\n}\r\n\r\n/**\r\n * AboutPage schema combining Organization and WebPage\r\n */\r\nexport interface AboutPageSchema {\r\n '@context': 'https://schema.org';\r\n '@type': 'AboutPage';\r\n name: string;\r\n url: string;\r\n description?: string;\r\n mainEntity?: OrganizationSchema;\r\n}\r\n\r\n/**\r\n * Generate AboutPage JSON-LD schema\r\n */\r\nexport function generateAboutPageSchema(\r\n config: OntoConfig,\r\n pageUrl: string\r\n): AboutPageSchema {\r\n const orgSchema = generateOrganizationSchema(config, pageUrl);\r\n\r\n const schema: AboutPageSchema = {\r\n '@context': 'https://schema.org',\r\n '@type': 'AboutPage',\r\n name: `About ${config.name}`,\r\n url: pageUrl\r\n };\r\n\r\n if (config.summary) {\r\n schema.description = config.summary;\r\n }\r\n\r\n if (orgSchema) {\r\n schema.mainEntity = orgSchema;\r\n }\r\n\r\n return schema;\r\n}\r\n\r\n/**\r\n * Determine which schema to generate based on page type\r\n */\r\nexport function generateSchemaForPageType(\r\n pageType: 'scoring' | 'about' | 'default',\r\n config: OntoConfig,\r\n pageUrl: string\r\n): any | null {\r\n switch (pageType) {\r\n case 'scoring':\r\n return generateAIOMethodologySchema(config, pageUrl);\r\n case 'about':\r\n return generateAboutPageSchema(config, pageUrl);\r\n case 'default':\r\n default:\r\n return null;\r\n }\r\n}\r\n\r\n/**\r\n * Serialize schema to JSON-LD script tag content\r\n */\r\nexport function serializeSchema(schema: any | null): string | null {\r\n if (!schema) {\r\n return null;\r\n }\r\n return JSON.stringify(schema, null, 2);\r\n}\r\n"],"mappings":"AAAA,UAAYA,MAAa,UACzB,OAAOC,MAAqB,WAE5B,IAAMC,EAAkB,IAAID,EAAgB,CACxC,aAAc,MACd,eAAgB,QACpB,CAAC,EAwBM,SAASE,EAAeC,EAAcC,EAAoB,mBAAsC,CACnG,IAAMC,EAAeF,EAAK,OAEpBG,EAAY,OAAKH,CAAI,EAGrBI,EAAQD,EAAE,OAAO,EAAE,KAAK,GAAKA,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,GAAK,gBACvDE,EAAcF,EAAE,0BAA0B,EAAE,KAAK,SAAS,GAAK,wBAE/DG,EAAuB,CAAC,EAC9BH,EAAE,oCAAoC,EAAE,KAAK,CAACI,EAAGC,IAAO,CACpD,GAAI,CACA,IAAMC,EAAMN,EAAEK,CAAE,EAAE,KAAK,GAAK,GACtBE,EAAS,KAAK,MAAMD,CAAG,EAC7BH,EAAc,KAAKI,CAAM,CAC7B,MAAQ,CAER,CACJ,CAAC,EAGDP,EAAE,uEAAuE,EAAE,OAAO,EAOlF,IAAIQ,EAAc,GACdR,EAAE,MAAM,EAAE,OAAS,EACnBQ,EAAcR,EAAE,MAAM,EAAE,KAAK,GAAK,GAC3BA,EAAE,SAAS,EAAE,OAAS,EAC7BQ,EAAcR,EAAE,SAAS,EAAE,KAAK,GAAK,GAErCQ,EAAcR,EAAE,MAAM,EAAE,KAAK,GAAK,GAItC,IAAIS,EAAWd,EAAgB,SAASa,CAAW,EAc/CE,EAXgB,CAChB,KAAKT,CAAK,GACV,KAAKC,CAAW,GAChB,GACA,eAAeJ,CAAS,GACxB,kBAAkB,IAAI,KAAK,EAAE,YAAY,CAAC,GAC1C,GACA,MACA,EACJ,EAEgC,KAAK;AAAA,CAAI,EAAIW,EAGzCN,EAAc,OAAS,IACvBO,GAAiB;AAAA;AAAA;AAAA;AAAA;AAAA,EACjBP,EAAc,QAAQQ,GAAK,CACvBD,GAAiB,KAAK,UAAUC,EAAG,KAAM,CAAC,EAAI;AAAA,CAClD,CAAC,EACDD,GAAiB,SAGrB,IAAME,EAAeF,EAAc,OAC7BG,EAAsBd,EAAe,GAAMA,EAAea,GAAgBb,EAAgB,IAAM,EAEtG,MAAO,CACH,SAAUW,EACV,SAAU,CACN,MAAAT,EACA,YAAAC,EACA,OAAQC,CACZ,EACA,MAAO,CACH,iBAAkBJ,EAClB,aAAAa,EACA,oBAAAC,CACJ,CACJ,CACJ,CCtBO,SAASC,EAAgBC,EAA4B,CAC1D,IAAMC,EAAkB,CAAC,EAWzB,GARAA,EAAM,KAAK,KAAKD,EAAO,IAAI,EAAE,EAC7BC,EAAM,KAAK,EAAE,EAGbA,EAAM,KAAK,KAAKD,EAAO,OAAO,EAAE,EAChCC,EAAM,KAAK,EAAE,EAGTD,EAAO,QAAUA,EAAO,OAAO,OAAS,EAAG,CAC7CC,EAAM,KAAK,eAAe,EAC1BA,EAAM,KAAK,EAAE,EACb,QAAWC,KAASF,EAAO,OAAQ,CACjC,IAAMG,EAAU,GAAGH,EAAO,OAAO,GAAGE,EAAM,IAAI,GAC9CD,EAAM,KAAK,MAAMC,EAAM,IAAI,KAAKC,CAAO,MAAMD,EAAM,WAAW,EAAE,CAClE,CACAD,EAAM,KAAK,EAAE,CACf,CAGA,GAAID,EAAO,eAAiBA,EAAO,cAAc,OAAS,EAAG,CAC3DC,EAAM,KAAK,cAAc,EACzBA,EAAM,KAAK,EAAE,EACb,QAAWG,KAAQJ,EAAO,cACpBI,EAAK,YACPH,EAAM,KAAK,MAAMG,EAAK,KAAK,KAAKA,EAAK,GAAG,MAAMA,EAAK,WAAW,EAAE,EAEhEH,EAAM,KAAK,MAAMG,EAAK,KAAK,KAAKA,EAAK,GAAG,GAAG,EAG/CH,EAAM,KAAK,EAAE,CACf,CAGA,GAAID,EAAO,UAAYA,EAAO,SAAS,OAAS,EAC9C,QAAWK,KAAWL,EAAO,SAC3BC,EAAM,KAAK,MAAMI,EAAQ,OAAO,EAAE,EAClCJ,EAAM,KAAK,EAAE,EACbA,EAAM,KAAKI,EAAQ,OAAO,EAC1BJ,EAAM,KAAK,EAAE,EAIjB,OAAOA,EAAM,KAAK;AAAA,CAAI,EAAE,KAAK,EAAI;AAAA,CACnC,CCtGO,SAASK,EACdC,EACAC,EACsB,CACtB,MAAO,CACL,WAAY,qBACZ,QAAS,QACT,KAAM,oCACN,YAAa,2JACb,KAAM,CACJ,CACE,QAAS,YACT,KAAM,sBACN,KAAM,iLACN,SAAU,CACZ,EACA,CACE,QAAS,YACT,KAAM,+BACN,KAAM,4LACN,SAAU,CACZ,EACA,CACE,QAAS,YACT,KAAM,kBACN,KAAM,2KACN,SAAU,CACZ,EACA,CACE,QAAS,YACT,KAAM,gBACN,KAAM,0IACN,SAAU,CACZ,CACF,CACF,CACF,CAkBO,SAASC,EACdF,EACAC,EAC2B,CAC3B,GAAI,CAACD,EAAO,aACV,OAAO,KAGT,IAAMG,EAA6B,CACjC,WAAY,qBACZ,QAAS,eACT,KAAMH,EAAO,aAAa,IAC5B,EAEA,OAAIA,EAAO,aAAa,MACtBG,EAAO,IAAMH,EAAO,aAAa,KAG/BA,EAAO,aAAa,cACtBG,EAAO,YAAcH,EAAO,aAAa,aAGvCA,EAAO,aAAa,OACtBG,EAAO,KAAOH,EAAO,aAAa,MAGhCA,EAAO,aAAa,eACtBG,EAAO,aAAeH,EAAO,aAAa,cAGrCG,CACT,CAiBO,SAASC,EACdJ,EACAC,EACiB,CACjB,IAAMI,EAAYH,EAA2BF,EAAQC,CAAO,EAEtDE,EAA0B,CAC9B,WAAY,qBACZ,QAAS,YACT,KAAM,SAASH,EAAO,IAAI,GAC1B,IAAKC,CACP,EAEA,OAAID,EAAO,UACTG,EAAO,YAAcH,EAAO,SAG1BK,IACFF,EAAO,WAAaE,GAGfF,CACT,CAKO,SAASG,EACdC,EACAP,EACAC,EACY,CACZ,OAAQM,EAAU,CAChB,IAAK,UACH,OAAOR,EAA6BC,EAAQC,CAAO,EACrD,IAAK,QACH,OAAOG,EAAwBJ,EAAQC,CAAO,EAEhD,QACE,OAAO,IACX,CACF,CAKO,SAASO,EAAgBL,EAAmC,CACjE,OAAKA,EAGE,KAAK,UAAUA,EAAQ,KAAM,CAAC,EAF5B,IAGX","names":["cheerio","TurndownService","turndownService","extractContent","html","sourceUrl","originalSize","$","title","description","jsonLdScripts","_","el","raw","parsed","contentHtml","markdown","finalMarkdown","j","markdownSize","tokenReductionRatio","generateLlmsTxt","config","lines","route","fullUrl","link","section","generateAIOMethodologySchema","config","pageUrl","generateOrganizationSchema","schema","generateAboutPageSchema","orgSchema","generateSchemaForPageType","pageType","serializeSchema"]}
|
|
1
|
+
{"version":3,"sources":["../src/extractor.ts","../src/config.ts","../src/schemas.ts"],"sourcesContent":["import * as cheerio from 'cheerio';\r\nimport TurndownService from 'turndown';\r\n\r\nconst turndownService = new TurndownService({\r\n headingStyle: 'atx',\r\n codeBlockStyle: 'fenced',\r\n});\r\n\r\n// Configure turndown to keep some layout or handle semantic tags differently if needed\r\n\r\nexport interface ExtractionResult {\r\n markdown: string;\r\n metadata: {\r\n title: string;\r\n description: string;\r\n jsonLd: any[];\r\n };\r\n stats: {\r\n originalHtmlSize: number;\r\n markdownSize: number;\r\n tokenReductionRatio: number;\r\n };\r\n}\r\n\r\n/**\r\n * Extracts pure semantic markdown and metadata from rendered Next.js HTML strings.\r\n * @param html The raw HTML string.\r\n * @param sourceUrl (Optional) the URL this was generated from, to attach as metadata.\r\n * @returns {ExtractionResult} The extracted payload.\r\n */\r\nexport function extractContent(html: string, sourceUrl: string = 'Generated Output'): ExtractionResult {\r\n const originalSize = html.length;\r\n\r\n const $ = cheerio.load(html);\r\n\r\n // 1. Extract Metadata BEFORE removing structure\r\n const title = $('title').text() || $('h1').first().text() || 'Untitled Page';\r\n const description = $('meta[name=\"description\"]').attr('content') || 'No description found.';\r\n\r\n const jsonLdScripts: any[] = [];\r\n $('script[type=\"application/ld+json\"]').each((_, el) => {\r\n try {\r\n const raw = $(el).html() || '';\r\n const parsed = JSON.parse(raw);\r\n jsonLdScripts.push(parsed);\r\n } catch {\r\n // ignore bad json\r\n }\r\n });\r\n\r\n // 2. Strip noise (React boilerplate, styles, unnecessary tags)\r\n $('script, style, noscript, iframe, svg, nav, footer, meta, link, header').remove();\r\n\r\n // Optionally remove typical Next.js hidden wrappers if they don't contain real content.\r\n // Next.js uses <div id=\"__next\"> but we mostly just want semantic content.\r\n\r\n // 3. Find the entry point for content\r\n // Prefer <main> or <article> over <body>\r\n let contentHtml = '';\r\n if ($('main').length > 0) {\r\n contentHtml = $('main').html() || '';\r\n } else if ($('article').length > 0) {\r\n contentHtml = $('article').html() || '';\r\n } else {\r\n contentHtml = $('body').html() || '';\r\n }\r\n\r\n // 4. Convert to Markdown\r\n let markdown = turndownService.turndown(contentHtml);\r\n\r\n // 5. Optionally inject Metadata header\r\n const headerLines = [\r\n `# ${title}`,\r\n `> ${description}`,\r\n ``,\r\n `**Source:** ${sourceUrl}`,\r\n `**Extracted:** ${new Date().toISOString()}`,\r\n ``,\r\n `---`,\r\n ``\r\n ];\r\n\r\n let finalMarkdown = headerLines.join('\\n') + markdown;\r\n\r\n // Add JSON-LD section if exists\r\n if (jsonLdScripts.length > 0) {\r\n finalMarkdown += '\\n\\n---\\n## Structured Data (JSON-LD)\\n```json\\n';\r\n jsonLdScripts.forEach(j => {\r\n finalMarkdown += JSON.stringify(j, null, 2) + '\\n';\r\n });\r\n finalMarkdown += '```\\n';\r\n }\r\n\r\n const markdownSize = finalMarkdown.length;\r\n const tokenReductionRatio = originalSize > 0 ? ((originalSize - markdownSize) / originalSize) * 100 : 0;\r\n\r\n return {\r\n markdown: finalMarkdown,\r\n metadata: {\r\n title,\r\n description,\r\n jsonLd: jsonLdScripts\r\n },\r\n stats: {\r\n originalHtmlSize: originalSize,\r\n markdownSize,\r\n tokenReductionRatio\r\n }\r\n };\r\n}\r\n\r\nexport async function generateStaticPayloads(nextAppDirDir: string, ontoPublicDir: string) {\r\n const fs = await import('fs');\r\n const path = await import('path');\r\n const { glob } = await import('glob');\r\n\r\n if (!fs.existsSync(nextAppDirDir)) {\r\n return;\r\n }\r\n\r\n const files = await glob('**/*.html', { cwd: nextAppDirDir });\r\n if (files.length === 0) return;\r\n\r\n if (!fs.existsSync(ontoPublicDir)) {\r\n fs.mkdirSync(ontoPublicDir, { recursive: true });\r\n }\r\n\r\n let totalFilesProcessed = 0;\r\n\r\n for (const file of files) {\r\n const inputPath = path.join(nextAppDirDir, file);\r\n const outputPathRelative = file.replace(/\\.html$/, '.md');\r\n const outputPath = path.join(ontoPublicDir, outputPathRelative);\r\n\r\n try {\r\n const htmlContent = fs.readFileSync(inputPath, 'utf8');\r\n\r\n let routeName = file.replace(/\\.html$/, '');\r\n if (routeName === 'index') routeName = '/';\r\n else routeName = `/${routeName}`;\r\n\r\n const result = extractContent(htmlContent, routeName);\r\n\r\n const outputDir = path.dirname(outputPath);\r\n if (!fs.existsSync(outputDir)) {\r\n fs.mkdirSync(outputDir, { recursive: true });\r\n }\r\n\r\n fs.writeFileSync(outputPath, result.markdown, 'utf8');\r\n totalFilesProcessed++;\r\n } catch (e: any) {\r\n console.error(`[Onto] Failed to process ${file}: ${e.message}`);\r\n }\r\n }\r\n console.log(`[Onto] Successfully generated ${totalFilesProcessed} semantic markdown endpoints.`);\r\n}\r\n","/**\r\n * Configuration schema for onto.config.ts\r\n * Used to dynamically generate llms.txt and other AI discovery files\r\n */\r\n\r\nexport type PageType = 'scoring' | 'about' | 'default';\r\n\r\nexport interface OntoRoute {\r\n /**\r\n * The URL path (e.g., '/docs', '/api/reference')\r\n */\r\n path: string;\r\n /**\r\n * Description of what this route contains\r\n */\r\n description: string;\r\n /**\r\n * Optional: Page type for automatic JSON-LD schema injection\r\n * - 'scoring': Injects Methodology schema with AIO scoring weights (40/35/25)\r\n * - 'about': Injects Organization/AboutPage schema\r\n * - 'default': No automatic schema injection\r\n */\r\n pageType?: PageType;\r\n}\r\n\r\nexport interface OntoConfig {\r\n /**\r\n * The name of your project or site (required)\r\n * Used as the H1 heading in llms.txt\r\n */\r\n name: string;\r\n\r\n /**\r\n * A short summary of your project (required)\r\n * Displayed as a blockquote in llms.txt\r\n * Should contain key information necessary for understanding the rest of the file\r\n */\r\n summary: string;\r\n\r\n /**\r\n * The base URL of your site (e.g., 'https://example.com')\r\n */\r\n baseUrl: string;\r\n\r\n /**\r\n * Optional: Additional sections to include in llms.txt\r\n * Each section can contain any markdown content\r\n */\r\n sections?: {\r\n heading: string;\r\n content: string;\r\n }[];\r\n\r\n /**\r\n * Key routes that AI agents should know about\r\n * These will be formatted as a markdown list in llms.txt\r\n */\r\n routes?: OntoRoute[];\r\n\r\n /**\r\n * Optional: Links to external resources (documentation, API references, etc.)\r\n */\r\n externalLinks?: {\r\n title: string;\r\n url: string;\r\n description?: string;\r\n }[];\r\n\r\n /**\r\n * Optional: Organization information for JSON-LD schemas\r\n */\r\n organization?: {\r\n name: string;\r\n description?: string;\r\n url?: string;\r\n logo?: string;\r\n foundingDate?: string;\r\n };\r\n}\r\n\r\n/**\r\n * Generate llms.txt content from OntoConfig\r\n * Follows the llms.txt specification:\r\n * - H1 with project name\r\n * - Blockquote with summary\r\n * - Additional markdown sections\r\n */\r\nexport function generateLlmsTxt(config: OntoConfig): string {\r\n const lines: string[] = [];\r\n\r\n // H1: Project name (required)\r\n lines.push(`# ${config.name}`);\r\n lines.push('');\r\n\r\n // Blockquote: Summary (required)\r\n lines.push(`> ${config.summary}`);\r\n lines.push('');\r\n\r\n // Key Routes section (if provided)\r\n if (config.routes && config.routes.length > 0) {\r\n lines.push('## Key Routes');\r\n lines.push('');\r\n for (const route of config.routes) {\r\n const fullUrl = `${config.baseUrl}${route.path}`;\r\n lines.push(`- [${route.path}](${fullUrl}): ${route.description}`);\r\n }\r\n lines.push('');\r\n }\r\n\r\n // External Links section (if provided)\r\n if (config.externalLinks && config.externalLinks.length > 0) {\r\n lines.push('## Resources');\r\n lines.push('');\r\n for (const link of config.externalLinks) {\r\n if (link.description) {\r\n lines.push(`- [${link.title}](${link.url}): ${link.description}`);\r\n } else {\r\n lines.push(`- [${link.title}](${link.url})`);\r\n }\r\n }\r\n lines.push('');\r\n }\r\n\r\n // Custom sections (if provided)\r\n if (config.sections && config.sections.length > 0) {\r\n for (const section of config.sections) {\r\n lines.push(`## ${section.heading}`);\r\n lines.push('');\r\n lines.push(section.content);\r\n lines.push('');\r\n }\r\n }\r\n\r\n return lines.join('\\n').trim() + '\\n';\r\n}\r\n","/**\r\n * JSON-LD Schema generators for automatic structured data injection\r\n * Follows Schema.org standards for AI-friendly metadata\r\n */\r\n\r\nimport { OntoConfig } from './config';\r\n\r\n/**\r\n * Standard AIO (AI Optimization) scoring methodology\r\n * Based on the Onto scoring algorithm:\r\n * - React Tax (Efficiency): 40% (Step 1)\r\n * - Semantic Richness: 35% (Step 2)\r\n * - Content Negotiation: 25% (Step 3)\r\n */\r\nexport interface AIOMethodologySchema {\r\n '@context': 'https://schema.org';\r\n '@type': 'HowTo';\r\n name: string;\r\n description: string;\r\n step: Array<{\r\n '@type': 'HowToStep';\r\n name: string;\r\n text: string;\r\n position: number;\r\n }>;\r\n}\r\n\r\n/**\r\n * Generate AIO Scoring Methodology JSON-LD schema\r\n * This explains to AI agents how the scoring system works\r\n */\r\nexport function generateAIOMethodologySchema(\r\n config: OntoConfig,\r\n pageUrl: string\r\n): AIOMethodologySchema {\r\n return {\r\n '@context': 'https://schema.org',\r\n '@type': 'HowTo',\r\n name: 'AIO Score Calculation Methodology',\r\n description: 'AI Optimization (AIO) Score measures how well a website is optimized for AI agents and LLM crawlers. Scored out of 100 points based on three core pillars.',\r\n step: [\r\n {\r\n '@type': 'HowToStep',\r\n name: 'React Tax (Token Efficiency)',\r\n text: 'Measures the ratio of useful content to total page weight. Weight: 40%. Sites with high \"React Tax\" (heavy JS/HTML/CSS noise) score lower as they consume more tokens for less information.',\r\n position: 1\r\n },\r\n {\r\n '@type': 'HowToStep',\r\n name: 'Semantic Richness',\r\n text: 'Evaluates the presence of structured metadata (JSON-LD), semantic HTML tags (<main>, <article>), and proper heading hierarchy. Weight: 35%. Essential for confident AI extraction.',\r\n position: 2\r\n },\r\n {\r\n '@type': 'HowToStep',\r\n name: 'Content Negotiation',\r\n text: 'Tests whether your server can negotiate and serve optimized Markdown payloads directly to AI agents via the Accept: text/markdown header. Weight: 25%.',\r\n position: 3\r\n }\r\n ]\r\n };\r\n}\r\n\r\n/**\r\n * Organization schema for About pages\r\n */\r\nexport interface OrganizationSchema {\r\n '@context': 'https://schema.org';\r\n '@type': 'Organization';\r\n name: string;\r\n url?: string;\r\n description?: string;\r\n logo?: string;\r\n foundingDate?: string;\r\n}\r\n\r\n/**\r\n * Generate Organization JSON-LD schema for About pages\r\n */\r\nexport function generateOrganizationSchema(\r\n config: OntoConfig,\r\n pageUrl: string\r\n): OrganizationSchema | null {\r\n if (!config.organization) {\r\n return null;\r\n }\r\n\r\n const schema: OrganizationSchema = {\r\n '@context': 'https://schema.org',\r\n '@type': 'Organization',\r\n name: config.organization.name\r\n };\r\n\r\n if (config.organization.url) {\r\n schema.url = config.organization.url;\r\n }\r\n\r\n if (config.organization.description) {\r\n schema.description = config.organization.description;\r\n }\r\n\r\n if (config.organization.logo) {\r\n schema.logo = config.organization.logo;\r\n }\r\n\r\n if (config.organization.foundingDate) {\r\n schema.foundingDate = config.organization.foundingDate;\r\n }\r\n\r\n return schema;\r\n}\r\n\r\n/**\r\n * AboutPage schema combining Organization and WebPage\r\n */\r\nexport interface AboutPageSchema {\r\n '@context': 'https://schema.org';\r\n '@type': 'AboutPage';\r\n name: string;\r\n url: string;\r\n description?: string;\r\n mainEntity?: OrganizationSchema;\r\n}\r\n\r\n/**\r\n * Generate AboutPage JSON-LD schema\r\n */\r\nexport function generateAboutPageSchema(\r\n config: OntoConfig,\r\n pageUrl: string\r\n): AboutPageSchema {\r\n const orgSchema = generateOrganizationSchema(config, pageUrl);\r\n\r\n const schema: AboutPageSchema = {\r\n '@context': 'https://schema.org',\r\n '@type': 'AboutPage',\r\n name: `About ${config.name}`,\r\n url: pageUrl\r\n };\r\n\r\n if (config.summary) {\r\n schema.description = config.summary;\r\n }\r\n\r\n if (orgSchema) {\r\n schema.mainEntity = orgSchema;\r\n }\r\n\r\n return schema;\r\n}\r\n\r\n/**\r\n * Determine which schema to generate based on page type\r\n */\r\nexport function generateSchemaForPageType(\r\n pageType: 'scoring' | 'about' | 'default',\r\n config: OntoConfig,\r\n pageUrl: string\r\n): any | null {\r\n switch (pageType) {\r\n case 'scoring':\r\n return generateAIOMethodologySchema(config, pageUrl);\r\n case 'about':\r\n return generateAboutPageSchema(config, pageUrl);\r\n case 'default':\r\n default:\r\n return null;\r\n }\r\n}\r\n\r\n/**\r\n * Serialize schema to JSON-LD script tag content\r\n */\r\nexport function serializeSchema(schema: any | null): string | null {\r\n if (!schema) {\r\n return null;\r\n }\r\n return JSON.stringify(schema, null, 2);\r\n}\r\n"],"mappings":"AAAA,UAAYA,MAAa,UACzB,OAAOC,MAAqB,WAE5B,IAAMC,EAAkB,IAAID,EAAgB,CACxC,aAAc,MACd,eAAgB,QACpB,CAAC,EAwBM,SAASE,EAAeC,EAAcC,EAAoB,mBAAsC,CACnG,IAAMC,EAAeF,EAAK,OAEpBG,EAAY,OAAKH,CAAI,EAGrBI,EAAQD,EAAE,OAAO,EAAE,KAAK,GAAKA,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,GAAK,gBACvDE,EAAcF,EAAE,0BAA0B,EAAE,KAAK,SAAS,GAAK,wBAE/DG,EAAuB,CAAC,EAC9BH,EAAE,oCAAoC,EAAE,KAAK,CAACI,EAAGC,IAAO,CACpD,GAAI,CACA,IAAMC,EAAMN,EAAEK,CAAE,EAAE,KAAK,GAAK,GACtBE,EAAS,KAAK,MAAMD,CAAG,EAC7BH,EAAc,KAAKI,CAAM,CAC7B,MAAQ,CAER,CACJ,CAAC,EAGDP,EAAE,uEAAuE,EAAE,OAAO,EAOlF,IAAIQ,EAAc,GACdR,EAAE,MAAM,EAAE,OAAS,EACnBQ,EAAcR,EAAE,MAAM,EAAE,KAAK,GAAK,GAC3BA,EAAE,SAAS,EAAE,OAAS,EAC7BQ,EAAcR,EAAE,SAAS,EAAE,KAAK,GAAK,GAErCQ,EAAcR,EAAE,MAAM,EAAE,KAAK,GAAK,GAItC,IAAIS,EAAWd,EAAgB,SAASa,CAAW,EAc/CE,EAXgB,CAChB,KAAKT,CAAK,GACV,KAAKC,CAAW,GAChB,GACA,eAAeJ,CAAS,GACxB,kBAAkB,IAAI,KAAK,EAAE,YAAY,CAAC,GAC1C,GACA,MACA,EACJ,EAEgC,KAAK;AAAA,CAAI,EAAIW,EAGzCN,EAAc,OAAS,IACvBO,GAAiB;AAAA;AAAA;AAAA;AAAA;AAAA,EACjBP,EAAc,QAAQQ,GAAK,CACvBD,GAAiB,KAAK,UAAUC,EAAG,KAAM,CAAC,EAAI;AAAA,CAClD,CAAC,EACDD,GAAiB,SAGrB,IAAME,EAAeF,EAAc,OAC7BG,EAAsBd,EAAe,GAAMA,EAAea,GAAgBb,EAAgB,IAAM,EAEtG,MAAO,CACH,SAAUW,EACV,SAAU,CACN,MAAAT,EACA,YAAAC,EACA,OAAQC,CACZ,EACA,MAAO,CACH,iBAAkBJ,EAClB,aAAAa,EACA,oBAAAC,CACJ,CACJ,CACJ,CCtBO,SAASC,EAAgBC,EAA4B,CAC1D,IAAMC,EAAkB,CAAC,EAWzB,GARAA,EAAM,KAAK,KAAKD,EAAO,IAAI,EAAE,EAC7BC,EAAM,KAAK,EAAE,EAGbA,EAAM,KAAK,KAAKD,EAAO,OAAO,EAAE,EAChCC,EAAM,KAAK,EAAE,EAGTD,EAAO,QAAUA,EAAO,OAAO,OAAS,EAAG,CAC7CC,EAAM,KAAK,eAAe,EAC1BA,EAAM,KAAK,EAAE,EACb,QAAWC,KAASF,EAAO,OAAQ,CACjC,IAAMG,EAAU,GAAGH,EAAO,OAAO,GAAGE,EAAM,IAAI,GAC9CD,EAAM,KAAK,MAAMC,EAAM,IAAI,KAAKC,CAAO,MAAMD,EAAM,WAAW,EAAE,CAClE,CACAD,EAAM,KAAK,EAAE,CACf,CAGA,GAAID,EAAO,eAAiBA,EAAO,cAAc,OAAS,EAAG,CAC3DC,EAAM,KAAK,cAAc,EACzBA,EAAM,KAAK,EAAE,EACb,QAAWG,KAAQJ,EAAO,cACpBI,EAAK,YACPH,EAAM,KAAK,MAAMG,EAAK,KAAK,KAAKA,EAAK,GAAG,MAAMA,EAAK,WAAW,EAAE,EAEhEH,EAAM,KAAK,MAAMG,EAAK,KAAK,KAAKA,EAAK,GAAG,GAAG,EAG/CH,EAAM,KAAK,EAAE,CACf,CAGA,GAAID,EAAO,UAAYA,EAAO,SAAS,OAAS,EAC9C,QAAWK,KAAWL,EAAO,SAC3BC,EAAM,KAAK,MAAMI,EAAQ,OAAO,EAAE,EAClCJ,EAAM,KAAK,EAAE,EACbA,EAAM,KAAKI,EAAQ,OAAO,EAC1BJ,EAAM,KAAK,EAAE,EAIjB,OAAOA,EAAM,KAAK;AAAA,CAAI,EAAE,KAAK,EAAI;AAAA,CACnC,CCvGO,SAASK,EACdC,EACAC,EACsB,CACtB,MAAO,CACL,WAAY,qBACZ,QAAS,QACT,KAAM,oCACN,YAAa,6JACb,KAAM,CACJ,CACE,QAAS,YACT,KAAM,+BACN,KAAM,8LACN,SAAU,CACZ,EACA,CACE,QAAS,YACT,KAAM,oBACN,KAAM,qLACN,SAAU,CACZ,EACA,CACE,QAAS,YACT,KAAM,sBACN,KAAM,yJACN,SAAU,CACZ,CACF,CACF,CACF,CAkBO,SAASC,EACdF,EACAC,EAC2B,CAC3B,GAAI,CAACD,EAAO,aACV,OAAO,KAGT,IAAMG,EAA6B,CACjC,WAAY,qBACZ,QAAS,eACT,KAAMH,EAAO,aAAa,IAC5B,EAEA,OAAIA,EAAO,aAAa,MACtBG,EAAO,IAAMH,EAAO,aAAa,KAG/BA,EAAO,aAAa,cACtBG,EAAO,YAAcH,EAAO,aAAa,aAGvCA,EAAO,aAAa,OACtBG,EAAO,KAAOH,EAAO,aAAa,MAGhCA,EAAO,aAAa,eACtBG,EAAO,aAAeH,EAAO,aAAa,cAGrCG,CACT,CAiBO,SAASC,EACdJ,EACAC,EACiB,CACjB,IAAMI,EAAYH,EAA2BF,EAAQC,CAAO,EAEtDE,EAA0B,CAC9B,WAAY,qBACZ,QAAS,YACT,KAAM,SAASH,EAAO,IAAI,GAC1B,IAAKC,CACP,EAEA,OAAID,EAAO,UACTG,EAAO,YAAcH,EAAO,SAG1BK,IACFF,EAAO,WAAaE,GAGfF,CACT,CAKO,SAASG,EACdC,EACAP,EACAC,EACY,CACZ,OAAQM,EAAU,CAChB,IAAK,UACH,OAAOR,EAA6BC,EAAQC,CAAO,EACrD,IAAK,QACH,OAAOG,EAAwBJ,EAAQC,CAAO,EAEhD,QACE,OAAO,IACX,CACF,CAKO,SAASO,EAAgBL,EAAmC,CACjE,OAAKA,EAGE,KAAK,UAAUA,EAAQ,KAAM,CAAC,EAF5B,IAGX","names":["cheerio","TurndownService","turndownService","extractContent","html","sourceUrl","originalSize","$","title","description","jsonLdScripts","_","el","raw","parsed","contentHtml","markdown","finalMarkdown","j","markdownSize","tokenReductionRatio","generateLlmsTxt","config","lines","route","fullUrl","link","section","generateAIOMethodologySchema","config","pageUrl","generateOrganizationSchema","schema","generateAboutPageSchema","orgSchema","generateSchemaForPageType","pageType","serializeSchema"]}
|
package/dist/middleware.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
"use strict";var
|
|
1
|
+
"use strict";var c=Object.defineProperty;var C=Object.getOwnPropertyDescriptor;var T=Object.getOwnPropertyNames;var _=Object.prototype.hasOwnProperty;var $=(t,e)=>{for(var n in e)c(t,n,{get:e[n],enumerable:!0})},P=(t,e,n,o)=>{if(e&&typeof e=="object"||typeof e=="function")for(let a of T(e))!_.call(t,a)&&a!==n&&c(t,a,{get:()=>e[a],enumerable:!(o=C(e,a))||o.enumerable});return t};var b=t=>P(c({},"__esModule",{value:!0}),t);var G={};$(G,{AI_BOT_USER_AGENTS:()=>y,matchBot:()=>p,ontoMiddleware:()=>k});module.exports=b(G);var d=require("next/server");var A=[{name:"GPTBot",company:"OpenAI",addedAt:"2025-01-01"},{name:"ChatGPT-User",company:"OpenAI",addedAt:"2025-01-01"},{name:"OAI-SearchBot",company:"OpenAI",addedAt:"2025-01-01"},{name:"ChatGPT",company:"OpenAI",addedAt:"2025-03-25"},{name:"OpenAI",company:"OpenAI",addedAt:"2025-03-25"},{name:"GPT",company:"OpenAI",addedAt:"2025-03-25"},{name:"Google-CloudVertexBot",company:"Google",addedAt:"2025-01-01"},{name:"Google-Extended",company:"Google",addedAt:"2025-01-01"},{name:"GoogleOther",company:"Google",addedAt:"2025-01-01"},{name:"ClaudeBot",company:"Anthropic",addedAt:"2025-01-01"},{name:"Claude-User",company:"Anthropic",addedAt:"2025-01-01"},{name:"anthropic-ai",company:"Anthropic",addedAt:"2025-01-01"},{name:"Claude",company:"Anthropic",addedAt:"2025-03-25"},{name:"Anthropic",company:"Anthropic",addedAt:"2025-03-25"},{name:"PerplexityBot",company:"Perplexity",addedAt:"2025-01-01"},{name:"Perplexity-User",company:"Perplexity",addedAt:"2025-01-01"},{name:"Perplexity",company:"Perplexity",addedAt:"2025-03-25"},{name:"Meta-ExternalAgent",company:"Meta",addedAt:"2025-01-01"},{name:"Meta-ExternalFetcher",company:"Meta",addedAt:"2025-01-01"},{name:"facebookexternalhit",company:"Meta",addedAt:"2025-03-25"},{name:"FacebookBot",company:"Meta",addedAt:"2025-01-01"},{name:"MistralBot",company:"Mistral",addedAt:"2025-03-25"},{name:"Mistral",company:"Mistral",addedAt:"2025-03-25"},{name:"Amazonbot",company:"Amazon",addedAt:"2025-03-25"},{name:"AI2Bot",company:"Allen Institute",addedAt:"2025-03-25"},{name:"DuckAssistBot",company:"DuckDuckGo",addedAt:"2025-03-25"},{name:"Diffbot",company:"Diffbot",addedAt:"2025-03-25"},{name:"CCBot",company:"Common Crawl",addedAt:"2025-01-01"},{name:"Bytespider",company:"ByteDance",addedAt:"2025-01-01"},{name:"Applebot-Extended",company:"Apple",addedAt:"2025-01-01"},{name:"cohere-ai",company:"Cohere",addedAt:"2025-01-01"},{name:"YouBot",company:"You.com",addedAt:"2025-01-01"}],y=I();function I(){return A.map(t=>t.name)}function p(t){if(!t)return;let e=t.toLowerCase(),n=A.filter(o=>e.includes(o.name.toLowerCase()));if(n.length!==0)return n.length===1?n[0]:n.reduce((o,a)=>a.name.length>o.name.length?a:o)}function g(t){let e=[];if(e.push(`# ${t.name}`),e.push(""),e.push(`> ${t.summary}`),e.push(""),t.routes&&t.routes.length>0){e.push("## Key Routes"),e.push("");for(let n of t.routes){let o=`${t.baseUrl}${n.path}`;e.push(`- [${n.path}](${o}): ${n.description}`)}e.push("")}if(t.externalLinks&&t.externalLinks.length>0){e.push("## Resources"),e.push("");for(let n of t.externalLinks)n.description?e.push(`- [${n.title}](${n.url}): ${n.description}`):e.push(`- [${n.title}](${n.url})`);e.push("")}if(t.sections&&t.sections.length>0)for(let n of t.sections)e.push(`## ${n.heading}`),e.push(""),e.push(n.content),e.push("");return e.join(`
|
|
2
2
|
`).trim()+`
|
|
3
|
-
`}async function
|
|
3
|
+
`}async function k(t,e){let n=t.headers.get("user-agent"),o=t.nextUrl.clone(),a=p(n),x=t.headers.get("accept")||"",l=t.nextUrl.searchParams.has("onto"),f=!!a,O=x.includes("text/markdown")||l;if(f||O){if(o.pathname.startsWith("/_next"))return d.NextResponse.next();if(o.pathname==="/llms.txt")try{if(e){let i=g(e),m=new d.NextResponse(i,{headers:{"Content-Type":"text/plain; charset=utf-8","Cache-Control":"public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400"}});return a&&m.headers.set("X-Onto-Bot",`${a.name} (${a.company})`),m.headers.set("X-Onto-Trace",n||"no-ua"),m}}catch(i){console.error("[Onto] Failed to generate llms.txt:",i)}if(o.pathname.includes("."))return d.NextResponse.next();let r=o.pathname;(r==="/"||r==="")&&(r="/index"),r.endsWith("/")&&r!=="/"&&(r=r.slice(0,-1));let u=process.env.ONTO_API_KEY,B=process.env.ONTO_DASHBOARD_URL||"https://app.buildonto.dev";u&&fetch(`${B}/api/track`,{method:"POST",headers:{"x-onto-key":u,"Content-Type":"application/json"},body:JSON.stringify({route:o.pathname,userAgent:n,bot:a?.name,company:a?.company})}).catch(()=>{}),o.pathname=`/.onto${r}.md`;let s=d.NextResponse.rewrite(o);return s.headers.set("Content-Type","text/markdown; charset=utf-8"),s.headers.set("Cache-Control","public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400"),s.headers.set("X-Onto-Trace",n||"no-ua"),a&&s.headers.set("X-Onto-Bot",`${a.name} (${a.company})`),l&&s.headers.set("X-Onto-Debug","true"),s}let h=d.NextResponse.next();return h.headers.set("X-Onto-Trace",n||"no-ua"),h}0&&(module.exports={AI_BOT_USER_AGENTS,matchBot,ontoMiddleware});
|
|
4
4
|
//# sourceMappingURL=middleware.js.map
|
package/dist/middleware.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/middleware.ts","../src/bots.ts","../src/config.ts"],"sourcesContent":["import { NextRequest, NextResponse } from 'next/server';\r\nimport { matchBot } from './bots';\r\nimport { generateLlmsTxt, OntoConfig } from './config';\r\n\r\nexport async function ontoMiddleware(request: any, config?: OntoConfig) {\r\n const userAgent = request.headers.get('user-agent');\r\n const url = request.nextUrl.clone();\r\n const matched = matchBot(userAgent);\r\n\r\n const accept = request.headers.get('accept') || '';\r\n const hasDebugParam = request.nextUrl.searchParams.has('onto');\r\n\r\n const isAiBot = !!matched;\r\n const isMarkdownRequested = accept.includes('text/markdown') || hasDebugParam;\r\n\r\n // Common logic for bot/markdown negotiation\r\n if (isAiBot || isMarkdownRequested) {\r\n // Ignore internal next.js requests\r\n if (url.pathname.startsWith('/_next')) {\r\n return NextResponse.next();\r\n }\r\n\r\n // --- llms.txt Auto-Discovery ---\r\n if (url.pathname === '/llms.txt') {\r\n try {\r\n if (config) {\r\n const llmsTxtContent = generateLlmsTxt(config);\r\n const response = new NextResponse(llmsTxtContent, {\r\n headers: {\r\n 'Content-Type': 'text/plain; charset=utf-8',\r\n 'Cache-Control': 'public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400',\r\n }\r\n });\r\n if (matched) response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);\r\n response.headers.set('X-Onto-Trace', userAgent || 'no-ua');\r\n return response;\r\n }\r\n } catch (error) {\r\n console.error('[Onto] Failed to generate llms.txt:', error);\r\n }\r\n }\r\n\r\n // Skip other static assets with dots\r\n if (url.pathname.includes('.')) {\r\n return NextResponse.next();\r\n }\r\n\r\n // Determine the corresponding payload path\r\n let payloadPath = url.pathname;\r\n if (payloadPath === '/' || payloadPath === '') payloadPath = '/index';\r\n if (payloadPath.endsWith('/') && payloadPath !== '/') payloadPath = payloadPath.slice(0, -1);\r\n\r\n // --- Onto Control Plane Integration (Premium) ---\r\n const ONTO_API_KEY = process.env.ONTO_API_KEY;\r\n const DASHBOARD_URL = process.env.ONTO_DASHBOARD_URL || 'https://app.buildonto.dev';\r\n\r\n if (ONTO_API_KEY) {\r\n // Logic for tracking and injection ... (fire-and-forget tracking)\r\n fetch(`${DASHBOARD_URL}/api/track`, {\r\n method: 'POST',\r\n headers: { 'x-onto-key': ONTO_API_KEY, 'Content-Type': 'application/json' },\r\n body: JSON.stringify({ route: url.pathname, userAgent, bot: matched?.name, company: matched?.company })\r\n }).catch(() => {});\r\n }\r\n\r\n // Rewrite to semantic payload\r\n url.pathname = `/.onto${payloadPath}.md`;\r\n const response = NextResponse.rewrite(url);\r\n \r\n // Apply headers\r\n response.headers.set('Content-Type', 'text/markdown; charset=utf-8');\r\n response.headers.set('Cache-Control', 'public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400');\r\n response.headers.set('X-Onto-Trace', userAgent || 'no-ua');\r\n if (matched) response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);\r\n if (hasDebugParam) response.headers.set('X-Onto-Debug', 'true');\r\n\r\n return response;\r\n }\r\n\r\n // Default response for non-bots\r\n const response = NextResponse.next();\r\n response.headers.set('X-Onto-Trace', userAgent || 'no-ua');\r\n return response;\r\n}\r\n\r\n// Re-export the bot registry for consumers who want to extend or inspect it\r\nexport { AI_BOT_USER_AGENTS, matchBot } from './bots';\r\nexport type { AiBot } from './bots';\r\n\r\n\r\n","/**\n * Comprehensive registry of AI bot user-agent strings.\n * The middleware uses this list to detect AI crawlers and serve optimized markdown.\n */\n\nexport interface AiBot {\n /** The user-agent substring to match against */\n name: string;\n /** The company operating this bot */\n company: string;\n /** ISO date when this bot was added or last verified */\n addedAt?: string;\n}\n\n/**\n * Structured registry of all known AI bots, grouped by company.\n * Useful for analytics and the Control Plane dashboard.\n */\nexport const AI_BOTS: AiBot[] = [\n // OpenAI\n { name: 'GPTBot', company: 'OpenAI', addedAt: '2025-01-01' },\n { name: 'ChatGPT-User', company: 'OpenAI', addedAt: '2025-01-01' },\n { name: 'OAI-SearchBot', company: 'OpenAI', addedAt: '2025-01-01' },\n\n // Google (SEO Safety: Googlebot proper is EXCLUDED)\n { name: 'Google-CloudVertexBot', company: 'Google', addedAt: '2025-01-01' },\n { name: 'Google-Extended', company: 'Google', addedAt: '2025-01-01' },\n { name: 'GoogleOther', company: 'Google', addedAt: '2025-01-01' },\n\n // Anthropic\n { name: 'ClaudeBot', company: 'Anthropic', addedAt: '2025-01-01' },\n { name: 'Claude-User', company: 'Anthropic', addedAt: '2025-01-01' },\n { name: 'anthropic-ai', company: 'Anthropic', addedAt: '2025-01-01' },\n\n // Perplexity\n { name: 'PerplexityBot', company: 'Perplexity', addedAt: '2025-01-01' },\n { name: 'Perplexity-User', company: 'Perplexity', addedAt: '2025-01-01' },\n\n // Meta\n { name: 'Meta-ExternalAgent', company: 'Meta', addedAt: '2025-01-01' },\n { name: 'Meta-ExternalFetcher', company: 'Meta', addedAt: '2025-01-01' },\n { name: 'facebookexternalhit', company: 'Meta', addedAt: '2025-03-25' },\n { name: 'FacebookBot', company: 'Meta', addedAt: '2025-01-01' },\n\n // Mistral\n { name: 'MistralBot', company: 'Mistral', addedAt: '2025-03-25' },\n\n // Amazon\n { name: 'Amazonbot', company: 'Amazon', addedAt: '2025-03-25' },\n\n // Others\n { name: 'AI2Bot', company: 'Allen Institute', addedAt: '2025-03-25' },\n { name: 'DuckAssistBot', company: 'DuckDuckGo', addedAt: '2025-03-25' },\n { name: 'Diffbot', company: 'Diffbot', addedAt: '2025-03-25' },\n { name: 'CCBot', company: 'Common Crawl', addedAt: '2025-01-01' },\n { name: 'Bytespider', company: 'ByteDance', addedAt: '2025-01-01' },\n { name: 'Applebot-Extended', company: 'Apple', addedAt: '2025-01-01' },\n { name: 'cohere-ai', company: 'Cohere', addedAt: '2025-01-01' },\n { name: 'YouBot', company: 'You.com', addedAt: '2025-01-01' },\n];\n\n/**\n * Flat list of user-agent substrings for fast matching in the middleware.\n */\nexport const AI_BOT_USER_AGENTS: string[] = AI_BOT_USER_AGENTS_CACHE();\n\nfunction AI_BOT_USER_AGENTS_CACHE() {\n return AI_BOTS.map(bot => bot.name);\n}\n\n/**\n * Given a raw user-agent string, returns the matched AiBot entry or undefined.\n * Uses a \"Longest Match\" strategy to ensure maximum specificity.\n * Comparison is case-insensitive.\n */\nexport function matchBot(userAgent: string | null): AiBot | undefined {\n if (!userAgent) return undefined;\n const lowerUA = userAgent.toLowerCase();\n \n // Find all matches\n const matches = AI_BOTS.filter(bot => \n lowerUA.includes(bot.name.toLowerCase())\n );\n\n if (matches.length === 0) return undefined;\n if (matches.length === 1) return matches[0];\n\n // Pick the longest match for maximum specificity (e.g. 'ChatGPT-User' vs 'GPT')\n return matches.reduce((longest, current) => \n current.name.length > longest.name.length ? current : longest\n );\n}\n","/**\r\n * Configuration schema for onto.config.ts\r\n * Used to dynamically generate llms.txt and other AI discovery files\r\n */\r\n\r\nexport type PageType = 'scoring' | 'about' | 'default';\r\n\r\nexport interface OntoRoute {\r\n /**\r\n * The URL path (e.g., '/docs', '/api/reference')\r\n */\r\n path: string;\r\n /**\r\n * Description of what this route contains\r\n */\r\n description: string;\r\n /**\r\n * Optional: Page type for automatic JSON-LD schema injection\r\n * - 'scoring': Injects Methodology schema with AIO scoring weights (40/35/25)\r\n * - 'about': Injects Organization/AboutPage schema\r\n * - 'default': No automatic schema injection\r\n */\r\n pageType?: PageType;\r\n}\r\n\r\nexport interface OntoConfig {\r\n /**\r\n * The name of your project or site (required)\r\n * Used as the H1 heading in llms.txt\r\n */\r\n name: string;\r\n\r\n /**\r\n * A short summary of your project (required)\r\n * Displayed as a blockquote in llms.txt\r\n * Should contain key information necessary for understanding the rest of the file\r\n */\r\n summary: string;\r\n\r\n /**\r\n * The base URL of your site (e.g., 'https://example.com')\r\n */\r\n baseUrl: string;\r\n\r\n /**\r\n * Optional: Additional sections to include in llms.txt\r\n * Each section can contain any markdown content\r\n */\r\n sections?: {\r\n heading: string;\r\n content: string;\r\n }[];\r\n\r\n /**\r\n * Key routes that AI agents should know about\r\n * These will be formatted as a markdown list in llms.txt\r\n */\r\n routes?: OntoRoute[];\r\n\r\n /**\r\n * Optional: Links to external resources (documentation, API references, etc.)\r\n */\r\n externalLinks?: {\r\n title: string;\r\n url: string;\r\n description?: string;\r\n }[];\r\n\r\n /**\r\n * Optional: Organization information for JSON-LD schemas\r\n */\r\n organization?: {\r\n name: string;\r\n description?: string;\r\n url?: string;\r\n logo?: string;\r\n foundingDate?: string;\r\n };\r\n}\r\n\r\n/**\r\n * Generate llms.txt content from OntoConfig\r\n * Follows the llms.txt specification:\r\n * - H1 with project name\r\n * - Blockquote with summary\r\n * - Additional markdown sections\r\n */\r\nexport function generateLlmsTxt(config: OntoConfig): string {\r\n const lines: string[] = [];\r\n\r\n // H1: Project name (required)\r\n lines.push(`# ${config.name}`);\r\n lines.push('');\r\n\r\n // Blockquote: Summary (required)\r\n lines.push(`> ${config.summary}`);\r\n lines.push('');\r\n\r\n // Key Routes section (if provided)\r\n if (config.routes && config.routes.length > 0) {\r\n lines.push('## Key Routes');\r\n lines.push('');\r\n for (const route of config.routes) {\r\n const fullUrl = `${config.baseUrl}${route.path}`;\r\n lines.push(`- [${route.path}](${fullUrl}): ${route.description}`);\r\n }\r\n lines.push('');\r\n }\r\n\r\n // External Links section (if provided)\r\n if (config.externalLinks && config.externalLinks.length > 0) {\r\n lines.push('## Resources');\r\n lines.push('');\r\n for (const link of config.externalLinks) {\r\n if (link.description) {\r\n lines.push(`- [${link.title}](${link.url}): ${link.description}`);\r\n } else {\r\n lines.push(`- [${link.title}](${link.url})`);\r\n }\r\n }\r\n lines.push('');\r\n }\r\n\r\n // Custom sections (if provided)\r\n if (config.sections && config.sections.length > 0) {\r\n for (const section of config.sections) {\r\n lines.push(`## ${section.heading}`);\r\n lines.push('');\r\n lines.push(section.content);\r\n lines.push('');\r\n }\r\n }\r\n\r\n return lines.join('\\n').trim() + '\\n';\r\n}\r\n"],"mappings":"yaAAA,IAAAA,EAAA,GAAAC,EAAAD,EAAA,wBAAAE,EAAA,aAAAC,EAAA,mBAAAC,IAAA,eAAAC,EAAAL,GAAA,IAAAM,EAA0C,uBCkBnC,IAAMC,EAAmB,CAE5B,CAAE,KAAM,SAAqB,QAAS,SAAY,QAAS,YAAa,EACxE,CAAE,KAAM,eAAoB,QAAS,SAAY,QAAS,YAAa,EACvE,CAAE,KAAM,gBAAoB,QAAS,SAAY,QAAS,YAAa,EAGvE,CAAE,KAAM,wBAA2B,QAAS,SAAU,QAAS,YAAa,EAC5E,CAAE,KAAM,kBAA2B,QAAS,SAAU,QAAS,YAAa,EAC5E,CAAE,KAAM,cAA2B,QAAS,SAAU,QAAS,YAAa,EAG5E,CAAE,KAAM,YAAmB,QAAS,YAAa,QAAS,YAAa,EACvE,CAAE,KAAM,cAAkB,QAAS,YAAa,QAAS,YAAa,EACtE,CAAE,KAAM,eAAkB,QAAS,YAAa,QAAS,YAAa,EAGtE,CAAE,KAAM,gBAAmB,QAAS,aAAc,QAAS,YAAa,EACxE,CAAE,KAAM,kBAAmB,QAAS,aAAc,QAAS,YAAa,EAGxE,CAAE,KAAM,qBAAwB,QAAS,OAAQ,QAAS,YAAa,EACvE,CAAE,KAAM,uBAAwB,QAAS,OAAQ,QAAS,YAAa,EACvE,CAAE,KAAM,sBAAwB,QAAS,OAAQ,QAAS,YAAa,EACvE,CAAE,KAAM,cAAwB,QAAS,OAAQ,QAAS,YAAa,EAGvE,CAAE,KAAM,aAAc,QAAS,UAAW,QAAS,YAAa,EAGhE,CAAE,KAAM,YAAa,QAAS,SAAU,QAAS,YAAa,EAG9D,CAAE,KAAM,SAAiB,QAAS,kBAAmB,QAAS,YAAa,EAC3E,CAAE,KAAM,gBAAiB,QAAS,aAAc,QAAS,YAAa,EACtE,CAAE,KAAM,UAAiB,QAAS,UAAe,QAAS,YAAa,EACvE,CAAE,KAAM,QAAiB,QAAS,eAAgB,QAAS,YAAa,EACxE,CAAE,KAAM,aAAiB,QAAS,YAAgB,QAAS,YAAa,EACxE,CAAE,KAAM,oBAAqB,QAAS,QAAY,QAAS,YAAa,EACxE,CAAE,KAAM,YAAiB,QAAS,SAAgB,QAAS,YAAa,EACxE,CAAE,KAAM,SAAiB,QAAS,UAAgB,QAAS,YAAa,CAC5E,EAKaC,EAA+BC,EAAyB,EAErE,SAASA,GAA2B,CAChC,OAAOF,EAAQ,IAAIG,GAAOA,EAAI,IAAI,CACtC,CAOO,SAASC,EAASC,EAA6C,CAClE,GAAI,CAACA,EAAW,OAChB,IAAMC,EAAUD,EAAU,YAAY,EAGhCE,EAAUP,EAAQ,OAAOG,GAC3BG,EAAQ,SAASH,EAAI,KAAK,YAAY,CAAC,CAC3C,EAEA,GAAII,EAAQ,SAAW,EACvB,OAAIA,EAAQ,SAAW,EAAUA,EAAQ,CAAC,EAGnCA,EAAQ,OAAO,CAACC,EAASC,IAC5BA,EAAQ,KAAK,OAASD,EAAQ,KAAK,OAASC,EAAUD,CAC1D,CACJ,CCJO,SAASE,EAAgBC,EAA4B,CAC1D,IAAMC,EAAkB,CAAC,EAWzB,GARAA,EAAM,KAAK,KAAKD,EAAO,IAAI,EAAE,EAC7BC,EAAM,KAAK,EAAE,EAGbA,EAAM,KAAK,KAAKD,EAAO,OAAO,EAAE,EAChCC,EAAM,KAAK,EAAE,EAGTD,EAAO,QAAUA,EAAO,OAAO,OAAS,EAAG,CAC7CC,EAAM,KAAK,eAAe,EAC1BA,EAAM,KAAK,EAAE,EACb,QAAWC,KAASF,EAAO,OAAQ,CACjC,IAAMG,EAAU,GAAGH,EAAO,OAAO,GAAGE,EAAM,IAAI,GAC9CD,EAAM,KAAK,MAAMC,EAAM,IAAI,KAAKC,CAAO,MAAMD,EAAM,WAAW,EAAE,CAClE,CACAD,EAAM,KAAK,EAAE,CACf,CAGA,GAAID,EAAO,eAAiBA,EAAO,cAAc,OAAS,EAAG,CAC3DC,EAAM,KAAK,cAAc,EACzBA,EAAM,KAAK,EAAE,EACb,QAAWG,KAAQJ,EAAO,cACpBI,EAAK,YACPH,EAAM,KAAK,MAAMG,EAAK,KAAK,KAAKA,EAAK,GAAG,MAAMA,EAAK,WAAW,EAAE,EAEhEH,EAAM,KAAK,MAAMG,EAAK,KAAK,KAAKA,EAAK,GAAG,GAAG,EAG/CH,EAAM,KAAK,EAAE,CACf,CAGA,GAAID,EAAO,UAAYA,EAAO,SAAS,OAAS,EAC9C,QAAWK,KAAWL,EAAO,SAC3BC,EAAM,KAAK,MAAMI,EAAQ,OAAO,EAAE,EAClCJ,EAAM,KAAK,EAAE,EACbA,EAAM,KAAKI,EAAQ,OAAO,EAC1BJ,EAAM,KAAK,EAAE,EAIjB,OAAOA,EAAM,KAAK;AAAA,CAAI,EAAE,KAAK,EAAI;AAAA,CACnC,CFlIA,eAAsBK,EAAeC,EAAcC,EAAqB,CACpE,IAAMC,EAAYF,EAAQ,QAAQ,IAAI,YAAY,EAC5CG,EAAMH,EAAQ,QAAQ,MAAM,EAC5BI,EAAUC,EAASH,CAAS,EAE5BI,EAASN,EAAQ,QAAQ,IAAI,QAAQ,GAAK,GAC1CO,EAAgBP,EAAQ,QAAQ,aAAa,IAAI,MAAM,EAEvDQ,EAAU,CAAC,CAACJ,EACZK,EAAsBH,EAAO,SAAS,eAAe,GAAKC,EAGhE,GAAIC,GAAWC,EAAqB,CAEhC,GAAIN,EAAI,SAAS,WAAW,QAAQ,EAChC,OAAO,eAAa,KAAK,EAI7B,GAAIA,EAAI,WAAa,YACjB,GAAI,CACA,GAAIF,EAAQ,CACR,IAAMS,EAAiBC,EAAgBV,CAAM,EACvCW,EAAW,IAAI,eAAaF,EAAgB,CAC9C,QAAS,CACL,eAAgB,4BAChB,gBAAiB,mEACrB,CACJ,CAAC,EACD,OAAIN,GAASQ,EAAS,QAAQ,IAAI,aAAc,GAAGR,EAAQ,IAAI,KAAKA,EAAQ,OAAO,GAAG,EACtFQ,EAAS,QAAQ,IAAI,eAAgBV,GAAa,OAAO,EAClDU,CACX,CACJ,OAASC,EAAO,CACZ,QAAQ,MAAM,sCAAuCA,CAAK,CAC9D,CAIJ,GAAIV,EAAI,SAAS,SAAS,GAAG,EACzB,OAAO,eAAa,KAAK,EAI7B,IAAIW,EAAcX,EAAI,UAClBW,IAAgB,KAAOA,IAAgB,MAAIA,EAAc,UACzDA,EAAY,SAAS,GAAG,GAAKA,IAAgB,MAAKA,EAAcA,EAAY,MAAM,EAAG,EAAE,GAG3F,IAAMC,EAAe,QAAQ,IAAI,aAC3BC,EAAgB,QAAQ,IAAI,oBAAsB,4BAEpDD,GAEA,MAAM,GAAGC,CAAa,aAAc,CAChC,OAAQ,OACR,QAAS,CAAE,aAAcD,EAAc,eAAgB,kBAAmB,EAC1E,KAAM,KAAK,UAAU,CAAE,MAAOZ,EAAI,SAAU,UAAAD,EAAW,IAAKE,GAAS,KAAM,QAASA,GAAS,OAAQ,CAAC,CAC1G,CAAC,EAAE,MAAM,IAAM,CAAC,CAAC,EAIrBD,EAAI,SAAW,SAASW,CAAW,MACnC,IAAMF,EAAW,eAAa,QAAQT,CAAG,EAGzC,OAAAS,EAAS,QAAQ,IAAI,eAAgB,8BAA8B,EACnEA,EAAS,QAAQ,IAAI,gBAAiB,mEAAmE,EACzGA,EAAS,QAAQ,IAAI,eAAgBV,GAAa,OAAO,EACrDE,GAASQ,EAAS,QAAQ,IAAI,aAAc,GAAGR,EAAQ,IAAI,KAAKA,EAAQ,OAAO,GAAG,EAClFG,GAAeK,EAAS,QAAQ,IAAI,eAAgB,MAAM,EAEvDA,CACX,CAGA,IAAMA,EAAW,eAAa,KAAK,EACnC,OAAAA,EAAS,QAAQ,IAAI,eAAgBV,GAAa,OAAO,EAClDU,CACX","names":["middleware_exports","__export","AI_BOT_USER_AGENTS","matchBot","ontoMiddleware","__toCommonJS","import_server","AI_BOTS","AI_BOT_USER_AGENTS","AI_BOT_USER_AGENTS_CACHE","bot","matchBot","userAgent","lowerUA","matches","longest","current","generateLlmsTxt","config","lines","route","fullUrl","link","section","ontoMiddleware","request","config","userAgent","url","matched","matchBot","accept","hasDebugParam","isAiBot","isMarkdownRequested","llmsTxtContent","generateLlmsTxt","response","error","payloadPath","ONTO_API_KEY","DASHBOARD_URL"]}
|
|
1
|
+
{"version":3,"sources":["../src/middleware.ts","../src/bots.ts","../src/config.ts"],"sourcesContent":["import { NextRequest, NextResponse } from 'next/server';\r\nimport { matchBot } from './bots';\r\nimport { generateLlmsTxt, OntoConfig } from './config';\r\n\r\nexport async function ontoMiddleware(request: any, config?: OntoConfig) {\r\n const userAgent = request.headers.get('user-agent');\r\n const url = request.nextUrl.clone();\r\n const matched = matchBot(userAgent);\r\n\r\n const accept = request.headers.get('accept') || '';\r\n const hasDebugParam = request.nextUrl.searchParams.has('onto');\r\n\r\n const isAiBot = !!matched;\r\n const isMarkdownRequested = accept.includes('text/markdown') || hasDebugParam;\r\n\r\n // Common logic for bot/markdown negotiation\r\n if (isAiBot || isMarkdownRequested) {\r\n // Ignore internal next.js requests\r\n if (url.pathname.startsWith('/_next')) {\r\n return NextResponse.next();\r\n }\r\n\r\n // --- llms.txt Auto-Discovery ---\r\n if (url.pathname === '/llms.txt') {\r\n try {\r\n if (config) {\r\n const llmsTxtContent = generateLlmsTxt(config);\r\n const response = new NextResponse(llmsTxtContent, {\r\n headers: {\r\n 'Content-Type': 'text/plain; charset=utf-8',\r\n 'Cache-Control': 'public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400',\r\n }\r\n });\r\n if (matched) response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);\r\n response.headers.set('X-Onto-Trace', userAgent || 'no-ua');\r\n return response;\r\n }\r\n } catch (error) {\r\n console.error('[Onto] Failed to generate llms.txt:', error);\r\n }\r\n }\r\n\r\n // Skip other static assets with dots\r\n if (url.pathname.includes('.')) {\r\n return NextResponse.next();\r\n }\r\n\r\n // Determine the corresponding payload path\r\n let payloadPath = url.pathname;\r\n if (payloadPath === '/' || payloadPath === '') payloadPath = '/index';\r\n if (payloadPath.endsWith('/') && payloadPath !== '/') payloadPath = payloadPath.slice(0, -1);\r\n\r\n // --- Onto Control Plane Integration (Premium) ---\r\n const ONTO_API_KEY = process.env.ONTO_API_KEY;\r\n const DASHBOARD_URL = process.env.ONTO_DASHBOARD_URL || 'https://app.buildonto.dev';\r\n\r\n if (ONTO_API_KEY) {\r\n // Logic for tracking and injection ... (fire-and-forget tracking)\r\n fetch(`${DASHBOARD_URL}/api/track`, {\r\n method: 'POST',\r\n headers: { 'x-onto-key': ONTO_API_KEY, 'Content-Type': 'application/json' },\r\n body: JSON.stringify({ route: url.pathname, userAgent, bot: matched?.name, company: matched?.company })\r\n }).catch(() => {});\r\n }\r\n\r\n // Rewrite to semantic payload\r\n url.pathname = `/.onto${payloadPath}.md`;\r\n const response = NextResponse.rewrite(url);\r\n \r\n // Apply headers\r\n response.headers.set('Content-Type', 'text/markdown; charset=utf-8');\r\n response.headers.set('Cache-Control', 'public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400');\r\n response.headers.set('X-Onto-Trace', userAgent || 'no-ua');\r\n if (matched) response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);\r\n if (hasDebugParam) response.headers.set('X-Onto-Debug', 'true');\r\n\r\n return response;\r\n }\r\n\r\n // Default response for non-bots\r\n const response = NextResponse.next();\r\n response.headers.set('X-Onto-Trace', userAgent || 'no-ua');\r\n return response;\r\n}\r\n\r\n// Re-export the bot registry for consumers who want to extend or inspect it\r\nexport { AI_BOT_USER_AGENTS, matchBot } from './bots';\r\nexport type { AiBot } from './bots';\r\n\r\n\r\n","/**\n * Comprehensive registry of AI bot user-agent strings.\n * The middleware uses this list to detect AI crawlers and serve optimized markdown.\n */\n\nexport interface AiBot {\n /** The user-agent substring to match against */\n name: string;\n /** The company operating this bot */\n company: string;\n /** ISO date when this bot was added or last verified */\n addedAt?: string;\n}\n\n/**\n * Structured registry of all known AI bots, grouped by company.\n * Useful for analytics and the Control Plane dashboard.\n */\nexport const AI_BOTS: AiBot[] = [\n // OpenAI\n { name: 'GPTBot', company: 'OpenAI', addedAt: '2025-01-01' },\n { name: 'ChatGPT-User', company: 'OpenAI', addedAt: '2025-01-01' },\n { name: 'OAI-SearchBot', company: 'OpenAI', addedAt: '2025-01-01' },\n { name: 'ChatGPT', company: 'OpenAI', addedAt: '2025-03-25' },\n { name: 'OpenAI', company: 'OpenAI', addedAt: '2025-03-25' },\n { name: 'GPT', company: 'OpenAI', addedAt: '2025-03-25' },\n\n // Google (SEO Safety: Googlebot proper is EXCLUDED)\n { name: 'Google-CloudVertexBot', company: 'Google', addedAt: '2025-01-01' },\n { name: 'Google-Extended', company: 'Google', addedAt: '2025-01-01' },\n { name: 'GoogleOther', company: 'Google', addedAt: '2025-01-01' },\n\n // Anthropic\n { name: 'ClaudeBot', company: 'Anthropic', addedAt: '2025-01-01' },\n { name: 'Claude-User', company: 'Anthropic', addedAt: '2025-01-01' },\n { name: 'anthropic-ai', company: 'Anthropic', addedAt: '2025-01-01' },\n { name: 'Claude', company: 'Anthropic', addedAt: '2025-03-25' },\n { name: 'Anthropic', company: 'Anthropic', addedAt: '2025-03-25' },\n\n // Perplexity\n { name: 'PerplexityBot', company: 'Perplexity', addedAt: '2025-01-01' },\n { name: 'Perplexity-User', company: 'Perplexity', addedAt: '2025-01-01' },\n { name: 'Perplexity', company: 'Perplexity', addedAt: '2025-03-25' },\n\n // Meta\n { name: 'Meta-ExternalAgent', company: 'Meta', addedAt: '2025-01-01' },\n { name: 'Meta-ExternalFetcher', company: 'Meta', addedAt: '2025-01-01' },\n { name: 'facebookexternalhit', company: 'Meta', addedAt: '2025-03-25' },\n { name: 'FacebookBot', company: 'Meta', addedAt: '2025-01-01' },\n\n // Mistral\n { name: 'MistralBot', company: 'Mistral', addedAt: '2025-03-25' },\n { name: 'Mistral', company: 'Mistral', addedAt: '2025-03-25' },\n\n // Amazon\n { name: 'Amazonbot', company: 'Amazon', addedAt: '2025-03-25' },\n\n // Others\n { name: 'AI2Bot', company: 'Allen Institute', addedAt: '2025-03-25' },\n { name: 'DuckAssistBot', company: 'DuckDuckGo', addedAt: '2025-03-25' },\n { name: 'Diffbot', company: 'Diffbot', addedAt: '2025-03-25' },\n { name: 'CCBot', company: 'Common Crawl', addedAt: '2025-01-01' },\n { name: 'Bytespider', company: 'ByteDance', addedAt: '2025-01-01' },\n { name: 'Applebot-Extended', company: 'Apple', addedAt: '2025-01-01' },\n { name: 'cohere-ai', company: 'Cohere', addedAt: '2025-01-01' },\n { name: 'YouBot', company: 'You.com', addedAt: '2025-01-01' },\n];\n\n/**\n * Flat list of user-agent substrings for fast matching in the middleware.\n */\nexport const AI_BOT_USER_AGENTS: string[] = AI_BOT_USER_AGENTS_CACHE();\n\nfunction AI_BOT_USER_AGENTS_CACHE() {\n return AI_BOTS.map(bot => bot.name);\n}\n\n/**\n * Given a raw user-agent string, returns the matched AiBot entry or undefined.\n * Uses a \"Longest Match\" strategy to ensure maximum specificity.\n * Comparison is case-insensitive.\n */\nexport function matchBot(userAgent: string | null): AiBot | undefined {\n if (!userAgent) return undefined;\n const lowerUA = userAgent.toLowerCase();\n \n // Find all matches\n const matches = AI_BOTS.filter(bot => \n lowerUA.includes(bot.name.toLowerCase())\n );\n\n if (matches.length === 0) return undefined;\n if (matches.length === 1) return matches[0];\n\n // Pick the longest match for maximum specificity (e.g. 'ChatGPT-User' vs 'GPT')\n return matches.reduce((longest, current) => \n current.name.length > longest.name.length ? current : longest\n );\n}\n","/**\r\n * Configuration schema for onto.config.ts\r\n * Used to dynamically generate llms.txt and other AI discovery files\r\n */\r\n\r\nexport type PageType = 'scoring' | 'about' | 'default';\r\n\r\nexport interface OntoRoute {\r\n /**\r\n * The URL path (e.g., '/docs', '/api/reference')\r\n */\r\n path: string;\r\n /**\r\n * Description of what this route contains\r\n */\r\n description: string;\r\n /**\r\n * Optional: Page type for automatic JSON-LD schema injection\r\n * - 'scoring': Injects Methodology schema with AIO scoring weights (40/35/25)\r\n * - 'about': Injects Organization/AboutPage schema\r\n * - 'default': No automatic schema injection\r\n */\r\n pageType?: PageType;\r\n}\r\n\r\nexport interface OntoConfig {\r\n /**\r\n * The name of your project or site (required)\r\n * Used as the H1 heading in llms.txt\r\n */\r\n name: string;\r\n\r\n /**\r\n * A short summary of your project (required)\r\n * Displayed as a blockquote in llms.txt\r\n * Should contain key information necessary for understanding the rest of the file\r\n */\r\n summary: string;\r\n\r\n /**\r\n * The base URL of your site (e.g., 'https://example.com')\r\n */\r\n baseUrl: string;\r\n\r\n /**\r\n * Optional: Additional sections to include in llms.txt\r\n * Each section can contain any markdown content\r\n */\r\n sections?: {\r\n heading: string;\r\n content: string;\r\n }[];\r\n\r\n /**\r\n * Key routes that AI agents should know about\r\n * These will be formatted as a markdown list in llms.txt\r\n */\r\n routes?: OntoRoute[];\r\n\r\n /**\r\n * Optional: Links to external resources (documentation, API references, etc.)\r\n */\r\n externalLinks?: {\r\n title: string;\r\n url: string;\r\n description?: string;\r\n }[];\r\n\r\n /**\r\n * Optional: Organization information for JSON-LD schemas\r\n */\r\n organization?: {\r\n name: string;\r\n description?: string;\r\n url?: string;\r\n logo?: string;\r\n foundingDate?: string;\r\n };\r\n}\r\n\r\n/**\r\n * Generate llms.txt content from OntoConfig\r\n * Follows the llms.txt specification:\r\n * - H1 with project name\r\n * - Blockquote with summary\r\n * - Additional markdown sections\r\n */\r\nexport function generateLlmsTxt(config: OntoConfig): string {\r\n const lines: string[] = [];\r\n\r\n // H1: Project name (required)\r\n lines.push(`# ${config.name}`);\r\n lines.push('');\r\n\r\n // Blockquote: Summary (required)\r\n lines.push(`> ${config.summary}`);\r\n lines.push('');\r\n\r\n // Key Routes section (if provided)\r\n if (config.routes && config.routes.length > 0) {\r\n lines.push('## Key Routes');\r\n lines.push('');\r\n for (const route of config.routes) {\r\n const fullUrl = `${config.baseUrl}${route.path}`;\r\n lines.push(`- [${route.path}](${fullUrl}): ${route.description}`);\r\n }\r\n lines.push('');\r\n }\r\n\r\n // External Links section (if provided)\r\n if (config.externalLinks && config.externalLinks.length > 0) {\r\n lines.push('## Resources');\r\n lines.push('');\r\n for (const link of config.externalLinks) {\r\n if (link.description) {\r\n lines.push(`- [${link.title}](${link.url}): ${link.description}`);\r\n } else {\r\n lines.push(`- [${link.title}](${link.url})`);\r\n }\r\n }\r\n lines.push('');\r\n }\r\n\r\n // Custom sections (if provided)\r\n if (config.sections && config.sections.length > 0) {\r\n for (const section of config.sections) {\r\n lines.push(`## ${section.heading}`);\r\n lines.push('');\r\n lines.push(section.content);\r\n lines.push('');\r\n }\r\n }\r\n\r\n return lines.join('\\n').trim() + '\\n';\r\n}\r\n"],"mappings":"yaAAA,IAAAA,EAAA,GAAAC,EAAAD,EAAA,wBAAAE,EAAA,aAAAC,EAAA,mBAAAC,IAAA,eAAAC,EAAAL,GAAA,IAAAM,EAA0C,uBCkBnC,IAAMC,EAAmB,CAE5B,CAAE,KAAM,SAAqB,QAAS,SAAY,QAAS,YAAa,EACxE,CAAE,KAAM,eAAoB,QAAS,SAAY,QAAS,YAAa,EACvE,CAAE,KAAM,gBAAoB,QAAS,SAAY,QAAS,YAAa,EACvE,CAAE,KAAM,UAAoB,QAAS,SAAY,QAAS,YAAa,EACvE,CAAE,KAAM,SAAoB,QAAS,SAAY,QAAS,YAAa,EACvE,CAAE,KAAM,MAAoB,QAAS,SAAY,QAAS,YAAa,EAGvE,CAAE,KAAM,wBAA2B,QAAS,SAAU,QAAS,YAAa,EAC5E,CAAE,KAAM,kBAA2B,QAAS,SAAU,QAAS,YAAa,EAC5E,CAAE,KAAM,cAA2B,QAAS,SAAU,QAAS,YAAa,EAG5E,CAAE,KAAM,YAAmB,QAAS,YAAa,QAAS,YAAa,EACvE,CAAE,KAAM,cAAkB,QAAS,YAAa,QAAS,YAAa,EACtE,CAAE,KAAM,eAAkB,QAAS,YAAa,QAAS,YAAa,EACtE,CAAE,KAAM,SAAkB,QAAS,YAAa,QAAS,YAAa,EACtE,CAAE,KAAM,YAAkB,QAAS,YAAa,QAAS,YAAa,EAGtE,CAAE,KAAM,gBAAmB,QAAS,aAAc,QAAS,YAAa,EACxE,CAAE,KAAM,kBAAmB,QAAS,aAAc,QAAS,YAAa,EACxE,CAAE,KAAM,aAAmB,QAAS,aAAc,QAAS,YAAa,EAGxE,CAAE,KAAM,qBAAwB,QAAS,OAAQ,QAAS,YAAa,EACvE,CAAE,KAAM,uBAAwB,QAAS,OAAQ,QAAS,YAAa,EACvE,CAAE,KAAM,sBAAwB,QAAS,OAAQ,QAAS,YAAa,EACvE,CAAE,KAAM,cAAwB,QAAS,OAAQ,QAAS,YAAa,EAGvE,CAAE,KAAM,aAAc,QAAS,UAAW,QAAS,YAAa,EAChE,CAAE,KAAM,UAAc,QAAS,UAAW,QAAS,YAAa,EAGhE,CAAE,KAAM,YAAa,QAAS,SAAU,QAAS,YAAa,EAG9D,CAAE,KAAM,SAAiB,QAAS,kBAAmB,QAAS,YAAa,EAC3E,CAAE,KAAM,gBAAiB,QAAS,aAAc,QAAS,YAAa,EACtE,CAAE,KAAM,UAAiB,QAAS,UAAe,QAAS,YAAa,EACvE,CAAE,KAAM,QAAiB,QAAS,eAAgB,QAAS,YAAa,EACxE,CAAE,KAAM,aAAiB,QAAS,YAAgB,QAAS,YAAa,EACxE,CAAE,KAAM,oBAAqB,QAAS,QAAY,QAAS,YAAa,EACxE,CAAE,KAAM,YAAiB,QAAS,SAAgB,QAAS,YAAa,EACxE,CAAE,KAAM,SAAiB,QAAS,UAAgB,QAAS,YAAa,CAC5E,EAKaC,EAA+BC,EAAyB,EAErE,SAASA,GAA2B,CAChC,OAAOF,EAAQ,IAAIG,GAAOA,EAAI,IAAI,CACtC,CAOO,SAASC,EAASC,EAA6C,CAClE,GAAI,CAACA,EAAW,OAChB,IAAMC,EAAUD,EAAU,YAAY,EAGhCE,EAAUP,EAAQ,OAAOG,GAC3BG,EAAQ,SAASH,EAAI,KAAK,YAAY,CAAC,CAC3C,EAEA,GAAII,EAAQ,SAAW,EACvB,OAAIA,EAAQ,SAAW,EAAUA,EAAQ,CAAC,EAGnCA,EAAQ,OAAO,CAACC,EAASC,IAC5BA,EAAQ,KAAK,OAASD,EAAQ,KAAK,OAASC,EAAUD,CAC1D,CACJ,CCXO,SAASE,EAAgBC,EAA4B,CAC1D,IAAMC,EAAkB,CAAC,EAWzB,GARAA,EAAM,KAAK,KAAKD,EAAO,IAAI,EAAE,EAC7BC,EAAM,KAAK,EAAE,EAGbA,EAAM,KAAK,KAAKD,EAAO,OAAO,EAAE,EAChCC,EAAM,KAAK,EAAE,EAGTD,EAAO,QAAUA,EAAO,OAAO,OAAS,EAAG,CAC7CC,EAAM,KAAK,eAAe,EAC1BA,EAAM,KAAK,EAAE,EACb,QAAWC,KAASF,EAAO,OAAQ,CACjC,IAAMG,EAAU,GAAGH,EAAO,OAAO,GAAGE,EAAM,IAAI,GAC9CD,EAAM,KAAK,MAAMC,EAAM,IAAI,KAAKC,CAAO,MAAMD,EAAM,WAAW,EAAE,CAClE,CACAD,EAAM,KAAK,EAAE,CACf,CAGA,GAAID,EAAO,eAAiBA,EAAO,cAAc,OAAS,EAAG,CAC3DC,EAAM,KAAK,cAAc,EACzBA,EAAM,KAAK,EAAE,EACb,QAAWG,KAAQJ,EAAO,cACpBI,EAAK,YACPH,EAAM,KAAK,MAAMG,EAAK,KAAK,KAAKA,EAAK,GAAG,MAAMA,EAAK,WAAW,EAAE,EAEhEH,EAAM,KAAK,MAAMG,EAAK,KAAK,KAAKA,EAAK,GAAG,GAAG,EAG/CH,EAAM,KAAK,EAAE,CACf,CAGA,GAAID,EAAO,UAAYA,EAAO,SAAS,OAAS,EAC9C,QAAWK,KAAWL,EAAO,SAC3BC,EAAM,KAAK,MAAMI,EAAQ,OAAO,EAAE,EAClCJ,EAAM,KAAK,EAAE,EACbA,EAAM,KAAKI,EAAQ,OAAO,EAC1BJ,EAAM,KAAK,EAAE,EAIjB,OAAOA,EAAM,KAAK;AAAA,CAAI,EAAE,KAAK,EAAI;AAAA,CACnC,CFlIA,eAAsBK,EAAeC,EAAcC,EAAqB,CACpE,IAAMC,EAAYF,EAAQ,QAAQ,IAAI,YAAY,EAC5CG,EAAMH,EAAQ,QAAQ,MAAM,EAC5BI,EAAUC,EAASH,CAAS,EAE5BI,EAASN,EAAQ,QAAQ,IAAI,QAAQ,GAAK,GAC1CO,EAAgBP,EAAQ,QAAQ,aAAa,IAAI,MAAM,EAEvDQ,EAAU,CAAC,CAACJ,EACZK,EAAsBH,EAAO,SAAS,eAAe,GAAKC,EAGhE,GAAIC,GAAWC,EAAqB,CAEhC,GAAIN,EAAI,SAAS,WAAW,QAAQ,EAChC,OAAO,eAAa,KAAK,EAI7B,GAAIA,EAAI,WAAa,YACjB,GAAI,CACA,GAAIF,EAAQ,CACR,IAAMS,EAAiBC,EAAgBV,CAAM,EACvCW,EAAW,IAAI,eAAaF,EAAgB,CAC9C,QAAS,CACL,eAAgB,4BAChB,gBAAiB,mEACrB,CACJ,CAAC,EACD,OAAIN,GAASQ,EAAS,QAAQ,IAAI,aAAc,GAAGR,EAAQ,IAAI,KAAKA,EAAQ,OAAO,GAAG,EACtFQ,EAAS,QAAQ,IAAI,eAAgBV,GAAa,OAAO,EAClDU,CACX,CACJ,OAASC,EAAO,CACZ,QAAQ,MAAM,sCAAuCA,CAAK,CAC9D,CAIJ,GAAIV,EAAI,SAAS,SAAS,GAAG,EACzB,OAAO,eAAa,KAAK,EAI7B,IAAIW,EAAcX,EAAI,UAClBW,IAAgB,KAAOA,IAAgB,MAAIA,EAAc,UACzDA,EAAY,SAAS,GAAG,GAAKA,IAAgB,MAAKA,EAAcA,EAAY,MAAM,EAAG,EAAE,GAG3F,IAAMC,EAAe,QAAQ,IAAI,aAC3BC,EAAgB,QAAQ,IAAI,oBAAsB,4BAEpDD,GAEA,MAAM,GAAGC,CAAa,aAAc,CAChC,OAAQ,OACR,QAAS,CAAE,aAAcD,EAAc,eAAgB,kBAAmB,EAC1E,KAAM,KAAK,UAAU,CAAE,MAAOZ,EAAI,SAAU,UAAAD,EAAW,IAAKE,GAAS,KAAM,QAASA,GAAS,OAAQ,CAAC,CAC1G,CAAC,EAAE,MAAM,IAAM,CAAC,CAAC,EAIrBD,EAAI,SAAW,SAASW,CAAW,MACnC,IAAMF,EAAW,eAAa,QAAQT,CAAG,EAGzC,OAAAS,EAAS,QAAQ,IAAI,eAAgB,8BAA8B,EACnEA,EAAS,QAAQ,IAAI,gBAAiB,mEAAmE,EACzGA,EAAS,QAAQ,IAAI,eAAgBV,GAAa,OAAO,EACrDE,GAASQ,EAAS,QAAQ,IAAI,aAAc,GAAGR,EAAQ,IAAI,KAAKA,EAAQ,OAAO,GAAG,EAClFG,GAAeK,EAAS,QAAQ,IAAI,eAAgB,MAAM,EAEvDA,CACX,CAGA,IAAMA,EAAW,eAAa,KAAK,EACnC,OAAAA,EAAS,QAAQ,IAAI,eAAgBV,GAAa,OAAO,EAClDU,CACX","names":["middleware_exports","__export","AI_BOT_USER_AGENTS","matchBot","ontoMiddleware","__toCommonJS","import_server","AI_BOTS","AI_BOT_USER_AGENTS","AI_BOT_USER_AGENTS_CACHE","bot","matchBot","userAgent","lowerUA","matches","longest","current","generateLlmsTxt","config","lines","route","fullUrl","link","section","ontoMiddleware","request","config","userAgent","url","matched","matchBot","accept","hasDebugParam","isAiBot","isMarkdownRequested","llmsTxtContent","generateLlmsTxt","response","error","payloadPath","ONTO_API_KEY","DASHBOARD_URL"]}
|
package/dist/middleware.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import{NextResponse as d}from"next/server";var u=[{name:"GPTBot",company:"OpenAI",addedAt:"2025-01-01"},{name:"ChatGPT-User",company:"OpenAI",addedAt:"2025-01-01"},{name:"OAI-SearchBot",company:"OpenAI",addedAt:"2025-01-01"},{name:"Google-CloudVertexBot",company:"Google",addedAt:"2025-01-01"},{name:"Google-Extended",company:"Google",addedAt:"2025-01-01"},{name:"GoogleOther",company:"Google",addedAt:"2025-01-01"},{name:"ClaudeBot",company:"Anthropic",addedAt:"2025-01-01"},{name:"Claude-User",company:"Anthropic",addedAt:"2025-01-01"},{name:"anthropic-ai",company:"Anthropic",addedAt:"2025-01-01"},{name:"PerplexityBot",company:"Perplexity",addedAt:"2025-01-01"},{name:"Perplexity-User",company:"Perplexity",addedAt:"2025-01-01"},{name:"Meta-ExternalAgent",company:"Meta",addedAt:"2025-01-01"},{name:"Meta-ExternalFetcher",company:"Meta",addedAt:"2025-01-01"},{name:"facebookexternalhit",company:"Meta",addedAt:"2025-03-25"},{name:"FacebookBot",company:"Meta",addedAt:"2025-01-01"},{name:"MistralBot",company:"Mistral",addedAt:"2025-03-25"},{name:"Amazonbot",company:"Amazon",addedAt:"2025-03-25"},{name:"AI2Bot",company:"Allen Institute",addedAt:"2025-03-25"},{name:"DuckAssistBot",company:"DuckDuckGo",addedAt:"2025-03-25"},{name:"Diffbot",company:"Diffbot",addedAt:"2025-03-25"},{name:"CCBot",company:"Common Crawl",addedAt:"2025-01-01"},{name:"Bytespider",company:"ByteDance",addedAt:"2025-01-01"},{name:"Applebot-Extended",company:"Apple",addedAt:"2025-01-01"},{name:"cohere-ai",company:"Cohere",addedAt:"2025-01-01"},{name:"YouBot",company:"You.com",addedAt:"2025-01-01"}],O=B();function B(){return u.map(n=>n.name)}function
|
|
1
|
+
import{NextResponse as d}from"next/server";var u=[{name:"GPTBot",company:"OpenAI",addedAt:"2025-01-01"},{name:"ChatGPT-User",company:"OpenAI",addedAt:"2025-01-01"},{name:"OAI-SearchBot",company:"OpenAI",addedAt:"2025-01-01"},{name:"ChatGPT",company:"OpenAI",addedAt:"2025-03-25"},{name:"OpenAI",company:"OpenAI",addedAt:"2025-03-25"},{name:"GPT",company:"OpenAI",addedAt:"2025-03-25"},{name:"Google-CloudVertexBot",company:"Google",addedAt:"2025-01-01"},{name:"Google-Extended",company:"Google",addedAt:"2025-01-01"},{name:"GoogleOther",company:"Google",addedAt:"2025-01-01"},{name:"ClaudeBot",company:"Anthropic",addedAt:"2025-01-01"},{name:"Claude-User",company:"Anthropic",addedAt:"2025-01-01"},{name:"anthropic-ai",company:"Anthropic",addedAt:"2025-01-01"},{name:"Claude",company:"Anthropic",addedAt:"2025-03-25"},{name:"Anthropic",company:"Anthropic",addedAt:"2025-03-25"},{name:"PerplexityBot",company:"Perplexity",addedAt:"2025-01-01"},{name:"Perplexity-User",company:"Perplexity",addedAt:"2025-01-01"},{name:"Perplexity",company:"Perplexity",addedAt:"2025-03-25"},{name:"Meta-ExternalAgent",company:"Meta",addedAt:"2025-01-01"},{name:"Meta-ExternalFetcher",company:"Meta",addedAt:"2025-01-01"},{name:"facebookexternalhit",company:"Meta",addedAt:"2025-03-25"},{name:"FacebookBot",company:"Meta",addedAt:"2025-01-01"},{name:"MistralBot",company:"Mistral",addedAt:"2025-03-25"},{name:"Mistral",company:"Mistral",addedAt:"2025-03-25"},{name:"Amazonbot",company:"Amazon",addedAt:"2025-03-25"},{name:"AI2Bot",company:"Allen Institute",addedAt:"2025-03-25"},{name:"DuckAssistBot",company:"DuckDuckGo",addedAt:"2025-03-25"},{name:"Diffbot",company:"Diffbot",addedAt:"2025-03-25"},{name:"CCBot",company:"Common Crawl",addedAt:"2025-01-01"},{name:"Bytespider",company:"ByteDance",addedAt:"2025-01-01"},{name:"Applebot-Extended",company:"Apple",addedAt:"2025-01-01"},{name:"cohere-ai",company:"Cohere",addedAt:"2025-01-01"},{name:"YouBot",company:"You.com",addedAt:"2025-01-01"}],O=B();function B(){return u.map(n=>n.name)}function m(n){if(!n)return;let t=n.toLowerCase(),e=u.filter(a=>t.includes(a.name.toLowerCase()));if(e.length!==0)return e.length===1?e[0]:e.reduce((a,o)=>o.name.length>a.name.length?o:a)}function A(n){let t=[];if(t.push(`# ${n.name}`),t.push(""),t.push(`> ${n.summary}`),t.push(""),n.routes&&n.routes.length>0){t.push("## Key Routes"),t.push("");for(let e of n.routes){let a=`${n.baseUrl}${e.path}`;t.push(`- [${e.path}](${a}): ${e.description}`)}t.push("")}if(n.externalLinks&&n.externalLinks.length>0){t.push("## Resources"),t.push("");for(let e of n.externalLinks)e.description?t.push(`- [${e.title}](${e.url}): ${e.description}`):t.push(`- [${e.title}](${e.url})`);t.push("")}if(n.sections&&n.sections.length>0)for(let e of n.sections)t.push(`## ${e.heading}`),t.push(""),t.push(e.content),t.push("");return t.join(`
|
|
2
2
|
`).trim()+`
|
|
3
|
-
`}async function
|
|
3
|
+
`}async function k(n,t){let e=n.headers.get("user-agent"),a=n.nextUrl.clone(),o=m(e),y=n.headers.get("accept")||"",c=n.nextUrl.searchParams.has("onto"),g=!!o,x=y.includes("text/markdown")||c;if(g||x){if(a.pathname.startsWith("/_next"))return d.next();if(a.pathname==="/llms.txt")try{if(t){let p=A(t),i=new d(p,{headers:{"Content-Type":"text/plain; charset=utf-8","Cache-Control":"public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400"}});return o&&i.headers.set("X-Onto-Bot",`${o.name} (${o.company})`),i.headers.set("X-Onto-Trace",e||"no-ua"),i}}catch(p){console.error("[Onto] Failed to generate llms.txt:",p)}if(a.pathname.includes("."))return d.next();let r=a.pathname;(r==="/"||r==="")&&(r="/index"),r.endsWith("/")&&r!=="/"&&(r=r.slice(0,-1));let h=process.env.ONTO_API_KEY,f=process.env.ONTO_DASHBOARD_URL||"https://app.buildonto.dev";h&&fetch(`${f}/api/track`,{method:"POST",headers:{"x-onto-key":h,"Content-Type":"application/json"},body:JSON.stringify({route:a.pathname,userAgent:e,bot:o?.name,company:o?.company})}).catch(()=>{}),a.pathname=`/.onto${r}.md`;let s=d.rewrite(a);return s.headers.set("Content-Type","text/markdown; charset=utf-8"),s.headers.set("Cache-Control","public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400"),s.headers.set("X-Onto-Trace",e||"no-ua"),o&&s.headers.set("X-Onto-Bot",`${o.name} (${o.company})`),c&&s.headers.set("X-Onto-Debug","true"),s}let l=d.next();return l.headers.set("X-Onto-Trace",e||"no-ua"),l}export{O as AI_BOT_USER_AGENTS,m as matchBot,k as ontoMiddleware};
|
|
4
4
|
//# sourceMappingURL=middleware.mjs.map
|
package/dist/middleware.mjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/middleware.ts","../src/bots.ts","../src/config.ts"],"sourcesContent":["import { NextRequest, NextResponse } from 'next/server';\r\nimport { matchBot } from './bots';\r\nimport { generateLlmsTxt, OntoConfig } from './config';\r\n\r\nexport async function ontoMiddleware(request: any, config?: OntoConfig) {\r\n const userAgent = request.headers.get('user-agent');\r\n const url = request.nextUrl.clone();\r\n const matched = matchBot(userAgent);\r\n\r\n const accept = request.headers.get('accept') || '';\r\n const hasDebugParam = request.nextUrl.searchParams.has('onto');\r\n\r\n const isAiBot = !!matched;\r\n const isMarkdownRequested = accept.includes('text/markdown') || hasDebugParam;\r\n\r\n // Common logic for bot/markdown negotiation\r\n if (isAiBot || isMarkdownRequested) {\r\n // Ignore internal next.js requests\r\n if (url.pathname.startsWith('/_next')) {\r\n return NextResponse.next();\r\n }\r\n\r\n // --- llms.txt Auto-Discovery ---\r\n if (url.pathname === '/llms.txt') {\r\n try {\r\n if (config) {\r\n const llmsTxtContent = generateLlmsTxt(config);\r\n const response = new NextResponse(llmsTxtContent, {\r\n headers: {\r\n 'Content-Type': 'text/plain; charset=utf-8',\r\n 'Cache-Control': 'public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400',\r\n }\r\n });\r\n if (matched) response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);\r\n response.headers.set('X-Onto-Trace', userAgent || 'no-ua');\r\n return response;\r\n }\r\n } catch (error) {\r\n console.error('[Onto] Failed to generate llms.txt:', error);\r\n }\r\n }\r\n\r\n // Skip other static assets with dots\r\n if (url.pathname.includes('.')) {\r\n return NextResponse.next();\r\n }\r\n\r\n // Determine the corresponding payload path\r\n let payloadPath = url.pathname;\r\n if (payloadPath === '/' || payloadPath === '') payloadPath = '/index';\r\n if (payloadPath.endsWith('/') && payloadPath !== '/') payloadPath = payloadPath.slice(0, -1);\r\n\r\n // --- Onto Control Plane Integration (Premium) ---\r\n const ONTO_API_KEY = process.env.ONTO_API_KEY;\r\n const DASHBOARD_URL = process.env.ONTO_DASHBOARD_URL || 'https://app.buildonto.dev';\r\n\r\n if (ONTO_API_KEY) {\r\n // Logic for tracking and injection ... (fire-and-forget tracking)\r\n fetch(`${DASHBOARD_URL}/api/track`, {\r\n method: 'POST',\r\n headers: { 'x-onto-key': ONTO_API_KEY, 'Content-Type': 'application/json' },\r\n body: JSON.stringify({ route: url.pathname, userAgent, bot: matched?.name, company: matched?.company })\r\n }).catch(() => {});\r\n }\r\n\r\n // Rewrite to semantic payload\r\n url.pathname = `/.onto${payloadPath}.md`;\r\n const response = NextResponse.rewrite(url);\r\n \r\n // Apply headers\r\n response.headers.set('Content-Type', 'text/markdown; charset=utf-8');\r\n response.headers.set('Cache-Control', 'public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400');\r\n response.headers.set('X-Onto-Trace', userAgent || 'no-ua');\r\n if (matched) response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);\r\n if (hasDebugParam) response.headers.set('X-Onto-Debug', 'true');\r\n\r\n return response;\r\n }\r\n\r\n // Default response for non-bots\r\n const response = NextResponse.next();\r\n response.headers.set('X-Onto-Trace', userAgent || 'no-ua');\r\n return response;\r\n}\r\n\r\n// Re-export the bot registry for consumers who want to extend or inspect it\r\nexport { AI_BOT_USER_AGENTS, matchBot } from './bots';\r\nexport type { AiBot } from './bots';\r\n\r\n\r\n","/**\n * Comprehensive registry of AI bot user-agent strings.\n * The middleware uses this list to detect AI crawlers and serve optimized markdown.\n */\n\nexport interface AiBot {\n /** The user-agent substring to match against */\n name: string;\n /** The company operating this bot */\n company: string;\n /** ISO date when this bot was added or last verified */\n addedAt?: string;\n}\n\n/**\n * Structured registry of all known AI bots, grouped by company.\n * Useful for analytics and the Control Plane dashboard.\n */\nexport const AI_BOTS: AiBot[] = [\n // OpenAI\n { name: 'GPTBot', company: 'OpenAI', addedAt: '2025-01-01' },\n { name: 'ChatGPT-User', company: 'OpenAI', addedAt: '2025-01-01' },\n { name: 'OAI-SearchBot', company: 'OpenAI', addedAt: '2025-01-01' },\n\n // Google (SEO Safety: Googlebot proper is EXCLUDED)\n { name: 'Google-CloudVertexBot', company: 'Google', addedAt: '2025-01-01' },\n { name: 'Google-Extended', company: 'Google', addedAt: '2025-01-01' },\n { name: 'GoogleOther', company: 'Google', addedAt: '2025-01-01' },\n\n // Anthropic\n { name: 'ClaudeBot', company: 'Anthropic', addedAt: '2025-01-01' },\n { name: 'Claude-User', company: 'Anthropic', addedAt: '2025-01-01' },\n { name: 'anthropic-ai', company: 'Anthropic', addedAt: '2025-01-01' },\n\n // Perplexity\n { name: 'PerplexityBot', company: 'Perplexity', addedAt: '2025-01-01' },\n { name: 'Perplexity-User', company: 'Perplexity', addedAt: '2025-01-01' },\n\n // Meta\n { name: 'Meta-ExternalAgent', company: 'Meta', addedAt: '2025-01-01' },\n { name: 'Meta-ExternalFetcher', company: 'Meta', addedAt: '2025-01-01' },\n { name: 'facebookexternalhit', company: 'Meta', addedAt: '2025-03-25' },\n { name: 'FacebookBot', company: 'Meta', addedAt: '2025-01-01' },\n\n // Mistral\n { name: 'MistralBot', company: 'Mistral', addedAt: '2025-03-25' },\n\n // Amazon\n { name: 'Amazonbot', company: 'Amazon', addedAt: '2025-03-25' },\n\n // Others\n { name: 'AI2Bot', company: 'Allen Institute', addedAt: '2025-03-25' },\n { name: 'DuckAssistBot', company: 'DuckDuckGo', addedAt: '2025-03-25' },\n { name: 'Diffbot', company: 'Diffbot', addedAt: '2025-03-25' },\n { name: 'CCBot', company: 'Common Crawl', addedAt: '2025-01-01' },\n { name: 'Bytespider', company: 'ByteDance', addedAt: '2025-01-01' },\n { name: 'Applebot-Extended', company: 'Apple', addedAt: '2025-01-01' },\n { name: 'cohere-ai', company: 'Cohere', addedAt: '2025-01-01' },\n { name: 'YouBot', company: 'You.com', addedAt: '2025-01-01' },\n];\n\n/**\n * Flat list of user-agent substrings for fast matching in the middleware.\n */\nexport const AI_BOT_USER_AGENTS: string[] = AI_BOT_USER_AGENTS_CACHE();\n\nfunction AI_BOT_USER_AGENTS_CACHE() {\n return AI_BOTS.map(bot => bot.name);\n}\n\n/**\n * Given a raw user-agent string, returns the matched AiBot entry or undefined.\n * Uses a \"Longest Match\" strategy to ensure maximum specificity.\n * Comparison is case-insensitive.\n */\nexport function matchBot(userAgent: string | null): AiBot | undefined {\n if (!userAgent) return undefined;\n const lowerUA = userAgent.toLowerCase();\n \n // Find all matches\n const matches = AI_BOTS.filter(bot => \n lowerUA.includes(bot.name.toLowerCase())\n );\n\n if (matches.length === 0) return undefined;\n if (matches.length === 1) return matches[0];\n\n // Pick the longest match for maximum specificity (e.g. 'ChatGPT-User' vs 'GPT')\n return matches.reduce((longest, current) => \n current.name.length > longest.name.length ? current : longest\n );\n}\n","/**\r\n * Configuration schema for onto.config.ts\r\n * Used to dynamically generate llms.txt and other AI discovery files\r\n */\r\n\r\nexport type PageType = 'scoring' | 'about' | 'default';\r\n\r\nexport interface OntoRoute {\r\n /**\r\n * The URL path (e.g., '/docs', '/api/reference')\r\n */\r\n path: string;\r\n /**\r\n * Description of what this route contains\r\n */\r\n description: string;\r\n /**\r\n * Optional: Page type for automatic JSON-LD schema injection\r\n * - 'scoring': Injects Methodology schema with AIO scoring weights (40/35/25)\r\n * - 'about': Injects Organization/AboutPage schema\r\n * - 'default': No automatic schema injection\r\n */\r\n pageType?: PageType;\r\n}\r\n\r\nexport interface OntoConfig {\r\n /**\r\n * The name of your project or site (required)\r\n * Used as the H1 heading in llms.txt\r\n */\r\n name: string;\r\n\r\n /**\r\n * A short summary of your project (required)\r\n * Displayed as a blockquote in llms.txt\r\n * Should contain key information necessary for understanding the rest of the file\r\n */\r\n summary: string;\r\n\r\n /**\r\n * The base URL of your site (e.g., 'https://example.com')\r\n */\r\n baseUrl: string;\r\n\r\n /**\r\n * Optional: Additional sections to include in llms.txt\r\n * Each section can contain any markdown content\r\n */\r\n sections?: {\r\n heading: string;\r\n content: string;\r\n }[];\r\n\r\n /**\r\n * Key routes that AI agents should know about\r\n * These will be formatted as a markdown list in llms.txt\r\n */\r\n routes?: OntoRoute[];\r\n\r\n /**\r\n * Optional: Links to external resources (documentation, API references, etc.)\r\n */\r\n externalLinks?: {\r\n title: string;\r\n url: string;\r\n description?: string;\r\n }[];\r\n\r\n /**\r\n * Optional: Organization information for JSON-LD schemas\r\n */\r\n organization?: {\r\n name: string;\r\n description?: string;\r\n url?: string;\r\n logo?: string;\r\n foundingDate?: string;\r\n };\r\n}\r\n\r\n/**\r\n * Generate llms.txt content from OntoConfig\r\n * Follows the llms.txt specification:\r\n * - H1 with project name\r\n * - Blockquote with summary\r\n * - Additional markdown sections\r\n */\r\nexport function generateLlmsTxt(config: OntoConfig): string {\r\n const lines: string[] = [];\r\n\r\n // H1: Project name (required)\r\n lines.push(`# ${config.name}`);\r\n lines.push('');\r\n\r\n // Blockquote: Summary (required)\r\n lines.push(`> ${config.summary}`);\r\n lines.push('');\r\n\r\n // Key Routes section (if provided)\r\n if (config.routes && config.routes.length > 0) {\r\n lines.push('## Key Routes');\r\n lines.push('');\r\n for (const route of config.routes) {\r\n const fullUrl = `${config.baseUrl}${route.path}`;\r\n lines.push(`- [${route.path}](${fullUrl}): ${route.description}`);\r\n }\r\n lines.push('');\r\n }\r\n\r\n // External Links section (if provided)\r\n if (config.externalLinks && config.externalLinks.length > 0) {\r\n lines.push('## Resources');\r\n lines.push('');\r\n for (const link of config.externalLinks) {\r\n if (link.description) {\r\n lines.push(`- [${link.title}](${link.url}): ${link.description}`);\r\n } else {\r\n lines.push(`- [${link.title}](${link.url})`);\r\n }\r\n }\r\n lines.push('');\r\n }\r\n\r\n // Custom sections (if provided)\r\n if (config.sections && config.sections.length > 0) {\r\n for (const section of config.sections) {\r\n lines.push(`## ${section.heading}`);\r\n lines.push('');\r\n lines.push(section.content);\r\n lines.push('');\r\n }\r\n }\r\n\r\n return lines.join('\\n').trim() + '\\n';\r\n}\r\n"],"mappings":"AAAA,OAAsB,gBAAAA,MAAoB,cCkBnC,IAAMC,EAAmB,CAE5B,CAAE,KAAM,SAAqB,QAAS,SAAY,QAAS,YAAa,EACxE,CAAE,KAAM,eAAoB,QAAS,SAAY,QAAS,YAAa,EACvE,CAAE,KAAM,gBAAoB,QAAS,SAAY,QAAS,YAAa,EAGvE,CAAE,KAAM,wBAA2B,QAAS,SAAU,QAAS,YAAa,EAC5E,CAAE,KAAM,kBAA2B,QAAS,SAAU,QAAS,YAAa,EAC5E,CAAE,KAAM,cAA2B,QAAS,SAAU,QAAS,YAAa,EAG5E,CAAE,KAAM,YAAmB,QAAS,YAAa,QAAS,YAAa,EACvE,CAAE,KAAM,cAAkB,QAAS,YAAa,QAAS,YAAa,EACtE,CAAE,KAAM,eAAkB,QAAS,YAAa,QAAS,YAAa,EAGtE,CAAE,KAAM,gBAAmB,QAAS,aAAc,QAAS,YAAa,EACxE,CAAE,KAAM,kBAAmB,QAAS,aAAc,QAAS,YAAa,EAGxE,CAAE,KAAM,qBAAwB,QAAS,OAAQ,QAAS,YAAa,EACvE,CAAE,KAAM,uBAAwB,QAAS,OAAQ,QAAS,YAAa,EACvE,CAAE,KAAM,sBAAwB,QAAS,OAAQ,QAAS,YAAa,EACvE,CAAE,KAAM,cAAwB,QAAS,OAAQ,QAAS,YAAa,EAGvE,CAAE,KAAM,aAAc,QAAS,UAAW,QAAS,YAAa,EAGhE,CAAE,KAAM,YAAa,QAAS,SAAU,QAAS,YAAa,EAG9D,CAAE,KAAM,SAAiB,QAAS,kBAAmB,QAAS,YAAa,EAC3E,CAAE,KAAM,gBAAiB,QAAS,aAAc,QAAS,YAAa,EACtE,CAAE,KAAM,UAAiB,QAAS,UAAe,QAAS,YAAa,EACvE,CAAE,KAAM,QAAiB,QAAS,eAAgB,QAAS,YAAa,EACxE,CAAE,KAAM,aAAiB,QAAS,YAAgB,QAAS,YAAa,EACxE,CAAE,KAAM,oBAAqB,QAAS,QAAY,QAAS,YAAa,EACxE,CAAE,KAAM,YAAiB,QAAS,SAAgB,QAAS,YAAa,EACxE,CAAE,KAAM,SAAiB,QAAS,UAAgB,QAAS,YAAa,CAC5E,EAKaC,EAA+BC,EAAyB,EAErE,SAASA,GAA2B,CAChC,OAAOF,EAAQ,IAAIG,GAAOA,EAAI,IAAI,CACtC,CAOO,SAASC,EAASC,EAA6C,CAClE,GAAI,CAACA,EAAW,OAChB,IAAMC,EAAUD,EAAU,YAAY,EAGhCE,EAAUP,EAAQ,OAAOG,GAC3BG,EAAQ,SAASH,EAAI,KAAK,YAAY,CAAC,CAC3C,EAEA,GAAII,EAAQ,SAAW,EACvB,OAAIA,EAAQ,SAAW,EAAUA,EAAQ,CAAC,EAGnCA,EAAQ,OAAO,CAACC,EAASC,IAC5BA,EAAQ,KAAK,OAASD,EAAQ,KAAK,OAASC,EAAUD,CAC1D,CACJ,CCJO,SAASE,EAAgBC,EAA4B,CAC1D,IAAMC,EAAkB,CAAC,EAWzB,GARAA,EAAM,KAAK,KAAKD,EAAO,IAAI,EAAE,EAC7BC,EAAM,KAAK,EAAE,EAGbA,EAAM,KAAK,KAAKD,EAAO,OAAO,EAAE,EAChCC,EAAM,KAAK,EAAE,EAGTD,EAAO,QAAUA,EAAO,OAAO,OAAS,EAAG,CAC7CC,EAAM,KAAK,eAAe,EAC1BA,EAAM,KAAK,EAAE,EACb,QAAWC,KAASF,EAAO,OAAQ,CACjC,IAAMG,EAAU,GAAGH,EAAO,OAAO,GAAGE,EAAM,IAAI,GAC9CD,EAAM,KAAK,MAAMC,EAAM,IAAI,KAAKC,CAAO,MAAMD,EAAM,WAAW,EAAE,CAClE,CACAD,EAAM,KAAK,EAAE,CACf,CAGA,GAAID,EAAO,eAAiBA,EAAO,cAAc,OAAS,EAAG,CAC3DC,EAAM,KAAK,cAAc,EACzBA,EAAM,KAAK,EAAE,EACb,QAAWG,KAAQJ,EAAO,cACpBI,EAAK,YACPH,EAAM,KAAK,MAAMG,EAAK,KAAK,KAAKA,EAAK,GAAG,MAAMA,EAAK,WAAW,EAAE,EAEhEH,EAAM,KAAK,MAAMG,EAAK,KAAK,KAAKA,EAAK,GAAG,GAAG,EAG/CH,EAAM,KAAK,EAAE,CACf,CAGA,GAAID,EAAO,UAAYA,EAAO,SAAS,OAAS,EAC9C,QAAWK,KAAWL,EAAO,SAC3BC,EAAM,KAAK,MAAMI,EAAQ,OAAO,EAAE,EAClCJ,EAAM,KAAK,EAAE,EACbA,EAAM,KAAKI,EAAQ,OAAO,EAC1BJ,EAAM,KAAK,EAAE,EAIjB,OAAOA,EAAM,KAAK;AAAA,CAAI,EAAE,KAAK,EAAI;AAAA,CACnC,CFlIA,eAAsBK,EAAeC,EAAcC,EAAqB,CACpE,IAAMC,EAAYF,EAAQ,QAAQ,IAAI,YAAY,EAC5CG,EAAMH,EAAQ,QAAQ,MAAM,EAC5BI,EAAUC,EAASH,CAAS,EAE5BI,EAASN,EAAQ,QAAQ,IAAI,QAAQ,GAAK,GAC1CO,EAAgBP,EAAQ,QAAQ,aAAa,IAAI,MAAM,EAEvDQ,EAAU,CAAC,CAACJ,EACZK,EAAsBH,EAAO,SAAS,eAAe,GAAKC,EAGhE,GAAIC,GAAWC,EAAqB,CAEhC,GAAIN,EAAI,SAAS,WAAW,QAAQ,EAChC,OAAOO,EAAa,KAAK,EAI7B,GAAIP,EAAI,WAAa,YACjB,GAAI,CACA,GAAIF,EAAQ,CACR,IAAMU,EAAiBC,EAAgBX,CAAM,EACvCY,EAAW,IAAIH,EAAaC,EAAgB,CAC9C,QAAS,CACL,eAAgB,4BAChB,gBAAiB,mEACrB,CACJ,CAAC,EACD,OAAIP,GAASS,EAAS,QAAQ,IAAI,aAAc,GAAGT,EAAQ,IAAI,KAAKA,EAAQ,OAAO,GAAG,EACtFS,EAAS,QAAQ,IAAI,eAAgBX,GAAa,OAAO,EAClDW,CACX,CACJ,OAASC,EAAO,CACZ,QAAQ,MAAM,sCAAuCA,CAAK,CAC9D,CAIJ,GAAIX,EAAI,SAAS,SAAS,GAAG,EACzB,OAAOO,EAAa,KAAK,EAI7B,IAAIK,EAAcZ,EAAI,UAClBY,IAAgB,KAAOA,IAAgB,MAAIA,EAAc,UACzDA,EAAY,SAAS,GAAG,GAAKA,IAAgB,MAAKA,EAAcA,EAAY,MAAM,EAAG,EAAE,GAG3F,IAAMC,EAAe,QAAQ,IAAI,aAC3BC,EAAgB,QAAQ,IAAI,oBAAsB,4BAEpDD,GAEA,MAAM,GAAGC,CAAa,aAAc,CAChC,OAAQ,OACR,QAAS,CAAE,aAAcD,EAAc,eAAgB,kBAAmB,EAC1E,KAAM,KAAK,UAAU,CAAE,MAAOb,EAAI,SAAU,UAAAD,EAAW,IAAKE,GAAS,KAAM,QAASA,GAAS,OAAQ,CAAC,CAC1G,CAAC,EAAE,MAAM,IAAM,CAAC,CAAC,EAIrBD,EAAI,SAAW,SAASY,CAAW,MACnC,IAAMF,EAAWH,EAAa,QAAQP,CAAG,EAGzC,OAAAU,EAAS,QAAQ,IAAI,eAAgB,8BAA8B,EACnEA,EAAS,QAAQ,IAAI,gBAAiB,mEAAmE,EACzGA,EAAS,QAAQ,IAAI,eAAgBX,GAAa,OAAO,EACrDE,GAASS,EAAS,QAAQ,IAAI,aAAc,GAAGT,EAAQ,IAAI,KAAKA,EAAQ,OAAO,GAAG,EAClFG,GAAeM,EAAS,QAAQ,IAAI,eAAgB,MAAM,EAEvDA,CACX,CAGA,IAAMA,EAAWH,EAAa,KAAK,EACnC,OAAAG,EAAS,QAAQ,IAAI,eAAgBX,GAAa,OAAO,EAClDW,CACX","names":["NextResponse","AI_BOTS","AI_BOT_USER_AGENTS","AI_BOT_USER_AGENTS_CACHE","bot","matchBot","userAgent","lowerUA","matches","longest","current","generateLlmsTxt","config","lines","route","fullUrl","link","section","ontoMiddleware","request","config","userAgent","url","matched","matchBot","accept","hasDebugParam","isAiBot","isMarkdownRequested","NextResponse","llmsTxtContent","generateLlmsTxt","response","error","payloadPath","ONTO_API_KEY","DASHBOARD_URL"]}
|
|
1
|
+
{"version":3,"sources":["../src/middleware.ts","../src/bots.ts","../src/config.ts"],"sourcesContent":["import { NextRequest, NextResponse } from 'next/server';\r\nimport { matchBot } from './bots';\r\nimport { generateLlmsTxt, OntoConfig } from './config';\r\n\r\nexport async function ontoMiddleware(request: any, config?: OntoConfig) {\r\n const userAgent = request.headers.get('user-agent');\r\n const url = request.nextUrl.clone();\r\n const matched = matchBot(userAgent);\r\n\r\n const accept = request.headers.get('accept') || '';\r\n const hasDebugParam = request.nextUrl.searchParams.has('onto');\r\n\r\n const isAiBot = !!matched;\r\n const isMarkdownRequested = accept.includes('text/markdown') || hasDebugParam;\r\n\r\n // Common logic for bot/markdown negotiation\r\n if (isAiBot || isMarkdownRequested) {\r\n // Ignore internal next.js requests\r\n if (url.pathname.startsWith('/_next')) {\r\n return NextResponse.next();\r\n }\r\n\r\n // --- llms.txt Auto-Discovery ---\r\n if (url.pathname === '/llms.txt') {\r\n try {\r\n if (config) {\r\n const llmsTxtContent = generateLlmsTxt(config);\r\n const response = new NextResponse(llmsTxtContent, {\r\n headers: {\r\n 'Content-Type': 'text/plain; charset=utf-8',\r\n 'Cache-Control': 'public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400',\r\n }\r\n });\r\n if (matched) response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);\r\n response.headers.set('X-Onto-Trace', userAgent || 'no-ua');\r\n return response;\r\n }\r\n } catch (error) {\r\n console.error('[Onto] Failed to generate llms.txt:', error);\r\n }\r\n }\r\n\r\n // Skip other static assets with dots\r\n if (url.pathname.includes('.')) {\r\n return NextResponse.next();\r\n }\r\n\r\n // Determine the corresponding payload path\r\n let payloadPath = url.pathname;\r\n if (payloadPath === '/' || payloadPath === '') payloadPath = '/index';\r\n if (payloadPath.endsWith('/') && payloadPath !== '/') payloadPath = payloadPath.slice(0, -1);\r\n\r\n // --- Onto Control Plane Integration (Premium) ---\r\n const ONTO_API_KEY = process.env.ONTO_API_KEY;\r\n const DASHBOARD_URL = process.env.ONTO_DASHBOARD_URL || 'https://app.buildonto.dev';\r\n\r\n if (ONTO_API_KEY) {\r\n // Logic for tracking and injection ... (fire-and-forget tracking)\r\n fetch(`${DASHBOARD_URL}/api/track`, {\r\n method: 'POST',\r\n headers: { 'x-onto-key': ONTO_API_KEY, 'Content-Type': 'application/json' },\r\n body: JSON.stringify({ route: url.pathname, userAgent, bot: matched?.name, company: matched?.company })\r\n }).catch(() => {});\r\n }\r\n\r\n // Rewrite to semantic payload\r\n url.pathname = `/.onto${payloadPath}.md`;\r\n const response = NextResponse.rewrite(url);\r\n \r\n // Apply headers\r\n response.headers.set('Content-Type', 'text/markdown; charset=utf-8');\r\n response.headers.set('Cache-Control', 'public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400');\r\n response.headers.set('X-Onto-Trace', userAgent || 'no-ua');\r\n if (matched) response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);\r\n if (hasDebugParam) response.headers.set('X-Onto-Debug', 'true');\r\n\r\n return response;\r\n }\r\n\r\n // Default response for non-bots\r\n const response = NextResponse.next();\r\n response.headers.set('X-Onto-Trace', userAgent || 'no-ua');\r\n return response;\r\n}\r\n\r\n// Re-export the bot registry for consumers who want to extend or inspect it\r\nexport { AI_BOT_USER_AGENTS, matchBot } from './bots';\r\nexport type { AiBot } from './bots';\r\n\r\n\r\n","/**\n * Comprehensive registry of AI bot user-agent strings.\n * The middleware uses this list to detect AI crawlers and serve optimized markdown.\n */\n\nexport interface AiBot {\n /** The user-agent substring to match against */\n name: string;\n /** The company operating this bot */\n company: string;\n /** ISO date when this bot was added or last verified */\n addedAt?: string;\n}\n\n/**\n * Structured registry of all known AI bots, grouped by company.\n * Useful for analytics and the Control Plane dashboard.\n */\nexport const AI_BOTS: AiBot[] = [\n // OpenAI\n { name: 'GPTBot', company: 'OpenAI', addedAt: '2025-01-01' },\n { name: 'ChatGPT-User', company: 'OpenAI', addedAt: '2025-01-01' },\n { name: 'OAI-SearchBot', company: 'OpenAI', addedAt: '2025-01-01' },\n { name: 'ChatGPT', company: 'OpenAI', addedAt: '2025-03-25' },\n { name: 'OpenAI', company: 'OpenAI', addedAt: '2025-03-25' },\n { name: 'GPT', company: 'OpenAI', addedAt: '2025-03-25' },\n\n // Google (SEO Safety: Googlebot proper is EXCLUDED)\n { name: 'Google-CloudVertexBot', company: 'Google', addedAt: '2025-01-01' },\n { name: 'Google-Extended', company: 'Google', addedAt: '2025-01-01' },\n { name: 'GoogleOther', company: 'Google', addedAt: '2025-01-01' },\n\n // Anthropic\n { name: 'ClaudeBot', company: 'Anthropic', addedAt: '2025-01-01' },\n { name: 'Claude-User', company: 'Anthropic', addedAt: '2025-01-01' },\n { name: 'anthropic-ai', company: 'Anthropic', addedAt: '2025-01-01' },\n { name: 'Claude', company: 'Anthropic', addedAt: '2025-03-25' },\n { name: 'Anthropic', company: 'Anthropic', addedAt: '2025-03-25' },\n\n // Perplexity\n { name: 'PerplexityBot', company: 'Perplexity', addedAt: '2025-01-01' },\n { name: 'Perplexity-User', company: 'Perplexity', addedAt: '2025-01-01' },\n { name: 'Perplexity', company: 'Perplexity', addedAt: '2025-03-25' },\n\n // Meta\n { name: 'Meta-ExternalAgent', company: 'Meta', addedAt: '2025-01-01' },\n { name: 'Meta-ExternalFetcher', company: 'Meta', addedAt: '2025-01-01' },\n { name: 'facebookexternalhit', company: 'Meta', addedAt: '2025-03-25' },\n { name: 'FacebookBot', company: 'Meta', addedAt: '2025-01-01' },\n\n // Mistral\n { name: 'MistralBot', company: 'Mistral', addedAt: '2025-03-25' },\n { name: 'Mistral', company: 'Mistral', addedAt: '2025-03-25' },\n\n // Amazon\n { name: 'Amazonbot', company: 'Amazon', addedAt: '2025-03-25' },\n\n // Others\n { name: 'AI2Bot', company: 'Allen Institute', addedAt: '2025-03-25' },\n { name: 'DuckAssistBot', company: 'DuckDuckGo', addedAt: '2025-03-25' },\n { name: 'Diffbot', company: 'Diffbot', addedAt: '2025-03-25' },\n { name: 'CCBot', company: 'Common Crawl', addedAt: '2025-01-01' },\n { name: 'Bytespider', company: 'ByteDance', addedAt: '2025-01-01' },\n { name: 'Applebot-Extended', company: 'Apple', addedAt: '2025-01-01' },\n { name: 'cohere-ai', company: 'Cohere', addedAt: '2025-01-01' },\n { name: 'YouBot', company: 'You.com', addedAt: '2025-01-01' },\n];\n\n/**\n * Flat list of user-agent substrings for fast matching in the middleware.\n */\nexport const AI_BOT_USER_AGENTS: string[] = AI_BOT_USER_AGENTS_CACHE();\n\nfunction AI_BOT_USER_AGENTS_CACHE() {\n return AI_BOTS.map(bot => bot.name);\n}\n\n/**\n * Given a raw user-agent string, returns the matched AiBot entry or undefined.\n * Uses a \"Longest Match\" strategy to ensure maximum specificity.\n * Comparison is case-insensitive.\n */\nexport function matchBot(userAgent: string | null): AiBot | undefined {\n if (!userAgent) return undefined;\n const lowerUA = userAgent.toLowerCase();\n \n // Find all matches\n const matches = AI_BOTS.filter(bot => \n lowerUA.includes(bot.name.toLowerCase())\n );\n\n if (matches.length === 0) return undefined;\n if (matches.length === 1) return matches[0];\n\n // Pick the longest match for maximum specificity (e.g. 'ChatGPT-User' vs 'GPT')\n return matches.reduce((longest, current) => \n current.name.length > longest.name.length ? current : longest\n );\n}\n","/**\r\n * Configuration schema for onto.config.ts\r\n * Used to dynamically generate llms.txt and other AI discovery files\r\n */\r\n\r\nexport type PageType = 'scoring' | 'about' | 'default';\r\n\r\nexport interface OntoRoute {\r\n /**\r\n * The URL path (e.g., '/docs', '/api/reference')\r\n */\r\n path: string;\r\n /**\r\n * Description of what this route contains\r\n */\r\n description: string;\r\n /**\r\n * Optional: Page type for automatic JSON-LD schema injection\r\n * - 'scoring': Injects Methodology schema with AIO scoring weights (40/35/25)\r\n * - 'about': Injects Organization/AboutPage schema\r\n * - 'default': No automatic schema injection\r\n */\r\n pageType?: PageType;\r\n}\r\n\r\nexport interface OntoConfig {\r\n /**\r\n * The name of your project or site (required)\r\n * Used as the H1 heading in llms.txt\r\n */\r\n name: string;\r\n\r\n /**\r\n * A short summary of your project (required)\r\n * Displayed as a blockquote in llms.txt\r\n * Should contain key information necessary for understanding the rest of the file\r\n */\r\n summary: string;\r\n\r\n /**\r\n * The base URL of your site (e.g., 'https://example.com')\r\n */\r\n baseUrl: string;\r\n\r\n /**\r\n * Optional: Additional sections to include in llms.txt\r\n * Each section can contain any markdown content\r\n */\r\n sections?: {\r\n heading: string;\r\n content: string;\r\n }[];\r\n\r\n /**\r\n * Key routes that AI agents should know about\r\n * These will be formatted as a markdown list in llms.txt\r\n */\r\n routes?: OntoRoute[];\r\n\r\n /**\r\n * Optional: Links to external resources (documentation, API references, etc.)\r\n */\r\n externalLinks?: {\r\n title: string;\r\n url: string;\r\n description?: string;\r\n }[];\r\n\r\n /**\r\n * Optional: Organization information for JSON-LD schemas\r\n */\r\n organization?: {\r\n name: string;\r\n description?: string;\r\n url?: string;\r\n logo?: string;\r\n foundingDate?: string;\r\n };\r\n}\r\n\r\n/**\r\n * Generate llms.txt content from OntoConfig\r\n * Follows the llms.txt specification:\r\n * - H1 with project name\r\n * - Blockquote with summary\r\n * - Additional markdown sections\r\n */\r\nexport function generateLlmsTxt(config: OntoConfig): string {\r\n const lines: string[] = [];\r\n\r\n // H1: Project name (required)\r\n lines.push(`# ${config.name}`);\r\n lines.push('');\r\n\r\n // Blockquote: Summary (required)\r\n lines.push(`> ${config.summary}`);\r\n lines.push('');\r\n\r\n // Key Routes section (if provided)\r\n if (config.routes && config.routes.length > 0) {\r\n lines.push('## Key Routes');\r\n lines.push('');\r\n for (const route of config.routes) {\r\n const fullUrl = `${config.baseUrl}${route.path}`;\r\n lines.push(`- [${route.path}](${fullUrl}): ${route.description}`);\r\n }\r\n lines.push('');\r\n }\r\n\r\n // External Links section (if provided)\r\n if (config.externalLinks && config.externalLinks.length > 0) {\r\n lines.push('## Resources');\r\n lines.push('');\r\n for (const link of config.externalLinks) {\r\n if (link.description) {\r\n lines.push(`- [${link.title}](${link.url}): ${link.description}`);\r\n } else {\r\n lines.push(`- [${link.title}](${link.url})`);\r\n }\r\n }\r\n lines.push('');\r\n }\r\n\r\n // Custom sections (if provided)\r\n if (config.sections && config.sections.length > 0) {\r\n for (const section of config.sections) {\r\n lines.push(`## ${section.heading}`);\r\n lines.push('');\r\n lines.push(section.content);\r\n lines.push('');\r\n }\r\n }\r\n\r\n return lines.join('\\n').trim() + '\\n';\r\n}\r\n"],"mappings":"AAAA,OAAsB,gBAAAA,MAAoB,cCkBnC,IAAMC,EAAmB,CAE5B,CAAE,KAAM,SAAqB,QAAS,SAAY,QAAS,YAAa,EACxE,CAAE,KAAM,eAAoB,QAAS,SAAY,QAAS,YAAa,EACvE,CAAE,KAAM,gBAAoB,QAAS,SAAY,QAAS,YAAa,EACvE,CAAE,KAAM,UAAoB,QAAS,SAAY,QAAS,YAAa,EACvE,CAAE,KAAM,SAAoB,QAAS,SAAY,QAAS,YAAa,EACvE,CAAE,KAAM,MAAoB,QAAS,SAAY,QAAS,YAAa,EAGvE,CAAE,KAAM,wBAA2B,QAAS,SAAU,QAAS,YAAa,EAC5E,CAAE,KAAM,kBAA2B,QAAS,SAAU,QAAS,YAAa,EAC5E,CAAE,KAAM,cAA2B,QAAS,SAAU,QAAS,YAAa,EAG5E,CAAE,KAAM,YAAmB,QAAS,YAAa,QAAS,YAAa,EACvE,CAAE,KAAM,cAAkB,QAAS,YAAa,QAAS,YAAa,EACtE,CAAE,KAAM,eAAkB,QAAS,YAAa,QAAS,YAAa,EACtE,CAAE,KAAM,SAAkB,QAAS,YAAa,QAAS,YAAa,EACtE,CAAE,KAAM,YAAkB,QAAS,YAAa,QAAS,YAAa,EAGtE,CAAE,KAAM,gBAAmB,QAAS,aAAc,QAAS,YAAa,EACxE,CAAE,KAAM,kBAAmB,QAAS,aAAc,QAAS,YAAa,EACxE,CAAE,KAAM,aAAmB,QAAS,aAAc,QAAS,YAAa,EAGxE,CAAE,KAAM,qBAAwB,QAAS,OAAQ,QAAS,YAAa,EACvE,CAAE,KAAM,uBAAwB,QAAS,OAAQ,QAAS,YAAa,EACvE,CAAE,KAAM,sBAAwB,QAAS,OAAQ,QAAS,YAAa,EACvE,CAAE,KAAM,cAAwB,QAAS,OAAQ,QAAS,YAAa,EAGvE,CAAE,KAAM,aAAc,QAAS,UAAW,QAAS,YAAa,EAChE,CAAE,KAAM,UAAc,QAAS,UAAW,QAAS,YAAa,EAGhE,CAAE,KAAM,YAAa,QAAS,SAAU,QAAS,YAAa,EAG9D,CAAE,KAAM,SAAiB,QAAS,kBAAmB,QAAS,YAAa,EAC3E,CAAE,KAAM,gBAAiB,QAAS,aAAc,QAAS,YAAa,EACtE,CAAE,KAAM,UAAiB,QAAS,UAAe,QAAS,YAAa,EACvE,CAAE,KAAM,QAAiB,QAAS,eAAgB,QAAS,YAAa,EACxE,CAAE,KAAM,aAAiB,QAAS,YAAgB,QAAS,YAAa,EACxE,CAAE,KAAM,oBAAqB,QAAS,QAAY,QAAS,YAAa,EACxE,CAAE,KAAM,YAAiB,QAAS,SAAgB,QAAS,YAAa,EACxE,CAAE,KAAM,SAAiB,QAAS,UAAgB,QAAS,YAAa,CAC5E,EAKaC,EAA+BC,EAAyB,EAErE,SAASA,GAA2B,CAChC,OAAOF,EAAQ,IAAIG,GAAOA,EAAI,IAAI,CACtC,CAOO,SAASC,EAASC,EAA6C,CAClE,GAAI,CAACA,EAAW,OAChB,IAAMC,EAAUD,EAAU,YAAY,EAGhCE,EAAUP,EAAQ,OAAOG,GAC3BG,EAAQ,SAASH,EAAI,KAAK,YAAY,CAAC,CAC3C,EAEA,GAAII,EAAQ,SAAW,EACvB,OAAIA,EAAQ,SAAW,EAAUA,EAAQ,CAAC,EAGnCA,EAAQ,OAAO,CAACC,EAASC,IAC5BA,EAAQ,KAAK,OAASD,EAAQ,KAAK,OAASC,EAAUD,CAC1D,CACJ,CCXO,SAASE,EAAgBC,EAA4B,CAC1D,IAAMC,EAAkB,CAAC,EAWzB,GARAA,EAAM,KAAK,KAAKD,EAAO,IAAI,EAAE,EAC7BC,EAAM,KAAK,EAAE,EAGbA,EAAM,KAAK,KAAKD,EAAO,OAAO,EAAE,EAChCC,EAAM,KAAK,EAAE,EAGTD,EAAO,QAAUA,EAAO,OAAO,OAAS,EAAG,CAC7CC,EAAM,KAAK,eAAe,EAC1BA,EAAM,KAAK,EAAE,EACb,QAAWC,KAASF,EAAO,OAAQ,CACjC,IAAMG,EAAU,GAAGH,EAAO,OAAO,GAAGE,EAAM,IAAI,GAC9CD,EAAM,KAAK,MAAMC,EAAM,IAAI,KAAKC,CAAO,MAAMD,EAAM,WAAW,EAAE,CAClE,CACAD,EAAM,KAAK,EAAE,CACf,CAGA,GAAID,EAAO,eAAiBA,EAAO,cAAc,OAAS,EAAG,CAC3DC,EAAM,KAAK,cAAc,EACzBA,EAAM,KAAK,EAAE,EACb,QAAWG,KAAQJ,EAAO,cACpBI,EAAK,YACPH,EAAM,KAAK,MAAMG,EAAK,KAAK,KAAKA,EAAK,GAAG,MAAMA,EAAK,WAAW,EAAE,EAEhEH,EAAM,KAAK,MAAMG,EAAK,KAAK,KAAKA,EAAK,GAAG,GAAG,EAG/CH,EAAM,KAAK,EAAE,CACf,CAGA,GAAID,EAAO,UAAYA,EAAO,SAAS,OAAS,EAC9C,QAAWK,KAAWL,EAAO,SAC3BC,EAAM,KAAK,MAAMI,EAAQ,OAAO,EAAE,EAClCJ,EAAM,KAAK,EAAE,EACbA,EAAM,KAAKI,EAAQ,OAAO,EAC1BJ,EAAM,KAAK,EAAE,EAIjB,OAAOA,EAAM,KAAK;AAAA,CAAI,EAAE,KAAK,EAAI;AAAA,CACnC,CFlIA,eAAsBK,EAAeC,EAAcC,EAAqB,CACpE,IAAMC,EAAYF,EAAQ,QAAQ,IAAI,YAAY,EAC5CG,EAAMH,EAAQ,QAAQ,MAAM,EAC5BI,EAAUC,EAASH,CAAS,EAE5BI,EAASN,EAAQ,QAAQ,IAAI,QAAQ,GAAK,GAC1CO,EAAgBP,EAAQ,QAAQ,aAAa,IAAI,MAAM,EAEvDQ,EAAU,CAAC,CAACJ,EACZK,EAAsBH,EAAO,SAAS,eAAe,GAAKC,EAGhE,GAAIC,GAAWC,EAAqB,CAEhC,GAAIN,EAAI,SAAS,WAAW,QAAQ,EAChC,OAAOO,EAAa,KAAK,EAI7B,GAAIP,EAAI,WAAa,YACjB,GAAI,CACA,GAAIF,EAAQ,CACR,IAAMU,EAAiBC,EAAgBX,CAAM,EACvCY,EAAW,IAAIH,EAAaC,EAAgB,CAC9C,QAAS,CACL,eAAgB,4BAChB,gBAAiB,mEACrB,CACJ,CAAC,EACD,OAAIP,GAASS,EAAS,QAAQ,IAAI,aAAc,GAAGT,EAAQ,IAAI,KAAKA,EAAQ,OAAO,GAAG,EACtFS,EAAS,QAAQ,IAAI,eAAgBX,GAAa,OAAO,EAClDW,CACX,CACJ,OAASC,EAAO,CACZ,QAAQ,MAAM,sCAAuCA,CAAK,CAC9D,CAIJ,GAAIX,EAAI,SAAS,SAAS,GAAG,EACzB,OAAOO,EAAa,KAAK,EAI7B,IAAIK,EAAcZ,EAAI,UAClBY,IAAgB,KAAOA,IAAgB,MAAIA,EAAc,UACzDA,EAAY,SAAS,GAAG,GAAKA,IAAgB,MAAKA,EAAcA,EAAY,MAAM,EAAG,EAAE,GAG3F,IAAMC,EAAe,QAAQ,IAAI,aAC3BC,EAAgB,QAAQ,IAAI,oBAAsB,4BAEpDD,GAEA,MAAM,GAAGC,CAAa,aAAc,CAChC,OAAQ,OACR,QAAS,CAAE,aAAcD,EAAc,eAAgB,kBAAmB,EAC1E,KAAM,KAAK,UAAU,CAAE,MAAOb,EAAI,SAAU,UAAAD,EAAW,IAAKE,GAAS,KAAM,QAASA,GAAS,OAAQ,CAAC,CAC1G,CAAC,EAAE,MAAM,IAAM,CAAC,CAAC,EAIrBD,EAAI,SAAW,SAASY,CAAW,MACnC,IAAMF,EAAWH,EAAa,QAAQP,CAAG,EAGzC,OAAAU,EAAS,QAAQ,IAAI,eAAgB,8BAA8B,EACnEA,EAAS,QAAQ,IAAI,gBAAiB,mEAAmE,EACzGA,EAAS,QAAQ,IAAI,eAAgBX,GAAa,OAAO,EACrDE,GAASS,EAAS,QAAQ,IAAI,aAAc,GAAGT,EAAQ,IAAI,KAAKA,EAAQ,OAAO,GAAG,EAClFG,GAAeM,EAAS,QAAQ,IAAI,eAAgB,MAAM,EAEvDA,CACX,CAGA,IAAMA,EAAWH,EAAa,KAAK,EACnC,OAAAG,EAAS,QAAQ,IAAI,eAAgBX,GAAa,OAAO,EAClDW,CACX","names":["NextResponse","AI_BOTS","AI_BOT_USER_AGENTS","AI_BOT_USER_AGENTS_CACHE","bot","matchBot","userAgent","lowerUA","matches","longest","current","generateLlmsTxt","config","lines","route","fullUrl","link","section","ontoMiddleware","request","config","userAgent","url","matched","matchBot","accept","hasDebugParam","isAiBot","isMarkdownRequested","NextResponse","llmsTxtContent","generateLlmsTxt","response","error","payloadPath","ONTO_API_KEY","DASHBOARD_URL"]}
|
package/dist/schemas.d.mts
CHANGED
|
@@ -8,10 +8,9 @@ import { OntoConfig } from './config.mjs';
|
|
|
8
8
|
/**
|
|
9
9
|
* Standard AIO (AI Optimization) scoring methodology
|
|
10
10
|
* Based on the Onto scoring algorithm:
|
|
11
|
-
* -
|
|
12
|
-
* -
|
|
13
|
-
* -
|
|
14
|
-
* - Semantic HTML: Bonus (15 points)
|
|
11
|
+
* - React Tax (Efficiency): 40% (Step 1)
|
|
12
|
+
* - Semantic Richness: 35% (Step 2)
|
|
13
|
+
* - Content Negotiation: 25% (Step 3)
|
|
15
14
|
*/
|
|
16
15
|
interface AIOMethodologySchema {
|
|
17
16
|
'@context': 'https://schema.org';
|
package/dist/schemas.d.ts
CHANGED
|
@@ -8,10 +8,9 @@ import { OntoConfig } from './config.js';
|
|
|
8
8
|
/**
|
|
9
9
|
* Standard AIO (AI Optimization) scoring methodology
|
|
10
10
|
* Based on the Onto scoring algorithm:
|
|
11
|
-
* -
|
|
12
|
-
* -
|
|
13
|
-
* -
|
|
14
|
-
* - Semantic HTML: Bonus (15 points)
|
|
11
|
+
* - React Tax (Efficiency): 40% (Step 1)
|
|
12
|
+
* - Semantic Richness: 35% (Step 2)
|
|
13
|
+
* - Content Negotiation: 25% (Step 3)
|
|
15
14
|
*/
|
|
16
15
|
interface AIOMethodologySchema {
|
|
17
16
|
'@context': 'https://schema.org';
|
package/dist/schemas.js
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
"use strict";var
|
|
1
|
+
"use strict";var i=Object.defineProperty;var c=Object.getOwnPropertyDescriptor;var u=Object.getOwnPropertyNames;var m=Object.prototype.hasOwnProperty;var p=(t,e)=>{for(var n in e)i(t,n,{get:e[n],enumerable:!0})},h=(t,e,n,o)=>{if(e&&typeof e=="object"||typeof e=="function")for(let a of u(e))!m.call(t,a)&&a!==n&&i(t,a,{get:()=>e[a],enumerable:!(o=c(e,a))||o.enumerable});return t};var l=t=>h(i({},"__esModule",{value:!0}),t);var f={};p(f,{generateAIOMethodologySchema:()=>r,generateAboutPageSchema:()=>g,generateOrganizationSchema:()=>s,generateSchemaForPageType:()=>d,serializeSchema:()=>y});module.exports=l(f);function r(t,e){return{"@context":"https://schema.org","@type":"HowTo",name:"AIO Score Calculation Methodology",description:"AI Optimization (AIO) Score measures how well a website is optimized for AI agents and LLM crawlers. Scored out of 100 points based on three core pillars.",step:[{"@type":"HowToStep",name:"React Tax (Token Efficiency)",text:'Measures the ratio of useful content to total page weight. Weight: 40%. Sites with high "React Tax" (heavy JS/HTML/CSS noise) score lower as they consume more tokens for less information.',position:1},{"@type":"HowToStep",name:"Semantic Richness",text:"Evaluates the presence of structured metadata (JSON-LD), semantic HTML tags (<main>, <article>), and proper heading hierarchy. Weight: 35%. Essential for confident AI extraction.",position:2},{"@type":"HowToStep",name:"Content Negotiation",text:"Tests whether your server can negotiate and serve optimized Markdown payloads directly to AI agents via the Accept: text/markdown header. Weight: 25%.",position:3}]}}function s(t,e){if(!t.organization)return null;let n={"@context":"https://schema.org","@type":"Organization",name:t.organization.name};return t.organization.url&&(n.url=t.organization.url),t.organization.description&&(n.description=t.organization.description),t.organization.logo&&(n.logo=t.organization.logo),t.organization.foundingDate&&(n.foundingDate=t.organization.foundingDate),n}function g(t,e){let n=s(t,e),o={"@context":"https://schema.org","@type":"AboutPage",name:`About ${t.name}`,url:e};return t.summary&&(o.description=t.summary),n&&(o.mainEntity=n),o}function d(t,e,n){switch(t){case"scoring":return r(e,n);case"about":return g(e,n);default:return null}}function y(t){return t?JSON.stringify(t,null,2):null}0&&(module.exports={generateAIOMethodologySchema,generateAboutPageSchema,generateOrganizationSchema,generateSchemaForPageType,serializeSchema});
|
|
2
2
|
//# sourceMappingURL=schemas.js.map
|
package/dist/schemas.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/schemas.ts"],"sourcesContent":["/**\r\n * JSON-LD Schema generators for automatic structured data injection\r\n * Follows Schema.org standards for AI-friendly metadata\r\n */\r\n\r\nimport { OntoConfig } from './config';\r\n\r\n/**\r\n * Standard AIO (AI Optimization) scoring methodology\r\n * Based on the Onto scoring algorithm:\r\n * -
|
|
1
|
+
{"version":3,"sources":["../src/schemas.ts"],"sourcesContent":["/**\r\n * JSON-LD Schema generators for automatic structured data injection\r\n * Follows Schema.org standards for AI-friendly metadata\r\n */\r\n\r\nimport { OntoConfig } from './config';\r\n\r\n/**\r\n * Standard AIO (AI Optimization) scoring methodology\r\n * Based on the Onto scoring algorithm:\r\n * - React Tax (Efficiency): 40% (Step 1)\r\n * - Semantic Richness: 35% (Step 2)\r\n * - Content Negotiation: 25% (Step 3)\r\n */\r\nexport interface AIOMethodologySchema {\r\n '@context': 'https://schema.org';\r\n '@type': 'HowTo';\r\n name: string;\r\n description: string;\r\n step: Array<{\r\n '@type': 'HowToStep';\r\n name: string;\r\n text: string;\r\n position: number;\r\n }>;\r\n}\r\n\r\n/**\r\n * Generate AIO Scoring Methodology JSON-LD schema\r\n * This explains to AI agents how the scoring system works\r\n */\r\nexport function generateAIOMethodologySchema(\r\n config: OntoConfig,\r\n pageUrl: string\r\n): AIOMethodologySchema {\r\n return {\r\n '@context': 'https://schema.org',\r\n '@type': 'HowTo',\r\n name: 'AIO Score Calculation Methodology',\r\n description: 'AI Optimization (AIO) Score measures how well a website is optimized for AI agents and LLM crawlers. Scored out of 100 points based on three core pillars.',\r\n step: [\r\n {\r\n '@type': 'HowToStep',\r\n name: 'React Tax (Token Efficiency)',\r\n text: 'Measures the ratio of useful content to total page weight. Weight: 40%. Sites with high \"React Tax\" (heavy JS/HTML/CSS noise) score lower as they consume more tokens for less information.',\r\n position: 1\r\n },\r\n {\r\n '@type': 'HowToStep',\r\n name: 'Semantic Richness',\r\n text: 'Evaluates the presence of structured metadata (JSON-LD), semantic HTML tags (<main>, <article>), and proper heading hierarchy. Weight: 35%. Essential for confident AI extraction.',\r\n position: 2\r\n },\r\n {\r\n '@type': 'HowToStep',\r\n name: 'Content Negotiation',\r\n text: 'Tests whether your server can negotiate and serve optimized Markdown payloads directly to AI agents via the Accept: text/markdown header. Weight: 25%.',\r\n position: 3\r\n }\r\n ]\r\n };\r\n}\r\n\r\n/**\r\n * Organization schema for About pages\r\n */\r\nexport interface OrganizationSchema {\r\n '@context': 'https://schema.org';\r\n '@type': 'Organization';\r\n name: string;\r\n url?: string;\r\n description?: string;\r\n logo?: string;\r\n foundingDate?: string;\r\n}\r\n\r\n/**\r\n * Generate Organization JSON-LD schema for About pages\r\n */\r\nexport function generateOrganizationSchema(\r\n config: OntoConfig,\r\n pageUrl: string\r\n): OrganizationSchema | null {\r\n if (!config.organization) {\r\n return null;\r\n }\r\n\r\n const schema: OrganizationSchema = {\r\n '@context': 'https://schema.org',\r\n '@type': 'Organization',\r\n name: config.organization.name\r\n };\r\n\r\n if (config.organization.url) {\r\n schema.url = config.organization.url;\r\n }\r\n\r\n if (config.organization.description) {\r\n schema.description = config.organization.description;\r\n }\r\n\r\n if (config.organization.logo) {\r\n schema.logo = config.organization.logo;\r\n }\r\n\r\n if (config.organization.foundingDate) {\r\n schema.foundingDate = config.organization.foundingDate;\r\n }\r\n\r\n return schema;\r\n}\r\n\r\n/**\r\n * AboutPage schema combining Organization and WebPage\r\n */\r\nexport interface AboutPageSchema {\r\n '@context': 'https://schema.org';\r\n '@type': 'AboutPage';\r\n name: string;\r\n url: string;\r\n description?: string;\r\n mainEntity?: OrganizationSchema;\r\n}\r\n\r\n/**\r\n * Generate AboutPage JSON-LD schema\r\n */\r\nexport function generateAboutPageSchema(\r\n config: OntoConfig,\r\n pageUrl: string\r\n): AboutPageSchema {\r\n const orgSchema = generateOrganizationSchema(config, pageUrl);\r\n\r\n const schema: AboutPageSchema = {\r\n '@context': 'https://schema.org',\r\n '@type': 'AboutPage',\r\n name: `About ${config.name}`,\r\n url: pageUrl\r\n };\r\n\r\n if (config.summary) {\r\n schema.description = config.summary;\r\n }\r\n\r\n if (orgSchema) {\r\n schema.mainEntity = orgSchema;\r\n }\r\n\r\n return schema;\r\n}\r\n\r\n/**\r\n * Determine which schema to generate based on page type\r\n */\r\nexport function generateSchemaForPageType(\r\n pageType: 'scoring' | 'about' | 'default',\r\n config: OntoConfig,\r\n pageUrl: string\r\n): any | null {\r\n switch (pageType) {\r\n case 'scoring':\r\n return generateAIOMethodologySchema(config, pageUrl);\r\n case 'about':\r\n return generateAboutPageSchema(config, pageUrl);\r\n case 'default':\r\n default:\r\n return null;\r\n }\r\n}\r\n\r\n/**\r\n * Serialize schema to JSON-LD script tag content\r\n */\r\nexport function serializeSchema(schema: any | null): string | null {\r\n if (!schema) {\r\n return null;\r\n }\r\n return JSON.stringify(schema, null, 2);\r\n}\r\n"],"mappings":"yaAAA,IAAAA,EAAA,GAAAC,EAAAD,EAAA,kCAAAE,EAAA,4BAAAC,EAAA,+BAAAC,EAAA,8BAAAC,EAAA,oBAAAC,IAAA,eAAAC,EAAAP,GA+BO,SAASE,EACdM,EACAC,EACsB,CACtB,MAAO,CACL,WAAY,qBACZ,QAAS,QACT,KAAM,oCACN,YAAa,6JACb,KAAM,CACJ,CACE,QAAS,YACT,KAAM,+BACN,KAAM,8LACN,SAAU,CACZ,EACA,CACE,QAAS,YACT,KAAM,oBACN,KAAM,qLACN,SAAU,CACZ,EACA,CACE,QAAS,YACT,KAAM,sBACN,KAAM,yJACN,SAAU,CACZ,CACF,CACF,CACF,CAkBO,SAASL,EACdI,EACAC,EAC2B,CAC3B,GAAI,CAACD,EAAO,aACV,OAAO,KAGT,IAAME,EAA6B,CACjC,WAAY,qBACZ,QAAS,eACT,KAAMF,EAAO,aAAa,IAC5B,EAEA,OAAIA,EAAO,aAAa,MACtBE,EAAO,IAAMF,EAAO,aAAa,KAG/BA,EAAO,aAAa,cACtBE,EAAO,YAAcF,EAAO,aAAa,aAGvCA,EAAO,aAAa,OACtBE,EAAO,KAAOF,EAAO,aAAa,MAGhCA,EAAO,aAAa,eACtBE,EAAO,aAAeF,EAAO,aAAa,cAGrCE,CACT,CAiBO,SAASP,EACdK,EACAC,EACiB,CACjB,IAAME,EAAYP,EAA2BI,EAAQC,CAAO,EAEtDC,EAA0B,CAC9B,WAAY,qBACZ,QAAS,YACT,KAAM,SAASF,EAAO,IAAI,GAC1B,IAAKC,CACP,EAEA,OAAID,EAAO,UACTE,EAAO,YAAcF,EAAO,SAG1BG,IACFD,EAAO,WAAaC,GAGfD,CACT,CAKO,SAASL,EACdO,EACAJ,EACAC,EACY,CACZ,OAAQG,EAAU,CAChB,IAAK,UACH,OAAOV,EAA6BM,EAAQC,CAAO,EACrD,IAAK,QACH,OAAON,EAAwBK,EAAQC,CAAO,EAEhD,QACE,OAAO,IACX,CACF,CAKO,SAASH,EAAgBI,EAAmC,CACjE,OAAKA,EAGE,KAAK,UAAUA,EAAQ,KAAM,CAAC,EAF5B,IAGX","names":["schemas_exports","__export","generateAIOMethodologySchema","generateAboutPageSchema","generateOrganizationSchema","generateSchemaForPageType","serializeSchema","__toCommonJS","config","pageUrl","schema","orgSchema","pageType"]}
|
package/dist/schemas.mjs
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
function
|
|
1
|
+
function a(t,n){return{"@context":"https://schema.org","@type":"HowTo",name:"AIO Score Calculation Methodology",description:"AI Optimization (AIO) Score measures how well a website is optimized for AI agents and LLM crawlers. Scored out of 100 points based on three core pillars.",step:[{"@type":"HowToStep",name:"React Tax (Token Efficiency)",text:'Measures the ratio of useful content to total page weight. Weight: 40%. Sites with high "React Tax" (heavy JS/HTML/CSS noise) score lower as they consume more tokens for less information.',position:1},{"@type":"HowToStep",name:"Semantic Richness",text:"Evaluates the presence of structured metadata (JSON-LD), semantic HTML tags (<main>, <article>), and proper heading hierarchy. Weight: 35%. Essential for confident AI extraction.",position:2},{"@type":"HowToStep",name:"Content Negotiation",text:"Tests whether your server can negotiate and serve optimized Markdown payloads directly to AI agents via the Accept: text/markdown header. Weight: 25%.",position:3}]}}function i(t,n){if(!t.organization)return null;let e={"@context":"https://schema.org","@type":"Organization",name:t.organization.name};return t.organization.url&&(e.url=t.organization.url),t.organization.description&&(e.description=t.organization.description),t.organization.logo&&(e.logo=t.organization.logo),t.organization.foundingDate&&(e.foundingDate=t.organization.foundingDate),e}function r(t,n){let e=i(t,n),o={"@context":"https://schema.org","@type":"AboutPage",name:`About ${t.name}`,url:n};return t.summary&&(o.description=t.summary),e&&(o.mainEntity=e),o}function s(t,n,e){switch(t){case"scoring":return a(n,e);case"about":return r(n,e);default:return null}}function g(t){return t?JSON.stringify(t,null,2):null}export{a as generateAIOMethodologySchema,r as generateAboutPageSchema,i as generateOrganizationSchema,s as generateSchemaForPageType,g as serializeSchema};
|
|
2
2
|
//# sourceMappingURL=schemas.mjs.map
|
package/dist/schemas.mjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/schemas.ts"],"sourcesContent":["/**\r\n * JSON-LD Schema generators for automatic structured data injection\r\n * Follows Schema.org standards for AI-friendly metadata\r\n */\r\n\r\nimport { OntoConfig } from './config';\r\n\r\n/**\r\n * Standard AIO (AI Optimization) scoring methodology\r\n * Based on the Onto scoring algorithm:\r\n * -
|
|
1
|
+
{"version":3,"sources":["../src/schemas.ts"],"sourcesContent":["/**\r\n * JSON-LD Schema generators for automatic structured data injection\r\n * Follows Schema.org standards for AI-friendly metadata\r\n */\r\n\r\nimport { OntoConfig } from './config';\r\n\r\n/**\r\n * Standard AIO (AI Optimization) scoring methodology\r\n * Based on the Onto scoring algorithm:\r\n * - React Tax (Efficiency): 40% (Step 1)\r\n * - Semantic Richness: 35% (Step 2)\r\n * - Content Negotiation: 25% (Step 3)\r\n */\r\nexport interface AIOMethodologySchema {\r\n '@context': 'https://schema.org';\r\n '@type': 'HowTo';\r\n name: string;\r\n description: string;\r\n step: Array<{\r\n '@type': 'HowToStep';\r\n name: string;\r\n text: string;\r\n position: number;\r\n }>;\r\n}\r\n\r\n/**\r\n * Generate AIO Scoring Methodology JSON-LD schema\r\n * This explains to AI agents how the scoring system works\r\n */\r\nexport function generateAIOMethodologySchema(\r\n config: OntoConfig,\r\n pageUrl: string\r\n): AIOMethodologySchema {\r\n return {\r\n '@context': 'https://schema.org',\r\n '@type': 'HowTo',\r\n name: 'AIO Score Calculation Methodology',\r\n description: 'AI Optimization (AIO) Score measures how well a website is optimized for AI agents and LLM crawlers. Scored out of 100 points based on three core pillars.',\r\n step: [\r\n {\r\n '@type': 'HowToStep',\r\n name: 'React Tax (Token Efficiency)',\r\n text: 'Measures the ratio of useful content to total page weight. Weight: 40%. Sites with high \"React Tax\" (heavy JS/HTML/CSS noise) score lower as they consume more tokens for less information.',\r\n position: 1\r\n },\r\n {\r\n '@type': 'HowToStep',\r\n name: 'Semantic Richness',\r\n text: 'Evaluates the presence of structured metadata (JSON-LD), semantic HTML tags (<main>, <article>), and proper heading hierarchy. Weight: 35%. Essential for confident AI extraction.',\r\n position: 2\r\n },\r\n {\r\n '@type': 'HowToStep',\r\n name: 'Content Negotiation',\r\n text: 'Tests whether your server can negotiate and serve optimized Markdown payloads directly to AI agents via the Accept: text/markdown header. Weight: 25%.',\r\n position: 3\r\n }\r\n ]\r\n };\r\n}\r\n\r\n/**\r\n * Organization schema for About pages\r\n */\r\nexport interface OrganizationSchema {\r\n '@context': 'https://schema.org';\r\n '@type': 'Organization';\r\n name: string;\r\n url?: string;\r\n description?: string;\r\n logo?: string;\r\n foundingDate?: string;\r\n}\r\n\r\n/**\r\n * Generate Organization JSON-LD schema for About pages\r\n */\r\nexport function generateOrganizationSchema(\r\n config: OntoConfig,\r\n pageUrl: string\r\n): OrganizationSchema | null {\r\n if (!config.organization) {\r\n return null;\r\n }\r\n\r\n const schema: OrganizationSchema = {\r\n '@context': 'https://schema.org',\r\n '@type': 'Organization',\r\n name: config.organization.name\r\n };\r\n\r\n if (config.organization.url) {\r\n schema.url = config.organization.url;\r\n }\r\n\r\n if (config.organization.description) {\r\n schema.description = config.organization.description;\r\n }\r\n\r\n if (config.organization.logo) {\r\n schema.logo = config.organization.logo;\r\n }\r\n\r\n if (config.organization.foundingDate) {\r\n schema.foundingDate = config.organization.foundingDate;\r\n }\r\n\r\n return schema;\r\n}\r\n\r\n/**\r\n * AboutPage schema combining Organization and WebPage\r\n */\r\nexport interface AboutPageSchema {\r\n '@context': 'https://schema.org';\r\n '@type': 'AboutPage';\r\n name: string;\r\n url: string;\r\n description?: string;\r\n mainEntity?: OrganizationSchema;\r\n}\r\n\r\n/**\r\n * Generate AboutPage JSON-LD schema\r\n */\r\nexport function generateAboutPageSchema(\r\n config: OntoConfig,\r\n pageUrl: string\r\n): AboutPageSchema {\r\n const orgSchema = generateOrganizationSchema(config, pageUrl);\r\n\r\n const schema: AboutPageSchema = {\r\n '@context': 'https://schema.org',\r\n '@type': 'AboutPage',\r\n name: `About ${config.name}`,\r\n url: pageUrl\r\n };\r\n\r\n if (config.summary) {\r\n schema.description = config.summary;\r\n }\r\n\r\n if (orgSchema) {\r\n schema.mainEntity = orgSchema;\r\n }\r\n\r\n return schema;\r\n}\r\n\r\n/**\r\n * Determine which schema to generate based on page type\r\n */\r\nexport function generateSchemaForPageType(\r\n pageType: 'scoring' | 'about' | 'default',\r\n config: OntoConfig,\r\n pageUrl: string\r\n): any | null {\r\n switch (pageType) {\r\n case 'scoring':\r\n return generateAIOMethodologySchema(config, pageUrl);\r\n case 'about':\r\n return generateAboutPageSchema(config, pageUrl);\r\n case 'default':\r\n default:\r\n return null;\r\n }\r\n}\r\n\r\n/**\r\n * Serialize schema to JSON-LD script tag content\r\n */\r\nexport function serializeSchema(schema: any | null): string | null {\r\n if (!schema) {\r\n return null;\r\n }\r\n return JSON.stringify(schema, null, 2);\r\n}\r\n"],"mappings":"AA+BO,SAASA,EACdC,EACAC,EACsB,CACtB,MAAO,CACL,WAAY,qBACZ,QAAS,QACT,KAAM,oCACN,YAAa,6JACb,KAAM,CACJ,CACE,QAAS,YACT,KAAM,+BACN,KAAM,8LACN,SAAU,CACZ,EACA,CACE,QAAS,YACT,KAAM,oBACN,KAAM,qLACN,SAAU,CACZ,EACA,CACE,QAAS,YACT,KAAM,sBACN,KAAM,yJACN,SAAU,CACZ,CACF,CACF,CACF,CAkBO,SAASC,EACdF,EACAC,EAC2B,CAC3B,GAAI,CAACD,EAAO,aACV,OAAO,KAGT,IAAMG,EAA6B,CACjC,WAAY,qBACZ,QAAS,eACT,KAAMH,EAAO,aAAa,IAC5B,EAEA,OAAIA,EAAO,aAAa,MACtBG,EAAO,IAAMH,EAAO,aAAa,KAG/BA,EAAO,aAAa,cACtBG,EAAO,YAAcH,EAAO,aAAa,aAGvCA,EAAO,aAAa,OACtBG,EAAO,KAAOH,EAAO,aAAa,MAGhCA,EAAO,aAAa,eACtBG,EAAO,aAAeH,EAAO,aAAa,cAGrCG,CACT,CAiBO,SAASC,EACdJ,EACAC,EACiB,CACjB,IAAMI,EAAYH,EAA2BF,EAAQC,CAAO,EAEtDE,EAA0B,CAC9B,WAAY,qBACZ,QAAS,YACT,KAAM,SAASH,EAAO,IAAI,GAC1B,IAAKC,CACP,EAEA,OAAID,EAAO,UACTG,EAAO,YAAcH,EAAO,SAG1BK,IACFF,EAAO,WAAaE,GAGfF,CACT,CAKO,SAASG,EACdC,EACAP,EACAC,EACY,CACZ,OAAQM,EAAU,CAChB,IAAK,UACH,OAAOR,EAA6BC,EAAQC,CAAO,EACrD,IAAK,QACH,OAAOG,EAAwBJ,EAAQC,CAAO,EAEhD,QACE,OAAO,IACX,CACF,CAKO,SAASO,EAAgBL,EAAmC,CACjE,OAAKA,EAGE,KAAK,UAAUA,EAAQ,KAAM,CAAC,EAF5B,IAGX","names":["generateAIOMethodologySchema","config","pageUrl","generateOrganizationSchema","schema","generateAboutPageSchema","orgSchema","generateSchemaForPageType","pageType","serializeSchema"]}
|
package/package.json
CHANGED
package/src/bots.ts
CHANGED
|
@@ -21,6 +21,9 @@ export const AI_BOTS: AiBot[] = [
|
|
|
21
21
|
{ name: 'GPTBot', company: 'OpenAI', addedAt: '2025-01-01' },
|
|
22
22
|
{ name: 'ChatGPT-User', company: 'OpenAI', addedAt: '2025-01-01' },
|
|
23
23
|
{ name: 'OAI-SearchBot', company: 'OpenAI', addedAt: '2025-01-01' },
|
|
24
|
+
{ name: 'ChatGPT', company: 'OpenAI', addedAt: '2025-03-25' },
|
|
25
|
+
{ name: 'OpenAI', company: 'OpenAI', addedAt: '2025-03-25' },
|
|
26
|
+
{ name: 'GPT', company: 'OpenAI', addedAt: '2025-03-25' },
|
|
24
27
|
|
|
25
28
|
// Google (SEO Safety: Googlebot proper is EXCLUDED)
|
|
26
29
|
{ name: 'Google-CloudVertexBot', company: 'Google', addedAt: '2025-01-01' },
|
|
@@ -31,10 +34,13 @@ export const AI_BOTS: AiBot[] = [
|
|
|
31
34
|
{ name: 'ClaudeBot', company: 'Anthropic', addedAt: '2025-01-01' },
|
|
32
35
|
{ name: 'Claude-User', company: 'Anthropic', addedAt: '2025-01-01' },
|
|
33
36
|
{ name: 'anthropic-ai', company: 'Anthropic', addedAt: '2025-01-01' },
|
|
37
|
+
{ name: 'Claude', company: 'Anthropic', addedAt: '2025-03-25' },
|
|
38
|
+
{ name: 'Anthropic', company: 'Anthropic', addedAt: '2025-03-25' },
|
|
34
39
|
|
|
35
40
|
// Perplexity
|
|
36
41
|
{ name: 'PerplexityBot', company: 'Perplexity', addedAt: '2025-01-01' },
|
|
37
42
|
{ name: 'Perplexity-User', company: 'Perplexity', addedAt: '2025-01-01' },
|
|
43
|
+
{ name: 'Perplexity', company: 'Perplexity', addedAt: '2025-03-25' },
|
|
38
44
|
|
|
39
45
|
// Meta
|
|
40
46
|
{ name: 'Meta-ExternalAgent', company: 'Meta', addedAt: '2025-01-01' },
|
|
@@ -44,6 +50,7 @@ export const AI_BOTS: AiBot[] = [
|
|
|
44
50
|
|
|
45
51
|
// Mistral
|
|
46
52
|
{ name: 'MistralBot', company: 'Mistral', addedAt: '2025-03-25' },
|
|
53
|
+
{ name: 'Mistral', company: 'Mistral', addedAt: '2025-03-25' },
|
|
47
54
|
|
|
48
55
|
// Amazon
|
|
49
56
|
{ name: 'Amazonbot', company: 'Amazon', addedAt: '2025-03-25' },
|
package/src/schemas.ts
CHANGED
|
@@ -8,10 +8,9 @@ import { OntoConfig } from './config';
|
|
|
8
8
|
/**
|
|
9
9
|
* Standard AIO (AI Optimization) scoring methodology
|
|
10
10
|
* Based on the Onto scoring algorithm:
|
|
11
|
-
* -
|
|
12
|
-
* -
|
|
13
|
-
* -
|
|
14
|
-
* - Semantic HTML: Bonus (15 points)
|
|
11
|
+
* - React Tax (Efficiency): 40% (Step 1)
|
|
12
|
+
* - Semantic Richness: 35% (Step 2)
|
|
13
|
+
* - Content Negotiation: 25% (Step 3)
|
|
15
14
|
*/
|
|
16
15
|
export interface AIOMethodologySchema {
|
|
17
16
|
'@context': 'https://schema.org';
|
|
@@ -38,31 +37,25 @@ export function generateAIOMethodologySchema(
|
|
|
38
37
|
'@context': 'https://schema.org',
|
|
39
38
|
'@type': 'HowTo',
|
|
40
39
|
name: 'AIO Score Calculation Methodology',
|
|
41
|
-
description: 'AI Optimization (AIO) Score measures how well a website is optimized for AI agents and LLM crawlers. Scored out of 100 points based on
|
|
40
|
+
description: 'AI Optimization (AIO) Score measures how well a website is optimized for AI agents and LLM crawlers. Scored out of 100 points based on three core pillars.',
|
|
42
41
|
step: [
|
|
43
42
|
{
|
|
44
43
|
'@type': 'HowToStep',
|
|
45
|
-
name: '
|
|
46
|
-
text: '
|
|
44
|
+
name: 'React Tax (Token Efficiency)',
|
|
45
|
+
text: 'Measures the ratio of useful content to total page weight. Weight: 40%. Sites with high "React Tax" (heavy JS/HTML/CSS noise) score lower as they consume more tokens for less information.',
|
|
47
46
|
position: 1
|
|
48
47
|
},
|
|
49
48
|
{
|
|
50
49
|
'@type': 'HowToStep',
|
|
51
|
-
name: '
|
|
52
|
-
text: '
|
|
50
|
+
name: 'Semantic Richness',
|
|
51
|
+
text: 'Evaluates the presence of structured metadata (JSON-LD), semantic HTML tags (<main>, <article>), and proper heading hierarchy. Weight: 35%. Essential for confident AI extraction.',
|
|
53
52
|
position: 2
|
|
54
53
|
},
|
|
55
54
|
{
|
|
56
55
|
'@type': 'HowToStep',
|
|
57
|
-
name: '
|
|
58
|
-
text: '
|
|
56
|
+
name: 'Content Negotiation',
|
|
57
|
+
text: 'Tests whether your server can negotiate and serve optimized Markdown payloads directly to AI agents via the Accept: text/markdown header. Weight: 25%.',
|
|
59
58
|
position: 3
|
|
60
|
-
},
|
|
61
|
-
{
|
|
62
|
-
'@type': 'HowToStep',
|
|
63
|
-
name: 'Semantic HTML',
|
|
64
|
-
text: 'Check for semantic tags like <main> and <article>. Bonus: +15 points if present. Helps AI agents separate navigation from core content.',
|
|
65
|
-
position: 4
|
|
66
59
|
}
|
|
67
60
|
]
|
|
68
61
|
};
|