@ontosdk/next 1.3.0 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ONTOROVIDER_USAGE.md +42 -83
- package/dist/middleware.d.mts +2 -1
- package/dist/middleware.d.ts +2 -1
- package/dist/middleware.js +3 -3
- package/dist/middleware.js.map +1 -1
- package/dist/middleware.mjs +3 -3
- package/dist/middleware.mjs.map +1 -1
- package/package.json +1 -1
- package/src/middleware.ts +11 -9
package/ONTOROVIDER_USAGE.md
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
# OntoProvider Usage Guide
|
|
2
2
|
|
|
3
|
-
The `OntoProvider` component
|
|
3
|
+
The `OntoProvider` component is the high-level, "Zero-Config" entry point for AI optimization. It handles both **Auto-Discovery** (link tags) and **Semantic SEO** (JSON-LD schemas) in one wrapper.
|
|
4
4
|
|
|
5
5
|
## Features
|
|
6
6
|
|
|
7
|
-
- **Automatic Link Tag Injection**:
|
|
8
|
-
- **
|
|
9
|
-
- **
|
|
10
|
-
- **
|
|
7
|
+
- **Automatic Link Tag Injection**: Adds `<link rel="alternate" type="text/markdown">` to every page.
|
|
8
|
+
- **JSON-LD Schema Injection**: Automatically generates structured data (Schema.org) for pages based on your `onto.config.ts`.
|
|
9
|
+
- **Dynamic Path Detection**: Uses `usePathname()` to construct full URLs for AI agents.
|
|
10
|
+
- **AIO Scoring Methodology**: Automatically injects methodology schemas for routes marked as `pageType: 'scoring'`.
|
|
11
11
|
|
|
12
12
|
## Installation
|
|
13
13
|
|
|
@@ -17,20 +17,17 @@ npm install @ontosdk/next
|
|
|
17
17
|
|
|
18
18
|
## Usage
|
|
19
19
|
|
|
20
|
-
### Basic
|
|
20
|
+
### 1. Basic Setup in Root Layout
|
|
21
21
|
|
|
22
|
-
Wrap your
|
|
22
|
+
Wrap your application and provide your `baseUrl`. For best results, also pass your `onto.config.ts` object.
|
|
23
23
|
|
|
24
24
|
```tsx
|
|
25
25
|
import { OntoProvider } from '@ontosdk/next/provider';
|
|
26
|
+
import config from '../onto.config';
|
|
26
27
|
|
|
27
|
-
export default function RootLayout({
|
|
28
|
-
children,
|
|
29
|
-
}: {
|
|
30
|
-
children: React.ReactNode;
|
|
31
|
-
}) {
|
|
28
|
+
export default function RootLayout({ children }) {
|
|
32
29
|
return (
|
|
33
|
-
<OntoProvider baseUrl="https://example.com">
|
|
30
|
+
<OntoProvider baseUrl="https://example.com" config={config}>
|
|
34
31
|
<html lang="en">
|
|
35
32
|
<head />
|
|
36
33
|
<body>{children}</body>
|
|
@@ -40,90 +37,52 @@ export default function RootLayout({
|
|
|
40
37
|
}
|
|
41
38
|
```
|
|
42
39
|
|
|
43
|
-
###
|
|
40
|
+
### 2. Page Type Configuration
|
|
44
41
|
|
|
45
|
-
|
|
42
|
+
In your `onto.config.ts`, you can specify `pageType` for automatic schema generation:
|
|
46
43
|
|
|
47
|
-
```
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
</html>
|
|
64
|
-
);
|
|
65
|
-
}
|
|
44
|
+
```typescript
|
|
45
|
+
const config: OntoConfig = {
|
|
46
|
+
// ... basic info
|
|
47
|
+
routes: [
|
|
48
|
+
{
|
|
49
|
+
path: '/pricing',
|
|
50
|
+
description: 'Pricing & Methodology',
|
|
51
|
+
pageType: 'scoring' // Triggers AIOMethodologySchema (AIO Scoring)
|
|
52
|
+
},
|
|
53
|
+
{
|
|
54
|
+
path: '/about',
|
|
55
|
+
description: 'About Us',
|
|
56
|
+
pageType: 'about' // Triggers Organization/AboutPage Schema
|
|
57
|
+
}
|
|
58
|
+
]
|
|
59
|
+
};
|
|
66
60
|
```
|
|
67
61
|
|
|
68
62
|
## How It Works
|
|
69
63
|
|
|
70
|
-
|
|
71
|
-
|
|
64
|
+
### Auto-Discovery
|
|
65
|
+
For a page at `/docs`, the provider injects:
|
|
72
66
|
```html
|
|
73
|
-
<link
|
|
74
|
-
rel="alternate"
|
|
75
|
-
type="text/markdown"
|
|
76
|
-
href="https://example.com/blog/my-post?format=md"
|
|
77
|
-
title="AI-optimized Markdown version"
|
|
78
|
-
/>
|
|
67
|
+
<link rel="alternate" type="text/markdown" href="https://example.com/docs?format=md" />
|
|
79
68
|
```
|
|
80
69
|
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
## Props
|
|
84
|
-
|
|
85
|
-
| Prop | Type | Required | Description |
|
|
86
|
-
|------|------|----------|-------------|
|
|
87
|
-
| `baseUrl` | `string` | Yes | The base URL of your site (e.g., `'https://example.com'`) |
|
|
88
|
-
| `children` | `ReactNode` | Yes | Child components to render |
|
|
70
|
+
### Semantic SEO (JSON-LD)
|
|
71
|
+
If the route matches a `pageType`, it injects a `<script type="application/ld+json">` with highly specific AI-friendly metadata. For example, a `scoring` page receives the **Onto AIO Methodology** schema, explaining your content negotiation and token efficiency weights to LLM crawlers.
|
|
89
72
|
|
|
90
73
|
## Comparison with OntoHead
|
|
91
74
|
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
-
|
|
75
|
+
| Feature | OntoHead | OntoProvider |
|
|
76
|
+
| :--- | :--- | :--- |
|
|
77
|
+
| **Path Pattern** | Static `/.onto/*.md` | Dynamic `?format=md` |
|
|
78
|
+
| **JSON-LD** | No | **Yes** |
|
|
79
|
+
| **Manifests** | Injects `llms.txt` link | No (handled by Middleware) |
|
|
80
|
+
| **Best For** | Static exports (CLI) | Dynamic & Meta-heavy sites |
|
|
96
81
|
|
|
97
|
-
|
|
98
|
-
- Uses dynamic query string pattern (e.g., `/blog/my-post?format=md`)
|
|
99
|
-
- Works with dynamic content
|
|
100
|
-
- Best for apps that handle markdown conversion at request time
|
|
82
|
+
## Requirements
|
|
101
83
|
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
Combine with Next.js middleware to handle the `?format=md` requests:
|
|
105
|
-
|
|
106
|
-
```typescript
|
|
107
|
-
// middleware.ts
|
|
108
|
-
import { NextResponse } from 'next/server';
|
|
109
|
-
import type { NextRequest } from 'next/server';
|
|
110
|
-
|
|
111
|
-
export function middleware(request: NextRequest) {
|
|
112
|
-
const { searchParams, pathname } = request.nextUrl;
|
|
113
|
-
|
|
114
|
-
if (searchParams.get('format') === 'md') {
|
|
115
|
-
// Convert the page to markdown and return it
|
|
116
|
-
// Implementation depends on your setup
|
|
117
|
-
return convertToMarkdown(pathname);
|
|
118
|
-
}
|
|
119
|
-
|
|
120
|
-
return NextResponse.next();
|
|
121
|
-
}
|
|
122
|
-
|
|
123
|
-
export const config = {
|
|
124
|
-
matcher: '/:path*',
|
|
125
|
-
};
|
|
126
|
-
```
|
|
84
|
+
- Next.js ≥14.0.0
|
|
85
|
+
- React ^18.0.0 || ^19.0.0
|
|
127
86
|
|
|
128
87
|
## License
|
|
129
88
|
|
package/dist/middleware.d.mts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { NextRequest, NextResponse } from 'next/server';
|
|
2
|
+
import { OntoConfig } from './config.mjs';
|
|
2
3
|
|
|
3
4
|
/**
|
|
4
5
|
* Comprehensive registry of AI bot user-agent strings.
|
|
@@ -20,6 +21,6 @@ declare const AI_BOT_USER_AGENTS: string[];
|
|
|
20
21
|
*/
|
|
21
22
|
declare function matchBot(userAgent: string | null): AiBot | undefined;
|
|
22
23
|
|
|
23
|
-
declare function ontoMiddleware(request: NextRequest): Promise<NextResponse<unknown>>;
|
|
24
|
+
declare function ontoMiddleware(request: NextRequest, config?: OntoConfig): Promise<NextResponse<unknown>>;
|
|
24
25
|
|
|
25
26
|
export { AI_BOT_USER_AGENTS, type AiBot, matchBot, ontoMiddleware };
|
package/dist/middleware.d.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { NextRequest, NextResponse } from 'next/server';
|
|
2
|
+
import { OntoConfig } from './config.js';
|
|
2
3
|
|
|
3
4
|
/**
|
|
4
5
|
* Comprehensive registry of AI bot user-agent strings.
|
|
@@ -20,6 +21,6 @@ declare const AI_BOT_USER_AGENTS: string[];
|
|
|
20
21
|
*/
|
|
21
22
|
declare function matchBot(userAgent: string | null): AiBot | undefined;
|
|
22
23
|
|
|
23
|
-
declare function ontoMiddleware(request: NextRequest): Promise<NextResponse<unknown>>;
|
|
24
|
+
declare function ontoMiddleware(request: NextRequest, config?: OntoConfig): Promise<NextResponse<unknown>>;
|
|
24
25
|
|
|
25
26
|
export { AI_BOT_USER_AGENTS, type AiBot, matchBot, ontoMiddleware };
|
package/dist/middleware.js
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
"use strict";var
|
|
1
|
+
"use strict";var u=Object.defineProperty;var T=Object.getOwnPropertyDescriptor;var k=Object.getOwnPropertyNames;var b=Object.prototype.hasOwnProperty;var R=(e,t)=>{for(var o in t)u(e,o,{get:t[o],enumerable:!0})},_=(e,t,o,a)=>{if(t&&typeof t=="object"||typeof t=="function")for(let n of k(t))!b.call(e,n)&&n!==o&&u(e,n,{get:()=>t[n],enumerable:!(a=T(t,n))||a.enumerable});return e};var P=e=>_(u({},"__esModule",{value:!0}),e);var G={};R(G,{AI_BOT_USER_AGENTS:()=>x,matchBot:()=>m,ontoMiddleware:()=>U});module.exports=P(G);var c=require("next/server");var f=[{name:"GPTBot",company:"OpenAI"},{name:"ChatGPT-User",company:"OpenAI"},{name:"OAI-SearchBot",company:"OpenAI"},{name:"Googlebot",company:"Google"},{name:"Google-CloudVertexBot",company:"Google"},{name:"Google-Extended",company:"Google"},{name:"GoogleOther",company:"Google"},{name:"ClaudeBot",company:"Anthropic"},{name:"Claude-User",company:"Anthropic"},{name:"anthropic-ai",company:"Anthropic"},{name:"PerplexityBot",company:"Perplexity"},{name:"Perplexity-User",company:"Perplexity"},{name:"Meta-ExternalAgent",company:"Meta"},{name:"Meta-ExternalFetcher",company:"Meta"},{name:"FacebookBot",company:"Meta"},{name:"CCBot",company:"Common Crawl"},{name:"Bytespider",company:"ByteDance"},{name:"Applebot-Extended",company:"Apple"},{name:"cohere-ai",company:"Cohere"},{name:"YouBot",company:"You.com"}],x=f.map(e=>e.name);function m(e){if(!e)return;let t=e.toLowerCase();return f.find(o=>t.includes(o.name.toLowerCase()))}async function O(){try{let e=await import(process.cwd()+"/onto.config");return e.default||e}catch{return null}}function C(e){let t=[];if(t.push(`# ${e.name}`),t.push(""),t.push(`> ${e.summary}`),t.push(""),e.routes&&e.routes.length>0){t.push("## Key Routes"),t.push("");for(let o of e.routes){let a=`${e.baseUrl}${o.path}`;t.push(`- [${o.path}](${a}): ${o.description}`)}t.push("")}if(e.externalLinks&&e.externalLinks.length>0){t.push("## Resources"),t.push("");for(let o of e.externalLinks)o.description?t.push(`- [${o.title}](${o.url}): ${o.description}`):t.push(`- [${o.title}](${o.url})`);t.push("")}if(e.sections&&e.sections.length>0)for(let o of e.sections)t.push(`## ${o.heading}`),t.push(""),t.push(o.content),t.push("");return t.join(`
|
|
2
2
|
`).trim()+`
|
|
3
|
-
`}async function U(e){let
|
|
3
|
+
`}async function U(e,t){let o=e.headers.get("user-agent"),a=e.nextUrl.clone(),n=m(o),$=e.headers.get("accept")||"",A=!!n,B=$.includes("text/markdown");if(A||B){if(a.pathname.startsWith("/_next"))return c.NextResponse.next();if(a.pathname==="/llms.txt")try{let i=t||await O();if(i){let r=C(i),p=new c.NextResponse(r,{headers:{"Content-Type":"text/plain; charset=utf-8","Cache-Control":"public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400"}});return n&&p.headers.set("X-Onto-Bot",`${n.name} (${n.company})`),p}else{a.pathname="/llms.txt";let r=c.NextResponse.rewrite(a);return r.headers.set("Content-Type","text/plain; charset=utf-8"),r.headers.set("Cache-Control","public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400"),n&&r.headers.set("X-Onto-Bot",`${n.name} (${n.company})`),r}}catch(i){console.error("[Onto] Failed to generate llms.txt:",i),a.pathname="/llms.txt";let r=c.NextResponse.rewrite(a);return r.headers.set("Content-Type","text/plain; charset=utf-8"),n&&r.headers.set("X-Onto-Bot",`${n.name} (${n.company})`),r}if(a.pathname.includes("."))return c.NextResponse.next();let s=a.pathname;(s==="/"||s==="")&&(s="/index"),s.endsWith("/")&&s!=="/"&&(s=s.slice(0,-1));let h={"Content-Type":"text/markdown; charset=utf-8","Cache-Control":"public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400"};n&&(h["X-Onto-Bot"]=`${n.name} (${n.company})`);let l=process.env.ONTO_API_KEY,d=process.env.ONTO_DASHBOARD_URL||"https://app.buildonto.dev";if(l){fetch(`${d}/api/track`,{method:"POST",headers:{"x-onto-key":l,"Content-Type":"application/json"},body:JSON.stringify({route:a.pathname,userAgent:o,bot:n?n.name:null,company:n?n.company:null})}).catch(()=>{});try{let i=await fetch(`${d}/api/sdk/inject?route=${a.pathname}`,{headers:{"x-onto-key":l},signal:AbortSignal.timeout(1500)});if(i.ok){let{injection:r}=await i.json();if(r){let p=`${a.origin}/.onto${s}.md`,y=await fetch(p);if(y.ok){let w=`${await y.text()}
|
|
4
4
|
|
|
5
5
|
---
|
|
6
6
|
|
|
7
|
-
${r}`;return new c.NextResponse(
|
|
7
|
+
${r}`;return new c.NextResponse(w,{headers:{...h,"X-Onto-Injected":"true"}})}}}}catch(i){console.error("[Onto] Injection failed",i)}}a.pathname=`/.onto${s}.md`;let g=c.NextResponse.rewrite(a);return n&&g.headers.set("X-Onto-Bot",`${n.name} (${n.company})`),g}return c.NextResponse.next()}0&&(module.exports={AI_BOT_USER_AGENTS,matchBot,ontoMiddleware});
|
|
8
8
|
//# sourceMappingURL=middleware.js.map
|
package/dist/middleware.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/middleware.ts","../src/bots.ts","../src/config.ts"],"sourcesContent":["import { NextRequest, NextResponse } from 'next/server';\r\nimport { AI_BOT_USER_AGENTS, matchBot } from './bots';\r\nimport { loadOntoConfig, generateLlmsTxt } from './config';\r\n\r\nexport async function ontoMiddleware(request: NextRequest) {\r\n const userAgent = request.headers.get('user-agent');\r\n const accept = request.headers.get('accept') || '';\r\n\r\n const matched = matchBot(userAgent);\r\n const isAiBot = !!matched;\r\n const isMarkdownRequested = accept.includes('text/markdown');\r\n\r\n // If traffic is identified as an AI Bot or markdown is requested\r\n if (isAiBot || isMarkdownRequested) {\r\n const url = request.nextUrl.clone();\r\n\r\n // Ignore internal next.js requests & static assets (but not llms.txt)\r\n if (url.pathname.startsWith('/_next')) {\r\n return NextResponse.next();\r\n }\r\n\r\n // --- llms.txt Auto-Discovery ---\r\n // Dynamically generate llms.txt from onto.config.ts\r\n if (url.pathname === '/llms.txt') {\r\n try {\r\n const config = await loadOntoConfig();\r\n\r\n if (config) {\r\n // Generate llms.txt dynamically from config\r\n const llmsTxtContent = generateLlmsTxt(config);\r\n const response = new NextResponse(llmsTxtContent, {\r\n headers: {\r\n 'Content-Type': 'text/plain; charset=utf-8',\r\n 'Cache-Control': 'public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400',\r\n }\r\n });\r\n\r\n if (matched) {\r\n response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);\r\n }\r\n\r\n return response;\r\n } else {\r\n // Fallback: try to serve static llms.txt from public folder\r\n url.pathname = '/llms.txt';\r\n const response = NextResponse.rewrite(url);\r\n response.headers.set('Content-Type', 'text/plain; charset=utf-8');\r\n response.headers.set('Cache-Control', 'public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400');\r\n if (matched) {\r\n response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);\r\n }\r\n return response;\r\n }\r\n } catch (error) {\r\n console.error('[Onto] Failed to generate llms.txt:', error);\r\n // Fallback to static file on error\r\n url.pathname = '/llms.txt';\r\n const response = NextResponse.rewrite(url);\r\n response.headers.set('Content-Type', 'text/plain; charset=utf-8');\r\n if (matched) {\r\n response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);\r\n }\r\n return response;\r\n }\r\n }\r\n\r\n // Skip other static assets\r\n if (url.pathname.includes('.')) {\r\n return NextResponse.next();\r\n }\r\n\r\n // Determine the corresponding payload path\r\n let payloadPath = url.pathname;\r\n if (payloadPath === '/' || payloadPath === '') {\r\n payloadPath = '/index';\r\n }\r\n\r\n // Strip trailing slash if present\r\n if (payloadPath.endsWith('/') && payloadPath !== '/') {\r\n payloadPath = payloadPath.slice(0, -1);\r\n }\r\n\r\n // Common response headers for all bot responses\r\n const botHeaders: Record<string, string> = {\r\n 'Content-Type': 'text/markdown; charset=utf-8',\r\n 'Cache-Control': 'public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400',\r\n };\r\n if (matched) {\r\n botHeaders['X-Onto-Bot'] = `${matched.name} (${matched.company})`;\r\n }\r\n\r\n // --- Onto Control Plane Integration (Premium) ---\r\n const ONTO_API_KEY = process.env.ONTO_API_KEY;\r\n const DASHBOARD_URL = process.env.ONTO_DASHBOARD_URL || 'https://app.buildonto.dev';\r\n\r\n if (ONTO_API_KEY) {\r\n // 1. Fire-and-forget tracking — includes structured bot info\r\n fetch(`${DASHBOARD_URL}/api/track`, {\r\n method: 'POST',\r\n headers: {\r\n 'x-onto-key': ONTO_API_KEY,\r\n 'Content-Type': 'application/json'\r\n },\r\n body: JSON.stringify({\r\n route: url.pathname,\r\n userAgent: userAgent,\r\n bot: matched ? matched.name : null,\r\n company: matched ? matched.company : null,\r\n })\r\n }).catch(() => {});\r\n\r\n // 2. Dynamic Context Injection\r\n try {\r\n const injectRes = await fetch(`${DASHBOARD_URL}/api/sdk/inject?route=${url.pathname}`, {\r\n headers: { 'x-onto-key': ONTO_API_KEY },\r\n signal: AbortSignal.timeout(1500)\r\n });\r\n\r\n if (injectRes.ok) {\r\n const { injection } = await injectRes.json();\r\n \r\n if (injection) {\r\n const localMdUrl = `${url.origin}/.onto${payloadPath}.md`;\r\n const mdRes = await fetch(localMdUrl);\r\n \r\n if (mdRes.ok) {\r\n const baseMarkdown = await mdRes.text();\r\n const finalMarkdown = `${baseMarkdown}\\n\\n---\\n\\n${injection}`;\r\n \r\n return new NextResponse(finalMarkdown, {\r\n headers: {\r\n ...botHeaders,\r\n 'X-Onto-Injected': 'true'\r\n }\r\n });\r\n }\r\n }\r\n }\r\n } catch (err) {\r\n console.error('[Onto] Injection failed', err);\r\n }\r\n }\r\n // ------------------------------------------------\r\n\r\n url.pathname = `/.onto${payloadPath}.md`;\r\n\r\n // Rewrite implicitly serves the target URL transparently to the client.\r\n const response = NextResponse.rewrite(url);\r\n if (matched) {\r\n response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);\r\n }\r\n return response;\r\n }\r\n\r\n return NextResponse.next();\r\n}\r\n\r\n// Re-export the bot registry for consumers who want to extend or inspect it\r\nexport { AI_BOT_USER_AGENTS, matchBot } from './bots';\r\nexport type { AiBot } from './bots';\r\n\r\n\r\n","/**\n * Comprehensive registry of AI bot user-agent strings.\n * The middleware uses this list to detect AI crawlers and serve optimized markdown.\n */\n\nexport interface AiBot {\n /** The user-agent substring to match against */\n name: string;\n /** The company operating this bot */\n company: string;\n}\n\n/**\n * Structured registry of all known AI bots, grouped by company.\n * Useful for analytics and the Control Plane dashboard.\n */\nexport const AI_BOTS: AiBot[] = [\n // OpenAI\n { name: 'GPTBot', company: 'OpenAI' },\n { name: 'ChatGPT-User', company: 'OpenAI' },\n { name: 'OAI-SearchBot', company: 'OpenAI' },\n\n // Google\n { name: 'Googlebot', company: 'Google' },\n { name: 'Google-CloudVertexBot', company: 'Google' },\n { name: 'Google-Extended', company: 'Google' },\n { name: 'GoogleOther', company: 'Google' },\n\n // Anthropic\n { name: 'ClaudeBot', company: 'Anthropic' },\n { name: 'Claude-User', company: 'Anthropic' },\n { name: 'anthropic-ai', company: 'Anthropic' },\n\n // Perplexity\n { name: 'PerplexityBot', company: 'Perplexity' },\n { name: 'Perplexity-User', company: 'Perplexity' },\n\n // Meta\n { name: 'Meta-ExternalAgent', company: 'Meta' },\n { name: 'Meta-ExternalFetcher', company: 'Meta' },\n { name: 'FacebookBot', company: 'Meta' },\n\n // Common Crawl (used by most smaller AI companies)\n { name: 'CCBot', company: 'Common Crawl' },\n\n // Other notable AI crawlers\n { name: 'Bytespider', company: 'ByteDance' },\n { name: 'Applebot-Extended', company: 'Apple' },\n { name: 'cohere-ai', company: 'Cohere' },\n { name: 'YouBot', company: 'You.com' },\n];\n\n/**\n * Flat list of user-agent substrings for fast matching in the middleware.\n */\nexport const AI_BOT_USER_AGENTS: string[] = AI_BOTS.map(bot => bot.name);\n\n/**\n * Given a raw user-agent string, returns the matched AiBot entry or undefined.\n * Comparison is case-insensitive to handle inconsistent agent casing.\n */\nexport function matchBot(userAgent: string | null): AiBot | undefined {\n if (!userAgent) return undefined;\n const lowerUA = userAgent.toLowerCase();\n return AI_BOTS.find(bot => \n lowerUA.includes(bot.name.toLowerCase())\n );\n}\n","/**\r\n * Configuration schema for onto.config.ts\r\n * Used to dynamically generate llms.txt and other AI discovery files\r\n */\r\n\r\nexport type PageType = 'scoring' | 'about' | 'default';\r\n\r\nexport interface OntoRoute {\r\n /**\r\n * The URL path (e.g., '/docs', '/api/reference')\r\n */\r\n path: string;\r\n /**\r\n * Description of what this route contains\r\n */\r\n description: string;\r\n /**\r\n * Optional: Page type for automatic JSON-LD schema injection\r\n * - 'scoring': Injects Methodology schema with AIO scoring weights (40/35/25)\r\n * - 'about': Injects Organization/AboutPage schema\r\n * - 'default': No automatic schema injection\r\n */\r\n pageType?: PageType;\r\n}\r\n\r\nexport interface OntoConfig {\r\n /**\r\n * The name of your project or site (required)\r\n * Used as the H1 heading in llms.txt\r\n */\r\n name: string;\r\n\r\n /**\r\n * A short summary of your project (required)\r\n * Displayed as a blockquote in llms.txt\r\n * Should contain key information necessary for understanding the rest of the file\r\n */\r\n summary: string;\r\n\r\n /**\r\n * The base URL of your site (e.g., 'https://example.com')\r\n */\r\n baseUrl: string;\r\n\r\n /**\r\n * Optional: Additional sections to include in llms.txt\r\n * Each section can contain any markdown content\r\n */\r\n sections?: {\r\n heading: string;\r\n content: string;\r\n }[];\r\n\r\n /**\r\n * Key routes that AI agents should know about\r\n * These will be formatted as a markdown list in llms.txt\r\n */\r\n routes?: OntoRoute[];\r\n\r\n /**\r\n * Optional: Links to external resources (documentation, API references, etc.)\r\n */\r\n externalLinks?: {\r\n title: string;\r\n url: string;\r\n description?: string;\r\n }[];\r\n\r\n /**\r\n * Optional: Organization information for JSON-LD schemas\r\n */\r\n organization?: {\r\n name: string;\r\n description?: string;\r\n url?: string;\r\n logo?: string;\r\n foundingDate?: string;\r\n };\r\n}\r\n\r\n/**\r\n * Load the onto.config.ts file from the user's project\r\n * This is used by the middleware to dynamically generate llms.txt\r\n */\r\nexport async function loadOntoConfig(): Promise<OntoConfig | null> {\r\n try {\r\n // Try to dynamically import the config file from the user's project root\r\n // This runs in the middleware context, so we look in the project root\r\n const config = await import(process.cwd() + '/onto.config');\r\n return config.default || config;\r\n } catch (error) {\r\n // Config file doesn't exist or failed to load\r\n return null;\r\n }\r\n}\r\n\r\n/**\r\n * Generate llms.txt content from OntoConfig\r\n * Follows the llms.txt specification:\r\n * - H1 with project name\r\n * - Blockquote with summary\r\n * - Additional markdown sections\r\n */\r\nexport function generateLlmsTxt(config: OntoConfig): string {\r\n const lines: string[] = [];\r\n\r\n // H1: Project name (required)\r\n lines.push(`# ${config.name}`);\r\n lines.push('');\r\n\r\n // Blockquote: Summary (required)\r\n lines.push(`> ${config.summary}`);\r\n lines.push('');\r\n\r\n // Key Routes section (if provided)\r\n if (config.routes && config.routes.length > 0) {\r\n lines.push('## Key Routes');\r\n lines.push('');\r\n for (const route of config.routes) {\r\n const fullUrl = `${config.baseUrl}${route.path}`;\r\n lines.push(`- [${route.path}](${fullUrl}): ${route.description}`);\r\n }\r\n lines.push('');\r\n }\r\n\r\n // External Links section (if provided)\r\n if (config.externalLinks && config.externalLinks.length > 0) {\r\n lines.push('## Resources');\r\n lines.push('');\r\n for (const link of config.externalLinks) {\r\n if (link.description) {\r\n lines.push(`- [${link.title}](${link.url}): ${link.description}`);\r\n } else {\r\n lines.push(`- [${link.title}](${link.url})`);\r\n }\r\n }\r\n lines.push('');\r\n }\r\n\r\n // Custom sections (if provided)\r\n if (config.sections && config.sections.length > 0) {\r\n for (const section of config.sections) {\r\n lines.push(`## ${section.heading}`);\r\n lines.push('');\r\n lines.push(section.content);\r\n lines.push('');\r\n }\r\n }\r\n\r\n return lines.join('\\n').trim() + '\\n';\r\n}\r\n"],"mappings":"yaAAA,IAAAA,EAAA,GAAAC,EAAAD,EAAA,wBAAAE,EAAA,aAAAC,EAAA,mBAAAC,IAAA,eAAAC,EAAAL,GAAA,IAAAM,EAA0C,uBCgBnC,IAAMC,EAAmB,CAE5B,CAAE,KAAM,SAAqB,QAAS,QAAS,EAC/C,CAAE,KAAM,eAAoB,QAAS,QAAS,EAC9C,CAAE,KAAM,gBAAoB,QAAS,QAAS,EAG9C,CAAE,KAAM,YAA0B,QAAS,QAAS,EACpD,CAAE,KAAM,wBAA2B,QAAS,QAAS,EACrD,CAAE,KAAM,kBAA2B,QAAS,QAAS,EACrD,CAAE,KAAM,cAA2B,QAAS,QAAS,EAGrD,CAAE,KAAM,YAAmB,QAAS,WAAY,EAChD,CAAE,KAAM,cAAkB,QAAS,WAAY,EAC/C,CAAE,KAAM,eAAkB,QAAS,WAAY,EAG/C,CAAE,KAAM,gBAAmB,QAAS,YAAa,EACjD,CAAE,KAAM,kBAAmB,QAAS,YAAa,EAGjD,CAAE,KAAM,qBAAwB,QAAS,MAAO,EAChD,CAAE,KAAM,uBAAwB,QAAS,MAAO,EAChD,CAAE,KAAM,cAAuB,QAAS,MAAO,EAG/C,CAAE,KAAM,QAAS,QAAS,cAAe,EAGzC,CAAE,KAAM,aAAqB,QAAS,WAAY,EAClD,CAAE,KAAM,oBAAqB,QAAS,OAAQ,EAC9C,CAAE,KAAM,YAAoB,QAAS,QAAS,EAC9C,CAAE,KAAM,SAAoB,QAAS,SAAU,CACnD,EAKaC,EAA+BD,EAAQ,IAAIE,GAAOA,EAAI,IAAI,EAMhE,SAASC,EAASC,EAA6C,CAClE,GAAI,CAACA,EAAW,OAChB,IAAMC,EAAUD,EAAU,YAAY,EACtC,OAAOJ,EAAQ,KAAKE,GAChBG,EAAQ,SAASH,EAAI,KAAK,YAAY,CAAC,CAC3C,CACJ,CCiBA,eAAsBI,GAA6C,CACjE,GAAI,CAGF,IAAMC,EAAS,MAAM,OAAO,QAAQ,IAAI,EAAI,gBAC5C,OAAOA,EAAO,SAAWA,CAC3B,MAAgB,CAEd,OAAO,IACT,CACF,CASO,SAASC,EAAgBD,EAA4B,CAC1D,IAAME,EAAkB,CAAC,EAWzB,GARAA,EAAM,KAAK,KAAKF,EAAO,IAAI,EAAE,EAC7BE,EAAM,KAAK,EAAE,EAGbA,EAAM,KAAK,KAAKF,EAAO,OAAO,EAAE,EAChCE,EAAM,KAAK,EAAE,EAGTF,EAAO,QAAUA,EAAO,OAAO,OAAS,EAAG,CAC7CE,EAAM,KAAK,eAAe,EAC1BA,EAAM,KAAK,EAAE,EACb,QAAWC,KAASH,EAAO,OAAQ,CACjC,IAAMI,EAAU,GAAGJ,EAAO,OAAO,GAAGG,EAAM,IAAI,GAC9CD,EAAM,KAAK,MAAMC,EAAM,IAAI,KAAKC,CAAO,MAAMD,EAAM,WAAW,EAAE,CAClE,CACAD,EAAM,KAAK,EAAE,CACf,CAGA,GAAIF,EAAO,eAAiBA,EAAO,cAAc,OAAS,EAAG,CAC3DE,EAAM,KAAK,cAAc,EACzBA,EAAM,KAAK,EAAE,EACb,QAAWG,KAAQL,EAAO,cACpBK,EAAK,YACPH,EAAM,KAAK,MAAMG,EAAK,KAAK,KAAKA,EAAK,GAAG,MAAMA,EAAK,WAAW,EAAE,EAEhEH,EAAM,KAAK,MAAMG,EAAK,KAAK,KAAKA,EAAK,GAAG,GAAG,EAG/CH,EAAM,KAAK,EAAE,CACf,CAGA,GAAIF,EAAO,UAAYA,EAAO,SAAS,OAAS,EAC9C,QAAWM,KAAWN,EAAO,SAC3BE,EAAM,KAAK,MAAMI,EAAQ,OAAO,EAAE,EAClCJ,EAAM,KAAK,EAAE,EACbA,EAAM,KAAKI,EAAQ,OAAO,EAC1BJ,EAAM,KAAK,EAAE,EAIjB,OAAOA,EAAM,KAAK;AAAA,CAAI,EAAE,KAAK,EAAI;AAAA,CACnC,CFlJA,eAAsBK,EAAeC,EAAsB,CACvD,IAAMC,EAAYD,EAAQ,QAAQ,IAAI,YAAY,EAC5CE,EAASF,EAAQ,QAAQ,IAAI,QAAQ,GAAK,GAE1CG,EAAUC,EAASH,CAAS,EAC5BI,EAAU,CAAC,CAACF,EACZG,EAAsBJ,EAAO,SAAS,eAAe,EAG3D,GAAIG,GAAWC,EAAqB,CAChC,IAAMC,EAAMP,EAAQ,QAAQ,MAAM,EAGlC,GAAIO,EAAI,SAAS,WAAW,QAAQ,EAChC,OAAO,eAAa,KAAK,EAK7B,GAAIA,EAAI,WAAa,YACjB,GAAI,CACA,IAAMC,EAAS,MAAMC,EAAe,EAEpC,GAAID,EAAQ,CAER,IAAME,EAAiBC,EAAgBH,CAAM,EACvCI,EAAW,IAAI,eAAaF,EAAgB,CAC9C,QAAS,CACL,eAAgB,4BAChB,gBAAiB,mEACrB,CACJ,CAAC,EAED,OAAIP,GACAS,EAAS,QAAQ,IAAI,aAAc,GAAGT,EAAQ,IAAI,KAAKA,EAAQ,OAAO,GAAG,EAGtES,CACX,KAAO,CAEHL,EAAI,SAAW,YACf,IAAMK,EAAW,eAAa,QAAQL,CAAG,EACzC,OAAAK,EAAS,QAAQ,IAAI,eAAgB,2BAA2B,EAChEA,EAAS,QAAQ,IAAI,gBAAiB,mEAAmE,EACrGT,GACAS,EAAS,QAAQ,IAAI,aAAc,GAAGT,EAAQ,IAAI,KAAKA,EAAQ,OAAO,GAAG,EAEtES,CACX,CACJ,OAASC,EAAO,CACZ,QAAQ,MAAM,sCAAuCA,CAAK,EAE1DN,EAAI,SAAW,YACf,IAAMK,EAAW,eAAa,QAAQL,CAAG,EACzC,OAAAK,EAAS,QAAQ,IAAI,eAAgB,2BAA2B,EAC5DT,GACAS,EAAS,QAAQ,IAAI,aAAc,GAAGT,EAAQ,IAAI,KAAKA,EAAQ,OAAO,GAAG,EAEtES,CACX,CAIJ,GAAIL,EAAI,SAAS,SAAS,GAAG,EACzB,OAAO,eAAa,KAAK,EAI7B,IAAIO,EAAcP,EAAI,UAClBO,IAAgB,KAAOA,IAAgB,MACvCA,EAAc,UAIdA,EAAY,SAAS,GAAG,GAAKA,IAAgB,MAC7CA,EAAcA,EAAY,MAAM,EAAG,EAAE,GAIzC,IAAMC,EAAqC,CACvC,eAAgB,+BAChB,gBAAiB,mEACrB,EACIZ,IACAY,EAAW,YAAY,EAAI,GAAGZ,EAAQ,IAAI,KAAKA,EAAQ,OAAO,KAIlE,IAAMa,EAAe,QAAQ,IAAI,aAC3BC,EAAgB,QAAQ,IAAI,oBAAsB,4BAExD,GAAID,EAAc,CAEd,MAAM,GAAGC,CAAa,aAAc,CAChC,OAAQ,OACR,QAAS,CACL,aAAcD,EACd,eAAgB,kBACpB,EACA,KAAM,KAAK,UAAU,CACjB,MAAOT,EAAI,SACX,UAAWN,EACX,IAAKE,EAAUA,EAAQ,KAAO,KAC9B,QAASA,EAAUA,EAAQ,QAAU,IACzC,CAAC,CACL,CAAC,EAAE,MAAM,IAAM,CAAC,CAAC,EAGjB,GAAI,CACA,IAAMe,EAAY,MAAM,MAAM,GAAGD,CAAa,yBAAyBV,EAAI,QAAQ,GAAI,CACnF,QAAS,CAAE,aAAcS,CAAa,EACtC,OAAQ,YAAY,QAAQ,IAAI,CACpC,CAAC,EAED,GAAIE,EAAU,GAAI,CACd,GAAM,CAAE,UAAAC,CAAU,EAAI,MAAMD,EAAU,KAAK,EAE3C,GAAIC,EAAW,CACX,IAAMC,EAAa,GAAGb,EAAI,MAAM,SAASO,CAAW,MAC9CO,EAAQ,MAAM,MAAMD,CAAU,EAEpC,GAAIC,EAAM,GAAI,CAEV,IAAMC,EAAgB,GADD,MAAMD,EAAM,KAAK,CACD;AAAA;AAAA;AAAA;AAAA,EAAcF,CAAS,GAE5D,OAAO,IAAI,eAAaG,EAAe,CACnC,QAAS,CACL,GAAGP,EACH,kBAAmB,MACvB,CACJ,CAAC,CACL,CACJ,CACJ,CACJ,OAASQ,EAAK,CACV,QAAQ,MAAM,0BAA2BA,CAAG,CAChD,CACJ,CAGAhB,EAAI,SAAW,SAASO,CAAW,MAGnC,IAAMF,EAAW,eAAa,QAAQL,CAAG,EACzC,OAAIJ,GACAS,EAAS,QAAQ,IAAI,aAAc,GAAGT,EAAQ,IAAI,KAAKA,EAAQ,OAAO,GAAG,EAEtES,CACX,CAEA,OAAO,eAAa,KAAK,CAC7B","names":["middleware_exports","__export","AI_BOT_USER_AGENTS","matchBot","ontoMiddleware","__toCommonJS","import_server","AI_BOTS","AI_BOT_USER_AGENTS","bot","matchBot","userAgent","lowerUA","loadOntoConfig","config","generateLlmsTxt","lines","route","fullUrl","link","section","ontoMiddleware","request","userAgent","accept","matched","matchBot","isAiBot","isMarkdownRequested","url","config","loadOntoConfig","llmsTxtContent","generateLlmsTxt","response","error","payloadPath","botHeaders","ONTO_API_KEY","DASHBOARD_URL","injectRes","injection","localMdUrl","mdRes","finalMarkdown","err"]}
|
|
1
|
+
{"version":3,"sources":["../src/middleware.ts","../src/bots.ts","../src/config.ts"],"sourcesContent":["import { NextRequest, NextResponse } from 'next/server';\r\nimport { matchBot } from './bots';\r\nimport { loadOntoConfig, generateLlmsTxt, OntoConfig } from './config';\r\n\r\nexport async function ontoMiddleware(request: NextRequest, config?: OntoConfig) {\r\n const userAgent = request.headers.get('user-agent');\r\n const url = request.nextUrl.clone();\r\n const matched = matchBot(userAgent);\r\n\r\n const accept = request.headers.get('accept') || '';\r\n\r\n const isAiBot = !!matched;\r\n const isMarkdownRequested = accept.includes('text/markdown');\r\n\r\n // If traffic is identified as an AI Bot or markdown is requested\r\n if (isAiBot || isMarkdownRequested) {\r\n\r\n // Ignore internal next.js requests & static assets (but not llms.txt)\r\n if (url.pathname.startsWith('/_next')) {\r\n return NextResponse.next();\r\n }\r\n\r\n // --- llms.txt Auto-Discovery ---\r\n // Serve the llms.txt manifest to AI agents\r\n if (url.pathname === '/llms.txt') {\r\n try {\r\n // Use provided config or try to load dynamic one\r\n const activeConfig = config || await loadOntoConfig();\r\n\r\n if (activeConfig) {\r\n // Generate llms.txt dynamically from config\r\n const llmsTxtContent = generateLlmsTxt(activeConfig);\r\n const response = new NextResponse(llmsTxtContent, {\r\n headers: {\r\n 'Content-Type': 'text/plain; charset=utf-8',\r\n 'Cache-Control': 'public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400',\r\n }\r\n });\r\n\r\n if (matched) {\r\n response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);\r\n }\r\n\r\n return response;\r\n } else {\r\n // Fallback: try to serve static llms.txt from public folder\r\n url.pathname = '/llms.txt';\r\n const response = NextResponse.rewrite(url);\r\n response.headers.set('Content-Type', 'text/plain; charset=utf-8');\r\n response.headers.set('Cache-Control', 'public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400');\r\n if (matched) {\r\n response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);\r\n }\r\n return response;\r\n }\r\n } catch (error) {\r\n console.error('[Onto] Failed to generate llms.txt:', error);\r\n // Fallback to static file on error\r\n url.pathname = '/llms.txt';\r\n const response = NextResponse.rewrite(url);\r\n response.headers.set('Content-Type', 'text/plain; charset=utf-8');\r\n if (matched) {\r\n response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);\r\n }\r\n return response;\r\n }\r\n }\r\n\r\n // Skip other static assets\r\n if (url.pathname.includes('.')) {\r\n return NextResponse.next();\r\n }\r\n\r\n // Determine the corresponding payload path\r\n let payloadPath = url.pathname;\r\n if (payloadPath === '/' || payloadPath === '') {\r\n payloadPath = '/index';\r\n }\r\n\r\n // Strip trailing slash if present\r\n if (payloadPath.endsWith('/') && payloadPath !== '/') {\r\n payloadPath = payloadPath.slice(0, -1);\r\n }\r\n\r\n // Common response headers for all bot responses\r\n const botHeaders: Record<string, string> = {\r\n 'Content-Type': 'text/markdown; charset=utf-8',\r\n 'Cache-Control': 'public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400',\r\n };\r\n if (matched) {\r\n botHeaders['X-Onto-Bot'] = `${matched.name} (${matched.company})`;\r\n }\r\n\r\n // --- Onto Control Plane Integration (Premium) ---\r\n const ONTO_API_KEY = process.env.ONTO_API_KEY;\r\n const DASHBOARD_URL = process.env.ONTO_DASHBOARD_URL || 'https://app.buildonto.dev';\r\n\r\n if (ONTO_API_KEY) {\r\n // 1. Fire-and-forget tracking — includes structured bot info\r\n fetch(`${DASHBOARD_URL}/api/track`, {\r\n method: 'POST',\r\n headers: {\r\n 'x-onto-key': ONTO_API_KEY,\r\n 'Content-Type': 'application/json'\r\n },\r\n body: JSON.stringify({\r\n route: url.pathname,\r\n userAgent: userAgent,\r\n bot: matched ? matched.name : null,\r\n company: matched ? matched.company : null,\r\n })\r\n }).catch(() => {});\r\n\r\n // 2. Dynamic Context Injection\r\n try {\r\n const injectRes = await fetch(`${DASHBOARD_URL}/api/sdk/inject?route=${url.pathname}`, {\r\n headers: { 'x-onto-key': ONTO_API_KEY },\r\n signal: AbortSignal.timeout(1500)\r\n });\r\n\r\n if (injectRes.ok) {\r\n const { injection } = await injectRes.json();\r\n \r\n if (injection) {\r\n const localMdUrl = `${url.origin}/.onto${payloadPath}.md`;\r\n const mdRes = await fetch(localMdUrl);\r\n \r\n if (mdRes.ok) {\r\n const baseMarkdown = await mdRes.text();\r\n const finalMarkdown = `${baseMarkdown}\\n\\n---\\n\\n${injection}`;\r\n \r\n return new NextResponse(finalMarkdown, {\r\n headers: {\r\n ...botHeaders,\r\n 'X-Onto-Injected': 'true'\r\n }\r\n });\r\n }\r\n }\r\n }\r\n } catch (err) {\r\n console.error('[Onto] Injection failed', err);\r\n }\r\n }\r\n // ------------------------------------------------\r\n\r\n url.pathname = `/.onto${payloadPath}.md`;\r\n\r\n // Rewrite implicitly serves the target URL transparently to the client.\r\n const response = NextResponse.rewrite(url);\r\n if (matched) {\r\n response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);\r\n }\r\n return response;\r\n }\r\n\r\n return NextResponse.next();\r\n}\r\n\r\n// Re-export the bot registry for consumers who want to extend or inspect it\r\nexport { AI_BOT_USER_AGENTS, matchBot } from './bots';\r\nexport type { AiBot } from './bots';\r\n\r\n\r\n","/**\n * Comprehensive registry of AI bot user-agent strings.\n * The middleware uses this list to detect AI crawlers and serve optimized markdown.\n */\n\nexport interface AiBot {\n /** The user-agent substring to match against */\n name: string;\n /** The company operating this bot */\n company: string;\n}\n\n/**\n * Structured registry of all known AI bots, grouped by company.\n * Useful for analytics and the Control Plane dashboard.\n */\nexport const AI_BOTS: AiBot[] = [\n // OpenAI\n { name: 'GPTBot', company: 'OpenAI' },\n { name: 'ChatGPT-User', company: 'OpenAI' },\n { name: 'OAI-SearchBot', company: 'OpenAI' },\n\n // Google\n { name: 'Googlebot', company: 'Google' },\n { name: 'Google-CloudVertexBot', company: 'Google' },\n { name: 'Google-Extended', company: 'Google' },\n { name: 'GoogleOther', company: 'Google' },\n\n // Anthropic\n { name: 'ClaudeBot', company: 'Anthropic' },\n { name: 'Claude-User', company: 'Anthropic' },\n { name: 'anthropic-ai', company: 'Anthropic' },\n\n // Perplexity\n { name: 'PerplexityBot', company: 'Perplexity' },\n { name: 'Perplexity-User', company: 'Perplexity' },\n\n // Meta\n { name: 'Meta-ExternalAgent', company: 'Meta' },\n { name: 'Meta-ExternalFetcher', company: 'Meta' },\n { name: 'FacebookBot', company: 'Meta' },\n\n // Common Crawl (used by most smaller AI companies)\n { name: 'CCBot', company: 'Common Crawl' },\n\n // Other notable AI crawlers\n { name: 'Bytespider', company: 'ByteDance' },\n { name: 'Applebot-Extended', company: 'Apple' },\n { name: 'cohere-ai', company: 'Cohere' },\n { name: 'YouBot', company: 'You.com' },\n];\n\n/**\n * Flat list of user-agent substrings for fast matching in the middleware.\n */\nexport const AI_BOT_USER_AGENTS: string[] = AI_BOTS.map(bot => bot.name);\n\n/**\n * Given a raw user-agent string, returns the matched AiBot entry or undefined.\n * Comparison is case-insensitive to handle inconsistent agent casing.\n */\nexport function matchBot(userAgent: string | null): AiBot | undefined {\n if (!userAgent) return undefined;\n const lowerUA = userAgent.toLowerCase();\n return AI_BOTS.find(bot => \n lowerUA.includes(bot.name.toLowerCase())\n );\n}\n","/**\r\n * Configuration schema for onto.config.ts\r\n * Used to dynamically generate llms.txt and other AI discovery files\r\n */\r\n\r\nexport type PageType = 'scoring' | 'about' | 'default';\r\n\r\nexport interface OntoRoute {\r\n /**\r\n * The URL path (e.g., '/docs', '/api/reference')\r\n */\r\n path: string;\r\n /**\r\n * Description of what this route contains\r\n */\r\n description: string;\r\n /**\r\n * Optional: Page type for automatic JSON-LD schema injection\r\n * - 'scoring': Injects Methodology schema with AIO scoring weights (40/35/25)\r\n * - 'about': Injects Organization/AboutPage schema\r\n * - 'default': No automatic schema injection\r\n */\r\n pageType?: PageType;\r\n}\r\n\r\nexport interface OntoConfig {\r\n /**\r\n * The name of your project or site (required)\r\n * Used as the H1 heading in llms.txt\r\n */\r\n name: string;\r\n\r\n /**\r\n * A short summary of your project (required)\r\n * Displayed as a blockquote in llms.txt\r\n * Should contain key information necessary for understanding the rest of the file\r\n */\r\n summary: string;\r\n\r\n /**\r\n * The base URL of your site (e.g., 'https://example.com')\r\n */\r\n baseUrl: string;\r\n\r\n /**\r\n * Optional: Additional sections to include in llms.txt\r\n * Each section can contain any markdown content\r\n */\r\n sections?: {\r\n heading: string;\r\n content: string;\r\n }[];\r\n\r\n /**\r\n * Key routes that AI agents should know about\r\n * These will be formatted as a markdown list in llms.txt\r\n */\r\n routes?: OntoRoute[];\r\n\r\n /**\r\n * Optional: Links to external resources (documentation, API references, etc.)\r\n */\r\n externalLinks?: {\r\n title: string;\r\n url: string;\r\n description?: string;\r\n }[];\r\n\r\n /**\r\n * Optional: Organization information for JSON-LD schemas\r\n */\r\n organization?: {\r\n name: string;\r\n description?: string;\r\n url?: string;\r\n logo?: string;\r\n foundingDate?: string;\r\n };\r\n}\r\n\r\n/**\r\n * Load the onto.config.ts file from the user's project\r\n * This is used by the middleware to dynamically generate llms.txt\r\n */\r\nexport async function loadOntoConfig(): Promise<OntoConfig | null> {\r\n try {\r\n // Try to dynamically import the config file from the user's project root\r\n // This runs in the middleware context, so we look in the project root\r\n const config = await import(process.cwd() + '/onto.config');\r\n return config.default || config;\r\n } catch (error) {\r\n // Config file doesn't exist or failed to load\r\n return null;\r\n }\r\n}\r\n\r\n/**\r\n * Generate llms.txt content from OntoConfig\r\n * Follows the llms.txt specification:\r\n * - H1 with project name\r\n * - Blockquote with summary\r\n * - Additional markdown sections\r\n */\r\nexport function generateLlmsTxt(config: OntoConfig): string {\r\n const lines: string[] = [];\r\n\r\n // H1: Project name (required)\r\n lines.push(`# ${config.name}`);\r\n lines.push('');\r\n\r\n // Blockquote: Summary (required)\r\n lines.push(`> ${config.summary}`);\r\n lines.push('');\r\n\r\n // Key Routes section (if provided)\r\n if (config.routes && config.routes.length > 0) {\r\n lines.push('## Key Routes');\r\n lines.push('');\r\n for (const route of config.routes) {\r\n const fullUrl = `${config.baseUrl}${route.path}`;\r\n lines.push(`- [${route.path}](${fullUrl}): ${route.description}`);\r\n }\r\n lines.push('');\r\n }\r\n\r\n // External Links section (if provided)\r\n if (config.externalLinks && config.externalLinks.length > 0) {\r\n lines.push('## Resources');\r\n lines.push('');\r\n for (const link of config.externalLinks) {\r\n if (link.description) {\r\n lines.push(`- [${link.title}](${link.url}): ${link.description}`);\r\n } else {\r\n lines.push(`- [${link.title}](${link.url})`);\r\n }\r\n }\r\n lines.push('');\r\n }\r\n\r\n // Custom sections (if provided)\r\n if (config.sections && config.sections.length > 0) {\r\n for (const section of config.sections) {\r\n lines.push(`## ${section.heading}`);\r\n lines.push('');\r\n lines.push(section.content);\r\n lines.push('');\r\n }\r\n }\r\n\r\n return lines.join('\\n').trim() + '\\n';\r\n}\r\n"],"mappings":"yaAAA,IAAAA,EAAA,GAAAC,EAAAD,EAAA,wBAAAE,EAAA,aAAAC,EAAA,mBAAAC,IAAA,eAAAC,EAAAL,GAAA,IAAAM,EAA0C,uBCgBnC,IAAMC,EAAmB,CAE5B,CAAE,KAAM,SAAqB,QAAS,QAAS,EAC/C,CAAE,KAAM,eAAoB,QAAS,QAAS,EAC9C,CAAE,KAAM,gBAAoB,QAAS,QAAS,EAG9C,CAAE,KAAM,YAA0B,QAAS,QAAS,EACpD,CAAE,KAAM,wBAA2B,QAAS,QAAS,EACrD,CAAE,KAAM,kBAA2B,QAAS,QAAS,EACrD,CAAE,KAAM,cAA2B,QAAS,QAAS,EAGrD,CAAE,KAAM,YAAmB,QAAS,WAAY,EAChD,CAAE,KAAM,cAAkB,QAAS,WAAY,EAC/C,CAAE,KAAM,eAAkB,QAAS,WAAY,EAG/C,CAAE,KAAM,gBAAmB,QAAS,YAAa,EACjD,CAAE,KAAM,kBAAmB,QAAS,YAAa,EAGjD,CAAE,KAAM,qBAAwB,QAAS,MAAO,EAChD,CAAE,KAAM,uBAAwB,QAAS,MAAO,EAChD,CAAE,KAAM,cAAuB,QAAS,MAAO,EAG/C,CAAE,KAAM,QAAS,QAAS,cAAe,EAGzC,CAAE,KAAM,aAAqB,QAAS,WAAY,EAClD,CAAE,KAAM,oBAAqB,QAAS,OAAQ,EAC9C,CAAE,KAAM,YAAoB,QAAS,QAAS,EAC9C,CAAE,KAAM,SAAoB,QAAS,SAAU,CACnD,EAKaC,EAA+BD,EAAQ,IAAIE,GAAOA,EAAI,IAAI,EAMhE,SAASC,EAASC,EAA6C,CAClE,GAAI,CAACA,EAAW,OAChB,IAAMC,EAAUD,EAAU,YAAY,EACtC,OAAOJ,EAAQ,KAAKE,GAChBG,EAAQ,SAASH,EAAI,KAAK,YAAY,CAAC,CAC3C,CACJ,CCiBA,eAAsBI,GAA6C,CACjE,GAAI,CAGF,IAAMC,EAAS,MAAM,OAAO,QAAQ,IAAI,EAAI,gBAC5C,OAAOA,EAAO,SAAWA,CAC3B,MAAgB,CAEd,OAAO,IACT,CACF,CASO,SAASC,EAAgBD,EAA4B,CAC1D,IAAME,EAAkB,CAAC,EAWzB,GARAA,EAAM,KAAK,KAAKF,EAAO,IAAI,EAAE,EAC7BE,EAAM,KAAK,EAAE,EAGbA,EAAM,KAAK,KAAKF,EAAO,OAAO,EAAE,EAChCE,EAAM,KAAK,EAAE,EAGTF,EAAO,QAAUA,EAAO,OAAO,OAAS,EAAG,CAC7CE,EAAM,KAAK,eAAe,EAC1BA,EAAM,KAAK,EAAE,EACb,QAAWC,KAASH,EAAO,OAAQ,CACjC,IAAMI,EAAU,GAAGJ,EAAO,OAAO,GAAGG,EAAM,IAAI,GAC9CD,EAAM,KAAK,MAAMC,EAAM,IAAI,KAAKC,CAAO,MAAMD,EAAM,WAAW,EAAE,CAClE,CACAD,EAAM,KAAK,EAAE,CACf,CAGA,GAAIF,EAAO,eAAiBA,EAAO,cAAc,OAAS,EAAG,CAC3DE,EAAM,KAAK,cAAc,EACzBA,EAAM,KAAK,EAAE,EACb,QAAWG,KAAQL,EAAO,cACpBK,EAAK,YACPH,EAAM,KAAK,MAAMG,EAAK,KAAK,KAAKA,EAAK,GAAG,MAAMA,EAAK,WAAW,EAAE,EAEhEH,EAAM,KAAK,MAAMG,EAAK,KAAK,KAAKA,EAAK,GAAG,GAAG,EAG/CH,EAAM,KAAK,EAAE,CACf,CAGA,GAAIF,EAAO,UAAYA,EAAO,SAAS,OAAS,EAC9C,QAAWM,KAAWN,EAAO,SAC3BE,EAAM,KAAK,MAAMI,EAAQ,OAAO,EAAE,EAClCJ,EAAM,KAAK,EAAE,EACbA,EAAM,KAAKI,EAAQ,OAAO,EAC1BJ,EAAM,KAAK,EAAE,EAIjB,OAAOA,EAAM,KAAK;AAAA,CAAI,EAAE,KAAK,EAAI;AAAA,CACnC,CFlJA,eAAsBK,EAAeC,EAAsBC,EAAqB,CAC5E,IAAMC,EAAYF,EAAQ,QAAQ,IAAI,YAAY,EAC5CG,EAAMH,EAAQ,QAAQ,MAAM,EAC5BI,EAAUC,EAASH,CAAS,EAE5BI,EAASN,EAAQ,QAAQ,IAAI,QAAQ,GAAK,GAE1CO,EAAU,CAAC,CAACH,EACZI,EAAsBF,EAAO,SAAS,eAAe,EAG3D,GAAIC,GAAWC,EAAqB,CAGhC,GAAIL,EAAI,SAAS,WAAW,QAAQ,EAChC,OAAO,eAAa,KAAK,EAK7B,GAAIA,EAAI,WAAa,YACjB,GAAI,CAEA,IAAMM,EAAeR,GAAU,MAAMS,EAAe,EAEpD,GAAID,EAAc,CAEd,IAAME,EAAiBC,EAAgBH,CAAY,EAC7CI,EAAW,IAAI,eAAaF,EAAgB,CAC9C,QAAS,CACL,eAAgB,4BAChB,gBAAiB,mEACrB,CACJ,CAAC,EAED,OAAIP,GACAS,EAAS,QAAQ,IAAI,aAAc,GAAGT,EAAQ,IAAI,KAAKA,EAAQ,OAAO,GAAG,EAGtES,CACX,KAAO,CAEHV,EAAI,SAAW,YACf,IAAMU,EAAW,eAAa,QAAQV,CAAG,EACzC,OAAAU,EAAS,QAAQ,IAAI,eAAgB,2BAA2B,EAChEA,EAAS,QAAQ,IAAI,gBAAiB,mEAAmE,EACrGT,GACAS,EAAS,QAAQ,IAAI,aAAc,GAAGT,EAAQ,IAAI,KAAKA,EAAQ,OAAO,GAAG,EAEtES,CACX,CACJ,OAASC,EAAO,CACZ,QAAQ,MAAM,sCAAuCA,CAAK,EAE1DX,EAAI,SAAW,YACf,IAAMU,EAAW,eAAa,QAAQV,CAAG,EACzC,OAAAU,EAAS,QAAQ,IAAI,eAAgB,2BAA2B,EAC5DT,GACAS,EAAS,QAAQ,IAAI,aAAc,GAAGT,EAAQ,IAAI,KAAKA,EAAQ,OAAO,GAAG,EAEtES,CACX,CAIJ,GAAIV,EAAI,SAAS,SAAS,GAAG,EACzB,OAAO,eAAa,KAAK,EAI7B,IAAIY,EAAcZ,EAAI,UAClBY,IAAgB,KAAOA,IAAgB,MACvCA,EAAc,UAIdA,EAAY,SAAS,GAAG,GAAKA,IAAgB,MAC7CA,EAAcA,EAAY,MAAM,EAAG,EAAE,GAIzC,IAAMC,EAAqC,CACvC,eAAgB,+BAChB,gBAAiB,mEACrB,EACIZ,IACAY,EAAW,YAAY,EAAI,GAAGZ,EAAQ,IAAI,KAAKA,EAAQ,OAAO,KAIlE,IAAMa,EAAe,QAAQ,IAAI,aAC3BC,EAAgB,QAAQ,IAAI,oBAAsB,4BAExD,GAAID,EAAc,CAEd,MAAM,GAAGC,CAAa,aAAc,CAChC,OAAQ,OACR,QAAS,CACL,aAAcD,EACd,eAAgB,kBACpB,EACA,KAAM,KAAK,UAAU,CACjB,MAAOd,EAAI,SACX,UAAWD,EACX,IAAKE,EAAUA,EAAQ,KAAO,KAC9B,QAASA,EAAUA,EAAQ,QAAU,IACzC,CAAC,CACL,CAAC,EAAE,MAAM,IAAM,CAAC,CAAC,EAGjB,GAAI,CACA,IAAMe,EAAY,MAAM,MAAM,GAAGD,CAAa,yBAAyBf,EAAI,QAAQ,GAAI,CACnF,QAAS,CAAE,aAAcc,CAAa,EACtC,OAAQ,YAAY,QAAQ,IAAI,CACpC,CAAC,EAED,GAAIE,EAAU,GAAI,CACd,GAAM,CAAE,UAAAC,CAAU,EAAI,MAAMD,EAAU,KAAK,EAE3C,GAAIC,EAAW,CACX,IAAMC,EAAa,GAAGlB,EAAI,MAAM,SAASY,CAAW,MAC9CO,EAAQ,MAAM,MAAMD,CAAU,EAEpC,GAAIC,EAAM,GAAI,CAEV,IAAMC,EAAgB,GADD,MAAMD,EAAM,KAAK,CACD;AAAA;AAAA;AAAA;AAAA,EAAcF,CAAS,GAE5D,OAAO,IAAI,eAAaG,EAAe,CACnC,QAAS,CACL,GAAGP,EACH,kBAAmB,MACvB,CACJ,CAAC,CACL,CACJ,CACJ,CACJ,OAASQ,EAAK,CACV,QAAQ,MAAM,0BAA2BA,CAAG,CAChD,CACJ,CAGArB,EAAI,SAAW,SAASY,CAAW,MAGnC,IAAMF,EAAW,eAAa,QAAQV,CAAG,EACzC,OAAIC,GACAS,EAAS,QAAQ,IAAI,aAAc,GAAGT,EAAQ,IAAI,KAAKA,EAAQ,OAAO,GAAG,EAEtES,CACX,CAEA,OAAO,eAAa,KAAK,CAC7B","names":["middleware_exports","__export","AI_BOT_USER_AGENTS","matchBot","ontoMiddleware","__toCommonJS","import_server","AI_BOTS","AI_BOT_USER_AGENTS","bot","matchBot","userAgent","lowerUA","loadOntoConfig","config","generateLlmsTxt","lines","route","fullUrl","link","section","ontoMiddleware","request","config","userAgent","url","matched","matchBot","accept","isAiBot","isMarkdownRequested","activeConfig","loadOntoConfig","llmsTxtContent","generateLlmsTxt","response","error","payloadPath","botHeaders","ONTO_API_KEY","DASHBOARD_URL","injectRes","injection","localMdUrl","mdRes","finalMarkdown","err"]}
|
package/dist/middleware.mjs
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
import{NextResponse as c}from"next/server";var y=[{name:"GPTBot",company:"OpenAI"},{name:"ChatGPT-User",company:"OpenAI"},{name:"OAI-SearchBot",company:"OpenAI"},{name:"Googlebot",company:"Google"},{name:"Google-CloudVertexBot",company:"Google"},{name:"Google-Extended",company:"Google"},{name:"GoogleOther",company:"Google"},{name:"ClaudeBot",company:"Anthropic"},{name:"Claude-User",company:"Anthropic"},{name:"anthropic-ai",company:"Anthropic"},{name:"PerplexityBot",company:"Perplexity"},{name:"Perplexity-User",company:"Perplexity"},{name:"Meta-ExternalAgent",company:"Meta"},{name:"Meta-ExternalFetcher",company:"Meta"},{name:"FacebookBot",company:"Meta"},{name:"CCBot",company:"Common Crawl"},{name:"Bytespider",company:"ByteDance"},{name:"Applebot-Extended",company:"Apple"},{name:"cohere-ai",company:"Cohere"},{name:"YouBot",company:"You.com"}],B=y.map(e=>e.name);function l(e){if(!e)return;let t=e.toLowerCase();return y.find(o=>t.includes(o.name.toLowerCase()))}async function
|
|
1
|
+
import{NextResponse as c}from"next/server";var y=[{name:"GPTBot",company:"OpenAI"},{name:"ChatGPT-User",company:"OpenAI"},{name:"OAI-SearchBot",company:"OpenAI"},{name:"Googlebot",company:"Google"},{name:"Google-CloudVertexBot",company:"Google"},{name:"Google-Extended",company:"Google"},{name:"GoogleOther",company:"Google"},{name:"ClaudeBot",company:"Anthropic"},{name:"Claude-User",company:"Anthropic"},{name:"anthropic-ai",company:"Anthropic"},{name:"PerplexityBot",company:"Perplexity"},{name:"Perplexity-User",company:"Perplexity"},{name:"Meta-ExternalAgent",company:"Meta"},{name:"Meta-ExternalFetcher",company:"Meta"},{name:"FacebookBot",company:"Meta"},{name:"CCBot",company:"Common Crawl"},{name:"Bytespider",company:"ByteDance"},{name:"Applebot-Extended",company:"Apple"},{name:"cohere-ai",company:"Cohere"},{name:"YouBot",company:"You.com"}],B=y.map(e=>e.name);function l(e){if(!e)return;let t=e.toLowerCase();return y.find(o=>t.includes(o.name.toLowerCase()))}async function f(){try{let e=await import(process.cwd()+"/onto.config");return e.default||e}catch{return null}}function x(e){let t=[];if(t.push(`# ${e.name}`),t.push(""),t.push(`> ${e.summary}`),t.push(""),e.routes&&e.routes.length>0){t.push("## Key Routes"),t.push("");for(let o of e.routes){let a=`${e.baseUrl}${o.path}`;t.push(`- [${o.path}](${a}): ${o.description}`)}t.push("")}if(e.externalLinks&&e.externalLinks.length>0){t.push("## Resources"),t.push("");for(let o of e.externalLinks)o.description?t.push(`- [${o.title}](${o.url}): ${o.description}`):t.push(`- [${o.title}](${o.url})`);t.push("")}if(e.sections&&e.sections.length>0)for(let o of e.sections)t.push(`## ${o.heading}`),t.push(""),t.push(o.content),t.push("");return t.join(`
|
|
2
2
|
`).trim()+`
|
|
3
|
-
`}async function G(e){let
|
|
3
|
+
`}async function G(e,t){let o=e.headers.get("user-agent"),a=e.nextUrl.clone(),n=l(o),O=e.headers.get("accept")||"",C=!!n,$=O.includes("text/markdown");if(C||$){if(a.pathname.startsWith("/_next"))return c.next();if(a.pathname==="/llms.txt")try{let i=t||await f();if(i){let r=x(i),p=new c(r,{headers:{"Content-Type":"text/plain; charset=utf-8","Cache-Control":"public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400"}});return n&&p.headers.set("X-Onto-Bot",`${n.name} (${n.company})`),p}else{a.pathname="/llms.txt";let r=c.rewrite(a);return r.headers.set("Content-Type","text/plain; charset=utf-8"),r.headers.set("Cache-Control","public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400"),n&&r.headers.set("X-Onto-Bot",`${n.name} (${n.company})`),r}}catch(i){console.error("[Onto] Failed to generate llms.txt:",i),a.pathname="/llms.txt";let r=c.rewrite(a);return r.headers.set("Content-Type","text/plain; charset=utf-8"),n&&r.headers.set("X-Onto-Bot",`${n.name} (${n.company})`),r}if(a.pathname.includes("."))return c.next();let s=a.pathname;(s==="/"||s==="")&&(s="/index"),s.endsWith("/")&&s!=="/"&&(s=s.slice(0,-1));let u={"Content-Type":"text/markdown; charset=utf-8","Cache-Control":"public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400"};n&&(u["X-Onto-Bot"]=`${n.name} (${n.company})`);let m=process.env.ONTO_API_KEY,h=process.env.ONTO_DASHBOARD_URL||"https://app.buildonto.dev";if(m){fetch(`${h}/api/track`,{method:"POST",headers:{"x-onto-key":m,"Content-Type":"application/json"},body:JSON.stringify({route:a.pathname,userAgent:o,bot:n?n.name:null,company:n?n.company:null})}).catch(()=>{});try{let i=await fetch(`${h}/api/sdk/inject?route=${a.pathname}`,{headers:{"x-onto-key":m},signal:AbortSignal.timeout(1500)});if(i.ok){let{injection:r}=await i.json();if(r){let p=`${a.origin}/.onto${s}.md`,g=await fetch(p);if(g.ok){let A=`${await g.text()}
|
|
4
4
|
|
|
5
5
|
---
|
|
6
6
|
|
|
7
|
-
${r}`;return new c(
|
|
7
|
+
${r}`;return new c(A,{headers:{...u,"X-Onto-Injected":"true"}})}}}}catch(i){console.error("[Onto] Injection failed",i)}}a.pathname=`/.onto${s}.md`;let d=c.rewrite(a);return n&&d.headers.set("X-Onto-Bot",`${n.name} (${n.company})`),d}return c.next()}export{B as AI_BOT_USER_AGENTS,l as matchBot,G as ontoMiddleware};
|
|
8
8
|
//# sourceMappingURL=middleware.mjs.map
|
package/dist/middleware.mjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/middleware.ts","../src/bots.ts","../src/config.ts"],"sourcesContent":["import { NextRequest, NextResponse } from 'next/server';\r\nimport { AI_BOT_USER_AGENTS, matchBot } from './bots';\r\nimport { loadOntoConfig, generateLlmsTxt } from './config';\r\n\r\nexport async function ontoMiddleware(request: NextRequest) {\r\n const userAgent = request.headers.get('user-agent');\r\n const accept = request.headers.get('accept') || '';\r\n\r\n const matched = matchBot(userAgent);\r\n const isAiBot = !!matched;\r\n const isMarkdownRequested = accept.includes('text/markdown');\r\n\r\n // If traffic is identified as an AI Bot or markdown is requested\r\n if (isAiBot || isMarkdownRequested) {\r\n const url = request.nextUrl.clone();\r\n\r\n // Ignore internal next.js requests & static assets (but not llms.txt)\r\n if (url.pathname.startsWith('/_next')) {\r\n return NextResponse.next();\r\n }\r\n\r\n // --- llms.txt Auto-Discovery ---\r\n // Dynamically generate llms.txt from onto.config.ts\r\n if (url.pathname === '/llms.txt') {\r\n try {\r\n const config = await loadOntoConfig();\r\n\r\n if (config) {\r\n // Generate llms.txt dynamically from config\r\n const llmsTxtContent = generateLlmsTxt(config);\r\n const response = new NextResponse(llmsTxtContent, {\r\n headers: {\r\n 'Content-Type': 'text/plain; charset=utf-8',\r\n 'Cache-Control': 'public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400',\r\n }\r\n });\r\n\r\n if (matched) {\r\n response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);\r\n }\r\n\r\n return response;\r\n } else {\r\n // Fallback: try to serve static llms.txt from public folder\r\n url.pathname = '/llms.txt';\r\n const response = NextResponse.rewrite(url);\r\n response.headers.set('Content-Type', 'text/plain; charset=utf-8');\r\n response.headers.set('Cache-Control', 'public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400');\r\n if (matched) {\r\n response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);\r\n }\r\n return response;\r\n }\r\n } catch (error) {\r\n console.error('[Onto] Failed to generate llms.txt:', error);\r\n // Fallback to static file on error\r\n url.pathname = '/llms.txt';\r\n const response = NextResponse.rewrite(url);\r\n response.headers.set('Content-Type', 'text/plain; charset=utf-8');\r\n if (matched) {\r\n response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);\r\n }\r\n return response;\r\n }\r\n }\r\n\r\n // Skip other static assets\r\n if (url.pathname.includes('.')) {\r\n return NextResponse.next();\r\n }\r\n\r\n // Determine the corresponding payload path\r\n let payloadPath = url.pathname;\r\n if (payloadPath === '/' || payloadPath === '') {\r\n payloadPath = '/index';\r\n }\r\n\r\n // Strip trailing slash if present\r\n if (payloadPath.endsWith('/') && payloadPath !== '/') {\r\n payloadPath = payloadPath.slice(0, -1);\r\n }\r\n\r\n // Common response headers for all bot responses\r\n const botHeaders: Record<string, string> = {\r\n 'Content-Type': 'text/markdown; charset=utf-8',\r\n 'Cache-Control': 'public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400',\r\n };\r\n if (matched) {\r\n botHeaders['X-Onto-Bot'] = `${matched.name} (${matched.company})`;\r\n }\r\n\r\n // --- Onto Control Plane Integration (Premium) ---\r\n const ONTO_API_KEY = process.env.ONTO_API_KEY;\r\n const DASHBOARD_URL = process.env.ONTO_DASHBOARD_URL || 'https://app.buildonto.dev';\r\n\r\n if (ONTO_API_KEY) {\r\n // 1. Fire-and-forget tracking — includes structured bot info\r\n fetch(`${DASHBOARD_URL}/api/track`, {\r\n method: 'POST',\r\n headers: {\r\n 'x-onto-key': ONTO_API_KEY,\r\n 'Content-Type': 'application/json'\r\n },\r\n body: JSON.stringify({\r\n route: url.pathname,\r\n userAgent: userAgent,\r\n bot: matched ? matched.name : null,\r\n company: matched ? matched.company : null,\r\n })\r\n }).catch(() => {});\r\n\r\n // 2. Dynamic Context Injection\r\n try {\r\n const injectRes = await fetch(`${DASHBOARD_URL}/api/sdk/inject?route=${url.pathname}`, {\r\n headers: { 'x-onto-key': ONTO_API_KEY },\r\n signal: AbortSignal.timeout(1500)\r\n });\r\n\r\n if (injectRes.ok) {\r\n const { injection } = await injectRes.json();\r\n \r\n if (injection) {\r\n const localMdUrl = `${url.origin}/.onto${payloadPath}.md`;\r\n const mdRes = await fetch(localMdUrl);\r\n \r\n if (mdRes.ok) {\r\n const baseMarkdown = await mdRes.text();\r\n const finalMarkdown = `${baseMarkdown}\\n\\n---\\n\\n${injection}`;\r\n \r\n return new NextResponse(finalMarkdown, {\r\n headers: {\r\n ...botHeaders,\r\n 'X-Onto-Injected': 'true'\r\n }\r\n });\r\n }\r\n }\r\n }\r\n } catch (err) {\r\n console.error('[Onto] Injection failed', err);\r\n }\r\n }\r\n // ------------------------------------------------\r\n\r\n url.pathname = `/.onto${payloadPath}.md`;\r\n\r\n // Rewrite implicitly serves the target URL transparently to the client.\r\n const response = NextResponse.rewrite(url);\r\n if (matched) {\r\n response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);\r\n }\r\n return response;\r\n }\r\n\r\n return NextResponse.next();\r\n}\r\n\r\n// Re-export the bot registry for consumers who want to extend or inspect it\r\nexport { AI_BOT_USER_AGENTS, matchBot } from './bots';\r\nexport type { AiBot } from './bots';\r\n\r\n\r\n","/**\n * Comprehensive registry of AI bot user-agent strings.\n * The middleware uses this list to detect AI crawlers and serve optimized markdown.\n */\n\nexport interface AiBot {\n /** The user-agent substring to match against */\n name: string;\n /** The company operating this bot */\n company: string;\n}\n\n/**\n * Structured registry of all known AI bots, grouped by company.\n * Useful for analytics and the Control Plane dashboard.\n */\nexport const AI_BOTS: AiBot[] = [\n // OpenAI\n { name: 'GPTBot', company: 'OpenAI' },\n { name: 'ChatGPT-User', company: 'OpenAI' },\n { name: 'OAI-SearchBot', company: 'OpenAI' },\n\n // Google\n { name: 'Googlebot', company: 'Google' },\n { name: 'Google-CloudVertexBot', company: 'Google' },\n { name: 'Google-Extended', company: 'Google' },\n { name: 'GoogleOther', company: 'Google' },\n\n // Anthropic\n { name: 'ClaudeBot', company: 'Anthropic' },\n { name: 'Claude-User', company: 'Anthropic' },\n { name: 'anthropic-ai', company: 'Anthropic' },\n\n // Perplexity\n { name: 'PerplexityBot', company: 'Perplexity' },\n { name: 'Perplexity-User', company: 'Perplexity' },\n\n // Meta\n { name: 'Meta-ExternalAgent', company: 'Meta' },\n { name: 'Meta-ExternalFetcher', company: 'Meta' },\n { name: 'FacebookBot', company: 'Meta' },\n\n // Common Crawl (used by most smaller AI companies)\n { name: 'CCBot', company: 'Common Crawl' },\n\n // Other notable AI crawlers\n { name: 'Bytespider', company: 'ByteDance' },\n { name: 'Applebot-Extended', company: 'Apple' },\n { name: 'cohere-ai', company: 'Cohere' },\n { name: 'YouBot', company: 'You.com' },\n];\n\n/**\n * Flat list of user-agent substrings for fast matching in the middleware.\n */\nexport const AI_BOT_USER_AGENTS: string[] = AI_BOTS.map(bot => bot.name);\n\n/**\n * Given a raw user-agent string, returns the matched AiBot entry or undefined.\n * Comparison is case-insensitive to handle inconsistent agent casing.\n */\nexport function matchBot(userAgent: string | null): AiBot | undefined {\n if (!userAgent) return undefined;\n const lowerUA = userAgent.toLowerCase();\n return AI_BOTS.find(bot => \n lowerUA.includes(bot.name.toLowerCase())\n );\n}\n","/**\r\n * Configuration schema for onto.config.ts\r\n * Used to dynamically generate llms.txt and other AI discovery files\r\n */\r\n\r\nexport type PageType = 'scoring' | 'about' | 'default';\r\n\r\nexport interface OntoRoute {\r\n /**\r\n * The URL path (e.g., '/docs', '/api/reference')\r\n */\r\n path: string;\r\n /**\r\n * Description of what this route contains\r\n */\r\n description: string;\r\n /**\r\n * Optional: Page type for automatic JSON-LD schema injection\r\n * - 'scoring': Injects Methodology schema with AIO scoring weights (40/35/25)\r\n * - 'about': Injects Organization/AboutPage schema\r\n * - 'default': No automatic schema injection\r\n */\r\n pageType?: PageType;\r\n}\r\n\r\nexport interface OntoConfig {\r\n /**\r\n * The name of your project or site (required)\r\n * Used as the H1 heading in llms.txt\r\n */\r\n name: string;\r\n\r\n /**\r\n * A short summary of your project (required)\r\n * Displayed as a blockquote in llms.txt\r\n * Should contain key information necessary for understanding the rest of the file\r\n */\r\n summary: string;\r\n\r\n /**\r\n * The base URL of your site (e.g., 'https://example.com')\r\n */\r\n baseUrl: string;\r\n\r\n /**\r\n * Optional: Additional sections to include in llms.txt\r\n * Each section can contain any markdown content\r\n */\r\n sections?: {\r\n heading: string;\r\n content: string;\r\n }[];\r\n\r\n /**\r\n * Key routes that AI agents should know about\r\n * These will be formatted as a markdown list in llms.txt\r\n */\r\n routes?: OntoRoute[];\r\n\r\n /**\r\n * Optional: Links to external resources (documentation, API references, etc.)\r\n */\r\n externalLinks?: {\r\n title: string;\r\n url: string;\r\n description?: string;\r\n }[];\r\n\r\n /**\r\n * Optional: Organization information for JSON-LD schemas\r\n */\r\n organization?: {\r\n name: string;\r\n description?: string;\r\n url?: string;\r\n logo?: string;\r\n foundingDate?: string;\r\n };\r\n}\r\n\r\n/**\r\n * Load the onto.config.ts file from the user's project\r\n * This is used by the middleware to dynamically generate llms.txt\r\n */\r\nexport async function loadOntoConfig(): Promise<OntoConfig | null> {\r\n try {\r\n // Try to dynamically import the config file from the user's project root\r\n // This runs in the middleware context, so we look in the project root\r\n const config = await import(process.cwd() + '/onto.config');\r\n return config.default || config;\r\n } catch (error) {\r\n // Config file doesn't exist or failed to load\r\n return null;\r\n }\r\n}\r\n\r\n/**\r\n * Generate llms.txt content from OntoConfig\r\n * Follows the llms.txt specification:\r\n * - H1 with project name\r\n * - Blockquote with summary\r\n * - Additional markdown sections\r\n */\r\nexport function generateLlmsTxt(config: OntoConfig): string {\r\n const lines: string[] = [];\r\n\r\n // H1: Project name (required)\r\n lines.push(`# ${config.name}`);\r\n lines.push('');\r\n\r\n // Blockquote: Summary (required)\r\n lines.push(`> ${config.summary}`);\r\n lines.push('');\r\n\r\n // Key Routes section (if provided)\r\n if (config.routes && config.routes.length > 0) {\r\n lines.push('## Key Routes');\r\n lines.push('');\r\n for (const route of config.routes) {\r\n const fullUrl = `${config.baseUrl}${route.path}`;\r\n lines.push(`- [${route.path}](${fullUrl}): ${route.description}`);\r\n }\r\n lines.push('');\r\n }\r\n\r\n // External Links section (if provided)\r\n if (config.externalLinks && config.externalLinks.length > 0) {\r\n lines.push('## Resources');\r\n lines.push('');\r\n for (const link of config.externalLinks) {\r\n if (link.description) {\r\n lines.push(`- [${link.title}](${link.url}): ${link.description}`);\r\n } else {\r\n lines.push(`- [${link.title}](${link.url})`);\r\n }\r\n }\r\n lines.push('');\r\n }\r\n\r\n // Custom sections (if provided)\r\n if (config.sections && config.sections.length > 0) {\r\n for (const section of config.sections) {\r\n lines.push(`## ${section.heading}`);\r\n lines.push('');\r\n lines.push(section.content);\r\n lines.push('');\r\n }\r\n }\r\n\r\n return lines.join('\\n').trim() + '\\n';\r\n}\r\n"],"mappings":"AAAA,OAAsB,gBAAAA,MAAoB,cCgBnC,IAAMC,EAAmB,CAE5B,CAAE,KAAM,SAAqB,QAAS,QAAS,EAC/C,CAAE,KAAM,eAAoB,QAAS,QAAS,EAC9C,CAAE,KAAM,gBAAoB,QAAS,QAAS,EAG9C,CAAE,KAAM,YAA0B,QAAS,QAAS,EACpD,CAAE,KAAM,wBAA2B,QAAS,QAAS,EACrD,CAAE,KAAM,kBAA2B,QAAS,QAAS,EACrD,CAAE,KAAM,cAA2B,QAAS,QAAS,EAGrD,CAAE,KAAM,YAAmB,QAAS,WAAY,EAChD,CAAE,KAAM,cAAkB,QAAS,WAAY,EAC/C,CAAE,KAAM,eAAkB,QAAS,WAAY,EAG/C,CAAE,KAAM,gBAAmB,QAAS,YAAa,EACjD,CAAE,KAAM,kBAAmB,QAAS,YAAa,EAGjD,CAAE,KAAM,qBAAwB,QAAS,MAAO,EAChD,CAAE,KAAM,uBAAwB,QAAS,MAAO,EAChD,CAAE,KAAM,cAAuB,QAAS,MAAO,EAG/C,CAAE,KAAM,QAAS,QAAS,cAAe,EAGzC,CAAE,KAAM,aAAqB,QAAS,WAAY,EAClD,CAAE,KAAM,oBAAqB,QAAS,OAAQ,EAC9C,CAAE,KAAM,YAAoB,QAAS,QAAS,EAC9C,CAAE,KAAM,SAAoB,QAAS,SAAU,CACnD,EAKaC,EAA+BD,EAAQ,IAAIE,GAAOA,EAAI,IAAI,EAMhE,SAASC,EAASC,EAA6C,CAClE,GAAI,CAACA,EAAW,OAChB,IAAMC,EAAUD,EAAU,YAAY,EACtC,OAAOJ,EAAQ,KAAKE,GAChBG,EAAQ,SAASH,EAAI,KAAK,YAAY,CAAC,CAC3C,CACJ,CCiBA,eAAsBI,GAA6C,CACjE,GAAI,CAGF,IAAMC,EAAS,MAAM,OAAO,QAAQ,IAAI,EAAI,gBAC5C,OAAOA,EAAO,SAAWA,CAC3B,MAAgB,CAEd,OAAO,IACT,CACF,CASO,SAASC,EAAgBD,EAA4B,CAC1D,IAAME,EAAkB,CAAC,EAWzB,GARAA,EAAM,KAAK,KAAKF,EAAO,IAAI,EAAE,EAC7BE,EAAM,KAAK,EAAE,EAGbA,EAAM,KAAK,KAAKF,EAAO,OAAO,EAAE,EAChCE,EAAM,KAAK,EAAE,EAGTF,EAAO,QAAUA,EAAO,OAAO,OAAS,EAAG,CAC7CE,EAAM,KAAK,eAAe,EAC1BA,EAAM,KAAK,EAAE,EACb,QAAWC,KAASH,EAAO,OAAQ,CACjC,IAAMI,EAAU,GAAGJ,EAAO,OAAO,GAAGG,EAAM,IAAI,GAC9CD,EAAM,KAAK,MAAMC,EAAM,IAAI,KAAKC,CAAO,MAAMD,EAAM,WAAW,EAAE,CAClE,CACAD,EAAM,KAAK,EAAE,CACf,CAGA,GAAIF,EAAO,eAAiBA,EAAO,cAAc,OAAS,EAAG,CAC3DE,EAAM,KAAK,cAAc,EACzBA,EAAM,KAAK,EAAE,EACb,QAAWG,KAAQL,EAAO,cACpBK,EAAK,YACPH,EAAM,KAAK,MAAMG,EAAK,KAAK,KAAKA,EAAK,GAAG,MAAMA,EAAK,WAAW,EAAE,EAEhEH,EAAM,KAAK,MAAMG,EAAK,KAAK,KAAKA,EAAK,GAAG,GAAG,EAG/CH,EAAM,KAAK,EAAE,CACf,CAGA,GAAIF,EAAO,UAAYA,EAAO,SAAS,OAAS,EAC9C,QAAWM,KAAWN,EAAO,SAC3BE,EAAM,KAAK,MAAMI,EAAQ,OAAO,EAAE,EAClCJ,EAAM,KAAK,EAAE,EACbA,EAAM,KAAKI,EAAQ,OAAO,EAC1BJ,EAAM,KAAK,EAAE,EAIjB,OAAOA,EAAM,KAAK;AAAA,CAAI,EAAE,KAAK,EAAI;AAAA,CACnC,CFlJA,eAAsBK,EAAeC,EAAsB,CACvD,IAAMC,EAAYD,EAAQ,QAAQ,IAAI,YAAY,EAC5CE,EAASF,EAAQ,QAAQ,IAAI,QAAQ,GAAK,GAE1CG,EAAUC,EAASH,CAAS,EAC5BI,EAAU,CAAC,CAACF,EACZG,EAAsBJ,EAAO,SAAS,eAAe,EAG3D,GAAIG,GAAWC,EAAqB,CAChC,IAAMC,EAAMP,EAAQ,QAAQ,MAAM,EAGlC,GAAIO,EAAI,SAAS,WAAW,QAAQ,EAChC,OAAOC,EAAa,KAAK,EAK7B,GAAID,EAAI,WAAa,YACjB,GAAI,CACA,IAAME,EAAS,MAAMC,EAAe,EAEpC,GAAID,EAAQ,CAER,IAAME,EAAiBC,EAAgBH,CAAM,EACvCI,EAAW,IAAIL,EAAaG,EAAgB,CAC9C,QAAS,CACL,eAAgB,4BAChB,gBAAiB,mEACrB,CACJ,CAAC,EAED,OAAIR,GACAU,EAAS,QAAQ,IAAI,aAAc,GAAGV,EAAQ,IAAI,KAAKA,EAAQ,OAAO,GAAG,EAGtEU,CACX,KAAO,CAEHN,EAAI,SAAW,YACf,IAAMM,EAAWL,EAAa,QAAQD,CAAG,EACzC,OAAAM,EAAS,QAAQ,IAAI,eAAgB,2BAA2B,EAChEA,EAAS,QAAQ,IAAI,gBAAiB,mEAAmE,EACrGV,GACAU,EAAS,QAAQ,IAAI,aAAc,GAAGV,EAAQ,IAAI,KAAKA,EAAQ,OAAO,GAAG,EAEtEU,CACX,CACJ,OAASC,EAAO,CACZ,QAAQ,MAAM,sCAAuCA,CAAK,EAE1DP,EAAI,SAAW,YACf,IAAMM,EAAWL,EAAa,QAAQD,CAAG,EACzC,OAAAM,EAAS,QAAQ,IAAI,eAAgB,2BAA2B,EAC5DV,GACAU,EAAS,QAAQ,IAAI,aAAc,GAAGV,EAAQ,IAAI,KAAKA,EAAQ,OAAO,GAAG,EAEtEU,CACX,CAIJ,GAAIN,EAAI,SAAS,SAAS,GAAG,EACzB,OAAOC,EAAa,KAAK,EAI7B,IAAIO,EAAcR,EAAI,UAClBQ,IAAgB,KAAOA,IAAgB,MACvCA,EAAc,UAIdA,EAAY,SAAS,GAAG,GAAKA,IAAgB,MAC7CA,EAAcA,EAAY,MAAM,EAAG,EAAE,GAIzC,IAAMC,EAAqC,CACvC,eAAgB,+BAChB,gBAAiB,mEACrB,EACIb,IACAa,EAAW,YAAY,EAAI,GAAGb,EAAQ,IAAI,KAAKA,EAAQ,OAAO,KAIlE,IAAMc,EAAe,QAAQ,IAAI,aAC3BC,EAAgB,QAAQ,IAAI,oBAAsB,4BAExD,GAAID,EAAc,CAEd,MAAM,GAAGC,CAAa,aAAc,CAChC,OAAQ,OACR,QAAS,CACL,aAAcD,EACd,eAAgB,kBACpB,EACA,KAAM,KAAK,UAAU,CACjB,MAAOV,EAAI,SACX,UAAWN,EACX,IAAKE,EAAUA,EAAQ,KAAO,KAC9B,QAASA,EAAUA,EAAQ,QAAU,IACzC,CAAC,CACL,CAAC,EAAE,MAAM,IAAM,CAAC,CAAC,EAGjB,GAAI,CACA,IAAMgB,EAAY,MAAM,MAAM,GAAGD,CAAa,yBAAyBX,EAAI,QAAQ,GAAI,CACnF,QAAS,CAAE,aAAcU,CAAa,EACtC,OAAQ,YAAY,QAAQ,IAAI,CACpC,CAAC,EAED,GAAIE,EAAU,GAAI,CACd,GAAM,CAAE,UAAAC,CAAU,EAAI,MAAMD,EAAU,KAAK,EAE3C,GAAIC,EAAW,CACX,IAAMC,EAAa,GAAGd,EAAI,MAAM,SAASQ,CAAW,MAC9CO,EAAQ,MAAM,MAAMD,CAAU,EAEpC,GAAIC,EAAM,GAAI,CAEV,IAAMC,EAAgB,GADD,MAAMD,EAAM,KAAK,CACD;AAAA;AAAA;AAAA;AAAA,EAAcF,CAAS,GAE5D,OAAO,IAAIZ,EAAae,EAAe,CACnC,QAAS,CACL,GAAGP,EACH,kBAAmB,MACvB,CACJ,CAAC,CACL,CACJ,CACJ,CACJ,OAASQ,EAAK,CACV,QAAQ,MAAM,0BAA2BA,CAAG,CAChD,CACJ,CAGAjB,EAAI,SAAW,SAASQ,CAAW,MAGnC,IAAMF,EAAWL,EAAa,QAAQD,CAAG,EACzC,OAAIJ,GACAU,EAAS,QAAQ,IAAI,aAAc,GAAGV,EAAQ,IAAI,KAAKA,EAAQ,OAAO,GAAG,EAEtEU,CACX,CAEA,OAAOL,EAAa,KAAK,CAC7B","names":["NextResponse","AI_BOTS","AI_BOT_USER_AGENTS","bot","matchBot","userAgent","lowerUA","loadOntoConfig","config","generateLlmsTxt","lines","route","fullUrl","link","section","ontoMiddleware","request","userAgent","accept","matched","matchBot","isAiBot","isMarkdownRequested","url","NextResponse","config","loadOntoConfig","llmsTxtContent","generateLlmsTxt","response","error","payloadPath","botHeaders","ONTO_API_KEY","DASHBOARD_URL","injectRes","injection","localMdUrl","mdRes","finalMarkdown","err"]}
|
|
1
|
+
{"version":3,"sources":["../src/middleware.ts","../src/bots.ts","../src/config.ts"],"sourcesContent":["import { NextRequest, NextResponse } from 'next/server';\r\nimport { matchBot } from './bots';\r\nimport { loadOntoConfig, generateLlmsTxt, OntoConfig } from './config';\r\n\r\nexport async function ontoMiddleware(request: NextRequest, config?: OntoConfig) {\r\n const userAgent = request.headers.get('user-agent');\r\n const url = request.nextUrl.clone();\r\n const matched = matchBot(userAgent);\r\n\r\n const accept = request.headers.get('accept') || '';\r\n\r\n const isAiBot = !!matched;\r\n const isMarkdownRequested = accept.includes('text/markdown');\r\n\r\n // If traffic is identified as an AI Bot or markdown is requested\r\n if (isAiBot || isMarkdownRequested) {\r\n\r\n // Ignore internal next.js requests & static assets (but not llms.txt)\r\n if (url.pathname.startsWith('/_next')) {\r\n return NextResponse.next();\r\n }\r\n\r\n // --- llms.txt Auto-Discovery ---\r\n // Serve the llms.txt manifest to AI agents\r\n if (url.pathname === '/llms.txt') {\r\n try {\r\n // Use provided config or try to load dynamic one\r\n const activeConfig = config || await loadOntoConfig();\r\n\r\n if (activeConfig) {\r\n // Generate llms.txt dynamically from config\r\n const llmsTxtContent = generateLlmsTxt(activeConfig);\r\n const response = new NextResponse(llmsTxtContent, {\r\n headers: {\r\n 'Content-Type': 'text/plain; charset=utf-8',\r\n 'Cache-Control': 'public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400',\r\n }\r\n });\r\n\r\n if (matched) {\r\n response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);\r\n }\r\n\r\n return response;\r\n } else {\r\n // Fallback: try to serve static llms.txt from public folder\r\n url.pathname = '/llms.txt';\r\n const response = NextResponse.rewrite(url);\r\n response.headers.set('Content-Type', 'text/plain; charset=utf-8');\r\n response.headers.set('Cache-Control', 'public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400');\r\n if (matched) {\r\n response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);\r\n }\r\n return response;\r\n }\r\n } catch (error) {\r\n console.error('[Onto] Failed to generate llms.txt:', error);\r\n // Fallback to static file on error\r\n url.pathname = '/llms.txt';\r\n const response = NextResponse.rewrite(url);\r\n response.headers.set('Content-Type', 'text/plain; charset=utf-8');\r\n if (matched) {\r\n response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);\r\n }\r\n return response;\r\n }\r\n }\r\n\r\n // Skip other static assets\r\n if (url.pathname.includes('.')) {\r\n return NextResponse.next();\r\n }\r\n\r\n // Determine the corresponding payload path\r\n let payloadPath = url.pathname;\r\n if (payloadPath === '/' || payloadPath === '') {\r\n payloadPath = '/index';\r\n }\r\n\r\n // Strip trailing slash if present\r\n if (payloadPath.endsWith('/') && payloadPath !== '/') {\r\n payloadPath = payloadPath.slice(0, -1);\r\n }\r\n\r\n // Common response headers for all bot responses\r\n const botHeaders: Record<string, string> = {\r\n 'Content-Type': 'text/markdown; charset=utf-8',\r\n 'Cache-Control': 'public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400',\r\n };\r\n if (matched) {\r\n botHeaders['X-Onto-Bot'] = `${matched.name} (${matched.company})`;\r\n }\r\n\r\n // --- Onto Control Plane Integration (Premium) ---\r\n const ONTO_API_KEY = process.env.ONTO_API_KEY;\r\n const DASHBOARD_URL = process.env.ONTO_DASHBOARD_URL || 'https://app.buildonto.dev';\r\n\r\n if (ONTO_API_KEY) {\r\n // 1. Fire-and-forget tracking — includes structured bot info\r\n fetch(`${DASHBOARD_URL}/api/track`, {\r\n method: 'POST',\r\n headers: {\r\n 'x-onto-key': ONTO_API_KEY,\r\n 'Content-Type': 'application/json'\r\n },\r\n body: JSON.stringify({\r\n route: url.pathname,\r\n userAgent: userAgent,\r\n bot: matched ? matched.name : null,\r\n company: matched ? matched.company : null,\r\n })\r\n }).catch(() => {});\r\n\r\n // 2. Dynamic Context Injection\r\n try {\r\n const injectRes = await fetch(`${DASHBOARD_URL}/api/sdk/inject?route=${url.pathname}`, {\r\n headers: { 'x-onto-key': ONTO_API_KEY },\r\n signal: AbortSignal.timeout(1500)\r\n });\r\n\r\n if (injectRes.ok) {\r\n const { injection } = await injectRes.json();\r\n \r\n if (injection) {\r\n const localMdUrl = `${url.origin}/.onto${payloadPath}.md`;\r\n const mdRes = await fetch(localMdUrl);\r\n \r\n if (mdRes.ok) {\r\n const baseMarkdown = await mdRes.text();\r\n const finalMarkdown = `${baseMarkdown}\\n\\n---\\n\\n${injection}`;\r\n \r\n return new NextResponse(finalMarkdown, {\r\n headers: {\r\n ...botHeaders,\r\n 'X-Onto-Injected': 'true'\r\n }\r\n });\r\n }\r\n }\r\n }\r\n } catch (err) {\r\n console.error('[Onto] Injection failed', err);\r\n }\r\n }\r\n // ------------------------------------------------\r\n\r\n url.pathname = `/.onto${payloadPath}.md`;\r\n\r\n // Rewrite implicitly serves the target URL transparently to the client.\r\n const response = NextResponse.rewrite(url);\r\n if (matched) {\r\n response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);\r\n }\r\n return response;\r\n }\r\n\r\n return NextResponse.next();\r\n}\r\n\r\n// Re-export the bot registry for consumers who want to extend or inspect it\r\nexport { AI_BOT_USER_AGENTS, matchBot } from './bots';\r\nexport type { AiBot } from './bots';\r\n\r\n\r\n","/**\n * Comprehensive registry of AI bot user-agent strings.\n * The middleware uses this list to detect AI crawlers and serve optimized markdown.\n */\n\nexport interface AiBot {\n /** The user-agent substring to match against */\n name: string;\n /** The company operating this bot */\n company: string;\n}\n\n/**\n * Structured registry of all known AI bots, grouped by company.\n * Useful for analytics and the Control Plane dashboard.\n */\nexport const AI_BOTS: AiBot[] = [\n // OpenAI\n { name: 'GPTBot', company: 'OpenAI' },\n { name: 'ChatGPT-User', company: 'OpenAI' },\n { name: 'OAI-SearchBot', company: 'OpenAI' },\n\n // Google\n { name: 'Googlebot', company: 'Google' },\n { name: 'Google-CloudVertexBot', company: 'Google' },\n { name: 'Google-Extended', company: 'Google' },\n { name: 'GoogleOther', company: 'Google' },\n\n // Anthropic\n { name: 'ClaudeBot', company: 'Anthropic' },\n { name: 'Claude-User', company: 'Anthropic' },\n { name: 'anthropic-ai', company: 'Anthropic' },\n\n // Perplexity\n { name: 'PerplexityBot', company: 'Perplexity' },\n { name: 'Perplexity-User', company: 'Perplexity' },\n\n // Meta\n { name: 'Meta-ExternalAgent', company: 'Meta' },\n { name: 'Meta-ExternalFetcher', company: 'Meta' },\n { name: 'FacebookBot', company: 'Meta' },\n\n // Common Crawl (used by most smaller AI companies)\n { name: 'CCBot', company: 'Common Crawl' },\n\n // Other notable AI crawlers\n { name: 'Bytespider', company: 'ByteDance' },\n { name: 'Applebot-Extended', company: 'Apple' },\n { name: 'cohere-ai', company: 'Cohere' },\n { name: 'YouBot', company: 'You.com' },\n];\n\n/**\n * Flat list of user-agent substrings for fast matching in the middleware.\n */\nexport const AI_BOT_USER_AGENTS: string[] = AI_BOTS.map(bot => bot.name);\n\n/**\n * Given a raw user-agent string, returns the matched AiBot entry or undefined.\n * Comparison is case-insensitive to handle inconsistent agent casing.\n */\nexport function matchBot(userAgent: string | null): AiBot | undefined {\n if (!userAgent) return undefined;\n const lowerUA = userAgent.toLowerCase();\n return AI_BOTS.find(bot => \n lowerUA.includes(bot.name.toLowerCase())\n );\n}\n","/**\r\n * Configuration schema for onto.config.ts\r\n * Used to dynamically generate llms.txt and other AI discovery files\r\n */\r\n\r\nexport type PageType = 'scoring' | 'about' | 'default';\r\n\r\nexport interface OntoRoute {\r\n /**\r\n * The URL path (e.g., '/docs', '/api/reference')\r\n */\r\n path: string;\r\n /**\r\n * Description of what this route contains\r\n */\r\n description: string;\r\n /**\r\n * Optional: Page type for automatic JSON-LD schema injection\r\n * - 'scoring': Injects Methodology schema with AIO scoring weights (40/35/25)\r\n * - 'about': Injects Organization/AboutPage schema\r\n * - 'default': No automatic schema injection\r\n */\r\n pageType?: PageType;\r\n}\r\n\r\nexport interface OntoConfig {\r\n /**\r\n * The name of your project or site (required)\r\n * Used as the H1 heading in llms.txt\r\n */\r\n name: string;\r\n\r\n /**\r\n * A short summary of your project (required)\r\n * Displayed as a blockquote in llms.txt\r\n * Should contain key information necessary for understanding the rest of the file\r\n */\r\n summary: string;\r\n\r\n /**\r\n * The base URL of your site (e.g., 'https://example.com')\r\n */\r\n baseUrl: string;\r\n\r\n /**\r\n * Optional: Additional sections to include in llms.txt\r\n * Each section can contain any markdown content\r\n */\r\n sections?: {\r\n heading: string;\r\n content: string;\r\n }[];\r\n\r\n /**\r\n * Key routes that AI agents should know about\r\n * These will be formatted as a markdown list in llms.txt\r\n */\r\n routes?: OntoRoute[];\r\n\r\n /**\r\n * Optional: Links to external resources (documentation, API references, etc.)\r\n */\r\n externalLinks?: {\r\n title: string;\r\n url: string;\r\n description?: string;\r\n }[];\r\n\r\n /**\r\n * Optional: Organization information for JSON-LD schemas\r\n */\r\n organization?: {\r\n name: string;\r\n description?: string;\r\n url?: string;\r\n logo?: string;\r\n foundingDate?: string;\r\n };\r\n}\r\n\r\n/**\r\n * Load the onto.config.ts file from the user's project\r\n * This is used by the middleware to dynamically generate llms.txt\r\n */\r\nexport async function loadOntoConfig(): Promise<OntoConfig | null> {\r\n try {\r\n // Try to dynamically import the config file from the user's project root\r\n // This runs in the middleware context, so we look in the project root\r\n const config = await import(process.cwd() + '/onto.config');\r\n return config.default || config;\r\n } catch (error) {\r\n // Config file doesn't exist or failed to load\r\n return null;\r\n }\r\n}\r\n\r\n/**\r\n * Generate llms.txt content from OntoConfig\r\n * Follows the llms.txt specification:\r\n * - H1 with project name\r\n * - Blockquote with summary\r\n * - Additional markdown sections\r\n */\r\nexport function generateLlmsTxt(config: OntoConfig): string {\r\n const lines: string[] = [];\r\n\r\n // H1: Project name (required)\r\n lines.push(`# ${config.name}`);\r\n lines.push('');\r\n\r\n // Blockquote: Summary (required)\r\n lines.push(`> ${config.summary}`);\r\n lines.push('');\r\n\r\n // Key Routes section (if provided)\r\n if (config.routes && config.routes.length > 0) {\r\n lines.push('## Key Routes');\r\n lines.push('');\r\n for (const route of config.routes) {\r\n const fullUrl = `${config.baseUrl}${route.path}`;\r\n lines.push(`- [${route.path}](${fullUrl}): ${route.description}`);\r\n }\r\n lines.push('');\r\n }\r\n\r\n // External Links section (if provided)\r\n if (config.externalLinks && config.externalLinks.length > 0) {\r\n lines.push('## Resources');\r\n lines.push('');\r\n for (const link of config.externalLinks) {\r\n if (link.description) {\r\n lines.push(`- [${link.title}](${link.url}): ${link.description}`);\r\n } else {\r\n lines.push(`- [${link.title}](${link.url})`);\r\n }\r\n }\r\n lines.push('');\r\n }\r\n\r\n // Custom sections (if provided)\r\n if (config.sections && config.sections.length > 0) {\r\n for (const section of config.sections) {\r\n lines.push(`## ${section.heading}`);\r\n lines.push('');\r\n lines.push(section.content);\r\n lines.push('');\r\n }\r\n }\r\n\r\n return lines.join('\\n').trim() + '\\n';\r\n}\r\n"],"mappings":"AAAA,OAAsB,gBAAAA,MAAoB,cCgBnC,IAAMC,EAAmB,CAE5B,CAAE,KAAM,SAAqB,QAAS,QAAS,EAC/C,CAAE,KAAM,eAAoB,QAAS,QAAS,EAC9C,CAAE,KAAM,gBAAoB,QAAS,QAAS,EAG9C,CAAE,KAAM,YAA0B,QAAS,QAAS,EACpD,CAAE,KAAM,wBAA2B,QAAS,QAAS,EACrD,CAAE,KAAM,kBAA2B,QAAS,QAAS,EACrD,CAAE,KAAM,cAA2B,QAAS,QAAS,EAGrD,CAAE,KAAM,YAAmB,QAAS,WAAY,EAChD,CAAE,KAAM,cAAkB,QAAS,WAAY,EAC/C,CAAE,KAAM,eAAkB,QAAS,WAAY,EAG/C,CAAE,KAAM,gBAAmB,QAAS,YAAa,EACjD,CAAE,KAAM,kBAAmB,QAAS,YAAa,EAGjD,CAAE,KAAM,qBAAwB,QAAS,MAAO,EAChD,CAAE,KAAM,uBAAwB,QAAS,MAAO,EAChD,CAAE,KAAM,cAAuB,QAAS,MAAO,EAG/C,CAAE,KAAM,QAAS,QAAS,cAAe,EAGzC,CAAE,KAAM,aAAqB,QAAS,WAAY,EAClD,CAAE,KAAM,oBAAqB,QAAS,OAAQ,EAC9C,CAAE,KAAM,YAAoB,QAAS,QAAS,EAC9C,CAAE,KAAM,SAAoB,QAAS,SAAU,CACnD,EAKaC,EAA+BD,EAAQ,IAAIE,GAAOA,EAAI,IAAI,EAMhE,SAASC,EAASC,EAA6C,CAClE,GAAI,CAACA,EAAW,OAChB,IAAMC,EAAUD,EAAU,YAAY,EACtC,OAAOJ,EAAQ,KAAKE,GAChBG,EAAQ,SAASH,EAAI,KAAK,YAAY,CAAC,CAC3C,CACJ,CCiBA,eAAsBI,GAA6C,CACjE,GAAI,CAGF,IAAMC,EAAS,MAAM,OAAO,QAAQ,IAAI,EAAI,gBAC5C,OAAOA,EAAO,SAAWA,CAC3B,MAAgB,CAEd,OAAO,IACT,CACF,CASO,SAASC,EAAgBD,EAA4B,CAC1D,IAAME,EAAkB,CAAC,EAWzB,GARAA,EAAM,KAAK,KAAKF,EAAO,IAAI,EAAE,EAC7BE,EAAM,KAAK,EAAE,EAGbA,EAAM,KAAK,KAAKF,EAAO,OAAO,EAAE,EAChCE,EAAM,KAAK,EAAE,EAGTF,EAAO,QAAUA,EAAO,OAAO,OAAS,EAAG,CAC7CE,EAAM,KAAK,eAAe,EAC1BA,EAAM,KAAK,EAAE,EACb,QAAWC,KAASH,EAAO,OAAQ,CACjC,IAAMI,EAAU,GAAGJ,EAAO,OAAO,GAAGG,EAAM,IAAI,GAC9CD,EAAM,KAAK,MAAMC,EAAM,IAAI,KAAKC,CAAO,MAAMD,EAAM,WAAW,EAAE,CAClE,CACAD,EAAM,KAAK,EAAE,CACf,CAGA,GAAIF,EAAO,eAAiBA,EAAO,cAAc,OAAS,EAAG,CAC3DE,EAAM,KAAK,cAAc,EACzBA,EAAM,KAAK,EAAE,EACb,QAAWG,KAAQL,EAAO,cACpBK,EAAK,YACPH,EAAM,KAAK,MAAMG,EAAK,KAAK,KAAKA,EAAK,GAAG,MAAMA,EAAK,WAAW,EAAE,EAEhEH,EAAM,KAAK,MAAMG,EAAK,KAAK,KAAKA,EAAK,GAAG,GAAG,EAG/CH,EAAM,KAAK,EAAE,CACf,CAGA,GAAIF,EAAO,UAAYA,EAAO,SAAS,OAAS,EAC9C,QAAWM,KAAWN,EAAO,SAC3BE,EAAM,KAAK,MAAMI,EAAQ,OAAO,EAAE,EAClCJ,EAAM,KAAK,EAAE,EACbA,EAAM,KAAKI,EAAQ,OAAO,EAC1BJ,EAAM,KAAK,EAAE,EAIjB,OAAOA,EAAM,KAAK;AAAA,CAAI,EAAE,KAAK,EAAI;AAAA,CACnC,CFlJA,eAAsBK,EAAeC,EAAsBC,EAAqB,CAC5E,IAAMC,EAAYF,EAAQ,QAAQ,IAAI,YAAY,EAC5CG,EAAMH,EAAQ,QAAQ,MAAM,EAC5BI,EAAUC,EAASH,CAAS,EAE5BI,EAASN,EAAQ,QAAQ,IAAI,QAAQ,GAAK,GAE1CO,EAAU,CAAC,CAACH,EACZI,EAAsBF,EAAO,SAAS,eAAe,EAG3D,GAAIC,GAAWC,EAAqB,CAGhC,GAAIL,EAAI,SAAS,WAAW,QAAQ,EAChC,OAAOM,EAAa,KAAK,EAK7B,GAAIN,EAAI,WAAa,YACjB,GAAI,CAEA,IAAMO,EAAeT,GAAU,MAAMU,EAAe,EAEpD,GAAID,EAAc,CAEd,IAAME,EAAiBC,EAAgBH,CAAY,EAC7CI,EAAW,IAAIL,EAAaG,EAAgB,CAC9C,QAAS,CACL,eAAgB,4BAChB,gBAAiB,mEACrB,CACJ,CAAC,EAED,OAAIR,GACAU,EAAS,QAAQ,IAAI,aAAc,GAAGV,EAAQ,IAAI,KAAKA,EAAQ,OAAO,GAAG,EAGtEU,CACX,KAAO,CAEHX,EAAI,SAAW,YACf,IAAMW,EAAWL,EAAa,QAAQN,CAAG,EACzC,OAAAW,EAAS,QAAQ,IAAI,eAAgB,2BAA2B,EAChEA,EAAS,QAAQ,IAAI,gBAAiB,mEAAmE,EACrGV,GACAU,EAAS,QAAQ,IAAI,aAAc,GAAGV,EAAQ,IAAI,KAAKA,EAAQ,OAAO,GAAG,EAEtEU,CACX,CACJ,OAASC,EAAO,CACZ,QAAQ,MAAM,sCAAuCA,CAAK,EAE1DZ,EAAI,SAAW,YACf,IAAMW,EAAWL,EAAa,QAAQN,CAAG,EACzC,OAAAW,EAAS,QAAQ,IAAI,eAAgB,2BAA2B,EAC5DV,GACAU,EAAS,QAAQ,IAAI,aAAc,GAAGV,EAAQ,IAAI,KAAKA,EAAQ,OAAO,GAAG,EAEtEU,CACX,CAIJ,GAAIX,EAAI,SAAS,SAAS,GAAG,EACzB,OAAOM,EAAa,KAAK,EAI7B,IAAIO,EAAcb,EAAI,UAClBa,IAAgB,KAAOA,IAAgB,MACvCA,EAAc,UAIdA,EAAY,SAAS,GAAG,GAAKA,IAAgB,MAC7CA,EAAcA,EAAY,MAAM,EAAG,EAAE,GAIzC,IAAMC,EAAqC,CACvC,eAAgB,+BAChB,gBAAiB,mEACrB,EACIb,IACAa,EAAW,YAAY,EAAI,GAAGb,EAAQ,IAAI,KAAKA,EAAQ,OAAO,KAIlE,IAAMc,EAAe,QAAQ,IAAI,aAC3BC,EAAgB,QAAQ,IAAI,oBAAsB,4BAExD,GAAID,EAAc,CAEd,MAAM,GAAGC,CAAa,aAAc,CAChC,OAAQ,OACR,QAAS,CACL,aAAcD,EACd,eAAgB,kBACpB,EACA,KAAM,KAAK,UAAU,CACjB,MAAOf,EAAI,SACX,UAAWD,EACX,IAAKE,EAAUA,EAAQ,KAAO,KAC9B,QAASA,EAAUA,EAAQ,QAAU,IACzC,CAAC,CACL,CAAC,EAAE,MAAM,IAAM,CAAC,CAAC,EAGjB,GAAI,CACA,IAAMgB,EAAY,MAAM,MAAM,GAAGD,CAAa,yBAAyBhB,EAAI,QAAQ,GAAI,CACnF,QAAS,CAAE,aAAce,CAAa,EACtC,OAAQ,YAAY,QAAQ,IAAI,CACpC,CAAC,EAED,GAAIE,EAAU,GAAI,CACd,GAAM,CAAE,UAAAC,CAAU,EAAI,MAAMD,EAAU,KAAK,EAE3C,GAAIC,EAAW,CACX,IAAMC,EAAa,GAAGnB,EAAI,MAAM,SAASa,CAAW,MAC9CO,EAAQ,MAAM,MAAMD,CAAU,EAEpC,GAAIC,EAAM,GAAI,CAEV,IAAMC,EAAgB,GADD,MAAMD,EAAM,KAAK,CACD;AAAA;AAAA;AAAA;AAAA,EAAcF,CAAS,GAE5D,OAAO,IAAIZ,EAAae,EAAe,CACnC,QAAS,CACL,GAAGP,EACH,kBAAmB,MACvB,CACJ,CAAC,CACL,CACJ,CACJ,CACJ,OAASQ,EAAK,CACV,QAAQ,MAAM,0BAA2BA,CAAG,CAChD,CACJ,CAGAtB,EAAI,SAAW,SAASa,CAAW,MAGnC,IAAMF,EAAWL,EAAa,QAAQN,CAAG,EACzC,OAAIC,GACAU,EAAS,QAAQ,IAAI,aAAc,GAAGV,EAAQ,IAAI,KAAKA,EAAQ,OAAO,GAAG,EAEtEU,CACX,CAEA,OAAOL,EAAa,KAAK,CAC7B","names":["NextResponse","AI_BOTS","AI_BOT_USER_AGENTS","bot","matchBot","userAgent","lowerUA","loadOntoConfig","config","generateLlmsTxt","lines","route","fullUrl","link","section","ontoMiddleware","request","config","userAgent","url","matched","matchBot","accept","isAiBot","isMarkdownRequested","NextResponse","activeConfig","loadOntoConfig","llmsTxtContent","generateLlmsTxt","response","error","payloadPath","botHeaders","ONTO_API_KEY","DASHBOARD_URL","injectRes","injection","localMdUrl","mdRes","finalMarkdown","err"]}
|
package/package.json
CHANGED
package/src/middleware.ts
CHANGED
|
@@ -1,18 +1,19 @@
|
|
|
1
1
|
import { NextRequest, NextResponse } from 'next/server';
|
|
2
|
-
import {
|
|
3
|
-
import { loadOntoConfig, generateLlmsTxt } from './config';
|
|
2
|
+
import { matchBot } from './bots';
|
|
3
|
+
import { loadOntoConfig, generateLlmsTxt, OntoConfig } from './config';
|
|
4
4
|
|
|
5
|
-
export async function ontoMiddleware(request: NextRequest) {
|
|
5
|
+
export async function ontoMiddleware(request: NextRequest, config?: OntoConfig) {
|
|
6
6
|
const userAgent = request.headers.get('user-agent');
|
|
7
|
+
const url = request.nextUrl.clone();
|
|
8
|
+
const matched = matchBot(userAgent);
|
|
9
|
+
|
|
7
10
|
const accept = request.headers.get('accept') || '';
|
|
8
11
|
|
|
9
|
-
const matched = matchBot(userAgent);
|
|
10
12
|
const isAiBot = !!matched;
|
|
11
13
|
const isMarkdownRequested = accept.includes('text/markdown');
|
|
12
14
|
|
|
13
15
|
// If traffic is identified as an AI Bot or markdown is requested
|
|
14
16
|
if (isAiBot || isMarkdownRequested) {
|
|
15
|
-
const url = request.nextUrl.clone();
|
|
16
17
|
|
|
17
18
|
// Ignore internal next.js requests & static assets (but not llms.txt)
|
|
18
19
|
if (url.pathname.startsWith('/_next')) {
|
|
@@ -20,14 +21,15 @@ export async function ontoMiddleware(request: NextRequest) {
|
|
|
20
21
|
}
|
|
21
22
|
|
|
22
23
|
// --- llms.txt Auto-Discovery ---
|
|
23
|
-
//
|
|
24
|
+
// Serve the llms.txt manifest to AI agents
|
|
24
25
|
if (url.pathname === '/llms.txt') {
|
|
25
26
|
try {
|
|
26
|
-
|
|
27
|
+
// Use provided config or try to load dynamic one
|
|
28
|
+
const activeConfig = config || await loadOntoConfig();
|
|
27
29
|
|
|
28
|
-
if (
|
|
30
|
+
if (activeConfig) {
|
|
29
31
|
// Generate llms.txt dynamically from config
|
|
30
|
-
const llmsTxtContent = generateLlmsTxt(
|
|
32
|
+
const llmsTxtContent = generateLlmsTxt(activeConfig);
|
|
31
33
|
const response = new NextResponse(llmsTxtContent, {
|
|
32
34
|
headers: {
|
|
33
35
|
'Content-Type': 'text/plain; charset=utf-8',
|