@ontosdk/next 1.1.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/JSON_LD_GUIDE.md +403 -0
  2. package/LLMS_TXT_GUIDE.md +297 -0
  3. package/ONTOROVIDER_USAGE.md +130 -0
  4. package/QUICKSTART.md +71 -0
  5. package/README.md +253 -0
  6. package/SCHEMA_QUICKSTART.md +97 -0
  7. package/dist/OntoHead.d.mts +27 -0
  8. package/dist/OntoHead.d.ts +27 -0
  9. package/dist/OntoHead.js +2 -0
  10. package/dist/OntoHead.js.map +1 -0
  11. package/dist/OntoHead.mjs +2 -0
  12. package/dist/OntoHead.mjs.map +1 -0
  13. package/dist/OntoProvider.d.mts +52 -0
  14. package/dist/OntoProvider.d.ts +52 -0
  15. package/dist/OntoProvider.js +2 -0
  16. package/dist/OntoProvider.js.map +1 -0
  17. package/dist/OntoProvider.mjs +2 -0
  18. package/dist/OntoProvider.mjs.map +1 -0
  19. package/dist/cli.js +6 -6
  20. package/dist/cli.js.map +1 -1
  21. package/dist/cli.mjs +6 -6
  22. package/dist/cli.mjs.map +1 -1
  23. package/dist/config.d.mts +85 -0
  24. package/dist/config.d.ts +85 -0
  25. package/dist/config.js +4 -0
  26. package/dist/config.js.map +1 -0
  27. package/dist/config.mjs +4 -0
  28. package/dist/config.mjs.map +1 -0
  29. package/dist/index.d.mts +3 -0
  30. package/dist/index.d.ts +3 -0
  31. package/dist/index.js +6 -4
  32. package/dist/index.js.map +1 -1
  33. package/dist/index.mjs +6 -4
  34. package/dist/index.mjs.map +1 -1
  35. package/dist/middleware.d.mts +21 -1
  36. package/dist/middleware.d.ts +21 -1
  37. package/dist/middleware.js +4 -2
  38. package/dist/middleware.js.map +1 -1
  39. package/dist/middleware.mjs +4 -2
  40. package/dist/middleware.mjs.map +1 -1
  41. package/dist/schemas.d.mts +73 -0
  42. package/dist/schemas.d.ts +73 -0
  43. package/dist/schemas.js +2 -0
  44. package/dist/schemas.js.map +1 -0
  45. package/dist/schemas.mjs +2 -0
  46. package/dist/schemas.mjs.map +1 -0
  47. package/onto.config.example.ts +111 -0
  48. package/package.json +28 -2
  49. package/src/OntoHead.tsx +59 -0
  50. package/src/OntoProvider.tsx +95 -0
  51. package/src/bots.ts +68 -0
  52. package/src/cli.ts +18 -1
  53. package/src/config.ts +151 -0
  54. package/src/index.ts +14 -0
  55. package/src/middleware.ts +82 -24
  56. package/src/schemas.ts +186 -0
  57. package/tsconfig.json +1 -0
  58. package/tsup.config.ts +2 -2
package/src/config.ts ADDED
@@ -0,0 +1,151 @@
1
+ /**
2
+ * Configuration schema for onto.config.ts
3
+ * Used to dynamically generate llms.txt and other AI discovery files
4
+ */
5
+
6
+ export type PageType = 'scoring' | 'about' | 'default';
7
+
8
+ export interface OntoRoute {
9
+ /**
10
+ * The URL path (e.g., '/docs', '/api/reference')
11
+ */
12
+ path: string;
13
+ /**
14
+ * Description of what this route contains
15
+ */
16
+ description: string;
17
+ /**
18
+ * Optional: Page type for automatic JSON-LD schema injection
19
+ * - 'scoring': Injects Methodology schema with AIO scoring weights (40/35/25)
20
+ * - 'about': Injects Organization/AboutPage schema
21
+ * - 'default': No automatic schema injection
22
+ */
23
+ pageType?: PageType;
24
+ }
25
+
26
+ export interface OntoConfig {
27
+ /**
28
+ * The name of your project or site (required)
29
+ * Used as the H1 heading in llms.txt
30
+ */
31
+ name: string;
32
+
33
+ /**
34
+ * A short summary of your project (required)
35
+ * Displayed as a blockquote in llms.txt
36
+ * Should contain key information necessary for understanding the rest of the file
37
+ */
38
+ summary: string;
39
+
40
+ /**
41
+ * The base URL of your site (e.g., 'https://example.com')
42
+ */
43
+ baseUrl: string;
44
+
45
+ /**
46
+ * Optional: Additional sections to include in llms.txt
47
+ * Each section can contain any markdown content
48
+ */
49
+ sections?: {
50
+ heading: string;
51
+ content: string;
52
+ }[];
53
+
54
+ /**
55
+ * Key routes that AI agents should know about
56
+ * These will be formatted as a markdown list in llms.txt
57
+ */
58
+ routes?: OntoRoute[];
59
+
60
+ /**
61
+ * Optional: Links to external resources (documentation, API references, etc.)
62
+ */
63
+ externalLinks?: {
64
+ title: string;
65
+ url: string;
66
+ description?: string;
67
+ }[];
68
+
69
+ /**
70
+ * Optional: Organization information for JSON-LD schemas
71
+ */
72
+ organization?: {
73
+ name: string;
74
+ description?: string;
75
+ url?: string;
76
+ logo?: string;
77
+ foundingDate?: string;
78
+ };
79
+ }
80
+
81
+ /**
82
+ * Load the onto.config.ts file from the user's project
83
+ * This is used by the middleware to dynamically generate llms.txt
84
+ */
85
+ export async function loadOntoConfig(): Promise<OntoConfig | null> {
86
+ try {
87
+ // Try to dynamically import the config file from the user's project root
88
+ // This runs in the middleware context, so we look in the project root
89
+ const config = await import(process.cwd() + '/onto.config');
90
+ return config.default || config;
91
+ } catch (error) {
92
+ // Config file doesn't exist or failed to load
93
+ return null;
94
+ }
95
+ }
96
+
97
+ /**
98
+ * Generate llms.txt content from OntoConfig
99
+ * Follows the llms.txt specification:
100
+ * - H1 with project name
101
+ * - Blockquote with summary
102
+ * - Additional markdown sections
103
+ */
104
+ export function generateLlmsTxt(config: OntoConfig): string {
105
+ const lines: string[] = [];
106
+
107
+ // H1: Project name (required)
108
+ lines.push(`# ${config.name}`);
109
+ lines.push('');
110
+
111
+ // Blockquote: Summary (required)
112
+ lines.push(`> ${config.summary}`);
113
+ lines.push('');
114
+
115
+ // Key Routes section (if provided)
116
+ if (config.routes && config.routes.length > 0) {
117
+ lines.push('## Key Routes');
118
+ lines.push('');
119
+ for (const route of config.routes) {
120
+ const fullUrl = `${config.baseUrl}${route.path}`;
121
+ lines.push(`- [${route.path}](${fullUrl}): ${route.description}`);
122
+ }
123
+ lines.push('');
124
+ }
125
+
126
+ // External Links section (if provided)
127
+ if (config.externalLinks && config.externalLinks.length > 0) {
128
+ lines.push('## Resources');
129
+ lines.push('');
130
+ for (const link of config.externalLinks) {
131
+ if (link.description) {
132
+ lines.push(`- [${link.title}](${link.url}): ${link.description}`);
133
+ } else {
134
+ lines.push(`- [${link.title}](${link.url})`);
135
+ }
136
+ }
137
+ lines.push('');
138
+ }
139
+
140
+ // Custom sections (if provided)
141
+ if (config.sections && config.sections.length > 0) {
142
+ for (const section of config.sections) {
143
+ lines.push(`## ${section.heading}`);
144
+ lines.push('');
145
+ lines.push(section.content);
146
+ lines.push('');
147
+ }
148
+ }
149
+
150
+ return lines.join('\n').trim() + '\n';
151
+ }
package/src/index.ts CHANGED
@@ -1,3 +1,17 @@
1
1
  // We cannot use Webpack plugins reliably in Next.js Turbopack due to WorkerError restrictions.
2
2
  // Users must instead run `npx onto-next` as a postbuild script.
3
3
  export { extractContent } from './extractor';
4
+ export { OntoConfig, OntoRoute, loadOntoConfig, generateLlmsTxt } from './config';
5
+ export type { OntoConfig as OntoConfigType, OntoRoute as OntoRouteType, PageType } from './config';
6
+ export {
7
+ generateAIOMethodologySchema,
8
+ generateOrganizationSchema,
9
+ generateAboutPageSchema,
10
+ generateSchemaForPageType,
11
+ serializeSchema
12
+ } from './schemas';
13
+ export type {
14
+ AIOMethodologySchema,
15
+ OrganizationSchema,
16
+ AboutPageSchema
17
+ } from './schemas';
package/src/middleware.ts CHANGED
@@ -1,29 +1,71 @@
1
1
  import { NextRequest, NextResponse } from 'next/server';
2
-
3
- const AI_BOT_USER_AGENTS = [
4
- 'GPTBot',
5
- 'ChatGPT-User',
6
- 'ClaudeBot',
7
- 'Claude-Web',
8
- 'anthropic-ai',
9
- 'PerplexityBot',
10
- 'OAI-SearchBot',
11
- 'GoogleExtended',
12
- ];
2
+ import { AI_BOT_USER_AGENTS, matchBot } from './bots';
3
+ import { loadOntoConfig, generateLlmsTxt } from './config';
13
4
 
14
5
  export async function ontoMiddleware(request: NextRequest) {
15
- const userAgent = request.headers.get('user-agent') || '';
6
+ const userAgent = request.headers.get('user-agent');
16
7
  const accept = request.headers.get('accept') || '';
17
8
 
18
- const isAiBot = AI_BOT_USER_AGENTS.some(bot => userAgent.includes(bot));
9
+ const matched = matchBot(userAgent);
10
+ const isAiBot = !!matched;
19
11
  const isMarkdownRequested = accept.includes('text/markdown');
20
12
 
21
- // If traffic is identified as an AI Bot, rewrite the URL
13
+ // If traffic is identified as an AI Bot or markdown is requested
22
14
  if (isAiBot || isMarkdownRequested) {
23
15
  const url = request.nextUrl.clone();
24
16
 
25
- // Ignore internal next.js requests & static assets
26
- if (url.pathname.startsWith('/_next') || url.pathname.includes('.')) {
17
+ // Ignore internal next.js requests & static assets (but not llms.txt)
18
+ if (url.pathname.startsWith('/_next')) {
19
+ return NextResponse.next();
20
+ }
21
+
22
+ // --- llms.txt Auto-Discovery ---
23
+ // Dynamically generate llms.txt from onto.config.ts
24
+ if (url.pathname === '/llms.txt') {
25
+ try {
26
+ const config = await loadOntoConfig();
27
+
28
+ if (config) {
29
+ // Generate llms.txt dynamically from config
30
+ const llmsTxtContent = generateLlmsTxt(config);
31
+ const response = new NextResponse(llmsTxtContent, {
32
+ headers: {
33
+ 'Content-Type': 'text/plain; charset=utf-8',
34
+ 'Cache-Control': 'public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400',
35
+ }
36
+ });
37
+
38
+ if (matched) {
39
+ response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);
40
+ }
41
+
42
+ return response;
43
+ } else {
44
+ // Fallback: try to serve static llms.txt from public folder
45
+ url.pathname = '/llms.txt';
46
+ const response = NextResponse.rewrite(url);
47
+ response.headers.set('Content-Type', 'text/plain; charset=utf-8');
48
+ response.headers.set('Cache-Control', 'public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400');
49
+ if (matched) {
50
+ response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);
51
+ }
52
+ return response;
53
+ }
54
+ } catch (error) {
55
+ console.error('[Onto] Failed to generate llms.txt:', error);
56
+ // Fallback to static file on error
57
+ url.pathname = '/llms.txt';
58
+ const response = NextResponse.rewrite(url);
59
+ response.headers.set('Content-Type', 'text/plain; charset=utf-8');
60
+ if (matched) {
61
+ response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);
62
+ }
63
+ return response;
64
+ }
65
+ }
66
+
67
+ // Skip other static assets
68
+ if (url.pathname.includes('.')) {
27
69
  return NextResponse.next();
28
70
  }
29
71
 
@@ -38,13 +80,21 @@ export async function ontoMiddleware(request: NextRequest) {
38
80
  payloadPath = payloadPath.slice(0, -1);
39
81
  }
40
82
 
83
+ // Common response headers for all bot responses
84
+ const botHeaders: Record<string, string> = {
85
+ 'Content-Type': 'text/markdown; charset=utf-8',
86
+ 'Cache-Control': 'public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400',
87
+ };
88
+ if (matched) {
89
+ botHeaders['X-Onto-Bot'] = `${matched.name} (${matched.company})`;
90
+ }
91
+
41
92
  // --- Onto Control Plane Integration (Premium) ---
42
93
  const ONTO_API_KEY = process.env.ONTO_API_KEY;
43
94
  const DASHBOARD_URL = process.env.ONTO_DASHBOARD_URL || 'https://app.buildonto.dev';
44
95
 
45
96
  if (ONTO_API_KEY) {
46
- // 1. Fire-and-forget tracking
47
- // Use background fetch (no await) to avoid blocking the response
97
+ // 1. Fire-and-forget tracking — includes structured bot info
48
98
  fetch(`${DASHBOARD_URL}/api/track`, {
49
99
  method: 'POST',
50
100
  headers: {
@@ -54,15 +104,15 @@ export async function ontoMiddleware(request: NextRequest) {
54
104
  body: JSON.stringify({
55
105
  route: url.pathname,
56
106
  userAgent: userAgent,
107
+ bot: matched ? matched.name : null,
108
+ company: matched ? matched.company : null,
57
109
  })
58
110
  }).catch(() => {});
59
111
 
60
112
  // 2. Dynamic Context Injection
61
113
  try {
62
- // Fetch the injection from the Control Plane
63
114
  const injectRes = await fetch(`${DASHBOARD_URL}/api/sdk/inject?route=${url.pathname}`, {
64
115
  headers: { 'x-onto-key': ONTO_API_KEY },
65
- // Set a strict timeout to keep edge fast
66
116
  signal: AbortSignal.timeout(1500)
67
117
  });
68
118
 
@@ -70,7 +120,6 @@ export async function ontoMiddleware(request: NextRequest) {
70
120
  const { injection } = await injectRes.json();
71
121
 
72
122
  if (injection) {
73
- // To inject, we must fetch the local markdown and append
74
123
  const localMdUrl = `${url.origin}/.onto${payloadPath}.md`;
75
124
  const mdRes = await fetch(localMdUrl);
76
125
 
@@ -80,8 +129,7 @@ export async function ontoMiddleware(request: NextRequest) {
80
129
 
81
130
  return new NextResponse(finalMarkdown, {
82
131
  headers: {
83
- 'Content-Type': 'text/markdown; charset=utf-8',
84
- 'Cache-Control': 'public, max-age=3600, s-maxage=3600, stale-while-revalidate=86400',
132
+ ...botHeaders,
85
133
  'X-Onto-Injected': 'true'
86
134
  }
87
135
  });
@@ -97,8 +145,18 @@ export async function ontoMiddleware(request: NextRequest) {
97
145
  url.pathname = `/.onto${payloadPath}.md`;
98
146
 
99
147
  // Rewrite implicitly serves the target URL transparently to the client.
100
- return NextResponse.rewrite(url);
148
+ const response = NextResponse.rewrite(url);
149
+ if (matched) {
150
+ response.headers.set('X-Onto-Bot', `${matched.name} (${matched.company})`);
151
+ }
152
+ return response;
101
153
  }
102
154
 
103
155
  return NextResponse.next();
104
156
  }
157
+
158
+ // Re-export the bot registry for consumers who want to extend or inspect it
159
+ export { AI_BOT_USER_AGENTS, matchBot } from './bots';
160
+ export type { AiBot } from './bots';
161
+
162
+
package/src/schemas.ts ADDED
@@ -0,0 +1,186 @@
1
+ /**
2
+ * JSON-LD Schema generators for automatic structured data injection
3
+ * Follows Schema.org standards for AI-friendly metadata
4
+ */
5
+
6
+ import { OntoConfig } from './config';
7
+
8
+ /**
9
+ * Standard AIO (AI Optimization) scoring methodology
10
+ * Based on the Onto scoring algorithm:
11
+ * - Content Negotiation: 40% (30 points)
12
+ * - React Tax / Token Efficiency: 35% (30 points)
13
+ * - Structured Data: 25% (25 points)
14
+ * - Semantic HTML: Bonus (15 points)
15
+ */
16
+ export interface AIOMethodologySchema {
17
+ '@context': 'https://schema.org';
18
+ '@type': 'HowTo';
19
+ name: string;
20
+ description: string;
21
+ step: Array<{
22
+ '@type': 'HowToStep';
23
+ name: string;
24
+ text: string;
25
+ position: number;
26
+ }>;
27
+ }
28
+
29
+ /**
30
+ * Generate AIO Scoring Methodology JSON-LD schema
31
+ * This explains to AI agents how the scoring system works
32
+ */
33
+ export function generateAIOMethodologySchema(
34
+ config: OntoConfig,
35
+ pageUrl: string
36
+ ): AIOMethodologySchema {
37
+ return {
38
+ '@context': 'https://schema.org',
39
+ '@type': 'HowTo',
40
+ name: 'AIO Score Calculation Methodology',
41
+ description: 'AI Optimization (AIO) Score measures how well a website is optimized for AI agents and LLM crawlers. Scored out of 100 points based on four key metrics.',
42
+ step: [
43
+ {
44
+ '@type': 'HowToStep',
45
+ name: 'Content Negotiation',
46
+ text: 'Check if the site responds to Accept: text/markdown header. Weight: 40%. Penalty: -30 points if missing. This ensures AI bots receive optimized content instead of heavy HTML.',
47
+ position: 1
48
+ },
49
+ {
50
+ '@type': 'HowToStep',
51
+ name: 'Token Efficiency (React Tax)',
52
+ text: 'Measure the ratio of visible text to total HTML size. Weight: 35%. Penalty: -30 points if HTML > 100KB but text < 1KB. Detects JavaScript-heavy sites that are difficult for AI to parse.',
53
+ position: 2
54
+ },
55
+ {
56
+ '@type': 'HowToStep',
57
+ name: 'Structured Data',
58
+ text: 'Verify presence of JSON-LD structured data (Schema.org). Weight: 25%. Penalty: -25 points if missing. Enables AI to confidently extract pricing, products, and entities.',
59
+ position: 3
60
+ },
61
+ {
62
+ '@type': 'HowToStep',
63
+ name: 'Semantic HTML',
64
+ text: 'Check for semantic tags like <main> and <article>. Bonus: +15 points if present. Helps AI agents separate navigation from core content.',
65
+ position: 4
66
+ }
67
+ ]
68
+ };
69
+ }
70
+
71
+ /**
72
+ * Organization schema for About pages
73
+ */
74
+ export interface OrganizationSchema {
75
+ '@context': 'https://schema.org';
76
+ '@type': 'Organization';
77
+ name: string;
78
+ url?: string;
79
+ description?: string;
80
+ logo?: string;
81
+ foundingDate?: string;
82
+ }
83
+
84
+ /**
85
+ * Generate Organization JSON-LD schema for About pages
86
+ */
87
+ export function generateOrganizationSchema(
88
+ config: OntoConfig,
89
+ pageUrl: string
90
+ ): OrganizationSchema | null {
91
+ if (!config.organization) {
92
+ return null;
93
+ }
94
+
95
+ const schema: OrganizationSchema = {
96
+ '@context': 'https://schema.org',
97
+ '@type': 'Organization',
98
+ name: config.organization.name
99
+ };
100
+
101
+ if (config.organization.url) {
102
+ schema.url = config.organization.url;
103
+ }
104
+
105
+ if (config.organization.description) {
106
+ schema.description = config.organization.description;
107
+ }
108
+
109
+ if (config.organization.logo) {
110
+ schema.logo = config.organization.logo;
111
+ }
112
+
113
+ if (config.organization.foundingDate) {
114
+ schema.foundingDate = config.organization.foundingDate;
115
+ }
116
+
117
+ return schema;
118
+ }
119
+
120
+ /**
121
+ * AboutPage schema combining Organization and WebPage
122
+ */
123
+ export interface AboutPageSchema {
124
+ '@context': 'https://schema.org';
125
+ '@type': 'AboutPage';
126
+ name: string;
127
+ url: string;
128
+ description?: string;
129
+ mainEntity?: OrganizationSchema;
130
+ }
131
+
132
+ /**
133
+ * Generate AboutPage JSON-LD schema
134
+ */
135
+ export function generateAboutPageSchema(
136
+ config: OntoConfig,
137
+ pageUrl: string
138
+ ): AboutPageSchema {
139
+ const orgSchema = generateOrganizationSchema(config, pageUrl);
140
+
141
+ const schema: AboutPageSchema = {
142
+ '@context': 'https://schema.org',
143
+ '@type': 'AboutPage',
144
+ name: `About ${config.name}`,
145
+ url: pageUrl
146
+ };
147
+
148
+ if (config.summary) {
149
+ schema.description = config.summary;
150
+ }
151
+
152
+ if (orgSchema) {
153
+ schema.mainEntity = orgSchema;
154
+ }
155
+
156
+ return schema;
157
+ }
158
+
159
+ /**
160
+ * Determine which schema to generate based on page type
161
+ */
162
+ export function generateSchemaForPageType(
163
+ pageType: 'scoring' | 'about' | 'default',
164
+ config: OntoConfig,
165
+ pageUrl: string
166
+ ): any | null {
167
+ switch (pageType) {
168
+ case 'scoring':
169
+ return generateAIOMethodologySchema(config, pageUrl);
170
+ case 'about':
171
+ return generateAboutPageSchema(config, pageUrl);
172
+ case 'default':
173
+ default:
174
+ return null;
175
+ }
176
+ }
177
+
178
+ /**
179
+ * Serialize schema to JSON-LD script tag content
180
+ */
181
+ export function serializeSchema(schema: any | null): string | null {
182
+ if (!schema) {
183
+ return null;
184
+ }
185
+ return JSON.stringify(schema, null, 2);
186
+ }
package/tsconfig.json CHANGED
@@ -7,6 +7,7 @@
7
7
  "strict": true,
8
8
  "skipLibCheck": true,
9
9
  "forceConsistentCasingInFileNames": true,
10
+ "jsx": "react-jsx",
10
11
  "outDir": "dist"
11
12
  },
12
13
  "include": [
package/tsup.config.ts CHANGED
@@ -1,7 +1,7 @@
1
1
  import { defineConfig } from 'tsup';
2
2
 
3
3
  export default defineConfig({
4
- entry: ['src/index.ts', 'src/cli.ts', 'src/middleware.ts'],
4
+ entry: ['src/index.ts', 'src/cli.ts', 'src/middleware.ts', 'src/OntoHead.tsx', 'src/OntoProvider.tsx', 'src/config.ts', 'src/schemas.ts'],
5
5
  format: ['cjs', 'esm'],
6
6
  dts: true,
7
7
  splitting: false,
@@ -10,5 +10,5 @@ export default defineConfig({
10
10
  bundle: true,
11
11
  outDir: 'dist',
12
12
  minify: true,
13
- external: ['next'],
13
+ external: ['next', 'react'],
14
14
  });