@ontosdk/next 1.3.0 → 1.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ONTOROVIDER_USAGE.md +42 -83
- package/dist/cli.js +8 -6
- package/dist/cli.js.map +1 -1
- package/dist/cli.mjs +8 -6
- package/dist/cli.mjs.map +1 -1
- package/dist/config.js +2 -2
- package/dist/config.js.map +1 -1
- package/dist/config.mjs +2 -2
- package/dist/config.mjs.map +1 -1
- package/dist/index.js +4 -4
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +2 -2
- package/dist/index.mjs.map +1 -1
- package/dist/middleware.js +3 -3
- package/dist/middleware.js.map +1 -1
- package/dist/middleware.mjs +3 -3
- package/dist/middleware.mjs.map +1 -1
- package/package.json +1 -1
- package/src/cli.ts +34 -0
- package/src/config.ts +0 -16
- package/src/middleware.ts +7 -8
- package/dist/OntoHead.d.mts +0 -27
- package/dist/OntoHead.d.ts +0 -27
- package/dist/OntoProvider.d.mts +0 -52
- package/dist/OntoProvider.d.ts +0 -52
- package/dist/cli.d.mts +0 -1
- package/dist/cli.d.ts +0 -1
- package/dist/config.d.mts +0 -85
- package/dist/config.d.ts +0 -85
- package/dist/index.d.mts +0 -25
- package/dist/index.d.ts +0 -25
- package/dist/middleware.d.mts +0 -25
- package/dist/middleware.d.ts +0 -25
- package/dist/schemas.d.mts +0 -73
- package/dist/schemas.d.ts +0 -73
package/ONTOROVIDER_USAGE.md
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
# OntoProvider Usage Guide
|
|
2
2
|
|
|
3
|
-
The `OntoProvider` component
|
|
3
|
+
The `OntoProvider` component is the high-level, "Zero-Config" entry point for AI optimization. It handles both **Auto-Discovery** (link tags) and **Semantic SEO** (JSON-LD schemas) in one wrapper.
|
|
4
4
|
|
|
5
5
|
## Features
|
|
6
6
|
|
|
7
|
-
- **Automatic Link Tag Injection**:
|
|
8
|
-
- **
|
|
9
|
-
- **
|
|
10
|
-
- **
|
|
7
|
+
- **Automatic Link Tag Injection**: Adds `<link rel="alternate" type="text/markdown">` to every page.
|
|
8
|
+
- **JSON-LD Schema Injection**: Automatically generates structured data (Schema.org) for pages based on your `onto.config.ts`.
|
|
9
|
+
- **Dynamic Path Detection**: Uses `usePathname()` to construct full URLs for AI agents.
|
|
10
|
+
- **AIO Scoring Methodology**: Automatically injects methodology schemas for routes marked as `pageType: 'scoring'`.
|
|
11
11
|
|
|
12
12
|
## Installation
|
|
13
13
|
|
|
@@ -17,20 +17,17 @@ npm install @ontosdk/next
|
|
|
17
17
|
|
|
18
18
|
## Usage
|
|
19
19
|
|
|
20
|
-
### Basic
|
|
20
|
+
### 1. Basic Setup in Root Layout
|
|
21
21
|
|
|
22
|
-
Wrap your
|
|
22
|
+
Wrap your application and provide your `baseUrl`. For best results, also pass your `onto.config.ts` object.
|
|
23
23
|
|
|
24
24
|
```tsx
|
|
25
25
|
import { OntoProvider } from '@ontosdk/next/provider';
|
|
26
|
+
import config from '../onto.config';
|
|
26
27
|
|
|
27
|
-
export default function RootLayout({
|
|
28
|
-
children,
|
|
29
|
-
}: {
|
|
30
|
-
children: React.ReactNode;
|
|
31
|
-
}) {
|
|
28
|
+
export default function RootLayout({ children }) {
|
|
32
29
|
return (
|
|
33
|
-
<OntoProvider baseUrl="https://example.com">
|
|
30
|
+
<OntoProvider baseUrl="https://example.com" config={config}>
|
|
34
31
|
<html lang="en">
|
|
35
32
|
<head />
|
|
36
33
|
<body>{children}</body>
|
|
@@ -40,90 +37,52 @@ export default function RootLayout({
|
|
|
40
37
|
}
|
|
41
38
|
```
|
|
42
39
|
|
|
43
|
-
###
|
|
40
|
+
### 2. Page Type Configuration
|
|
44
41
|
|
|
45
|
-
|
|
42
|
+
In your `onto.config.ts`, you can specify `pageType` for automatic schema generation:
|
|
46
43
|
|
|
47
|
-
```
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
</html>
|
|
64
|
-
);
|
|
65
|
-
}
|
|
44
|
+
```typescript
|
|
45
|
+
const config: OntoConfig = {
|
|
46
|
+
// ... basic info
|
|
47
|
+
routes: [
|
|
48
|
+
{
|
|
49
|
+
path: '/pricing',
|
|
50
|
+
description: 'Pricing & Methodology',
|
|
51
|
+
pageType: 'scoring' // Triggers AIOMethodologySchema (AIO Scoring)
|
|
52
|
+
},
|
|
53
|
+
{
|
|
54
|
+
path: '/about',
|
|
55
|
+
description: 'About Us',
|
|
56
|
+
pageType: 'about' // Triggers Organization/AboutPage Schema
|
|
57
|
+
}
|
|
58
|
+
]
|
|
59
|
+
};
|
|
66
60
|
```
|
|
67
61
|
|
|
68
62
|
## How It Works
|
|
69
63
|
|
|
70
|
-
|
|
71
|
-
|
|
64
|
+
### Auto-Discovery
|
|
65
|
+
For a page at `/docs`, the provider injects:
|
|
72
66
|
```html
|
|
73
|
-
<link
|
|
74
|
-
rel="alternate"
|
|
75
|
-
type="text/markdown"
|
|
76
|
-
href="https://example.com/blog/my-post?format=md"
|
|
77
|
-
title="AI-optimized Markdown version"
|
|
78
|
-
/>
|
|
67
|
+
<link rel="alternate" type="text/markdown" href="https://example.com/docs?format=md" />
|
|
79
68
|
```
|
|
80
69
|
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
## Props
|
|
84
|
-
|
|
85
|
-
| Prop | Type | Required | Description |
|
|
86
|
-
|------|------|----------|-------------|
|
|
87
|
-
| `baseUrl` | `string` | Yes | The base URL of your site (e.g., `'https://example.com'`) |
|
|
88
|
-
| `children` | `ReactNode` | Yes | Child components to render |
|
|
70
|
+
### Semantic SEO (JSON-LD)
|
|
71
|
+
If the route matches a `pageType`, it injects a `<script type="application/ld+json">` with highly specific AI-friendly metadata. For example, a `scoring` page receives the **Onto AIO Methodology** schema, explaining your content negotiation and token efficiency weights to LLM crawlers.
|
|
89
72
|
|
|
90
73
|
## Comparison with OntoHead
|
|
91
74
|
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
-
|
|
75
|
+
| Feature | OntoHead | OntoProvider |
|
|
76
|
+
| :--- | :--- | :--- |
|
|
77
|
+
| **Path Pattern** | Static `/.onto/*.md` | Dynamic `?format=md` |
|
|
78
|
+
| **JSON-LD** | No | **Yes** |
|
|
79
|
+
| **Manifests** | Injects `llms.txt` link | No (handled by Middleware) |
|
|
80
|
+
| **Best For** | Static exports (CLI) | Dynamic & Meta-heavy sites |
|
|
96
81
|
|
|
97
|
-
|
|
98
|
-
- Uses dynamic query string pattern (e.g., `/blog/my-post?format=md`)
|
|
99
|
-
- Works with dynamic content
|
|
100
|
-
- Best for apps that handle markdown conversion at request time
|
|
82
|
+
## Requirements
|
|
101
83
|
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
Combine with Next.js middleware to handle the `?format=md` requests:
|
|
105
|
-
|
|
106
|
-
```typescript
|
|
107
|
-
// middleware.ts
|
|
108
|
-
import { NextResponse } from 'next/server';
|
|
109
|
-
import type { NextRequest } from 'next/server';
|
|
110
|
-
|
|
111
|
-
export function middleware(request: NextRequest) {
|
|
112
|
-
const { searchParams, pathname } = request.nextUrl;
|
|
113
|
-
|
|
114
|
-
if (searchParams.get('format') === 'md') {
|
|
115
|
-
// Convert the page to markdown and return it
|
|
116
|
-
// Implementation depends on your setup
|
|
117
|
-
return convertToMarkdown(pathname);
|
|
118
|
-
}
|
|
119
|
-
|
|
120
|
-
return NextResponse.next();
|
|
121
|
-
}
|
|
122
|
-
|
|
123
|
-
export const config = {
|
|
124
|
-
matcher: '/:path*',
|
|
125
|
-
};
|
|
126
|
-
```
|
|
84
|
+
- Next.js ≥14.0.0
|
|
85
|
+
- React ^18.0.0 || ^19.0.0
|
|
127
86
|
|
|
128
87
|
## License
|
|
129
88
|
|
package/dist/cli.js
CHANGED
|
@@ -1,12 +1,14 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
"use strict";var F=Object.create;var O=Object.defineProperty;var
|
|
3
|
-
`)
|
|
2
|
+
"use strict";var F=Object.create;var O=Object.defineProperty;var L=Object.getOwnPropertyDescriptor;var T=Object.getOwnPropertyNames;var z=Object.getPrototypeOf,N=Object.prototype.hasOwnProperty;var E=(e,t,n,o)=>{if(t&&typeof t=="object"||typeof t=="function")for(let r of T(t))!N.call(e,r)&&r!==n&&O(e,r,{get:()=>t[r],enumerable:!(o=L(t,r))||o.enumerable});return e};var S=(e,t,n)=>(n=e!=null?F(z(e)):{},E(t||!e||!e.__esModule?O(n,"default",{value:e,enumerable:!0}):n,e));var b=require("glob"),a=S(require("fs")),l=S(require("path")),s=S(require("picocolors"));var k=S(require("cheerio")),P=S(require("turndown")),D=new P.default({headingStyle:"atx",codeBlockStyle:"fenced"});function j(e,t="Generated Output"){let n=e.length,o=k.load(e),r=o("title").text()||o("h1").first().text()||"Untitled Page",m=o('meta[name="description"]').attr("content")||"No description found.",f=[];o('script[type="application/ld+json"]').each((p,h)=>{try{let d=o(h).html()||"",x=JSON.parse(d);f.push(x)}catch{}}),o("script, style, noscript, iframe, svg, nav, footer, meta, link, header").remove();let g="";o("main").length>0?g=o("main").html()||"":o("article").length>0?g=o("article").html()||"":g=o("body").html()||"";let $=D.turndown(g),i=[`# ${r}`,`> ${m}`,"",`**Source:** ${t}`,`**Extracted:** ${new Date().toISOString()}`,"","---",""].join(`
|
|
3
|
+
`)+$;f.length>0&&(i+=`
|
|
4
4
|
|
|
5
5
|
---
|
|
6
6
|
## Structured Data (JSON-LD)
|
|
7
7
|
\`\`\`json
|
|
8
|
-
`,
|
|
9
|
-
`}),
|
|
10
|
-
|
|
11
|
-
`))
|
|
8
|
+
`,f.forEach(p=>{i+=JSON.stringify(p,null,2)+`
|
|
9
|
+
`}),i+="```\n");let u=i.length,c=n>0?(n-u)/n*100:0;return{markdown:i,metadata:{title:r,description:m,jsonLd:f},stats:{originalHtmlSize:n,markdownSize:u,tokenReductionRatio:c}}}function v(e){let t=[];if(t.push(`# ${e.name}`),t.push(""),t.push(`> ${e.summary}`),t.push(""),e.routes&&e.routes.length>0){t.push("## Key Routes"),t.push("");for(let n of e.routes){let o=`${e.baseUrl}${n.path}`;t.push(`- [${n.path}](${o}): ${n.description}`)}t.push("")}if(e.externalLinks&&e.externalLinks.length>0){t.push("## Resources"),t.push("");for(let n of e.externalLinks)n.description?t.push(`- [${n.title}](${n.url}): ${n.description}`):t.push(`- [${n.title}](${n.url})`);t.push("")}if(e.sections&&e.sections.length>0)for(let n of e.sections)t.push(`## ${n.heading}`),t.push(""),t.push(n.content),t.push("");return t.join(`
|
|
10
|
+
`).trim()+`
|
|
11
|
+
`}async function K(){try{let t=await import("file://"+l.default.join(process.cwd(),"onto.config").replace(/\\/g,"/")+".ts");return t.default||t}catch{try{let n=await import("file://"+l.default.join(process.cwd(),"onto.config").replace(/\\/g,"/")+".js");return n.default||n}catch{return null}}}function H(){let e=l.default.join(process.cwd(),".env.local");a.default.existsSync(e)&&a.default.readFileSync(e,"utf8").split(/\r?\n/).forEach(n=>{let o=n.trim();if(!o||o.startsWith("#"))return;let[r,...m]=o.split("=");r&&m.length>0&&(process.env[r.trim()]=m.join("=").trim().replace(/^["']|["']$/g,""))})}async function _(){H(),console.log(s.default.cyan(`
|
|
12
|
+
[Onto] Starting Semantic Output Generation...`));let e=process.cwd(),t=l.default.join(e,".next/server/app"),n=l.default.join(e,"public/.onto");if(!a.default.existsSync(t)){console.log(s.default.yellow(`[Onto] Could not find Next.js app output at ${t}`)),console.log(s.default.yellow('[Onto] Ensure this is run after "next build" and you are using the App Router.'));return}let o=await(0,b.glob)("**/*.html",{cwd:t});if(o.length===0){console.log(s.default.yellow("[Onto] No static HTML files found to process."));return}a.default.existsSync(n)||a.default.mkdirSync(n,{recursive:!0});let r=0,m=0,f=0;for(let i of o){let u=l.default.join(t,i),c=i.replace(/\.html$/,".md"),p=l.default.join(n,c);try{let h=a.default.readFileSync(u,"utf8"),d=j(h,`/${c.replace(/\.md$/,"")}`),x=l.default.dirname(p);a.default.existsSync(x)||a.default.mkdirSync(x,{recursive:!0}),a.default.writeFileSync(p,d.markdown,"utf8"),r+=d.stats.originalHtmlSize,m+=d.stats.markdownSize,f++;let C=(d.stats.originalHtmlSize/1024).toFixed(1),R=(d.stats.markdownSize/1024).toFixed(1),y=i.replace(/\.html$/,"");y==="index"?y="/":y=`/${y}`,console.log(s.default.green("\u2713 Optimized")+s.default.dim(` ${y} `)+s.default.blue(`[${C}KB -> ${R}KB]`))}catch(h){console.error(s.default.red(`\u2717 Failed to process ${i}: ${h.message}`))}}console.log(s.default.bold(s.default.magenta(`Processed ${f} pages. Total Size: ${(r/1024).toFixed(1)}KB -> ${(m/1024).toFixed(1)}KB`)));let g=process.env.ONTO_API_KEY,$=process.env.ONTO_DASHBOARD_URL||"https://app.buildonto.dev";if(g&&f>0){console.log(s.default.cyan(`[Onto] Syncing manifest with Control Plane [${$}]...`));try{let i=o.map(c=>{let p=c.replace(/\.html$/,""),h=p==="index"?"/":`/${p}`,d=l.default.join(n,c.replace(/\.html$/,".md"));return{route:h,filename:`${p}.md`,content:a.default.readFileSync(d,"utf8")}}),u=await fetch(`${$}/api/files`,{method:"POST",headers:{"x-onto-key":g,"Content-Type":"application/json"},body:JSON.stringify({files:i})});if(u.ok)console.log(s.default.green("\u2713 Control Plane sync successful"));else{let c=await u.json().catch(()=>({}));console.log(s.default.yellow(`\u26A0 Control Plane sync skipped: ${c.error||u.statusText}`))}}catch(i){console.log(s.default.yellow(`\u26A0 Control Plane sync failed: ${i.message}`))}}let w=await K();if(w){let i=v(w),u=l.default.join(e,"public/llms.txt"),c=l.default.join(e,"public");a.default.existsSync(c)||a.default.mkdirSync(c,{recursive:!0}),a.default.writeFileSync(u,i,"utf8"),console.log(s.default.green("\u2713 Generated")+s.default.dim(" /llms.txt"))}console.log(s.default.dim(`Edge payloads are ready at /public/.onto/*
|
|
13
|
+
`))}_().catch(e=>{console.error(s.default.red(`[Onto] Fatal Error: ${e.message}`)),process.exit(1)});
|
|
12
14
|
//# sourceMappingURL=cli.js.map
|
package/dist/cli.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/cli.ts","../src/extractor.ts"],"sourcesContent":["#!/usr/bin/env node\r\nimport { glob } from 'glob';\r\nimport fs from 'fs';\r\nimport path from 'path';\r\nimport pc from 'picocolors';\r\nimport { extractContent } from './extractor';\r\n\r\n// Simple helper to load .env.local from the current working directory\r\nfunction loadEnv() {\r\n const envPath = path.join(process.cwd(), '.env.local');\r\n if (fs.existsSync(envPath)) {\r\n const envContent = fs.readFileSync(envPath, 'utf8');\r\n envContent.split(/\\r?\\n/).forEach(line => {\r\n const trimmedLine = line.trim();\r\n if (!trimmedLine || trimmedLine.startsWith('#')) return;\r\n const [key, ...valueParts] = trimmedLine.split('=');\r\n if (key && valueParts.length > 0) {\r\n process.env[key.trim()] = valueParts.join('=').trim().replace(/^[\"']|[\"']$/g, '');\r\n }\r\n });\r\n }\r\n}\r\n\r\nasync function main() {\r\n loadEnv();\r\n console.log(pc.cyan('\\n[Onto] Starting Semantic Output Generation...'));\r\n\r\n const cwd = process.cwd();\r\n const nextAppDirDir = path.join(cwd, '.next/server/app');\r\n const ontoPublicDir = path.join(cwd, 'public/.onto');\r\n\r\n if (!fs.existsSync(nextAppDirDir)) {\r\n console.log(pc.yellow(`[Onto] Could not find Next.js app output at ${nextAppDirDir}`));\r\n console.log(pc.yellow(`[Onto] Ensure this is run after \"next build\" and you are using the App Router.`));\r\n return;\r\n }\r\n\r\n // Find all HTML files rendered by Next.js in the app directory\r\n const files = await glob('**/*.html', { cwd: nextAppDirDir });\r\n\r\n if (files.length === 0) {\r\n console.log(pc.yellow(`[Onto] No static HTML files found to process.`));\r\n return;\r\n }\r\n\r\n // Ensure output directory exists\r\n if (!fs.existsSync(ontoPublicDir)) {\r\n fs.mkdirSync(ontoPublicDir, { recursive: true });\r\n }\r\n\r\n let totalOriginalSize = 0;\r\n let totalMarkdownSize = 0;\r\n let totalFilesProcessed = 0;\r\n\r\n for (const file of files) {\r\n const inputPath = path.join(nextAppDirDir, file);\r\n\r\n // We map file path e.g. \"pricing.html\" to \"pricing.md\", or \"blog/post.html\" to \"blog/post.md\"\r\n let outputPathRelative = file.replace(/\\.html$/, '.md');\r\n // If it's a dynamic route page, or purely root index.html\r\n const outputPath = path.join(ontoPublicDir, outputPathRelative);\r\n\r\n try {\r\n const htmlContent = fs.readFileSync(inputPath, 'utf8');\r\n\r\n const result = extractContent(htmlContent, `/${outputPathRelative.replace(/\\.md$/, '')}`);\r\n\r\n // Ensure specific sub-directory exists (e.g., for blog/post.md)\r\n const outputDir = path.dirname(outputPath);\r\n if (!fs.existsSync(outputDir)) {\r\n fs.mkdirSync(outputDir, { recursive: true });\r\n }\r\n\r\n fs.writeFileSync(outputPath, result.markdown, 'utf8');\r\n\r\n totalOriginalSize += result.stats.originalHtmlSize;\r\n totalMarkdownSize += result.stats.markdownSize;\r\n totalFilesProcessed++;\r\n\r\n const origKb = (result.stats.originalHtmlSize / 1024).toFixed(1);\r\n const mdKb = (result.stats.markdownSize / 1024).toFixed(1);\r\n\r\n // /index.html -> /\r\n let routeName = file.replace(/\\.html$/, '');\r\n if (routeName === 'index') routeName = '/';\r\n else routeName = `/${routeName}`;\r\n\r\n console.log(\r\n pc.green(`✓ Optimized`) +\r\n pc.dim(` ${routeName} `) +\r\n pc.blue(`[${origKb}KB -> ${mdKb}KB]`)\r\n );\r\n } catch (e: any) {\r\n console.error(pc.red(`✗ Failed to process ${file}: ${e.message}`));\r\n }\r\n }\r\n\r\n console.log(\r\n pc.bold(\r\n pc.magenta(`Processed ${totalFilesProcessed} pages. Total Size: ${(totalOriginalSize / 1024).toFixed(1)}KB -> ${(totalMarkdownSize / 1024).toFixed(1)}KB`)\r\n )\r\n );\r\n\r\n // Sync with Onto Control Plane (Premium)\r\n const ONTO_API_KEY = process.env.ONTO_API_KEY;\r\n const DASHBOARD_URL = process.env.ONTO_DASHBOARD_URL || 'https://app.buildonto.dev';\r\n\r\n if (ONTO_API_KEY && totalFilesProcessed > 0) {\r\n console.log(pc.cyan(`[Onto] Syncing manifest with Control Plane [${DASHBOARD_URL}]...`));\r\n try {\r\n const manifest = files.map(file => {\r\n const routeName = file.replace(/\\.html$/, '');\r\n const route = routeName === 'index' ? '/' : `/${routeName}`;\r\n const mdPath = path.join(ontoPublicDir, file.replace(/\\.html$/, '.md'));\r\n return {\r\n route,\r\n filename: `${routeName}.md`,\r\n content: fs.readFileSync(mdPath, 'utf8')\r\n };\r\n });\r\n\r\n const res = await fetch(`${DASHBOARD_URL}/api/files`, {\r\n method: 'POST',\r\n headers: {\r\n 'x-onto-key': ONTO_API_KEY,\r\n 'Content-Type': 'application/json'\r\n },\r\n body: JSON.stringify({ files: manifest })\r\n });\r\n\r\n if (res.ok) {\r\n console.log(pc.green('✓ Control Plane sync successful'));\r\n } else {\r\n const errData = await res.json().catch(() => ({}));\r\n console.log(pc.yellow(`⚠ Control Plane sync skipped: ${errData.error || res.statusText}`));\r\n }\r\n } catch (e: any) {\r\n console.log(pc.yellow(`⚠ Control Plane sync failed: ${e.message}`));\r\n }\r\n }\r\n\r\n console.log(pc.dim(`Edge payloads are ready at /public/.onto/*\\n`));\r\n}\r\n\r\nmain().catch(e => {\r\n console.error(pc.red(`[Onto] Fatal Error: ${e.message}`));\r\n process.exit(1);\r\n});\r\n","import * as cheerio from 'cheerio';\r\nimport TurndownService from 'turndown';\r\n\r\nconst turndownService = new TurndownService({\r\n headingStyle: 'atx',\r\n codeBlockStyle: 'fenced',\r\n});\r\n\r\n// Configure turndown to keep some layout or handle semantic tags differently if needed\r\n\r\nexport interface ExtractionResult {\r\n markdown: string;\r\n metadata: {\r\n title: string;\r\n description: string;\r\n jsonLd: any[];\r\n };\r\n stats: {\r\n originalHtmlSize: number;\r\n markdownSize: number;\r\n tokenReductionRatio: number;\r\n };\r\n}\r\n\r\n/**\r\n * Extracts pure semantic markdown and metadata from rendered Next.js HTML strings.\r\n * @param html The raw HTML string.\r\n * @param sourceUrl (Optional) the URL this was generated from, to attach as metadata.\r\n * @returns {ExtractionResult} The extracted payload.\r\n */\r\nexport function extractContent(html: string, sourceUrl: string = 'Generated Output'): ExtractionResult {\r\n const originalSize = html.length;\r\n\r\n const $ = cheerio.load(html);\r\n\r\n // 1. Extract Metadata BEFORE removing structure\r\n const title = $('title').text() || $('h1').first().text() || 'Untitled Page';\r\n const description = $('meta[name=\"description\"]').attr('content') || 'No description found.';\r\n\r\n const jsonLdScripts: any[] = [];\r\n $('script[type=\"application/ld+json\"]').each((_, el) => {\r\n try {\r\n const raw = $(el).html() || '';\r\n const parsed = JSON.parse(raw);\r\n jsonLdScripts.push(parsed);\r\n } catch {\r\n // ignore bad json\r\n }\r\n });\r\n\r\n // 2. Strip noise (React boilerplate, styles, unnecessary tags)\r\n $('script, style, noscript, iframe, svg, nav, footer, meta, link, header').remove();\r\n\r\n // Optionally remove typical Next.js hidden wrappers if they don't contain real content.\r\n // Next.js uses <div id=\"__next\"> but we mostly just want semantic content.\r\n\r\n // 3. Find the entry point for content\r\n // Prefer <main> or <article> over <body>\r\n let contentHtml = '';\r\n if ($('main').length > 0) {\r\n contentHtml = $('main').html() || '';\r\n } else if ($('article').length > 0) {\r\n contentHtml = $('article').html() || '';\r\n } else {\r\n contentHtml = $('body').html() || '';\r\n }\r\n\r\n // 4. Convert to Markdown\r\n let markdown = turndownService.turndown(contentHtml);\r\n\r\n // 5. Optionally inject Metadata header\r\n const headerLines = [\r\n `# ${title}`,\r\n `> ${description}`,\r\n ``,\r\n `**Source:** ${sourceUrl}`,\r\n `**Extracted:** ${new Date().toISOString()}`,\r\n ``,\r\n `---`,\r\n ``\r\n ];\r\n\r\n let finalMarkdown = headerLines.join('\\n') + markdown;\r\n\r\n // Add JSON-LD section if exists\r\n if (jsonLdScripts.length > 0) {\r\n finalMarkdown += '\\n\\n---\\n## Structured Data (JSON-LD)\\n```json\\n';\r\n jsonLdScripts.forEach(j => {\r\n finalMarkdown += JSON.stringify(j, null, 2) + '\\n';\r\n });\r\n finalMarkdown += '```\\n';\r\n }\r\n\r\n const markdownSize = finalMarkdown.length;\r\n const tokenReductionRatio = originalSize > 0 ? ((originalSize - markdownSize) / originalSize) * 100 : 0;\r\n\r\n return {\r\n markdown: finalMarkdown,\r\n metadata: {\r\n title,\r\n description,\r\n jsonLd: jsonLdScripts\r\n },\r\n stats: {\r\n originalHtmlSize: originalSize,\r\n markdownSize,\r\n tokenReductionRatio\r\n }\r\n };\r\n}\r\n\r\nexport async function generateStaticPayloads(nextAppDirDir: string, ontoPublicDir: string) {\r\n const fs = await import('fs');\r\n const path = await import('path');\r\n const { glob } = await import('glob');\r\n\r\n if (!fs.existsSync(nextAppDirDir)) {\r\n return;\r\n }\r\n\r\n const files = await glob('**/*.html', { cwd: nextAppDirDir });\r\n if (files.length === 0) return;\r\n\r\n if (!fs.existsSync(ontoPublicDir)) {\r\n fs.mkdirSync(ontoPublicDir, { recursive: true });\r\n }\r\n\r\n let totalFilesProcessed = 0;\r\n\r\n for (const file of files) {\r\n const inputPath = path.join(nextAppDirDir, file);\r\n const outputPathRelative = file.replace(/\\.html$/, '.md');\r\n const outputPath = path.join(ontoPublicDir, outputPathRelative);\r\n\r\n try {\r\n const htmlContent = fs.readFileSync(inputPath, 'utf8');\r\n\r\n let routeName = file.replace(/\\.html$/, '');\r\n if (routeName === 'index') routeName = '/';\r\n else routeName = `/${routeName}`;\r\n\r\n const result = extractContent(htmlContent, routeName);\r\n\r\n const outputDir = path.dirname(outputPath);\r\n if (!fs.existsSync(outputDir)) {\r\n fs.mkdirSync(outputDir, { recursive: true });\r\n }\r\n\r\n fs.writeFileSync(outputPath, result.markdown, 'utf8');\r\n totalFilesProcessed++;\r\n } catch (e: any) {\r\n console.error(`[Onto] Failed to process ${file}: ${e.message}`);\r\n }\r\n }\r\n console.log(`[Onto] Successfully generated ${totalFilesProcessed} semantic markdown endpoints.`);\r\n}\r\n"],"mappings":";wdACA,IAAAA,EAAqB,gBACrBC,EAAe,iBACfC,EAAiB,mBACjBC,EAAe,yBCJf,IAAAC,EAAyB,sBACzBC,EAA4B,uBAEtBC,EAAkB,IAAI,EAAAC,QAAgB,CACxC,aAAc,MACd,eAAgB,QACpB,CAAC,EAwBM,SAASC,EAAeC,EAAcC,EAAoB,mBAAsC,CACnG,IAAMC,EAAeF,EAAK,OAEpBG,EAAY,OAAKH,CAAI,EAGrBI,EAAQD,EAAE,OAAO,EAAE,KAAK,GAAKA,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,GAAK,gBACvDE,EAAcF,EAAE,0BAA0B,EAAE,KAAK,SAAS,GAAK,wBAE/DG,EAAuB,CAAC,EAC9BH,EAAE,oCAAoC,EAAE,KAAK,CAACI,EAAGC,IAAO,CACpD,GAAI,CACA,IAAMC,EAAMN,EAAEK,CAAE,EAAE,KAAK,GAAK,GACtBE,EAAS,KAAK,MAAMD,CAAG,EAC7BH,EAAc,KAAKI,CAAM,CAC7B,MAAQ,CAER,CACJ,CAAC,EAGDP,EAAE,uEAAuE,EAAE,OAAO,EAOlF,IAAIQ,EAAc,GACdR,EAAE,MAAM,EAAE,OAAS,EACnBQ,EAAcR,EAAE,MAAM,EAAE,KAAK,GAAK,GAC3BA,EAAE,SAAS,EAAE,OAAS,EAC7BQ,EAAcR,EAAE,SAAS,EAAE,KAAK,GAAK,GAErCQ,EAAcR,EAAE,MAAM,EAAE,KAAK,GAAK,GAItC,IAAIS,EAAWf,EAAgB,SAASc,CAAW,EAc/CE,EAXgB,CAChB,KAAKT,CAAK,GACV,KAAKC,CAAW,GAChB,GACA,eAAeJ,CAAS,GACxB,kBAAkB,IAAI,KAAK,EAAE,YAAY,CAAC,GAC1C,GACA,MACA,EACJ,EAEgC,KAAK;AAAA,CAAI,EAAIW,EAGzCN,EAAc,OAAS,IACvBO,GAAiB;AAAA;AAAA;AAAA;AAAA;AAAA,EACjBP,EAAc,QAAQQ,GAAK,CACvBD,GAAiB,KAAK,UAAUC,EAAG,KAAM,CAAC,EAAI;AAAA,CAClD,CAAC,EACDD,GAAiB,SAGrB,IAAME,EAAeF,EAAc,OAC7BG,EAAsBd,EAAe,GAAMA,EAAea,GAAgBb,EAAgB,IAAM,EAEtG,MAAO,CACH,SAAUW,EACV,SAAU,CACN,MAAAT,EACA,YAAAC,EACA,OAAQC,CACZ,EACA,MAAO,CACH,iBAAkBJ,EAClB,aAAAa,EACA,oBAAAC,CACJ,CACJ,CACJ,CDrGA,SAASC,GAAU,CACf,IAAMC,EAAU,EAAAC,QAAK,KAAK,QAAQ,IAAI,EAAG,YAAY,EACjD,EAAAC,QAAG,WAAWF,CAAO,GACF,EAAAE,QAAG,aAAaF,EAAS,MAAM,EACvC,MAAM,OAAO,EAAE,QAAQG,GAAQ,CACtC,IAAMC,EAAcD,EAAK,KAAK,EAC9B,GAAI,CAACC,GAAeA,EAAY,WAAW,GAAG,EAAG,OACjD,GAAM,CAACC,EAAK,GAAGC,CAAU,EAAIF,EAAY,MAAM,GAAG,EAC9CC,GAAOC,EAAW,OAAS,IAC3B,QAAQ,IAAID,EAAI,KAAK,CAAC,EAAIC,EAAW,KAAK,GAAG,EAAE,KAAK,EAAE,QAAQ,eAAgB,EAAE,EAExF,CAAC,CAET,CAEA,eAAeC,GAAO,CAClBR,EAAQ,EACR,QAAQ,IAAI,EAAAS,QAAG,KAAK;AAAA,8CAAiD,CAAC,EAEtE,IAAMC,EAAM,QAAQ,IAAI,EAClBC,EAAgB,EAAAT,QAAK,KAAKQ,EAAK,kBAAkB,EACjDE,EAAgB,EAAAV,QAAK,KAAKQ,EAAK,cAAc,EAEnD,GAAI,CAAC,EAAAP,QAAG,WAAWQ,CAAa,EAAG,CAC/B,QAAQ,IAAI,EAAAF,QAAG,OAAO,+CAA+CE,CAAa,EAAE,CAAC,EACrF,QAAQ,IAAI,EAAAF,QAAG,OAAO,gFAAgF,CAAC,EACvG,MACJ,CAGA,IAAMI,EAAQ,QAAM,QAAK,YAAa,CAAE,IAAKF,CAAc,CAAC,EAE5D,GAAIE,EAAM,SAAW,EAAG,CACpB,QAAQ,IAAI,EAAAJ,QAAG,OAAO,+CAA+C,CAAC,EACtE,MACJ,CAGK,EAAAN,QAAG,WAAWS,CAAa,GAC5B,EAAAT,QAAG,UAAUS,EAAe,CAAE,UAAW,EAAK,CAAC,EAGnD,IAAIE,EAAoB,EACpBC,EAAoB,EACpBC,EAAsB,EAE1B,QAAWC,KAAQJ,EAAO,CACtB,IAAMK,EAAY,EAAAhB,QAAK,KAAKS,EAAeM,CAAI,EAG3CE,EAAqBF,EAAK,QAAQ,UAAW,KAAK,EAEhDG,EAAa,EAAAlB,QAAK,KAAKU,EAAeO,CAAkB,EAE9D,GAAI,CACA,IAAME,EAAc,EAAAlB,QAAG,aAAae,EAAW,MAAM,EAE/CI,EAASC,EAAeF,EAAa,IAAIF,EAAmB,QAAQ,QAAS,EAAE,CAAC,EAAE,EAGlFK,EAAY,EAAAtB,QAAK,QAAQkB,CAAU,EACpC,EAAAjB,QAAG,WAAWqB,CAAS,GACxB,EAAArB,QAAG,UAAUqB,EAAW,CAAE,UAAW,EAAK,CAAC,EAG/C,EAAArB,QAAG,cAAciB,EAAYE,EAAO,SAAU,MAAM,EAEpDR,GAAqBQ,EAAO,MAAM,iBAClCP,GAAqBO,EAAO,MAAM,aAClCN,IAEA,IAAMS,GAAUH,EAAO,MAAM,iBAAmB,MAAM,QAAQ,CAAC,EACzDI,GAAQJ,EAAO,MAAM,aAAe,MAAM,QAAQ,CAAC,EAGrDK,EAAYV,EAAK,QAAQ,UAAW,EAAE,EACtCU,IAAc,QAASA,EAAY,IAClCA,EAAY,IAAIA,CAAS,GAE9B,QAAQ,IACJ,EAAAlB,QAAG,MAAM,kBAAa,EACtB,EAAAA,QAAG,IAAI,IAAIkB,CAAS,GAAG,EACvB,EAAAlB,QAAG,KAAK,IAAIgB,CAAM,SAASC,CAAI,KAAK,CACxC,CACJ,OAASE,EAAQ,CACb,QAAQ,MAAM,EAAAnB,QAAG,IAAI,4BAAuBQ,CAAI,KAAKW,EAAE,OAAO,EAAE,CAAC,CACrE,CACJ,CAEA,QAAQ,IACJ,EAAAnB,QAAG,KACC,EAAAA,QAAG,QAAQ,aAAaO,CAAmB,wBAAwBF,EAAoB,MAAM,QAAQ,CAAC,CAAC,UAAUC,EAAoB,MAAM,QAAQ,CAAC,CAAC,IAAI,CAC7J,CACJ,EAGA,IAAMc,EAAe,QAAQ,IAAI,aAC3BC,EAAgB,QAAQ,IAAI,oBAAsB,4BAExD,GAAID,GAAgBb,EAAsB,EAAG,CACzC,QAAQ,IAAI,EAAAP,QAAG,KAAK,+CAA+CqB,CAAa,MAAM,CAAC,EACvF,GAAI,CACA,IAAMC,EAAWlB,EAAM,IAAII,GAAQ,CAC/B,IAAMU,EAAYV,EAAK,QAAQ,UAAW,EAAE,EACtCe,EAAQL,IAAc,QAAU,IAAM,IAAIA,CAAS,GACnDM,EAAS,EAAA/B,QAAK,KAAKU,EAAeK,EAAK,QAAQ,UAAW,KAAK,CAAC,EACtE,MAAO,CACH,MAAAe,EACA,SAAU,GAAGL,CAAS,MACtB,QAAS,EAAAxB,QAAG,aAAa8B,EAAQ,MAAM,CAC3C,CACJ,CAAC,EAEKC,EAAM,MAAM,MAAM,GAAGJ,CAAa,aAAc,CAClD,OAAQ,OACR,QAAS,CACL,aAAcD,EACd,eAAgB,kBACpB,EACA,KAAM,KAAK,UAAU,CAAE,MAAOE,CAAS,CAAC,CAC5C,CAAC,EAED,GAAIG,EAAI,GACJ,QAAQ,IAAI,EAAAzB,QAAG,MAAM,sCAAiC,CAAC,MACpD,CACH,IAAM0B,EAAU,MAAMD,EAAI,KAAK,EAAE,MAAM,KAAO,CAAC,EAAE,EACjD,QAAQ,IAAI,EAAAzB,QAAG,OAAO,sCAAiC0B,EAAQ,OAASD,EAAI,UAAU,EAAE,CAAC,CAC7F,CACJ,OAASN,EAAQ,CACb,QAAQ,IAAI,EAAAnB,QAAG,OAAO,qCAAgCmB,EAAE,OAAO,EAAE,CAAC,CACtE,CACJ,CAEA,QAAQ,IAAI,EAAAnB,QAAG,IAAI;AAAA,CAA8C,CAAC,CACtE,CAEAD,EAAK,EAAE,MAAM,GAAK,CACd,QAAQ,MAAM,EAAAC,QAAG,IAAI,uBAAuB,EAAE,OAAO,EAAE,CAAC,EACxD,QAAQ,KAAK,CAAC,CAClB,CAAC","names":["import_glob","import_fs","import_path","import_picocolors","cheerio","import_turndown","turndownService","TurndownService","extractContent","html","sourceUrl","originalSize","$","title","description","jsonLdScripts","_","el","raw","parsed","contentHtml","markdown","finalMarkdown","j","markdownSize","tokenReductionRatio","loadEnv","envPath","path","fs","line","trimmedLine","key","valueParts","main","pc","cwd","nextAppDirDir","ontoPublicDir","files","totalOriginalSize","totalMarkdownSize","totalFilesProcessed","file","inputPath","outputPathRelative","outputPath","htmlContent","result","extractContent","outputDir","origKb","mdKb","routeName","e","ONTO_API_KEY","DASHBOARD_URL","manifest","route","mdPath","res","errData"]}
|
|
1
|
+
{"version":3,"sources":["../src/cli.ts","../src/extractor.ts","../src/config.ts"],"sourcesContent":["#!/usr/bin/env node\r\nimport { glob } from 'glob';\r\nimport fs from 'fs';\r\nimport path from 'path';\r\nimport pc from 'picocolors';\r\nimport { extractContent } from './extractor';\r\nimport { generateLlmsTxt, OntoConfig } from './config';\r\n\r\nasync function loadOntoConfig(): Promise<OntoConfig | null> {\r\n try {\r\n const configPath = path.join(process.cwd(), 'onto.config');\r\n // Node.js dynamic import\r\n const config = await import('file://' + configPath.replace(/\\\\/g, '/') + '.ts');\r\n return config.default || config;\r\n } catch (error) {\r\n try {\r\n const configPath = path.join(process.cwd(), 'onto.config');\r\n const config = await import('file://' + configPath.replace(/\\\\/g, '/') + '.js');\r\n return config.default || config;\r\n } catch (e) {\r\n return null;\r\n }\r\n }\r\n}\r\n\r\n// Simple helper to load .env.local from the current working directory\r\nfunction loadEnv() {\r\n const envPath = path.join(process.cwd(), '.env.local');\r\n if (fs.existsSync(envPath)) {\r\n const envContent = fs.readFileSync(envPath, 'utf8');\r\n envContent.split(/\\r?\\n/).forEach(line => {\r\n const trimmedLine = line.trim();\r\n if (!trimmedLine || trimmedLine.startsWith('#')) return;\r\n const [key, ...valueParts] = trimmedLine.split('=');\r\n if (key && valueParts.length > 0) {\r\n process.env[key.trim()] = valueParts.join('=').trim().replace(/^[\"']|[\"']$/g, '');\r\n }\r\n });\r\n }\r\n}\r\n\r\nasync function main() {\r\n loadEnv();\r\n console.log(pc.cyan('\\n[Onto] Starting Semantic Output Generation...'));\r\n\r\n const cwd = process.cwd();\r\n const nextAppDirDir = path.join(cwd, '.next/server/app');\r\n const ontoPublicDir = path.join(cwd, 'public/.onto');\r\n\r\n if (!fs.existsSync(nextAppDirDir)) {\r\n console.log(pc.yellow(`[Onto] Could not find Next.js app output at ${nextAppDirDir}`));\r\n console.log(pc.yellow(`[Onto] Ensure this is run after \"next build\" and you are using the App Router.`));\r\n return;\r\n }\r\n\r\n // Find all HTML files rendered by Next.js in the app directory\r\n const files = await glob('**/*.html', { cwd: nextAppDirDir });\r\n\r\n if (files.length === 0) {\r\n console.log(pc.yellow(`[Onto] No static HTML files found to process.`));\r\n return;\r\n }\r\n\r\n // Ensure output directory exists\r\n if (!fs.existsSync(ontoPublicDir)) {\r\n fs.mkdirSync(ontoPublicDir, { recursive: true });\r\n }\r\n\r\n let totalOriginalSize = 0;\r\n let totalMarkdownSize = 0;\r\n let totalFilesProcessed = 0;\r\n\r\n for (const file of files) {\r\n const inputPath = path.join(nextAppDirDir, file);\r\n\r\n // We map file path e.g. \"pricing.html\" to \"pricing.md\", or \"blog/post.html\" to \"blog/post.md\"\r\n let outputPathRelative = file.replace(/\\.html$/, '.md');\r\n // If it's a dynamic route page, or purely root index.html\r\n const outputPath = path.join(ontoPublicDir, outputPathRelative);\r\n\r\n try {\r\n const htmlContent = fs.readFileSync(inputPath, 'utf8');\r\n\r\n const result = extractContent(htmlContent, `/${outputPathRelative.replace(/\\.md$/, '')}`);\r\n\r\n // Ensure specific sub-directory exists (e.g., for blog/post.md)\r\n const outputDir = path.dirname(outputPath);\r\n if (!fs.existsSync(outputDir)) {\r\n fs.mkdirSync(outputDir, { recursive: true });\r\n }\r\n\r\n fs.writeFileSync(outputPath, result.markdown, 'utf8');\r\n\r\n totalOriginalSize += result.stats.originalHtmlSize;\r\n totalMarkdownSize += result.stats.markdownSize;\r\n totalFilesProcessed++;\r\n\r\n const origKb = (result.stats.originalHtmlSize / 1024).toFixed(1);\r\n const mdKb = (result.stats.markdownSize / 1024).toFixed(1);\r\n\r\n // /index.html -> /\r\n let routeName = file.replace(/\\.html$/, '');\r\n if (routeName === 'index') routeName = '/';\r\n else routeName = `/${routeName}`;\r\n\r\n console.log(\r\n pc.green(`✓ Optimized`) +\r\n pc.dim(` ${routeName} `) +\r\n pc.blue(`[${origKb}KB -> ${mdKb}KB]`)\r\n );\r\n } catch (e: any) {\r\n console.error(pc.red(`✗ Failed to process ${file}: ${e.message}`));\r\n }\r\n }\r\n\r\n console.log(\r\n pc.bold(\r\n pc.magenta(`Processed ${totalFilesProcessed} pages. Total Size: ${(totalOriginalSize / 1024).toFixed(1)}KB -> ${(totalMarkdownSize / 1024).toFixed(1)}KB`)\r\n )\r\n );\r\n\r\n // Sync with Onto Control Plane (Premium)\r\n const ONTO_API_KEY = process.env.ONTO_API_KEY;\r\n const DASHBOARD_URL = process.env.ONTO_DASHBOARD_URL || 'https://app.buildonto.dev';\r\n\r\n if (ONTO_API_KEY && totalFilesProcessed > 0) {\r\n console.log(pc.cyan(`[Onto] Syncing manifest with Control Plane [${DASHBOARD_URL}]...`));\r\n try {\r\n const manifest = files.map(file => {\r\n const routeName = file.replace(/\\.html$/, '');\r\n const route = routeName === 'index' ? '/' : `/${routeName}`;\r\n const mdPath = path.join(ontoPublicDir, file.replace(/\\.html$/, '.md'));\r\n return {\r\n route,\r\n filename: `${routeName}.md`,\r\n content: fs.readFileSync(mdPath, 'utf8')\r\n };\r\n });\r\n\r\n const res = await fetch(`${DASHBOARD_URL}/api/files`, {\r\n method: 'POST',\r\n headers: {\r\n 'x-onto-key': ONTO_API_KEY,\r\n 'Content-Type': 'application/json'\r\n },\r\n body: JSON.stringify({ files: manifest })\r\n });\r\n\r\n if (res.ok) {\r\n console.log(pc.green('✓ Control Plane sync successful'));\r\n } else {\r\n const errData = await res.json().catch(() => ({}));\r\n console.log(pc.yellow(`⚠ Control Plane sync skipped: ${errData.error || res.statusText}`));\r\n }\r\n } catch (e: any) {\r\n console.log(pc.yellow(`⚠ Control Plane sync failed: ${e.message}`));\r\n }\r\n }\r\n\r\n // --- Generate llms.txt manifest ---\r\n const config = await loadOntoConfig();\r\n if (config) {\r\n const llmsTxtContent = generateLlmsTxt(config);\r\n const llmsTxtPath = path.join(cwd, 'public/llms.txt');\r\n \r\n // Ensure public dir exists\r\n const publicDir = path.join(cwd, 'public');\r\n if (!fs.existsSync(publicDir)) {\r\n fs.mkdirSync(publicDir, { recursive: true });\r\n }\r\n\r\n fs.writeFileSync(llmsTxtPath, llmsTxtContent, 'utf8');\r\n console.log(pc.green('✓ Generated') + pc.dim(' /llms.txt'));\r\n }\r\n\r\n console.log(pc.dim(`Edge payloads are ready at /public/.onto/*\\n`));\r\n}\r\n\r\nmain().catch(e => {\r\n console.error(pc.red(`[Onto] Fatal Error: ${e.message}`));\r\n process.exit(1);\r\n});\r\n","import * as cheerio from 'cheerio';\r\nimport TurndownService from 'turndown';\r\n\r\nconst turndownService = new TurndownService({\r\n headingStyle: 'atx',\r\n codeBlockStyle: 'fenced',\r\n});\r\n\r\n// Configure turndown to keep some layout or handle semantic tags differently if needed\r\n\r\nexport interface ExtractionResult {\r\n markdown: string;\r\n metadata: {\r\n title: string;\r\n description: string;\r\n jsonLd: any[];\r\n };\r\n stats: {\r\n originalHtmlSize: number;\r\n markdownSize: number;\r\n tokenReductionRatio: number;\r\n };\r\n}\r\n\r\n/**\r\n * Extracts pure semantic markdown and metadata from rendered Next.js HTML strings.\r\n * @param html The raw HTML string.\r\n * @param sourceUrl (Optional) the URL this was generated from, to attach as metadata.\r\n * @returns {ExtractionResult} The extracted payload.\r\n */\r\nexport function extractContent(html: string, sourceUrl: string = 'Generated Output'): ExtractionResult {\r\n const originalSize = html.length;\r\n\r\n const $ = cheerio.load(html);\r\n\r\n // 1. Extract Metadata BEFORE removing structure\r\n const title = $('title').text() || $('h1').first().text() || 'Untitled Page';\r\n const description = $('meta[name=\"description\"]').attr('content') || 'No description found.';\r\n\r\n const jsonLdScripts: any[] = [];\r\n $('script[type=\"application/ld+json\"]').each((_, el) => {\r\n try {\r\n const raw = $(el).html() || '';\r\n const parsed = JSON.parse(raw);\r\n jsonLdScripts.push(parsed);\r\n } catch {\r\n // ignore bad json\r\n }\r\n });\r\n\r\n // 2. Strip noise (React boilerplate, styles, unnecessary tags)\r\n $('script, style, noscript, iframe, svg, nav, footer, meta, link, header').remove();\r\n\r\n // Optionally remove typical Next.js hidden wrappers if they don't contain real content.\r\n // Next.js uses <div id=\"__next\"> but we mostly just want semantic content.\r\n\r\n // 3. Find the entry point for content\r\n // Prefer <main> or <article> over <body>\r\n let contentHtml = '';\r\n if ($('main').length > 0) {\r\n contentHtml = $('main').html() || '';\r\n } else if ($('article').length > 0) {\r\n contentHtml = $('article').html() || '';\r\n } else {\r\n contentHtml = $('body').html() || '';\r\n }\r\n\r\n // 4. Convert to Markdown\r\n let markdown = turndownService.turndown(contentHtml);\r\n\r\n // 5. Optionally inject Metadata header\r\n const headerLines = [\r\n `# ${title}`,\r\n `> ${description}`,\r\n ``,\r\n `**Source:** ${sourceUrl}`,\r\n `**Extracted:** ${new Date().toISOString()}`,\r\n ``,\r\n `---`,\r\n ``\r\n ];\r\n\r\n let finalMarkdown = headerLines.join('\\n') + markdown;\r\n\r\n // Add JSON-LD section if exists\r\n if (jsonLdScripts.length > 0) {\r\n finalMarkdown += '\\n\\n---\\n## Structured Data (JSON-LD)\\n```json\\n';\r\n jsonLdScripts.forEach(j => {\r\n finalMarkdown += JSON.stringify(j, null, 2) + '\\n';\r\n });\r\n finalMarkdown += '```\\n';\r\n }\r\n\r\n const markdownSize = finalMarkdown.length;\r\n const tokenReductionRatio = originalSize > 0 ? ((originalSize - markdownSize) / originalSize) * 100 : 0;\r\n\r\n return {\r\n markdown: finalMarkdown,\r\n metadata: {\r\n title,\r\n description,\r\n jsonLd: jsonLdScripts\r\n },\r\n stats: {\r\n originalHtmlSize: originalSize,\r\n markdownSize,\r\n tokenReductionRatio\r\n }\r\n };\r\n}\r\n\r\nexport async function generateStaticPayloads(nextAppDirDir: string, ontoPublicDir: string) {\r\n const fs = await import('fs');\r\n const path = await import('path');\r\n const { glob } = await import('glob');\r\n\r\n if (!fs.existsSync(nextAppDirDir)) {\r\n return;\r\n }\r\n\r\n const files = await glob('**/*.html', { cwd: nextAppDirDir });\r\n if (files.length === 0) return;\r\n\r\n if (!fs.existsSync(ontoPublicDir)) {\r\n fs.mkdirSync(ontoPublicDir, { recursive: true });\r\n }\r\n\r\n let totalFilesProcessed = 0;\r\n\r\n for (const file of files) {\r\n const inputPath = path.join(nextAppDirDir, file);\r\n const outputPathRelative = file.replace(/\\.html$/, '.md');\r\n const outputPath = path.join(ontoPublicDir, outputPathRelative);\r\n\r\n try {\r\n const htmlContent = fs.readFileSync(inputPath, 'utf8');\r\n\r\n let routeName = file.replace(/\\.html$/, '');\r\n if (routeName === 'index') routeName = '/';\r\n else routeName = `/${routeName}`;\r\n\r\n const result = extractContent(htmlContent, routeName);\r\n\r\n const outputDir = path.dirname(outputPath);\r\n if (!fs.existsSync(outputDir)) {\r\n fs.mkdirSync(outputDir, { recursive: true });\r\n }\r\n\r\n fs.writeFileSync(outputPath, result.markdown, 'utf8');\r\n totalFilesProcessed++;\r\n } catch (e: any) {\r\n console.error(`[Onto] Failed to process ${file}: ${e.message}`);\r\n }\r\n }\r\n console.log(`[Onto] Successfully generated ${totalFilesProcessed} semantic markdown endpoints.`);\r\n}\r\n","/**\r\n * Configuration schema for onto.config.ts\r\n * Used to dynamically generate llms.txt and other AI discovery files\r\n */\r\n\r\nexport type PageType = 'scoring' | 'about' | 'default';\r\n\r\nexport interface OntoRoute {\r\n /**\r\n * The URL path (e.g., '/docs', '/api/reference')\r\n */\r\n path: string;\r\n /**\r\n * Description of what this route contains\r\n */\r\n description: string;\r\n /**\r\n * Optional: Page type for automatic JSON-LD schema injection\r\n * - 'scoring': Injects Methodology schema with AIO scoring weights (40/35/25)\r\n * - 'about': Injects Organization/AboutPage schema\r\n * - 'default': No automatic schema injection\r\n */\r\n pageType?: PageType;\r\n}\r\n\r\nexport interface OntoConfig {\r\n /**\r\n * The name of your project or site (required)\r\n * Used as the H1 heading in llms.txt\r\n */\r\n name: string;\r\n\r\n /**\r\n * A short summary of your project (required)\r\n * Displayed as a blockquote in llms.txt\r\n * Should contain key information necessary for understanding the rest of the file\r\n */\r\n summary: string;\r\n\r\n /**\r\n * The base URL of your site (e.g., 'https://example.com')\r\n */\r\n baseUrl: string;\r\n\r\n /**\r\n * Optional: Additional sections to include in llms.txt\r\n * Each section can contain any markdown content\r\n */\r\n sections?: {\r\n heading: string;\r\n content: string;\r\n }[];\r\n\r\n /**\r\n * Key routes that AI agents should know about\r\n * These will be formatted as a markdown list in llms.txt\r\n */\r\n routes?: OntoRoute[];\r\n\r\n /**\r\n * Optional: Links to external resources (documentation, API references, etc.)\r\n */\r\n externalLinks?: {\r\n title: string;\r\n url: string;\r\n description?: string;\r\n }[];\r\n\r\n /**\r\n * Optional: Organization information for JSON-LD schemas\r\n */\r\n organization?: {\r\n name: string;\r\n description?: string;\r\n url?: string;\r\n logo?: string;\r\n foundingDate?: string;\r\n };\r\n}\r\n\r\n/**\r\n * Generate llms.txt content from OntoConfig\r\n * Follows the llms.txt specification:\r\n * - H1 with project name\r\n * - Blockquote with summary\r\n * - Additional markdown sections\r\n */\r\nexport function generateLlmsTxt(config: OntoConfig): string {\r\n const lines: string[] = [];\r\n\r\n // H1: Project name (required)\r\n lines.push(`# ${config.name}`);\r\n lines.push('');\r\n\r\n // Blockquote: Summary (required)\r\n lines.push(`> ${config.summary}`);\r\n lines.push('');\r\n\r\n // Key Routes section (if provided)\r\n if (config.routes && config.routes.length > 0) {\r\n lines.push('## Key Routes');\r\n lines.push('');\r\n for (const route of config.routes) {\r\n const fullUrl = `${config.baseUrl}${route.path}`;\r\n lines.push(`- [${route.path}](${fullUrl}): ${route.description}`);\r\n }\r\n lines.push('');\r\n }\r\n\r\n // External Links section (if provided)\r\n if (config.externalLinks && config.externalLinks.length > 0) {\r\n lines.push('## Resources');\r\n lines.push('');\r\n for (const link of config.externalLinks) {\r\n if (link.description) {\r\n lines.push(`- [${link.title}](${link.url}): ${link.description}`);\r\n } else {\r\n lines.push(`- [${link.title}](${link.url})`);\r\n }\r\n }\r\n lines.push('');\r\n }\r\n\r\n // Custom sections (if provided)\r\n if (config.sections && config.sections.length > 0) {\r\n for (const section of config.sections) {\r\n lines.push(`## ${section.heading}`);\r\n lines.push('');\r\n lines.push(section.content);\r\n lines.push('');\r\n }\r\n }\r\n\r\n return lines.join('\\n').trim() + '\\n';\r\n}\r\n"],"mappings":";wdACA,IAAAA,EAAqB,gBACrBC,EAAe,iBACfC,EAAiB,mBACjBC,EAAe,yBCJf,IAAAC,EAAyB,sBACzBC,EAA4B,uBAEtBC,EAAkB,IAAI,EAAAC,QAAgB,CACxC,aAAc,MACd,eAAgB,QACpB,CAAC,EAwBM,SAASC,EAAeC,EAAcC,EAAoB,mBAAsC,CACnG,IAAMC,EAAeF,EAAK,OAEpBG,EAAY,OAAKH,CAAI,EAGrBI,EAAQD,EAAE,OAAO,EAAE,KAAK,GAAKA,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,GAAK,gBACvDE,EAAcF,EAAE,0BAA0B,EAAE,KAAK,SAAS,GAAK,wBAE/DG,EAAuB,CAAC,EAC9BH,EAAE,oCAAoC,EAAE,KAAK,CAACI,EAAGC,IAAO,CACpD,GAAI,CACA,IAAMC,EAAMN,EAAEK,CAAE,EAAE,KAAK,GAAK,GACtBE,EAAS,KAAK,MAAMD,CAAG,EAC7BH,EAAc,KAAKI,CAAM,CAC7B,MAAQ,CAER,CACJ,CAAC,EAGDP,EAAE,uEAAuE,EAAE,OAAO,EAOlF,IAAIQ,EAAc,GACdR,EAAE,MAAM,EAAE,OAAS,EACnBQ,EAAcR,EAAE,MAAM,EAAE,KAAK,GAAK,GAC3BA,EAAE,SAAS,EAAE,OAAS,EAC7BQ,EAAcR,EAAE,SAAS,EAAE,KAAK,GAAK,GAErCQ,EAAcR,EAAE,MAAM,EAAE,KAAK,GAAK,GAItC,IAAIS,EAAWf,EAAgB,SAASc,CAAW,EAc/CE,EAXgB,CAChB,KAAKT,CAAK,GACV,KAAKC,CAAW,GAChB,GACA,eAAeJ,CAAS,GACxB,kBAAkB,IAAI,KAAK,EAAE,YAAY,CAAC,GAC1C,GACA,MACA,EACJ,EAEgC,KAAK;AAAA,CAAI,EAAIW,EAGzCN,EAAc,OAAS,IACvBO,GAAiB;AAAA;AAAA;AAAA;AAAA;AAAA,EACjBP,EAAc,QAAQQ,GAAK,CACvBD,GAAiB,KAAK,UAAUC,EAAG,KAAM,CAAC,EAAI;AAAA,CAClD,CAAC,EACDD,GAAiB,SAGrB,IAAME,EAAeF,EAAc,OAC7BG,EAAsBd,EAAe,GAAMA,EAAea,GAAgBb,EAAgB,IAAM,EAEtG,MAAO,CACH,SAAUW,EACV,SAAU,CACN,MAAAT,EACA,YAAAC,EACA,OAAQC,CACZ,EACA,MAAO,CACH,iBAAkBJ,EAClB,aAAAa,EACA,oBAAAC,CACJ,CACJ,CACJ,CCtBO,SAASC,EAAgBC,EAA4B,CAC1D,IAAMC,EAAkB,CAAC,EAWzB,GARAA,EAAM,KAAK,KAAKD,EAAO,IAAI,EAAE,EAC7BC,EAAM,KAAK,EAAE,EAGbA,EAAM,KAAK,KAAKD,EAAO,OAAO,EAAE,EAChCC,EAAM,KAAK,EAAE,EAGTD,EAAO,QAAUA,EAAO,OAAO,OAAS,EAAG,CAC7CC,EAAM,KAAK,eAAe,EAC1BA,EAAM,KAAK,EAAE,EACb,QAAWC,KAASF,EAAO,OAAQ,CACjC,IAAMG,EAAU,GAAGH,EAAO,OAAO,GAAGE,EAAM,IAAI,GAC9CD,EAAM,KAAK,MAAMC,EAAM,IAAI,KAAKC,CAAO,MAAMD,EAAM,WAAW,EAAE,CAClE,CACAD,EAAM,KAAK,EAAE,CACf,CAGA,GAAID,EAAO,eAAiBA,EAAO,cAAc,OAAS,EAAG,CAC3DC,EAAM,KAAK,cAAc,EACzBA,EAAM,KAAK,EAAE,EACb,QAAWG,KAAQJ,EAAO,cACpBI,EAAK,YACPH,EAAM,KAAK,MAAMG,EAAK,KAAK,KAAKA,EAAK,GAAG,MAAMA,EAAK,WAAW,EAAE,EAEhEH,EAAM,KAAK,MAAMG,EAAK,KAAK,KAAKA,EAAK,GAAG,GAAG,EAG/CH,EAAM,KAAK,EAAE,CACf,CAGA,GAAID,EAAO,UAAYA,EAAO,SAAS,OAAS,EAC9C,QAAWK,KAAWL,EAAO,SAC3BC,EAAM,KAAK,MAAMI,EAAQ,OAAO,EAAE,EAClCJ,EAAM,KAAK,EAAE,EACbA,EAAM,KAAKI,EAAQ,OAAO,EAC1BJ,EAAM,KAAK,EAAE,EAIjB,OAAOA,EAAM,KAAK;AAAA,CAAI,EAAE,KAAK,EAAI;AAAA,CACnC,CF9HA,eAAeK,GAA6C,CACxD,GAAI,CAGA,IAAMC,EAAS,MAAM,OAAO,UAFT,EAAAC,QAAK,KAAK,QAAQ,IAAI,EAAG,aAAa,EAEN,QAAQ,MAAO,GAAG,EAAI,OACzE,OAAOD,EAAO,SAAWA,CAC7B,MAAgB,CACZ,GAAI,CAEA,IAAMA,EAAS,MAAM,OAAO,UADT,EAAAC,QAAK,KAAK,QAAQ,IAAI,EAAG,aAAa,EACN,QAAQ,MAAO,GAAG,EAAI,OACzE,OAAOD,EAAO,SAAWA,CAC7B,MAAY,CACR,OAAO,IACX,CACJ,CACJ,CAGA,SAASE,GAAU,CACf,IAAMC,EAAU,EAAAF,QAAK,KAAK,QAAQ,IAAI,EAAG,YAAY,EACjD,EAAAG,QAAG,WAAWD,CAAO,GACF,EAAAC,QAAG,aAAaD,EAAS,MAAM,EACvC,MAAM,OAAO,EAAE,QAAQE,GAAQ,CACtC,IAAMC,EAAcD,EAAK,KAAK,EAC9B,GAAI,CAACC,GAAeA,EAAY,WAAW,GAAG,EAAG,OACjD,GAAM,CAACC,EAAK,GAAGC,CAAU,EAAIF,EAAY,MAAM,GAAG,EAC9CC,GAAOC,EAAW,OAAS,IAC3B,QAAQ,IAAID,EAAI,KAAK,CAAC,EAAIC,EAAW,KAAK,GAAG,EAAE,KAAK,EAAE,QAAQ,eAAgB,EAAE,EAExF,CAAC,CAET,CAEA,eAAeC,GAAO,CAClBP,EAAQ,EACR,QAAQ,IAAI,EAAAQ,QAAG,KAAK;AAAA,8CAAiD,CAAC,EAEtE,IAAMC,EAAM,QAAQ,IAAI,EAClBC,EAAgB,EAAAX,QAAK,KAAKU,EAAK,kBAAkB,EACjDE,EAAgB,EAAAZ,QAAK,KAAKU,EAAK,cAAc,EAEnD,GAAI,CAAC,EAAAP,QAAG,WAAWQ,CAAa,EAAG,CAC/B,QAAQ,IAAI,EAAAF,QAAG,OAAO,+CAA+CE,CAAa,EAAE,CAAC,EACrF,QAAQ,IAAI,EAAAF,QAAG,OAAO,gFAAgF,CAAC,EACvG,MACJ,CAGA,IAAMI,EAAQ,QAAM,QAAK,YAAa,CAAE,IAAKF,CAAc,CAAC,EAE5D,GAAIE,EAAM,SAAW,EAAG,CACpB,QAAQ,IAAI,EAAAJ,QAAG,OAAO,+CAA+C,CAAC,EACtE,MACJ,CAGK,EAAAN,QAAG,WAAWS,CAAa,GAC5B,EAAAT,QAAG,UAAUS,EAAe,CAAE,UAAW,EAAK,CAAC,EAGnD,IAAIE,EAAoB,EACpBC,EAAoB,EACpBC,EAAsB,EAE1B,QAAWC,KAAQJ,EAAO,CACtB,IAAMK,EAAY,EAAAlB,QAAK,KAAKW,EAAeM,CAAI,EAG3CE,EAAqBF,EAAK,QAAQ,UAAW,KAAK,EAEhDG,EAAa,EAAApB,QAAK,KAAKY,EAAeO,CAAkB,EAE9D,GAAI,CACA,IAAME,EAAc,EAAAlB,QAAG,aAAae,EAAW,MAAM,EAE/CI,EAASC,EAAeF,EAAa,IAAIF,EAAmB,QAAQ,QAAS,EAAE,CAAC,EAAE,EAGlFK,EAAY,EAAAxB,QAAK,QAAQoB,CAAU,EACpC,EAAAjB,QAAG,WAAWqB,CAAS,GACxB,EAAArB,QAAG,UAAUqB,EAAW,CAAE,UAAW,EAAK,CAAC,EAG/C,EAAArB,QAAG,cAAciB,EAAYE,EAAO,SAAU,MAAM,EAEpDR,GAAqBQ,EAAO,MAAM,iBAClCP,GAAqBO,EAAO,MAAM,aAClCN,IAEA,IAAMS,GAAUH,EAAO,MAAM,iBAAmB,MAAM,QAAQ,CAAC,EACzDI,GAAQJ,EAAO,MAAM,aAAe,MAAM,QAAQ,CAAC,EAGrDK,EAAYV,EAAK,QAAQ,UAAW,EAAE,EACtCU,IAAc,QAASA,EAAY,IAClCA,EAAY,IAAIA,CAAS,GAE9B,QAAQ,IACJ,EAAAlB,QAAG,MAAM,kBAAa,EACtB,EAAAA,QAAG,IAAI,IAAIkB,CAAS,GAAG,EACvB,EAAAlB,QAAG,KAAK,IAAIgB,CAAM,SAASC,CAAI,KAAK,CACxC,CACJ,OAASE,EAAQ,CACb,QAAQ,MAAM,EAAAnB,QAAG,IAAI,4BAAuBQ,CAAI,KAAKW,EAAE,OAAO,EAAE,CAAC,CACrE,CACJ,CAEA,QAAQ,IACJ,EAAAnB,QAAG,KACC,EAAAA,QAAG,QAAQ,aAAaO,CAAmB,wBAAwBF,EAAoB,MAAM,QAAQ,CAAC,CAAC,UAAUC,EAAoB,MAAM,QAAQ,CAAC,CAAC,IAAI,CAC7J,CACJ,EAGA,IAAMc,EAAe,QAAQ,IAAI,aAC3BC,EAAgB,QAAQ,IAAI,oBAAsB,4BAExD,GAAID,GAAgBb,EAAsB,EAAG,CACzC,QAAQ,IAAI,EAAAP,QAAG,KAAK,+CAA+CqB,CAAa,MAAM,CAAC,EACvF,GAAI,CACA,IAAMC,EAAWlB,EAAM,IAAII,GAAQ,CAC/B,IAAMU,EAAYV,EAAK,QAAQ,UAAW,EAAE,EACtCe,EAAQL,IAAc,QAAU,IAAM,IAAIA,CAAS,GACnDM,EAAS,EAAAjC,QAAK,KAAKY,EAAeK,EAAK,QAAQ,UAAW,KAAK,CAAC,EACtE,MAAO,CACH,MAAAe,EACA,SAAU,GAAGL,CAAS,MACtB,QAAS,EAAAxB,QAAG,aAAa8B,EAAQ,MAAM,CAC3C,CACJ,CAAC,EAEKC,EAAM,MAAM,MAAM,GAAGJ,CAAa,aAAc,CAClD,OAAQ,OACR,QAAS,CACL,aAAcD,EACd,eAAgB,kBACpB,EACA,KAAM,KAAK,UAAU,CAAE,MAAOE,CAAS,CAAC,CAC5C,CAAC,EAED,GAAIG,EAAI,GACJ,QAAQ,IAAI,EAAAzB,QAAG,MAAM,sCAAiC,CAAC,MACpD,CACH,IAAM0B,EAAU,MAAMD,EAAI,KAAK,EAAE,MAAM,KAAO,CAAC,EAAE,EACjD,QAAQ,IAAI,EAAAzB,QAAG,OAAO,sCAAiC0B,EAAQ,OAASD,EAAI,UAAU,EAAE,CAAC,CAC7F,CACJ,OAASN,EAAQ,CACb,QAAQ,IAAI,EAAAnB,QAAG,OAAO,qCAAgCmB,EAAE,OAAO,EAAE,CAAC,CACtE,CACJ,CAGA,IAAM7B,EAAS,MAAMD,EAAe,EACpC,GAAIC,EAAQ,CACR,IAAMqC,EAAiBC,EAAgBtC,CAAM,EACvCuC,EAAc,EAAAtC,QAAK,KAAKU,EAAK,iBAAiB,EAG9C6B,EAAY,EAAAvC,QAAK,KAAKU,EAAK,QAAQ,EACpC,EAAAP,QAAG,WAAWoC,CAAS,GACxB,EAAApC,QAAG,UAAUoC,EAAW,CAAE,UAAW,EAAK,CAAC,EAG/C,EAAApC,QAAG,cAAcmC,EAAaF,EAAgB,MAAM,EACpD,QAAQ,IAAI,EAAA3B,QAAG,MAAM,kBAAa,EAAI,EAAAA,QAAG,IAAI,YAAY,CAAC,CAC9D,CAEA,QAAQ,IAAI,EAAAA,QAAG,IAAI;AAAA,CAA8C,CAAC,CACtE,CAEAD,EAAK,EAAE,MAAM,GAAK,CACd,QAAQ,MAAM,EAAAC,QAAG,IAAI,uBAAuB,EAAE,OAAO,EAAE,CAAC,EACxD,QAAQ,KAAK,CAAC,CAClB,CAAC","names":["import_glob","import_fs","import_path","import_picocolors","cheerio","import_turndown","turndownService","TurndownService","extractContent","html","sourceUrl","originalSize","$","title","description","jsonLdScripts","_","el","raw","parsed","contentHtml","markdown","finalMarkdown","j","markdownSize","tokenReductionRatio","generateLlmsTxt","config","lines","route","fullUrl","link","section","loadOntoConfig","config","path","loadEnv","envPath","fs","line","trimmedLine","key","valueParts","main","pc","cwd","nextAppDirDir","ontoPublicDir","files","totalOriginalSize","totalMarkdownSize","totalFilesProcessed","file","inputPath","outputPathRelative","outputPath","htmlContent","result","extractContent","outputDir","origKb","mdKb","routeName","e","ONTO_API_KEY","DASHBOARD_URL","manifest","route","mdPath","res","errData","llmsTxtContent","generateLlmsTxt","llmsTxtPath","publicDir"]}
|
package/dist/cli.mjs
CHANGED
|
@@ -1,12 +1,14 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
import{glob as
|
|
3
|
-
`)+S;
|
|
2
|
+
import{glob as C}from"glob";import r from"fs";import c from"path";import s from"picocolors";import*as w from"cheerio";import v from"turndown";var b=new v({headingStyle:"atx",codeBlockStyle:"fenced"});function O(n,t="Generated Output"){let e=n.length,o=w.load(n),d=o("title").text()||o("h1").first().text()||"Untitled Page",m=o('meta[name="description"]').attr("content")||"No description found.",f=[];o('script[type="application/ld+json"]').each((u,h)=>{try{let p=o(h).html()||"",$=JSON.parse(p);f.push($)}catch{}}),o("script, style, noscript, iframe, svg, nav, footer, meta, link, header").remove();let g="";o("main").length>0?g=o("main").html()||"":o("article").length>0?g=o("article").html()||"":g=o("body").html()||"";let S=b.turndown(g),i=[`# ${d}`,`> ${m}`,"",`**Source:** ${t}`,`**Extracted:** ${new Date().toISOString()}`,"","---",""].join(`
|
|
3
|
+
`)+S;f.length>0&&(i+=`
|
|
4
4
|
|
|
5
5
|
---
|
|
6
6
|
## Structured Data (JSON-LD)
|
|
7
7
|
\`\`\`json
|
|
8
|
-
`,
|
|
9
|
-
`}),
|
|
10
|
-
|
|
11
|
-
`))
|
|
8
|
+
`,f.forEach(u=>{i+=JSON.stringify(u,null,2)+`
|
|
9
|
+
`}),i+="```\n");let l=i.length,a=e>0?(e-l)/e*100:0;return{markdown:i,metadata:{title:d,description:m,jsonLd:f},stats:{originalHtmlSize:e,markdownSize:l,tokenReductionRatio:a}}}function k(n){let t=[];if(t.push(`# ${n.name}`),t.push(""),t.push(`> ${n.summary}`),t.push(""),n.routes&&n.routes.length>0){t.push("## Key Routes"),t.push("");for(let e of n.routes){let o=`${n.baseUrl}${e.path}`;t.push(`- [${e.path}](${o}): ${e.description}`)}t.push("")}if(n.externalLinks&&n.externalLinks.length>0){t.push("## Resources"),t.push("");for(let e of n.externalLinks)e.description?t.push(`- [${e.title}](${e.url}): ${e.description}`):t.push(`- [${e.title}](${e.url})`);t.push("")}if(n.sections&&n.sections.length>0)for(let e of n.sections)t.push(`## ${e.heading}`),t.push(""),t.push(e.content),t.push("");return t.join(`
|
|
10
|
+
`).trim()+`
|
|
11
|
+
`}async function R(){try{let t=await import("file://"+c.join(process.cwd(),"onto.config").replace(/\\/g,"/")+".ts");return t.default||t}catch{try{let e=await import("file://"+c.join(process.cwd(),"onto.config").replace(/\\/g,"/")+".js");return e.default||e}catch{return null}}}function F(){let n=c.join(process.cwd(),".env.local");r.existsSync(n)&&r.readFileSync(n,"utf8").split(/\r?\n/).forEach(e=>{let o=e.trim();if(!o||o.startsWith("#"))return;let[d,...m]=o.split("=");d&&m.length>0&&(process.env[d.trim()]=m.join("=").trim().replace(/^["']|["']$/g,""))})}async function L(){F(),console.log(s.cyan(`
|
|
12
|
+
[Onto] Starting Semantic Output Generation...`));let n=process.cwd(),t=c.join(n,".next/server/app"),e=c.join(n,"public/.onto");if(!r.existsSync(t)){console.log(s.yellow(`[Onto] Could not find Next.js app output at ${t}`)),console.log(s.yellow('[Onto] Ensure this is run after "next build" and you are using the App Router.'));return}let o=await C("**/*.html",{cwd:t});if(o.length===0){console.log(s.yellow("[Onto] No static HTML files found to process."));return}r.existsSync(e)||r.mkdirSync(e,{recursive:!0});let d=0,m=0,f=0;for(let i of o){let l=c.join(t,i),a=i.replace(/\.html$/,".md"),u=c.join(e,a);try{let h=r.readFileSync(l,"utf8"),p=O(h,`/${a.replace(/\.md$/,"")}`),$=c.dirname(u);r.existsSync($)||r.mkdirSync($,{recursive:!0}),r.writeFileSync(u,p.markdown,"utf8"),d+=p.stats.originalHtmlSize,m+=p.stats.markdownSize,f++;let P=(p.stats.originalHtmlSize/1024).toFixed(1),j=(p.stats.markdownSize/1024).toFixed(1),y=i.replace(/\.html$/,"");y==="index"?y="/":y=`/${y}`,console.log(s.green("\u2713 Optimized")+s.dim(` ${y} `)+s.blue(`[${P}KB -> ${j}KB]`))}catch(h){console.error(s.red(`\u2717 Failed to process ${i}: ${h.message}`))}}console.log(s.bold(s.magenta(`Processed ${f} pages. Total Size: ${(d/1024).toFixed(1)}KB -> ${(m/1024).toFixed(1)}KB`)));let g=process.env.ONTO_API_KEY,S=process.env.ONTO_DASHBOARD_URL||"https://app.buildonto.dev";if(g&&f>0){console.log(s.cyan(`[Onto] Syncing manifest with Control Plane [${S}]...`));try{let i=o.map(a=>{let u=a.replace(/\.html$/,""),h=u==="index"?"/":`/${u}`,p=c.join(e,a.replace(/\.html$/,".md"));return{route:h,filename:`${u}.md`,content:r.readFileSync(p,"utf8")}}),l=await fetch(`${S}/api/files`,{method:"POST",headers:{"x-onto-key":g,"Content-Type":"application/json"},body:JSON.stringify({files:i})});if(l.ok)console.log(s.green("\u2713 Control Plane sync successful"));else{let a=await l.json().catch(()=>({}));console.log(s.yellow(`\u26A0 Control Plane sync skipped: ${a.error||l.statusText}`))}}catch(i){console.log(s.yellow(`\u26A0 Control Plane sync failed: ${i.message}`))}}let x=await R();if(x){let i=k(x),l=c.join(n,"public/llms.txt"),a=c.join(n,"public");r.existsSync(a)||r.mkdirSync(a,{recursive:!0}),r.writeFileSync(l,i,"utf8"),console.log(s.green("\u2713 Generated")+s.dim(" /llms.txt"))}console.log(s.dim(`Edge payloads are ready at /public/.onto/*
|
|
13
|
+
`))}L().catch(n=>{console.error(s.red(`[Onto] Fatal Error: ${n.message}`)),process.exit(1)});
|
|
12
14
|
//# sourceMappingURL=cli.mjs.map
|
package/dist/cli.mjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/cli.ts","../src/extractor.ts"],"sourcesContent":["#!/usr/bin/env node\r\nimport { glob } from 'glob';\r\nimport fs from 'fs';\r\nimport path from 'path';\r\nimport pc from 'picocolors';\r\nimport { extractContent } from './extractor';\r\n\r\n// Simple helper to load .env.local from the current working directory\r\nfunction loadEnv() {\r\n const envPath = path.join(process.cwd(), '.env.local');\r\n if (fs.existsSync(envPath)) {\r\n const envContent = fs.readFileSync(envPath, 'utf8');\r\n envContent.split(/\\r?\\n/).forEach(line => {\r\n const trimmedLine = line.trim();\r\n if (!trimmedLine || trimmedLine.startsWith('#')) return;\r\n const [key, ...valueParts] = trimmedLine.split('=');\r\n if (key && valueParts.length > 0) {\r\n process.env[key.trim()] = valueParts.join('=').trim().replace(/^[\"']|[\"']$/g, '');\r\n }\r\n });\r\n }\r\n}\r\n\r\nasync function main() {\r\n loadEnv();\r\n console.log(pc.cyan('\\n[Onto] Starting Semantic Output Generation...'));\r\n\r\n const cwd = process.cwd();\r\n const nextAppDirDir = path.join(cwd, '.next/server/app');\r\n const ontoPublicDir = path.join(cwd, 'public/.onto');\r\n\r\n if (!fs.existsSync(nextAppDirDir)) {\r\n console.log(pc.yellow(`[Onto] Could not find Next.js app output at ${nextAppDirDir}`));\r\n console.log(pc.yellow(`[Onto] Ensure this is run after \"next build\" and you are using the App Router.`));\r\n return;\r\n }\r\n\r\n // Find all HTML files rendered by Next.js in the app directory\r\n const files = await glob('**/*.html', { cwd: nextAppDirDir });\r\n\r\n if (files.length === 0) {\r\n console.log(pc.yellow(`[Onto] No static HTML files found to process.`));\r\n return;\r\n }\r\n\r\n // Ensure output directory exists\r\n if (!fs.existsSync(ontoPublicDir)) {\r\n fs.mkdirSync(ontoPublicDir, { recursive: true });\r\n }\r\n\r\n let totalOriginalSize = 0;\r\n let totalMarkdownSize = 0;\r\n let totalFilesProcessed = 0;\r\n\r\n for (const file of files) {\r\n const inputPath = path.join(nextAppDirDir, file);\r\n\r\n // We map file path e.g. \"pricing.html\" to \"pricing.md\", or \"blog/post.html\" to \"blog/post.md\"\r\n let outputPathRelative = file.replace(/\\.html$/, '.md');\r\n // If it's a dynamic route page, or purely root index.html\r\n const outputPath = path.join(ontoPublicDir, outputPathRelative);\r\n\r\n try {\r\n const htmlContent = fs.readFileSync(inputPath, 'utf8');\r\n\r\n const result = extractContent(htmlContent, `/${outputPathRelative.replace(/\\.md$/, '')}`);\r\n\r\n // Ensure specific sub-directory exists (e.g., for blog/post.md)\r\n const outputDir = path.dirname(outputPath);\r\n if (!fs.existsSync(outputDir)) {\r\n fs.mkdirSync(outputDir, { recursive: true });\r\n }\r\n\r\n fs.writeFileSync(outputPath, result.markdown, 'utf8');\r\n\r\n totalOriginalSize += result.stats.originalHtmlSize;\r\n totalMarkdownSize += result.stats.markdownSize;\r\n totalFilesProcessed++;\r\n\r\n const origKb = (result.stats.originalHtmlSize / 1024).toFixed(1);\r\n const mdKb = (result.stats.markdownSize / 1024).toFixed(1);\r\n\r\n // /index.html -> /\r\n let routeName = file.replace(/\\.html$/, '');\r\n if (routeName === 'index') routeName = '/';\r\n else routeName = `/${routeName}`;\r\n\r\n console.log(\r\n pc.green(`✓ Optimized`) +\r\n pc.dim(` ${routeName} `) +\r\n pc.blue(`[${origKb}KB -> ${mdKb}KB]`)\r\n );\r\n } catch (e: any) {\r\n console.error(pc.red(`✗ Failed to process ${file}: ${e.message}`));\r\n }\r\n }\r\n\r\n console.log(\r\n pc.bold(\r\n pc.magenta(`Processed ${totalFilesProcessed} pages. Total Size: ${(totalOriginalSize / 1024).toFixed(1)}KB -> ${(totalMarkdownSize / 1024).toFixed(1)}KB`)\r\n )\r\n );\r\n\r\n // Sync with Onto Control Plane (Premium)\r\n const ONTO_API_KEY = process.env.ONTO_API_KEY;\r\n const DASHBOARD_URL = process.env.ONTO_DASHBOARD_URL || 'https://app.buildonto.dev';\r\n\r\n if (ONTO_API_KEY && totalFilesProcessed > 0) {\r\n console.log(pc.cyan(`[Onto] Syncing manifest with Control Plane [${DASHBOARD_URL}]...`));\r\n try {\r\n const manifest = files.map(file => {\r\n const routeName = file.replace(/\\.html$/, '');\r\n const route = routeName === 'index' ? '/' : `/${routeName}`;\r\n const mdPath = path.join(ontoPublicDir, file.replace(/\\.html$/, '.md'));\r\n return {\r\n route,\r\n filename: `${routeName}.md`,\r\n content: fs.readFileSync(mdPath, 'utf8')\r\n };\r\n });\r\n\r\n const res = await fetch(`${DASHBOARD_URL}/api/files`, {\r\n method: 'POST',\r\n headers: {\r\n 'x-onto-key': ONTO_API_KEY,\r\n 'Content-Type': 'application/json'\r\n },\r\n body: JSON.stringify({ files: manifest })\r\n });\r\n\r\n if (res.ok) {\r\n console.log(pc.green('✓ Control Plane sync successful'));\r\n } else {\r\n const errData = await res.json().catch(() => ({}));\r\n console.log(pc.yellow(`⚠ Control Plane sync skipped: ${errData.error || res.statusText}`));\r\n }\r\n } catch (e: any) {\r\n console.log(pc.yellow(`⚠ Control Plane sync failed: ${e.message}`));\r\n }\r\n }\r\n\r\n console.log(pc.dim(`Edge payloads are ready at /public/.onto/*\\n`));\r\n}\r\n\r\nmain().catch(e => {\r\n console.error(pc.red(`[Onto] Fatal Error: ${e.message}`));\r\n process.exit(1);\r\n});\r\n","import * as cheerio from 'cheerio';\r\nimport TurndownService from 'turndown';\r\n\r\nconst turndownService = new TurndownService({\r\n headingStyle: 'atx',\r\n codeBlockStyle: 'fenced',\r\n});\r\n\r\n// Configure turndown to keep some layout or handle semantic tags differently if needed\r\n\r\nexport interface ExtractionResult {\r\n markdown: string;\r\n metadata: {\r\n title: string;\r\n description: string;\r\n jsonLd: any[];\r\n };\r\n stats: {\r\n originalHtmlSize: number;\r\n markdownSize: number;\r\n tokenReductionRatio: number;\r\n };\r\n}\r\n\r\n/**\r\n * Extracts pure semantic markdown and metadata from rendered Next.js HTML strings.\r\n * @param html The raw HTML string.\r\n * @param sourceUrl (Optional) the URL this was generated from, to attach as metadata.\r\n * @returns {ExtractionResult} The extracted payload.\r\n */\r\nexport function extractContent(html: string, sourceUrl: string = 'Generated Output'): ExtractionResult {\r\n const originalSize = html.length;\r\n\r\n const $ = cheerio.load(html);\r\n\r\n // 1. Extract Metadata BEFORE removing structure\r\n const title = $('title').text() || $('h1').first().text() || 'Untitled Page';\r\n const description = $('meta[name=\"description\"]').attr('content') || 'No description found.';\r\n\r\n const jsonLdScripts: any[] = [];\r\n $('script[type=\"application/ld+json\"]').each((_, el) => {\r\n try {\r\n const raw = $(el).html() || '';\r\n const parsed = JSON.parse(raw);\r\n jsonLdScripts.push(parsed);\r\n } catch {\r\n // ignore bad json\r\n }\r\n });\r\n\r\n // 2. Strip noise (React boilerplate, styles, unnecessary tags)\r\n $('script, style, noscript, iframe, svg, nav, footer, meta, link, header').remove();\r\n\r\n // Optionally remove typical Next.js hidden wrappers if they don't contain real content.\r\n // Next.js uses <div id=\"__next\"> but we mostly just want semantic content.\r\n\r\n // 3. Find the entry point for content\r\n // Prefer <main> or <article> over <body>\r\n let contentHtml = '';\r\n if ($('main').length > 0) {\r\n contentHtml = $('main').html() || '';\r\n } else if ($('article').length > 0) {\r\n contentHtml = $('article').html() || '';\r\n } else {\r\n contentHtml = $('body').html() || '';\r\n }\r\n\r\n // 4. Convert to Markdown\r\n let markdown = turndownService.turndown(contentHtml);\r\n\r\n // 5. Optionally inject Metadata header\r\n const headerLines = [\r\n `# ${title}`,\r\n `> ${description}`,\r\n ``,\r\n `**Source:** ${sourceUrl}`,\r\n `**Extracted:** ${new Date().toISOString()}`,\r\n ``,\r\n `---`,\r\n ``\r\n ];\r\n\r\n let finalMarkdown = headerLines.join('\\n') + markdown;\r\n\r\n // Add JSON-LD section if exists\r\n if (jsonLdScripts.length > 0) {\r\n finalMarkdown += '\\n\\n---\\n## Structured Data (JSON-LD)\\n```json\\n';\r\n jsonLdScripts.forEach(j => {\r\n finalMarkdown += JSON.stringify(j, null, 2) + '\\n';\r\n });\r\n finalMarkdown += '```\\n';\r\n }\r\n\r\n const markdownSize = finalMarkdown.length;\r\n const tokenReductionRatio = originalSize > 0 ? ((originalSize - markdownSize) / originalSize) * 100 : 0;\r\n\r\n return {\r\n markdown: finalMarkdown,\r\n metadata: {\r\n title,\r\n description,\r\n jsonLd: jsonLdScripts\r\n },\r\n stats: {\r\n originalHtmlSize: originalSize,\r\n markdownSize,\r\n tokenReductionRatio\r\n }\r\n };\r\n}\r\n\r\nexport async function generateStaticPayloads(nextAppDirDir: string, ontoPublicDir: string) {\r\n const fs = await import('fs');\r\n const path = await import('path');\r\n const { glob } = await import('glob');\r\n\r\n if (!fs.existsSync(nextAppDirDir)) {\r\n return;\r\n }\r\n\r\n const files = await glob('**/*.html', { cwd: nextAppDirDir });\r\n if (files.length === 0) return;\r\n\r\n if (!fs.existsSync(ontoPublicDir)) {\r\n fs.mkdirSync(ontoPublicDir, { recursive: true });\r\n }\r\n\r\n let totalFilesProcessed = 0;\r\n\r\n for (const file of files) {\r\n const inputPath = path.join(nextAppDirDir, file);\r\n const outputPathRelative = file.replace(/\\.html$/, '.md');\r\n const outputPath = path.join(ontoPublicDir, outputPathRelative);\r\n\r\n try {\r\n const htmlContent = fs.readFileSync(inputPath, 'utf8');\r\n\r\n let routeName = file.replace(/\\.html$/, '');\r\n if (routeName === 'index') routeName = '/';\r\n else routeName = `/${routeName}`;\r\n\r\n const result = extractContent(htmlContent, routeName);\r\n\r\n const outputDir = path.dirname(outputPath);\r\n if (!fs.existsSync(outputDir)) {\r\n fs.mkdirSync(outputDir, { recursive: true });\r\n }\r\n\r\n fs.writeFileSync(outputPath, result.markdown, 'utf8');\r\n totalFilesProcessed++;\r\n } catch (e: any) {\r\n console.error(`[Onto] Failed to process ${file}: ${e.message}`);\r\n }\r\n }\r\n console.log(`[Onto] Successfully generated ${totalFilesProcessed} semantic markdown endpoints.`);\r\n}\r\n"],"mappings":";AACA,OAAS,QAAAA,MAAY,OACrB,OAAOC,MAAQ,KACf,OAAOC,MAAU,OACjB,OAAOC,MAAQ,aCJf,UAAYC,MAAa,UACzB,OAAOC,MAAqB,WAE5B,IAAMC,EAAkB,IAAID,EAAgB,CACxC,aAAc,MACd,eAAgB,QACpB,CAAC,EAwBM,SAASE,EAAeC,EAAcC,EAAoB,mBAAsC,CACnG,IAAMC,EAAeF,EAAK,OAEpBG,EAAY,OAAKH,CAAI,EAGrBI,EAAQD,EAAE,OAAO,EAAE,KAAK,GAAKA,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,GAAK,gBACvDE,EAAcF,EAAE,0BAA0B,EAAE,KAAK,SAAS,GAAK,wBAE/DG,EAAuB,CAAC,EAC9BH,EAAE,oCAAoC,EAAE,KAAK,CAACI,EAAGC,IAAO,CACpD,GAAI,CACA,IAAMC,EAAMN,EAAEK,CAAE,EAAE,KAAK,GAAK,GACtBE,EAAS,KAAK,MAAMD,CAAG,EAC7BH,EAAc,KAAKI,CAAM,CAC7B,MAAQ,CAER,CACJ,CAAC,EAGDP,EAAE,uEAAuE,EAAE,OAAO,EAOlF,IAAIQ,EAAc,GACdR,EAAE,MAAM,EAAE,OAAS,EACnBQ,EAAcR,EAAE,MAAM,EAAE,KAAK,GAAK,GAC3BA,EAAE,SAAS,EAAE,OAAS,EAC7BQ,EAAcR,EAAE,SAAS,EAAE,KAAK,GAAK,GAErCQ,EAAcR,EAAE,MAAM,EAAE,KAAK,GAAK,GAItC,IAAIS,EAAWd,EAAgB,SAASa,CAAW,EAc/CE,EAXgB,CAChB,KAAKT,CAAK,GACV,KAAKC,CAAW,GAChB,GACA,eAAeJ,CAAS,GACxB,kBAAkB,IAAI,KAAK,EAAE,YAAY,CAAC,GAC1C,GACA,MACA,EACJ,EAEgC,KAAK;AAAA,CAAI,EAAIW,EAGzCN,EAAc,OAAS,IACvBO,GAAiB;AAAA;AAAA;AAAA;AAAA;AAAA,EACjBP,EAAc,QAAQQ,GAAK,CACvBD,GAAiB,KAAK,UAAUC,EAAG,KAAM,CAAC,EAAI;AAAA,CAClD,CAAC,EACDD,GAAiB,SAGrB,IAAME,EAAeF,EAAc,OAC7BG,EAAsBd,EAAe,GAAMA,EAAea,GAAgBb,EAAgB,IAAM,EAEtG,MAAO,CACH,SAAUW,EACV,SAAU,CACN,MAAAT,EACA,YAAAC,EACA,OAAQC,CACZ,EACA,MAAO,CACH,iBAAkBJ,EAClB,aAAAa,EACA,oBAAAC,CACJ,CACJ,CACJ,CDrGA,SAASC,GAAU,CACf,IAAMC,EAAUC,EAAK,KAAK,QAAQ,IAAI,EAAG,YAAY,EACjDC,EAAG,WAAWF,CAAO,GACFE,EAAG,aAAaF,EAAS,MAAM,EACvC,MAAM,OAAO,EAAE,QAAQG,GAAQ,CACtC,IAAMC,EAAcD,EAAK,KAAK,EAC9B,GAAI,CAACC,GAAeA,EAAY,WAAW,GAAG,EAAG,OACjD,GAAM,CAACC,EAAK,GAAGC,CAAU,EAAIF,EAAY,MAAM,GAAG,EAC9CC,GAAOC,EAAW,OAAS,IAC3B,QAAQ,IAAID,EAAI,KAAK,CAAC,EAAIC,EAAW,KAAK,GAAG,EAAE,KAAK,EAAE,QAAQ,eAAgB,EAAE,EAExF,CAAC,CAET,CAEA,eAAeC,GAAO,CAClBR,EAAQ,EACR,QAAQ,IAAIS,EAAG,KAAK;AAAA,8CAAiD,CAAC,EAEtE,IAAMC,EAAM,QAAQ,IAAI,EAClBC,EAAgBT,EAAK,KAAKQ,EAAK,kBAAkB,EACjDE,EAAgBV,EAAK,KAAKQ,EAAK,cAAc,EAEnD,GAAI,CAACP,EAAG,WAAWQ,CAAa,EAAG,CAC/B,QAAQ,IAAIF,EAAG,OAAO,+CAA+CE,CAAa,EAAE,CAAC,EACrF,QAAQ,IAAIF,EAAG,OAAO,gFAAgF,CAAC,EACvG,MACJ,CAGA,IAAMI,EAAQ,MAAMC,EAAK,YAAa,CAAE,IAAKH,CAAc,CAAC,EAE5D,GAAIE,EAAM,SAAW,EAAG,CACpB,QAAQ,IAAIJ,EAAG,OAAO,+CAA+C,CAAC,EACtE,MACJ,CAGKN,EAAG,WAAWS,CAAa,GAC5BT,EAAG,UAAUS,EAAe,CAAE,UAAW,EAAK,CAAC,EAGnD,IAAIG,EAAoB,EACpBC,EAAoB,EACpBC,EAAsB,EAE1B,QAAWC,KAAQL,EAAO,CACtB,IAAMM,EAAYjB,EAAK,KAAKS,EAAeO,CAAI,EAG3CE,EAAqBF,EAAK,QAAQ,UAAW,KAAK,EAEhDG,EAAanB,EAAK,KAAKU,EAAeQ,CAAkB,EAE9D,GAAI,CACA,IAAME,EAAcnB,EAAG,aAAagB,EAAW,MAAM,EAE/CI,EAASC,EAAeF,EAAa,IAAIF,EAAmB,QAAQ,QAAS,EAAE,CAAC,EAAE,EAGlFK,EAAYvB,EAAK,QAAQmB,CAAU,EACpClB,EAAG,WAAWsB,CAAS,GACxBtB,EAAG,UAAUsB,EAAW,CAAE,UAAW,EAAK,CAAC,EAG/CtB,EAAG,cAAckB,EAAYE,EAAO,SAAU,MAAM,EAEpDR,GAAqBQ,EAAO,MAAM,iBAClCP,GAAqBO,EAAO,MAAM,aAClCN,IAEA,IAAMS,GAAUH,EAAO,MAAM,iBAAmB,MAAM,QAAQ,CAAC,EACzDI,GAAQJ,EAAO,MAAM,aAAe,MAAM,QAAQ,CAAC,EAGrDK,EAAYV,EAAK,QAAQ,UAAW,EAAE,EACtCU,IAAc,QAASA,EAAY,IAClCA,EAAY,IAAIA,CAAS,GAE9B,QAAQ,IACJnB,EAAG,MAAM,kBAAa,EACtBA,EAAG,IAAI,IAAImB,CAAS,GAAG,EACvBnB,EAAG,KAAK,IAAIiB,CAAM,SAASC,CAAI,KAAK,CACxC,CACJ,OAASE,EAAQ,CACb,QAAQ,MAAMpB,EAAG,IAAI,4BAAuBS,CAAI,KAAKW,EAAE,OAAO,EAAE,CAAC,CACrE,CACJ,CAEA,QAAQ,IACJpB,EAAG,KACCA,EAAG,QAAQ,aAAaQ,CAAmB,wBAAwBF,EAAoB,MAAM,QAAQ,CAAC,CAAC,UAAUC,EAAoB,MAAM,QAAQ,CAAC,CAAC,IAAI,CAC7J,CACJ,EAGA,IAAMc,EAAe,QAAQ,IAAI,aAC3BC,EAAgB,QAAQ,IAAI,oBAAsB,4BAExD,GAAID,GAAgBb,EAAsB,EAAG,CACzC,QAAQ,IAAIR,EAAG,KAAK,+CAA+CsB,CAAa,MAAM,CAAC,EACvF,GAAI,CACA,IAAMC,EAAWnB,EAAM,IAAIK,GAAQ,CAC/B,IAAMU,EAAYV,EAAK,QAAQ,UAAW,EAAE,EACtCe,EAAQL,IAAc,QAAU,IAAM,IAAIA,CAAS,GACnDM,EAAShC,EAAK,KAAKU,EAAeM,EAAK,QAAQ,UAAW,KAAK,CAAC,EACtE,MAAO,CACH,MAAAe,EACA,SAAU,GAAGL,CAAS,MACtB,QAASzB,EAAG,aAAa+B,EAAQ,MAAM,CAC3C,CACJ,CAAC,EAEKC,EAAM,MAAM,MAAM,GAAGJ,CAAa,aAAc,CAClD,OAAQ,OACR,QAAS,CACL,aAAcD,EACd,eAAgB,kBACpB,EACA,KAAM,KAAK,UAAU,CAAE,MAAOE,CAAS,CAAC,CAC5C,CAAC,EAED,GAAIG,EAAI,GACJ,QAAQ,IAAI1B,EAAG,MAAM,sCAAiC,CAAC,MACpD,CACH,IAAM2B,EAAU,MAAMD,EAAI,KAAK,EAAE,MAAM,KAAO,CAAC,EAAE,EACjD,QAAQ,IAAI1B,EAAG,OAAO,sCAAiC2B,EAAQ,OAASD,EAAI,UAAU,EAAE,CAAC,CAC7F,CACJ,OAASN,EAAQ,CACb,QAAQ,IAAIpB,EAAG,OAAO,qCAAgCoB,EAAE,OAAO,EAAE,CAAC,CACtE,CACJ,CAEA,QAAQ,IAAIpB,EAAG,IAAI;AAAA,CAA8C,CAAC,CACtE,CAEAD,EAAK,EAAE,MAAMqB,GAAK,CACd,QAAQ,MAAMpB,EAAG,IAAI,uBAAuBoB,EAAE,OAAO,EAAE,CAAC,EACxD,QAAQ,KAAK,CAAC,CAClB,CAAC","names":["glob","fs","path","pc","cheerio","TurndownService","turndownService","extractContent","html","sourceUrl","originalSize","$","title","description","jsonLdScripts","_","el","raw","parsed","contentHtml","markdown","finalMarkdown","j","markdownSize","tokenReductionRatio","loadEnv","envPath","path","fs","line","trimmedLine","key","valueParts","main","pc","cwd","nextAppDirDir","ontoPublicDir","files","glob","totalOriginalSize","totalMarkdownSize","totalFilesProcessed","file","inputPath","outputPathRelative","outputPath","htmlContent","result","extractContent","outputDir","origKb","mdKb","routeName","e","ONTO_API_KEY","DASHBOARD_URL","manifest","route","mdPath","res","errData"]}
|
|
1
|
+
{"version":3,"sources":["../src/cli.ts","../src/extractor.ts","../src/config.ts"],"sourcesContent":["#!/usr/bin/env node\r\nimport { glob } from 'glob';\r\nimport fs from 'fs';\r\nimport path from 'path';\r\nimport pc from 'picocolors';\r\nimport { extractContent } from './extractor';\r\nimport { generateLlmsTxt, OntoConfig } from './config';\r\n\r\nasync function loadOntoConfig(): Promise<OntoConfig | null> {\r\n try {\r\n const configPath = path.join(process.cwd(), 'onto.config');\r\n // Node.js dynamic import\r\n const config = await import('file://' + configPath.replace(/\\\\/g, '/') + '.ts');\r\n return config.default || config;\r\n } catch (error) {\r\n try {\r\n const configPath = path.join(process.cwd(), 'onto.config');\r\n const config = await import('file://' + configPath.replace(/\\\\/g, '/') + '.js');\r\n return config.default || config;\r\n } catch (e) {\r\n return null;\r\n }\r\n }\r\n}\r\n\r\n// Simple helper to load .env.local from the current working directory\r\nfunction loadEnv() {\r\n const envPath = path.join(process.cwd(), '.env.local');\r\n if (fs.existsSync(envPath)) {\r\n const envContent = fs.readFileSync(envPath, 'utf8');\r\n envContent.split(/\\r?\\n/).forEach(line => {\r\n const trimmedLine = line.trim();\r\n if (!trimmedLine || trimmedLine.startsWith('#')) return;\r\n const [key, ...valueParts] = trimmedLine.split('=');\r\n if (key && valueParts.length > 0) {\r\n process.env[key.trim()] = valueParts.join('=').trim().replace(/^[\"']|[\"']$/g, '');\r\n }\r\n });\r\n }\r\n}\r\n\r\nasync function main() {\r\n loadEnv();\r\n console.log(pc.cyan('\\n[Onto] Starting Semantic Output Generation...'));\r\n\r\n const cwd = process.cwd();\r\n const nextAppDirDir = path.join(cwd, '.next/server/app');\r\n const ontoPublicDir = path.join(cwd, 'public/.onto');\r\n\r\n if (!fs.existsSync(nextAppDirDir)) {\r\n console.log(pc.yellow(`[Onto] Could not find Next.js app output at ${nextAppDirDir}`));\r\n console.log(pc.yellow(`[Onto] Ensure this is run after \"next build\" and you are using the App Router.`));\r\n return;\r\n }\r\n\r\n // Find all HTML files rendered by Next.js in the app directory\r\n const files = await glob('**/*.html', { cwd: nextAppDirDir });\r\n\r\n if (files.length === 0) {\r\n console.log(pc.yellow(`[Onto] No static HTML files found to process.`));\r\n return;\r\n }\r\n\r\n // Ensure output directory exists\r\n if (!fs.existsSync(ontoPublicDir)) {\r\n fs.mkdirSync(ontoPublicDir, { recursive: true });\r\n }\r\n\r\n let totalOriginalSize = 0;\r\n let totalMarkdownSize = 0;\r\n let totalFilesProcessed = 0;\r\n\r\n for (const file of files) {\r\n const inputPath = path.join(nextAppDirDir, file);\r\n\r\n // We map file path e.g. \"pricing.html\" to \"pricing.md\", or \"blog/post.html\" to \"blog/post.md\"\r\n let outputPathRelative = file.replace(/\\.html$/, '.md');\r\n // If it's a dynamic route page, or purely root index.html\r\n const outputPath = path.join(ontoPublicDir, outputPathRelative);\r\n\r\n try {\r\n const htmlContent = fs.readFileSync(inputPath, 'utf8');\r\n\r\n const result = extractContent(htmlContent, `/${outputPathRelative.replace(/\\.md$/, '')}`);\r\n\r\n // Ensure specific sub-directory exists (e.g., for blog/post.md)\r\n const outputDir = path.dirname(outputPath);\r\n if (!fs.existsSync(outputDir)) {\r\n fs.mkdirSync(outputDir, { recursive: true });\r\n }\r\n\r\n fs.writeFileSync(outputPath, result.markdown, 'utf8');\r\n\r\n totalOriginalSize += result.stats.originalHtmlSize;\r\n totalMarkdownSize += result.stats.markdownSize;\r\n totalFilesProcessed++;\r\n\r\n const origKb = (result.stats.originalHtmlSize / 1024).toFixed(1);\r\n const mdKb = (result.stats.markdownSize / 1024).toFixed(1);\r\n\r\n // /index.html -> /\r\n let routeName = file.replace(/\\.html$/, '');\r\n if (routeName === 'index') routeName = '/';\r\n else routeName = `/${routeName}`;\r\n\r\n console.log(\r\n pc.green(`✓ Optimized`) +\r\n pc.dim(` ${routeName} `) +\r\n pc.blue(`[${origKb}KB -> ${mdKb}KB]`)\r\n );\r\n } catch (e: any) {\r\n console.error(pc.red(`✗ Failed to process ${file}: ${e.message}`));\r\n }\r\n }\r\n\r\n console.log(\r\n pc.bold(\r\n pc.magenta(`Processed ${totalFilesProcessed} pages. Total Size: ${(totalOriginalSize / 1024).toFixed(1)}KB -> ${(totalMarkdownSize / 1024).toFixed(1)}KB`)\r\n )\r\n );\r\n\r\n // Sync with Onto Control Plane (Premium)\r\n const ONTO_API_KEY = process.env.ONTO_API_KEY;\r\n const DASHBOARD_URL = process.env.ONTO_DASHBOARD_URL || 'https://app.buildonto.dev';\r\n\r\n if (ONTO_API_KEY && totalFilesProcessed > 0) {\r\n console.log(pc.cyan(`[Onto] Syncing manifest with Control Plane [${DASHBOARD_URL}]...`));\r\n try {\r\n const manifest = files.map(file => {\r\n const routeName = file.replace(/\\.html$/, '');\r\n const route = routeName === 'index' ? '/' : `/${routeName}`;\r\n const mdPath = path.join(ontoPublicDir, file.replace(/\\.html$/, '.md'));\r\n return {\r\n route,\r\n filename: `${routeName}.md`,\r\n content: fs.readFileSync(mdPath, 'utf8')\r\n };\r\n });\r\n\r\n const res = await fetch(`${DASHBOARD_URL}/api/files`, {\r\n method: 'POST',\r\n headers: {\r\n 'x-onto-key': ONTO_API_KEY,\r\n 'Content-Type': 'application/json'\r\n },\r\n body: JSON.stringify({ files: manifest })\r\n });\r\n\r\n if (res.ok) {\r\n console.log(pc.green('✓ Control Plane sync successful'));\r\n } else {\r\n const errData = await res.json().catch(() => ({}));\r\n console.log(pc.yellow(`⚠ Control Plane sync skipped: ${errData.error || res.statusText}`));\r\n }\r\n } catch (e: any) {\r\n console.log(pc.yellow(`⚠ Control Plane sync failed: ${e.message}`));\r\n }\r\n }\r\n\r\n // --- Generate llms.txt manifest ---\r\n const config = await loadOntoConfig();\r\n if (config) {\r\n const llmsTxtContent = generateLlmsTxt(config);\r\n const llmsTxtPath = path.join(cwd, 'public/llms.txt');\r\n \r\n // Ensure public dir exists\r\n const publicDir = path.join(cwd, 'public');\r\n if (!fs.existsSync(publicDir)) {\r\n fs.mkdirSync(publicDir, { recursive: true });\r\n }\r\n\r\n fs.writeFileSync(llmsTxtPath, llmsTxtContent, 'utf8');\r\n console.log(pc.green('✓ Generated') + pc.dim(' /llms.txt'));\r\n }\r\n\r\n console.log(pc.dim(`Edge payloads are ready at /public/.onto/*\\n`));\r\n}\r\n\r\nmain().catch(e => {\r\n console.error(pc.red(`[Onto] Fatal Error: ${e.message}`));\r\n process.exit(1);\r\n});\r\n","import * as cheerio from 'cheerio';\r\nimport TurndownService from 'turndown';\r\n\r\nconst turndownService = new TurndownService({\r\n headingStyle: 'atx',\r\n codeBlockStyle: 'fenced',\r\n});\r\n\r\n// Configure turndown to keep some layout or handle semantic tags differently if needed\r\n\r\nexport interface ExtractionResult {\r\n markdown: string;\r\n metadata: {\r\n title: string;\r\n description: string;\r\n jsonLd: any[];\r\n };\r\n stats: {\r\n originalHtmlSize: number;\r\n markdownSize: number;\r\n tokenReductionRatio: number;\r\n };\r\n}\r\n\r\n/**\r\n * Extracts pure semantic markdown and metadata from rendered Next.js HTML strings.\r\n * @param html The raw HTML string.\r\n * @param sourceUrl (Optional) the URL this was generated from, to attach as metadata.\r\n * @returns {ExtractionResult} The extracted payload.\r\n */\r\nexport function extractContent(html: string, sourceUrl: string = 'Generated Output'): ExtractionResult {\r\n const originalSize = html.length;\r\n\r\n const $ = cheerio.load(html);\r\n\r\n // 1. Extract Metadata BEFORE removing structure\r\n const title = $('title').text() || $('h1').first().text() || 'Untitled Page';\r\n const description = $('meta[name=\"description\"]').attr('content') || 'No description found.';\r\n\r\n const jsonLdScripts: any[] = [];\r\n $('script[type=\"application/ld+json\"]').each((_, el) => {\r\n try {\r\n const raw = $(el).html() || '';\r\n const parsed = JSON.parse(raw);\r\n jsonLdScripts.push(parsed);\r\n } catch {\r\n // ignore bad json\r\n }\r\n });\r\n\r\n // 2. Strip noise (React boilerplate, styles, unnecessary tags)\r\n $('script, style, noscript, iframe, svg, nav, footer, meta, link, header').remove();\r\n\r\n // Optionally remove typical Next.js hidden wrappers if they don't contain real content.\r\n // Next.js uses <div id=\"__next\"> but we mostly just want semantic content.\r\n\r\n // 3. Find the entry point for content\r\n // Prefer <main> or <article> over <body>\r\n let contentHtml = '';\r\n if ($('main').length > 0) {\r\n contentHtml = $('main').html() || '';\r\n } else if ($('article').length > 0) {\r\n contentHtml = $('article').html() || '';\r\n } else {\r\n contentHtml = $('body').html() || '';\r\n }\r\n\r\n // 4. Convert to Markdown\r\n let markdown = turndownService.turndown(contentHtml);\r\n\r\n // 5. Optionally inject Metadata header\r\n const headerLines = [\r\n `# ${title}`,\r\n `> ${description}`,\r\n ``,\r\n `**Source:** ${sourceUrl}`,\r\n `**Extracted:** ${new Date().toISOString()}`,\r\n ``,\r\n `---`,\r\n ``\r\n ];\r\n\r\n let finalMarkdown = headerLines.join('\\n') + markdown;\r\n\r\n // Add JSON-LD section if exists\r\n if (jsonLdScripts.length > 0) {\r\n finalMarkdown += '\\n\\n---\\n## Structured Data (JSON-LD)\\n```json\\n';\r\n jsonLdScripts.forEach(j => {\r\n finalMarkdown += JSON.stringify(j, null, 2) + '\\n';\r\n });\r\n finalMarkdown += '```\\n';\r\n }\r\n\r\n const markdownSize = finalMarkdown.length;\r\n const tokenReductionRatio = originalSize > 0 ? ((originalSize - markdownSize) / originalSize) * 100 : 0;\r\n\r\n return {\r\n markdown: finalMarkdown,\r\n metadata: {\r\n title,\r\n description,\r\n jsonLd: jsonLdScripts\r\n },\r\n stats: {\r\n originalHtmlSize: originalSize,\r\n markdownSize,\r\n tokenReductionRatio\r\n }\r\n };\r\n}\r\n\r\nexport async function generateStaticPayloads(nextAppDirDir: string, ontoPublicDir: string) {\r\n const fs = await import('fs');\r\n const path = await import('path');\r\n const { glob } = await import('glob');\r\n\r\n if (!fs.existsSync(nextAppDirDir)) {\r\n return;\r\n }\r\n\r\n const files = await glob('**/*.html', { cwd: nextAppDirDir });\r\n if (files.length === 0) return;\r\n\r\n if (!fs.existsSync(ontoPublicDir)) {\r\n fs.mkdirSync(ontoPublicDir, { recursive: true });\r\n }\r\n\r\n let totalFilesProcessed = 0;\r\n\r\n for (const file of files) {\r\n const inputPath = path.join(nextAppDirDir, file);\r\n const outputPathRelative = file.replace(/\\.html$/, '.md');\r\n const outputPath = path.join(ontoPublicDir, outputPathRelative);\r\n\r\n try {\r\n const htmlContent = fs.readFileSync(inputPath, 'utf8');\r\n\r\n let routeName = file.replace(/\\.html$/, '');\r\n if (routeName === 'index') routeName = '/';\r\n else routeName = `/${routeName}`;\r\n\r\n const result = extractContent(htmlContent, routeName);\r\n\r\n const outputDir = path.dirname(outputPath);\r\n if (!fs.existsSync(outputDir)) {\r\n fs.mkdirSync(outputDir, { recursive: true });\r\n }\r\n\r\n fs.writeFileSync(outputPath, result.markdown, 'utf8');\r\n totalFilesProcessed++;\r\n } catch (e: any) {\r\n console.error(`[Onto] Failed to process ${file}: ${e.message}`);\r\n }\r\n }\r\n console.log(`[Onto] Successfully generated ${totalFilesProcessed} semantic markdown endpoints.`);\r\n}\r\n","/**\r\n * Configuration schema for onto.config.ts\r\n * Used to dynamically generate llms.txt and other AI discovery files\r\n */\r\n\r\nexport type PageType = 'scoring' | 'about' | 'default';\r\n\r\nexport interface OntoRoute {\r\n /**\r\n * The URL path (e.g., '/docs', '/api/reference')\r\n */\r\n path: string;\r\n /**\r\n * Description of what this route contains\r\n */\r\n description: string;\r\n /**\r\n * Optional: Page type for automatic JSON-LD schema injection\r\n * - 'scoring': Injects Methodology schema with AIO scoring weights (40/35/25)\r\n * - 'about': Injects Organization/AboutPage schema\r\n * - 'default': No automatic schema injection\r\n */\r\n pageType?: PageType;\r\n}\r\n\r\nexport interface OntoConfig {\r\n /**\r\n * The name of your project or site (required)\r\n * Used as the H1 heading in llms.txt\r\n */\r\n name: string;\r\n\r\n /**\r\n * A short summary of your project (required)\r\n * Displayed as a blockquote in llms.txt\r\n * Should contain key information necessary for understanding the rest of the file\r\n */\r\n summary: string;\r\n\r\n /**\r\n * The base URL of your site (e.g., 'https://example.com')\r\n */\r\n baseUrl: string;\r\n\r\n /**\r\n * Optional: Additional sections to include in llms.txt\r\n * Each section can contain any markdown content\r\n */\r\n sections?: {\r\n heading: string;\r\n content: string;\r\n }[];\r\n\r\n /**\r\n * Key routes that AI agents should know about\r\n * These will be formatted as a markdown list in llms.txt\r\n */\r\n routes?: OntoRoute[];\r\n\r\n /**\r\n * Optional: Links to external resources (documentation, API references, etc.)\r\n */\r\n externalLinks?: {\r\n title: string;\r\n url: string;\r\n description?: string;\r\n }[];\r\n\r\n /**\r\n * Optional: Organization information for JSON-LD schemas\r\n */\r\n organization?: {\r\n name: string;\r\n description?: string;\r\n url?: string;\r\n logo?: string;\r\n foundingDate?: string;\r\n };\r\n}\r\n\r\n/**\r\n * Generate llms.txt content from OntoConfig\r\n * Follows the llms.txt specification:\r\n * - H1 with project name\r\n * - Blockquote with summary\r\n * - Additional markdown sections\r\n */\r\nexport function generateLlmsTxt(config: OntoConfig): string {\r\n const lines: string[] = [];\r\n\r\n // H1: Project name (required)\r\n lines.push(`# ${config.name}`);\r\n lines.push('');\r\n\r\n // Blockquote: Summary (required)\r\n lines.push(`> ${config.summary}`);\r\n lines.push('');\r\n\r\n // Key Routes section (if provided)\r\n if (config.routes && config.routes.length > 0) {\r\n lines.push('## Key Routes');\r\n lines.push('');\r\n for (const route of config.routes) {\r\n const fullUrl = `${config.baseUrl}${route.path}`;\r\n lines.push(`- [${route.path}](${fullUrl}): ${route.description}`);\r\n }\r\n lines.push('');\r\n }\r\n\r\n // External Links section (if provided)\r\n if (config.externalLinks && config.externalLinks.length > 0) {\r\n lines.push('## Resources');\r\n lines.push('');\r\n for (const link of config.externalLinks) {\r\n if (link.description) {\r\n lines.push(`- [${link.title}](${link.url}): ${link.description}`);\r\n } else {\r\n lines.push(`- [${link.title}](${link.url})`);\r\n }\r\n }\r\n lines.push('');\r\n }\r\n\r\n // Custom sections (if provided)\r\n if (config.sections && config.sections.length > 0) {\r\n for (const section of config.sections) {\r\n lines.push(`## ${section.heading}`);\r\n lines.push('');\r\n lines.push(section.content);\r\n lines.push('');\r\n }\r\n }\r\n\r\n return lines.join('\\n').trim() + '\\n';\r\n}\r\n"],"mappings":";AACA,OAAS,QAAAA,MAAY,OACrB,OAAOC,MAAQ,KACf,OAAOC,MAAU,OACjB,OAAOC,MAAQ,aCJf,UAAYC,MAAa,UACzB,OAAOC,MAAqB,WAE5B,IAAMC,EAAkB,IAAID,EAAgB,CACxC,aAAc,MACd,eAAgB,QACpB,CAAC,EAwBM,SAASE,EAAeC,EAAcC,EAAoB,mBAAsC,CACnG,IAAMC,EAAeF,EAAK,OAEpBG,EAAY,OAAKH,CAAI,EAGrBI,EAAQD,EAAE,OAAO,EAAE,KAAK,GAAKA,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,GAAK,gBACvDE,EAAcF,EAAE,0BAA0B,EAAE,KAAK,SAAS,GAAK,wBAE/DG,EAAuB,CAAC,EAC9BH,EAAE,oCAAoC,EAAE,KAAK,CAACI,EAAGC,IAAO,CACpD,GAAI,CACA,IAAMC,EAAMN,EAAEK,CAAE,EAAE,KAAK,GAAK,GACtBE,EAAS,KAAK,MAAMD,CAAG,EAC7BH,EAAc,KAAKI,CAAM,CAC7B,MAAQ,CAER,CACJ,CAAC,EAGDP,EAAE,uEAAuE,EAAE,OAAO,EAOlF,IAAIQ,EAAc,GACdR,EAAE,MAAM,EAAE,OAAS,EACnBQ,EAAcR,EAAE,MAAM,EAAE,KAAK,GAAK,GAC3BA,EAAE,SAAS,EAAE,OAAS,EAC7BQ,EAAcR,EAAE,SAAS,EAAE,KAAK,GAAK,GAErCQ,EAAcR,EAAE,MAAM,EAAE,KAAK,GAAK,GAItC,IAAIS,EAAWd,EAAgB,SAASa,CAAW,EAc/CE,EAXgB,CAChB,KAAKT,CAAK,GACV,KAAKC,CAAW,GAChB,GACA,eAAeJ,CAAS,GACxB,kBAAkB,IAAI,KAAK,EAAE,YAAY,CAAC,GAC1C,GACA,MACA,EACJ,EAEgC,KAAK;AAAA,CAAI,EAAIW,EAGzCN,EAAc,OAAS,IACvBO,GAAiB;AAAA;AAAA;AAAA;AAAA;AAAA,EACjBP,EAAc,QAAQQ,GAAK,CACvBD,GAAiB,KAAK,UAAUC,EAAG,KAAM,CAAC,EAAI;AAAA,CAClD,CAAC,EACDD,GAAiB,SAGrB,IAAME,EAAeF,EAAc,OAC7BG,EAAsBd,EAAe,GAAMA,EAAea,GAAgBb,EAAgB,IAAM,EAEtG,MAAO,CACH,SAAUW,EACV,SAAU,CACN,MAAAT,EACA,YAAAC,EACA,OAAQC,CACZ,EACA,MAAO,CACH,iBAAkBJ,EAClB,aAAAa,EACA,oBAAAC,CACJ,CACJ,CACJ,CCtBO,SAASC,EAAgBC,EAA4B,CAC1D,IAAMC,EAAkB,CAAC,EAWzB,GARAA,EAAM,KAAK,KAAKD,EAAO,IAAI,EAAE,EAC7BC,EAAM,KAAK,EAAE,EAGbA,EAAM,KAAK,KAAKD,EAAO,OAAO,EAAE,EAChCC,EAAM,KAAK,EAAE,EAGTD,EAAO,QAAUA,EAAO,OAAO,OAAS,EAAG,CAC7CC,EAAM,KAAK,eAAe,EAC1BA,EAAM,KAAK,EAAE,EACb,QAAWC,KAASF,EAAO,OAAQ,CACjC,IAAMG,EAAU,GAAGH,EAAO,OAAO,GAAGE,EAAM,IAAI,GAC9CD,EAAM,KAAK,MAAMC,EAAM,IAAI,KAAKC,CAAO,MAAMD,EAAM,WAAW,EAAE,CAClE,CACAD,EAAM,KAAK,EAAE,CACf,CAGA,GAAID,EAAO,eAAiBA,EAAO,cAAc,OAAS,EAAG,CAC3DC,EAAM,KAAK,cAAc,EACzBA,EAAM,KAAK,EAAE,EACb,QAAWG,KAAQJ,EAAO,cACpBI,EAAK,YACPH,EAAM,KAAK,MAAMG,EAAK,KAAK,KAAKA,EAAK,GAAG,MAAMA,EAAK,WAAW,EAAE,EAEhEH,EAAM,KAAK,MAAMG,EAAK,KAAK,KAAKA,EAAK,GAAG,GAAG,EAG/CH,EAAM,KAAK,EAAE,CACf,CAGA,GAAID,EAAO,UAAYA,EAAO,SAAS,OAAS,EAC9C,QAAWK,KAAWL,EAAO,SAC3BC,EAAM,KAAK,MAAMI,EAAQ,OAAO,EAAE,EAClCJ,EAAM,KAAK,EAAE,EACbA,EAAM,KAAKI,EAAQ,OAAO,EAC1BJ,EAAM,KAAK,EAAE,EAIjB,OAAOA,EAAM,KAAK;AAAA,CAAI,EAAE,KAAK,EAAI;AAAA,CACnC,CF9HA,eAAeK,GAA6C,CACxD,GAAI,CAGA,IAAMC,EAAS,MAAM,OAAO,UAFTC,EAAK,KAAK,QAAQ,IAAI,EAAG,aAAa,EAEN,QAAQ,MAAO,GAAG,EAAI,OACzE,OAAOD,EAAO,SAAWA,CAC7B,MAAgB,CACZ,GAAI,CAEA,IAAMA,EAAS,MAAM,OAAO,UADTC,EAAK,KAAK,QAAQ,IAAI,EAAG,aAAa,EACN,QAAQ,MAAO,GAAG,EAAI,OACzE,OAAOD,EAAO,SAAWA,CAC7B,MAAY,CACR,OAAO,IACX,CACJ,CACJ,CAGA,SAASE,GAAU,CACf,IAAMC,EAAUF,EAAK,KAAK,QAAQ,IAAI,EAAG,YAAY,EACjDG,EAAG,WAAWD,CAAO,GACFC,EAAG,aAAaD,EAAS,MAAM,EACvC,MAAM,OAAO,EAAE,QAAQE,GAAQ,CACtC,IAAMC,EAAcD,EAAK,KAAK,EAC9B,GAAI,CAACC,GAAeA,EAAY,WAAW,GAAG,EAAG,OACjD,GAAM,CAACC,EAAK,GAAGC,CAAU,EAAIF,EAAY,MAAM,GAAG,EAC9CC,GAAOC,EAAW,OAAS,IAC3B,QAAQ,IAAID,EAAI,KAAK,CAAC,EAAIC,EAAW,KAAK,GAAG,EAAE,KAAK,EAAE,QAAQ,eAAgB,EAAE,EAExF,CAAC,CAET,CAEA,eAAeC,GAAO,CAClBP,EAAQ,EACR,QAAQ,IAAIQ,EAAG,KAAK;AAAA,8CAAiD,CAAC,EAEtE,IAAMC,EAAM,QAAQ,IAAI,EAClBC,EAAgBX,EAAK,KAAKU,EAAK,kBAAkB,EACjDE,EAAgBZ,EAAK,KAAKU,EAAK,cAAc,EAEnD,GAAI,CAACP,EAAG,WAAWQ,CAAa,EAAG,CAC/B,QAAQ,IAAIF,EAAG,OAAO,+CAA+CE,CAAa,EAAE,CAAC,EACrF,QAAQ,IAAIF,EAAG,OAAO,gFAAgF,CAAC,EACvG,MACJ,CAGA,IAAMI,EAAQ,MAAMC,EAAK,YAAa,CAAE,IAAKH,CAAc,CAAC,EAE5D,GAAIE,EAAM,SAAW,EAAG,CACpB,QAAQ,IAAIJ,EAAG,OAAO,+CAA+C,CAAC,EACtE,MACJ,CAGKN,EAAG,WAAWS,CAAa,GAC5BT,EAAG,UAAUS,EAAe,CAAE,UAAW,EAAK,CAAC,EAGnD,IAAIG,EAAoB,EACpBC,EAAoB,EACpBC,EAAsB,EAE1B,QAAWC,KAAQL,EAAO,CACtB,IAAMM,EAAYnB,EAAK,KAAKW,EAAeO,CAAI,EAG3CE,EAAqBF,EAAK,QAAQ,UAAW,KAAK,EAEhDG,EAAarB,EAAK,KAAKY,EAAeQ,CAAkB,EAE9D,GAAI,CACA,IAAME,EAAcnB,EAAG,aAAagB,EAAW,MAAM,EAE/CI,EAASC,EAAeF,EAAa,IAAIF,EAAmB,QAAQ,QAAS,EAAE,CAAC,EAAE,EAGlFK,EAAYzB,EAAK,QAAQqB,CAAU,EACpClB,EAAG,WAAWsB,CAAS,GACxBtB,EAAG,UAAUsB,EAAW,CAAE,UAAW,EAAK,CAAC,EAG/CtB,EAAG,cAAckB,EAAYE,EAAO,SAAU,MAAM,EAEpDR,GAAqBQ,EAAO,MAAM,iBAClCP,GAAqBO,EAAO,MAAM,aAClCN,IAEA,IAAMS,GAAUH,EAAO,MAAM,iBAAmB,MAAM,QAAQ,CAAC,EACzDI,GAAQJ,EAAO,MAAM,aAAe,MAAM,QAAQ,CAAC,EAGrDK,EAAYV,EAAK,QAAQ,UAAW,EAAE,EACtCU,IAAc,QAASA,EAAY,IAClCA,EAAY,IAAIA,CAAS,GAE9B,QAAQ,IACJnB,EAAG,MAAM,kBAAa,EACtBA,EAAG,IAAI,IAAImB,CAAS,GAAG,EACvBnB,EAAG,KAAK,IAAIiB,CAAM,SAASC,CAAI,KAAK,CACxC,CACJ,OAASE,EAAQ,CACb,QAAQ,MAAMpB,EAAG,IAAI,4BAAuBS,CAAI,KAAKW,EAAE,OAAO,EAAE,CAAC,CACrE,CACJ,CAEA,QAAQ,IACJpB,EAAG,KACCA,EAAG,QAAQ,aAAaQ,CAAmB,wBAAwBF,EAAoB,MAAM,QAAQ,CAAC,CAAC,UAAUC,EAAoB,MAAM,QAAQ,CAAC,CAAC,IAAI,CAC7J,CACJ,EAGA,IAAMc,EAAe,QAAQ,IAAI,aAC3BC,EAAgB,QAAQ,IAAI,oBAAsB,4BAExD,GAAID,GAAgBb,EAAsB,EAAG,CACzC,QAAQ,IAAIR,EAAG,KAAK,+CAA+CsB,CAAa,MAAM,CAAC,EACvF,GAAI,CACA,IAAMC,EAAWnB,EAAM,IAAIK,GAAQ,CAC/B,IAAMU,EAAYV,EAAK,QAAQ,UAAW,EAAE,EACtCe,EAAQL,IAAc,QAAU,IAAM,IAAIA,CAAS,GACnDM,EAASlC,EAAK,KAAKY,EAAeM,EAAK,QAAQ,UAAW,KAAK,CAAC,EACtE,MAAO,CACH,MAAAe,EACA,SAAU,GAAGL,CAAS,MACtB,QAASzB,EAAG,aAAa+B,EAAQ,MAAM,CAC3C,CACJ,CAAC,EAEKC,EAAM,MAAM,MAAM,GAAGJ,CAAa,aAAc,CAClD,OAAQ,OACR,QAAS,CACL,aAAcD,EACd,eAAgB,kBACpB,EACA,KAAM,KAAK,UAAU,CAAE,MAAOE,CAAS,CAAC,CAC5C,CAAC,EAED,GAAIG,EAAI,GACJ,QAAQ,IAAI1B,EAAG,MAAM,sCAAiC,CAAC,MACpD,CACH,IAAM2B,EAAU,MAAMD,EAAI,KAAK,EAAE,MAAM,KAAO,CAAC,EAAE,EACjD,QAAQ,IAAI1B,EAAG,OAAO,sCAAiC2B,EAAQ,OAASD,EAAI,UAAU,EAAE,CAAC,CAC7F,CACJ,OAASN,EAAQ,CACb,QAAQ,IAAIpB,EAAG,OAAO,qCAAgCoB,EAAE,OAAO,EAAE,CAAC,CACtE,CACJ,CAGA,IAAM9B,EAAS,MAAMD,EAAe,EACpC,GAAIC,EAAQ,CACR,IAAMsC,EAAiBC,EAAgBvC,CAAM,EACvCwC,EAAcvC,EAAK,KAAKU,EAAK,iBAAiB,EAG9C8B,EAAYxC,EAAK,KAAKU,EAAK,QAAQ,EACpCP,EAAG,WAAWqC,CAAS,GACxBrC,EAAG,UAAUqC,EAAW,CAAE,UAAW,EAAK,CAAC,EAG/CrC,EAAG,cAAcoC,EAAaF,EAAgB,MAAM,EACpD,QAAQ,IAAI5B,EAAG,MAAM,kBAAa,EAAIA,EAAG,IAAI,YAAY,CAAC,CAC9D,CAEA,QAAQ,IAAIA,EAAG,IAAI;AAAA,CAA8C,CAAC,CACtE,CAEAD,EAAK,EAAE,MAAMqB,GAAK,CACd,QAAQ,MAAMpB,EAAG,IAAI,uBAAuBoB,EAAE,OAAO,EAAE,CAAC,EACxD,QAAQ,KAAK,CAAC,CAClB,CAAC","names":["glob","fs","path","pc","cheerio","TurndownService","turndownService","extractContent","html","sourceUrl","originalSize","$","title","description","jsonLdScripts","_","el","raw","parsed","contentHtml","markdown","finalMarkdown","j","markdownSize","tokenReductionRatio","generateLlmsTxt","config","lines","route","fullUrl","link","section","loadOntoConfig","config","path","loadEnv","envPath","fs","line","trimmedLine","key","valueParts","main","pc","cwd","nextAppDirDir","ontoPublicDir","files","glob","totalOriginalSize","totalMarkdownSize","totalFilesProcessed","file","inputPath","outputPathRelative","outputPath","htmlContent","result","extractContent","outputDir","origKb","mdKb","routeName","e","ONTO_API_KEY","DASHBOARD_URL","manifest","route","mdPath","res","errData","llmsTxtContent","generateLlmsTxt","llmsTxtPath","publicDir"]}
|
package/dist/config.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
"use strict";var i=Object.defineProperty;var o=Object.getOwnPropertyDescriptor;var u=Object.getOwnPropertyNames;var p=Object.prototype.hasOwnProperty;var a=(
|
|
1
|
+
"use strict";var i=Object.defineProperty;var o=Object.getOwnPropertyDescriptor;var u=Object.getOwnPropertyNames;var p=Object.prototype.hasOwnProperty;var a=(s,t)=>{for(var e in t)i(s,e,{get:t[e],enumerable:!0})},h=(s,t,e,r)=>{if(t&&typeof t=="object"||typeof t=="function")for(let n of u(t))!p.call(s,n)&&n!==e&&i(s,n,{get:()=>t[n],enumerable:!(r=o(t,n))||r.enumerable});return s};var l=s=>h(i({},"__esModule",{value:!0}),s);var $={};a($,{generateLlmsTxt:()=>g});module.exports=l($);function g(s){let t=[];if(t.push(`# ${s.name}`),t.push(""),t.push(`> ${s.summary}`),t.push(""),s.routes&&s.routes.length>0){t.push("## Key Routes"),t.push("");for(let e of s.routes){let r=`${s.baseUrl}${e.path}`;t.push(`- [${e.path}](${r}): ${e.description}`)}t.push("")}if(s.externalLinks&&s.externalLinks.length>0){t.push("## Resources"),t.push("");for(let e of s.externalLinks)e.description?t.push(`- [${e.title}](${e.url}): ${e.description}`):t.push(`- [${e.title}](${e.url})`);t.push("")}if(s.sections&&s.sections.length>0)for(let e of s.sections)t.push(`## ${e.heading}`),t.push(""),t.push(e.content),t.push("");return t.join(`
|
|
2
2
|
`).trim()+`
|
|
3
|
-
`}0&&(module.exports={generateLlmsTxt
|
|
3
|
+
`}0&&(module.exports={generateLlmsTxt});
|
|
4
4
|
//# sourceMappingURL=config.js.map
|
package/dist/config.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/config.ts"],"sourcesContent":["/**\r\n * Configuration schema for onto.config.ts\r\n * Used to dynamically generate llms.txt and other AI discovery files\r\n */\r\n\r\nexport type PageType = 'scoring' | 'about' | 'default';\r\n\r\nexport interface OntoRoute {\r\n /**\r\n * The URL path (e.g., '/docs', '/api/reference')\r\n */\r\n path: string;\r\n /**\r\n * Description of what this route contains\r\n */\r\n description: string;\r\n /**\r\n * Optional: Page type for automatic JSON-LD schema injection\r\n * - 'scoring': Injects Methodology schema with AIO scoring weights (40/35/25)\r\n * - 'about': Injects Organization/AboutPage schema\r\n * - 'default': No automatic schema injection\r\n */\r\n pageType?: PageType;\r\n}\r\n\r\nexport interface OntoConfig {\r\n /**\r\n * The name of your project or site (required)\r\n * Used as the H1 heading in llms.txt\r\n */\r\n name: string;\r\n\r\n /**\r\n * A short summary of your project (required)\r\n * Displayed as a blockquote in llms.txt\r\n * Should contain key information necessary for understanding the rest of the file\r\n */\r\n summary: string;\r\n\r\n /**\r\n * The base URL of your site (e.g., 'https://example.com')\r\n */\r\n baseUrl: string;\r\n\r\n /**\r\n * Optional: Additional sections to include in llms.txt\r\n * Each section can contain any markdown content\r\n */\r\n sections?: {\r\n heading: string;\r\n content: string;\r\n }[];\r\n\r\n /**\r\n * Key routes that AI agents should know about\r\n * These will be formatted as a markdown list in llms.txt\r\n */\r\n routes?: OntoRoute[];\r\n\r\n /**\r\n * Optional: Links to external resources (documentation, API references, etc.)\r\n */\r\n externalLinks?: {\r\n title: string;\r\n url: string;\r\n description?: string;\r\n }[];\r\n\r\n /**\r\n * Optional: Organization information for JSON-LD schemas\r\n */\r\n organization?: {\r\n name: string;\r\n description?: string;\r\n url?: string;\r\n logo?: string;\r\n foundingDate?: string;\r\n };\r\n}\r\n\r\n/**\r\n *
|
|
1
|
+
{"version":3,"sources":["../src/config.ts"],"sourcesContent":["/**\r\n * Configuration schema for onto.config.ts\r\n * Used to dynamically generate llms.txt and other AI discovery files\r\n */\r\n\r\nexport type PageType = 'scoring' | 'about' | 'default';\r\n\r\nexport interface OntoRoute {\r\n /**\r\n * The URL path (e.g., '/docs', '/api/reference')\r\n */\r\n path: string;\r\n /**\r\n * Description of what this route contains\r\n */\r\n description: string;\r\n /**\r\n * Optional: Page type for automatic JSON-LD schema injection\r\n * - 'scoring': Injects Methodology schema with AIO scoring weights (40/35/25)\r\n * - 'about': Injects Organization/AboutPage schema\r\n * - 'default': No automatic schema injection\r\n */\r\n pageType?: PageType;\r\n}\r\n\r\nexport interface OntoConfig {\r\n /**\r\n * The name of your project or site (required)\r\n * Used as the H1 heading in llms.txt\r\n */\r\n name: string;\r\n\r\n /**\r\n * A short summary of your project (required)\r\n * Displayed as a blockquote in llms.txt\r\n * Should contain key information necessary for understanding the rest of the file\r\n */\r\n summary: string;\r\n\r\n /**\r\n * The base URL of your site (e.g., 'https://example.com')\r\n */\r\n baseUrl: string;\r\n\r\n /**\r\n * Optional: Additional sections to include in llms.txt\r\n * Each section can contain any markdown content\r\n */\r\n sections?: {\r\n heading: string;\r\n content: string;\r\n }[];\r\n\r\n /**\r\n * Key routes that AI agents should know about\r\n * These will be formatted as a markdown list in llms.txt\r\n */\r\n routes?: OntoRoute[];\r\n\r\n /**\r\n * Optional: Links to external resources (documentation, API references, etc.)\r\n */\r\n externalLinks?: {\r\n title: string;\r\n url: string;\r\n description?: string;\r\n }[];\r\n\r\n /**\r\n * Optional: Organization information for JSON-LD schemas\r\n */\r\n organization?: {\r\n name: string;\r\n description?: string;\r\n url?: string;\r\n logo?: string;\r\n foundingDate?: string;\r\n };\r\n}\r\n\r\n/**\r\n * Generate llms.txt content from OntoConfig\r\n * Follows the llms.txt specification:\r\n * - H1 with project name\r\n * - Blockquote with summary\r\n * - Additional markdown sections\r\n */\r\nexport function generateLlmsTxt(config: OntoConfig): string {\r\n const lines: string[] = [];\r\n\r\n // H1: Project name (required)\r\n lines.push(`# ${config.name}`);\r\n lines.push('');\r\n\r\n // Blockquote: Summary (required)\r\n lines.push(`> ${config.summary}`);\r\n lines.push('');\r\n\r\n // Key Routes section (if provided)\r\n if (config.routes && config.routes.length > 0) {\r\n lines.push('## Key Routes');\r\n lines.push('');\r\n for (const route of config.routes) {\r\n const fullUrl = `${config.baseUrl}${route.path}`;\r\n lines.push(`- [${route.path}](${fullUrl}): ${route.description}`);\r\n }\r\n lines.push('');\r\n }\r\n\r\n // External Links section (if provided)\r\n if (config.externalLinks && config.externalLinks.length > 0) {\r\n lines.push('## Resources');\r\n lines.push('');\r\n for (const link of config.externalLinks) {\r\n if (link.description) {\r\n lines.push(`- [${link.title}](${link.url}): ${link.description}`);\r\n } else {\r\n lines.push(`- [${link.title}](${link.url})`);\r\n }\r\n }\r\n lines.push('');\r\n }\r\n\r\n // Custom sections (if provided)\r\n if (config.sections && config.sections.length > 0) {\r\n for (const section of config.sections) {\r\n lines.push(`## ${section.heading}`);\r\n lines.push('');\r\n lines.push(section.content);\r\n lines.push('');\r\n }\r\n }\r\n\r\n return lines.join('\\n').trim() + '\\n';\r\n}\r\n"],"mappings":"yaAAA,IAAAA,EAAA,GAAAC,EAAAD,EAAA,qBAAAE,IAAA,eAAAC,EAAAH,GAuFO,SAASE,EAAgBE,EAA4B,CAC1D,IAAMC,EAAkB,CAAC,EAWzB,GARAA,EAAM,KAAK,KAAKD,EAAO,IAAI,EAAE,EAC7BC,EAAM,KAAK,EAAE,EAGbA,EAAM,KAAK,KAAKD,EAAO,OAAO,EAAE,EAChCC,EAAM,KAAK,EAAE,EAGTD,EAAO,QAAUA,EAAO,OAAO,OAAS,EAAG,CAC7CC,EAAM,KAAK,eAAe,EAC1BA,EAAM,KAAK,EAAE,EACb,QAAWC,KAASF,EAAO,OAAQ,CACjC,IAAMG,EAAU,GAAGH,EAAO,OAAO,GAAGE,EAAM,IAAI,GAC9CD,EAAM,KAAK,MAAMC,EAAM,IAAI,KAAKC,CAAO,MAAMD,EAAM,WAAW,EAAE,CAClE,CACAD,EAAM,KAAK,EAAE,CACf,CAGA,GAAID,EAAO,eAAiBA,EAAO,cAAc,OAAS,EAAG,CAC3DC,EAAM,KAAK,cAAc,EACzBA,EAAM,KAAK,EAAE,EACb,QAAWG,KAAQJ,EAAO,cACpBI,EAAK,YACPH,EAAM,KAAK,MAAMG,EAAK,KAAK,KAAKA,EAAK,GAAG,MAAMA,EAAK,WAAW,EAAE,EAEhEH,EAAM,KAAK,MAAMG,EAAK,KAAK,KAAKA,EAAK,GAAG,GAAG,EAG/CH,EAAM,KAAK,EAAE,CACf,CAGA,GAAID,EAAO,UAAYA,EAAO,SAAS,OAAS,EAC9C,QAAWK,KAAWL,EAAO,SAC3BC,EAAM,KAAK,MAAMI,EAAQ,OAAO,EAAE,EAClCJ,EAAM,KAAK,EAAE,EACbA,EAAM,KAAKI,EAAQ,OAAO,EAC1BJ,EAAM,KAAK,EAAE,EAIjB,OAAOA,EAAM,KAAK;AAAA,CAAI,EAAE,KAAK,EAAI;AAAA,CACnC","names":["config_exports","__export","generateLlmsTxt","__toCommonJS","config","lines","route","fullUrl","link","section"]}
|
package/dist/config.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
|
|
1
|
+
function r(e){let t=[];if(t.push(`# ${e.name}`),t.push(""),t.push(`> ${e.summary}`),t.push(""),e.routes&&e.routes.length>0){t.push("## Key Routes"),t.push("");for(let s of e.routes){let n=`${e.baseUrl}${s.path}`;t.push(`- [${s.path}](${n}): ${s.description}`)}t.push("")}if(e.externalLinks&&e.externalLinks.length>0){t.push("## Resources"),t.push("");for(let s of e.externalLinks)s.description?t.push(`- [${s.title}](${s.url}): ${s.description}`):t.push(`- [${s.title}](${s.url})`);t.push("")}if(e.sections&&e.sections.length>0)for(let s of e.sections)t.push(`## ${s.heading}`),t.push(""),t.push(s.content),t.push("");return t.join(`
|
|
2
2
|
`).trim()+`
|
|
3
|
-
`}export{
|
|
3
|
+
`}export{r as generateLlmsTxt};
|
|
4
4
|
//# sourceMappingURL=config.mjs.map
|
package/dist/config.mjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/config.ts"],"sourcesContent":["/**\r\n * Configuration schema for onto.config.ts\r\n * Used to dynamically generate llms.txt and other AI discovery files\r\n */\r\n\r\nexport type PageType = 'scoring' | 'about' | 'default';\r\n\r\nexport interface OntoRoute {\r\n /**\r\n * The URL path (e.g., '/docs', '/api/reference')\r\n */\r\n path: string;\r\n /**\r\n * Description of what this route contains\r\n */\r\n description: string;\r\n /**\r\n * Optional: Page type for automatic JSON-LD schema injection\r\n * - 'scoring': Injects Methodology schema with AIO scoring weights (40/35/25)\r\n * - 'about': Injects Organization/AboutPage schema\r\n * - 'default': No automatic schema injection\r\n */\r\n pageType?: PageType;\r\n}\r\n\r\nexport interface OntoConfig {\r\n /**\r\n * The name of your project or site (required)\r\n * Used as the H1 heading in llms.txt\r\n */\r\n name: string;\r\n\r\n /**\r\n * A short summary of your project (required)\r\n * Displayed as a blockquote in llms.txt\r\n * Should contain key information necessary for understanding the rest of the file\r\n */\r\n summary: string;\r\n\r\n /**\r\n * The base URL of your site (e.g., 'https://example.com')\r\n */\r\n baseUrl: string;\r\n\r\n /**\r\n * Optional: Additional sections to include in llms.txt\r\n * Each section can contain any markdown content\r\n */\r\n sections?: {\r\n heading: string;\r\n content: string;\r\n }[];\r\n\r\n /**\r\n * Key routes that AI agents should know about\r\n * These will be formatted as a markdown list in llms.txt\r\n */\r\n routes?: OntoRoute[];\r\n\r\n /**\r\n * Optional: Links to external resources (documentation, API references, etc.)\r\n */\r\n externalLinks?: {\r\n title: string;\r\n url: string;\r\n description?: string;\r\n }[];\r\n\r\n /**\r\n * Optional: Organization information for JSON-LD schemas\r\n */\r\n organization?: {\r\n name: string;\r\n description?: string;\r\n url?: string;\r\n logo?: string;\r\n foundingDate?: string;\r\n };\r\n}\r\n\r\n/**\r\n *
|
|
1
|
+
{"version":3,"sources":["../src/config.ts"],"sourcesContent":["/**\r\n * Configuration schema for onto.config.ts\r\n * Used to dynamically generate llms.txt and other AI discovery files\r\n */\r\n\r\nexport type PageType = 'scoring' | 'about' | 'default';\r\n\r\nexport interface OntoRoute {\r\n /**\r\n * The URL path (e.g., '/docs', '/api/reference')\r\n */\r\n path: string;\r\n /**\r\n * Description of what this route contains\r\n */\r\n description: string;\r\n /**\r\n * Optional: Page type for automatic JSON-LD schema injection\r\n * - 'scoring': Injects Methodology schema with AIO scoring weights (40/35/25)\r\n * - 'about': Injects Organization/AboutPage schema\r\n * - 'default': No automatic schema injection\r\n */\r\n pageType?: PageType;\r\n}\r\n\r\nexport interface OntoConfig {\r\n /**\r\n * The name of your project or site (required)\r\n * Used as the H1 heading in llms.txt\r\n */\r\n name: string;\r\n\r\n /**\r\n * A short summary of your project (required)\r\n * Displayed as a blockquote in llms.txt\r\n * Should contain key information necessary for understanding the rest of the file\r\n */\r\n summary: string;\r\n\r\n /**\r\n * The base URL of your site (e.g., 'https://example.com')\r\n */\r\n baseUrl: string;\r\n\r\n /**\r\n * Optional: Additional sections to include in llms.txt\r\n * Each section can contain any markdown content\r\n */\r\n sections?: {\r\n heading: string;\r\n content: string;\r\n }[];\r\n\r\n /**\r\n * Key routes that AI agents should know about\r\n * These will be formatted as a markdown list in llms.txt\r\n */\r\n routes?: OntoRoute[];\r\n\r\n /**\r\n * Optional: Links to external resources (documentation, API references, etc.)\r\n */\r\n externalLinks?: {\r\n title: string;\r\n url: string;\r\n description?: string;\r\n }[];\r\n\r\n /**\r\n * Optional: Organization information for JSON-LD schemas\r\n */\r\n organization?: {\r\n name: string;\r\n description?: string;\r\n url?: string;\r\n logo?: string;\r\n foundingDate?: string;\r\n };\r\n}\r\n\r\n/**\r\n * Generate llms.txt content from OntoConfig\r\n * Follows the llms.txt specification:\r\n * - H1 with project name\r\n * - Blockquote with summary\r\n * - Additional markdown sections\r\n */\r\nexport function generateLlmsTxt(config: OntoConfig): string {\r\n const lines: string[] = [];\r\n\r\n // H1: Project name (required)\r\n lines.push(`# ${config.name}`);\r\n lines.push('');\r\n\r\n // Blockquote: Summary (required)\r\n lines.push(`> ${config.summary}`);\r\n lines.push('');\r\n\r\n // Key Routes section (if provided)\r\n if (config.routes && config.routes.length > 0) {\r\n lines.push('## Key Routes');\r\n lines.push('');\r\n for (const route of config.routes) {\r\n const fullUrl = `${config.baseUrl}${route.path}`;\r\n lines.push(`- [${route.path}](${fullUrl}): ${route.description}`);\r\n }\r\n lines.push('');\r\n }\r\n\r\n // External Links section (if provided)\r\n if (config.externalLinks && config.externalLinks.length > 0) {\r\n lines.push('## Resources');\r\n lines.push('');\r\n for (const link of config.externalLinks) {\r\n if (link.description) {\r\n lines.push(`- [${link.title}](${link.url}): ${link.description}`);\r\n } else {\r\n lines.push(`- [${link.title}](${link.url})`);\r\n }\r\n }\r\n lines.push('');\r\n }\r\n\r\n // Custom sections (if provided)\r\n if (config.sections && config.sections.length > 0) {\r\n for (const section of config.sections) {\r\n lines.push(`## ${section.heading}`);\r\n lines.push('');\r\n lines.push(section.content);\r\n lines.push('');\r\n }\r\n }\r\n\r\n return lines.join('\\n').trim() + '\\n';\r\n}\r\n"],"mappings":"AAuFO,SAASA,EAAgBC,EAA4B,CAC1D,IAAMC,EAAkB,CAAC,EAWzB,GARAA,EAAM,KAAK,KAAKD,EAAO,IAAI,EAAE,EAC7BC,EAAM,KAAK,EAAE,EAGbA,EAAM,KAAK,KAAKD,EAAO,OAAO,EAAE,EAChCC,EAAM,KAAK,EAAE,EAGTD,EAAO,QAAUA,EAAO,OAAO,OAAS,EAAG,CAC7CC,EAAM,KAAK,eAAe,EAC1BA,EAAM,KAAK,EAAE,EACb,QAAWC,KAASF,EAAO,OAAQ,CACjC,IAAMG,EAAU,GAAGH,EAAO,OAAO,GAAGE,EAAM,IAAI,GAC9CD,EAAM,KAAK,MAAMC,EAAM,IAAI,KAAKC,CAAO,MAAMD,EAAM,WAAW,EAAE,CAClE,CACAD,EAAM,KAAK,EAAE,CACf,CAGA,GAAID,EAAO,eAAiBA,EAAO,cAAc,OAAS,EAAG,CAC3DC,EAAM,KAAK,cAAc,EACzBA,EAAM,KAAK,EAAE,EACb,QAAWG,KAAQJ,EAAO,cACpBI,EAAK,YACPH,EAAM,KAAK,MAAMG,EAAK,KAAK,KAAKA,EAAK,GAAG,MAAMA,EAAK,WAAW,EAAE,EAEhEH,EAAM,KAAK,MAAMG,EAAK,KAAK,KAAKA,EAAK,GAAG,GAAG,EAG/CH,EAAM,KAAK,EAAE,CACf,CAGA,GAAID,EAAO,UAAYA,EAAO,SAAS,OAAS,EAC9C,QAAWK,KAAWL,EAAO,SAC3BC,EAAM,KAAK,MAAMI,EAAQ,OAAO,EAAE,EAClCJ,EAAM,KAAK,EAAE,EACbA,EAAM,KAAKI,EAAQ,OAAO,EAC1BJ,EAAM,KAAK,EAAE,EAIjB,OAAOA,EAAM,KAAK;AAAA,CAAI,EAAE,KAAK,EAAI;AAAA,CACnC","names":["generateLlmsTxt","config","lines","route","fullUrl","link","section"]}
|