nuxt-ai-ready 0.3.8 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -14
- package/dist/module.d.mts +13 -20
- package/dist/module.json +1 -1
- package/dist/module.mjs +369 -380
- package/dist/runtime/{llms-txt.d.ts → llms-txt-utils.d.ts} +2 -0
- package/dist/runtime/llms-txt-utils.js +114 -0
- package/dist/runtime/nuxt/plugins/{prerender.js → md-hints.prerender.js} +1 -1
- package/dist/runtime/server/mcp/resources/pages.d.ts +17 -0
- package/dist/runtime/server/mcp/{dev/resources → resources}/pages.js +5 -4
- package/dist/runtime/server/mcp/tools/list-pages.d.ts +16 -0
- package/dist/runtime/server/mcp/tools/list-pages.js +11 -0
- package/dist/runtime/server/mcp/tools/search-pages-fuzzy.d.ts +3 -0
- package/dist/runtime/server/mcp/tools/search-pages-fuzzy.js +25 -0
- package/dist/runtime/server/middleware/markdown.js +62 -0
- package/dist/runtime/server/middleware/markdown.prerender.d.ts +2 -0
- package/dist/runtime/server/middleware/markdown.prerender.js +35 -0
- package/dist/runtime/server/routes/llms-full.txt.get.d.ts +2 -0
- package/dist/runtime/server/routes/llms-full.txt.get.js +5 -0
- package/dist/runtime/server/routes/llms.txt.get.js +18 -18
- package/dist/runtime/server/utils/pageData.d.ts +25 -0
- package/dist/runtime/server/utils/pageData.js +25 -0
- package/dist/runtime/server/utils/sitemap.d.ts +6 -0
- package/dist/runtime/server/utils/sitemap.js +25 -0
- package/dist/runtime/server/utils.d.ts +15 -3
- package/dist/runtime/server/utils.js +92 -36
- package/dist/runtime/types.d.ts +25 -52
- package/dist/types.d.mts +1 -1
- package/package.json +16 -15
- package/dist/runtime/llms-txt.js +0 -35
- package/dist/runtime/server/mcp/dev/tools/list-pages.js +0 -10
- package/dist/runtime/server/mcp/dev/utils.js +0 -34
- package/dist/runtime/server/mcp/prod/resources/pages-chunks.js +0 -25
- package/dist/runtime/server/mcp/prod/resources/pages.js +0 -25
- package/dist/runtime/server/mcp/prod/tools/list-pages.js +0 -21
- package/dist/runtime/server/mcp/utils.d.ts +0 -3
- package/dist/runtime/server/mcp/utils.js +0 -7
- package/dist/runtime/server/middleware/mdream.js +0 -148
- package/dist/runtime/server/plugins/sitemap-lastmod.d.ts +0 -2
- package/dist/runtime/server/plugins/sitemap-lastmod.js +0 -22
- /package/dist/runtime/nuxt/plugins/{prerender.d.ts → md-hints.prerender.d.ts} +0 -0
- /package/dist/runtime/server/middleware/{mdream.d.ts → markdown.d.ts} +0 -0
package/dist/runtime/types.d.ts
CHANGED
|
@@ -65,40 +65,18 @@ export interface ModuleOptions {
|
|
|
65
65
|
mcp?: {
|
|
66
66
|
/** Enable MCP tools (list-pages) @default true */
|
|
67
67
|
tools?: boolean;
|
|
68
|
-
/** Enable MCP resources (pages
|
|
68
|
+
/** Enable MCP resources (pages) @default true */
|
|
69
69
|
resources?: boolean;
|
|
70
70
|
};
|
|
71
71
|
/**
|
|
72
|
-
*
|
|
72
|
+
* Cache duration for llms.txt in seconds (runtime generation)
|
|
73
|
+
* Set to 0 to disable caching
|
|
74
|
+
* @default 600 (10 minutes)
|
|
73
75
|
*/
|
|
74
|
-
|
|
75
|
-
/**
|
|
76
|
-
* Enable timestamp tracking
|
|
77
|
-
* @default false
|
|
78
|
-
*/
|
|
79
|
-
enabled?: boolean;
|
|
80
|
-
/**
|
|
81
|
-
* Path to store content hash manifest
|
|
82
|
-
* @default 'node_modules/.cache/nuxt-seo/ai-index/content-hashes.json'
|
|
83
|
-
*/
|
|
84
|
-
manifestPath?: string;
|
|
85
|
-
};
|
|
86
|
-
}
|
|
87
|
-
/**
|
|
88
|
-
* Individual chunk entry in llms-full.toon (one per chunk)
|
|
89
|
-
* Used for RAG, embeddings, and semantic search
|
|
90
|
-
* Optimized for token efficiency - join with llms.toon for title/description
|
|
91
|
-
* Chunk index can be inferred from id suffix (e.g., "hash-0", "hash-1")
|
|
92
|
-
* Tabular TOON format (primitives only)
|
|
93
|
-
*/
|
|
94
|
-
export interface BulkChunk {
|
|
95
|
-
id: string;
|
|
96
|
-
route: string;
|
|
97
|
-
content: string;
|
|
76
|
+
cacheMaxAgeSeconds?: number;
|
|
98
77
|
}
|
|
99
78
|
/**
|
|
100
|
-
* Page-level entry
|
|
101
|
-
* Used for page discovery, listing, and metadata queries
|
|
79
|
+
* Page-level entry for discovery and metadata queries
|
|
102
80
|
*/
|
|
103
81
|
export interface BulkDocument {
|
|
104
82
|
/** Page route/path */
|
|
@@ -107,12 +85,10 @@ export interface BulkDocument {
|
|
|
107
85
|
title: string;
|
|
108
86
|
/** Page description */
|
|
109
87
|
description: string;
|
|
110
|
-
/** Full markdown content
|
|
88
|
+
/** Full markdown content */
|
|
111
89
|
markdown: string;
|
|
112
90
|
/** Page headings structure (e.g., [{ "h1": "Title" }, { "h2": "Subtitle" }]) */
|
|
113
91
|
headings: Array<Record<string, string>>;
|
|
114
|
-
/** All chunk IDs for this page (first ID can be used as document ID) */
|
|
115
|
-
chunkIds: string[];
|
|
116
92
|
/** ISO 8601 timestamp of last content update */
|
|
117
93
|
updatedAt?: string;
|
|
118
94
|
}
|
|
@@ -123,13 +99,13 @@ export interface BulkDocument {
|
|
|
123
99
|
* You can modify the markdown content before it's returned to the client.
|
|
124
100
|
*
|
|
125
101
|
* @example Modify markdown content
|
|
126
|
-
* nitroApp.hooks.hook('
|
|
102
|
+
* nitroApp.hooks.hook('ai-ready:markdown', async (context) => {
|
|
127
103
|
* // Add a footer to all markdown
|
|
128
104
|
* context.markdown += '\n\n---\n*Generated with mdream*'
|
|
129
105
|
* })
|
|
130
106
|
*
|
|
131
107
|
* @example Track conversions and add headers
|
|
132
|
-
* nitroApp.hooks.hook('
|
|
108
|
+
* nitroApp.hooks.hook('ai-ready:markdown', async (context) => {
|
|
133
109
|
* console.log(`Converted ${context.route} (${context.title})`)
|
|
134
110
|
*
|
|
135
111
|
* // Add custom headers
|
|
@@ -186,33 +162,30 @@ export interface LlmsTxtConfig {
|
|
|
186
162
|
notes?: string | string[];
|
|
187
163
|
}
|
|
188
164
|
/**
|
|
189
|
-
* Hook context for
|
|
165
|
+
* Hook context for page markdown processing (Nuxt build-time hook)
|
|
190
166
|
*
|
|
191
|
-
* Called during prerender
|
|
192
|
-
* to
|
|
167
|
+
* Called once per page during prerender when markdown content has changed.
|
|
168
|
+
* Allows integrations to process page content (e.g., for embedding generation).
|
|
193
169
|
*
|
|
194
|
-
* @example Process
|
|
195
|
-
*
|
|
196
|
-
*
|
|
197
|
-
*
|
|
198
|
-
* id: context.chunk.id,
|
|
199
|
-
* embedding,
|
|
200
|
-
* metadata: {
|
|
201
|
-
* route: context.route,
|
|
202
|
-
* title: context.title,
|
|
203
|
-
* }
|
|
204
|
-
* })
|
|
170
|
+
* @example Process page markdown
|
|
171
|
+
* nuxt.hooks.hook('ai-ready:page:markdown', async (context) => {
|
|
172
|
+
* console.log(`Processing ${context.route}: ${context.title}`)
|
|
173
|
+
* // Generate embeddings, update search index, etc.
|
|
205
174
|
* })
|
|
206
175
|
*/
|
|
207
|
-
export interface
|
|
208
|
-
/** The chunk data that will be written to bulk JSONL */
|
|
209
|
-
chunk: BulkChunk;
|
|
176
|
+
export interface PageMarkdownContext {
|
|
210
177
|
/** The route being processed (e.g., '/about') */
|
|
211
178
|
route: string;
|
|
179
|
+
/** The markdown content */
|
|
180
|
+
markdown: string;
|
|
212
181
|
/** Page title extracted from HTML */
|
|
213
182
|
title: string;
|
|
214
183
|
/** Page description from meta tags */
|
|
215
184
|
description: string;
|
|
216
|
-
|
|
217
|
-
|
|
185
|
+
}
|
|
186
|
+
declare module 'nitropack/types' {
|
|
187
|
+
interface NitroRuntimeHooks {
|
|
188
|
+
'ai-ready:markdown': (context: MarkdownContext) => void | Promise<void>;
|
|
189
|
+
'ai-ready:mdreamConfig': (config: import('mdream').HTMLToMarkdownOptions) => void | Promise<void>;
|
|
190
|
+
}
|
|
218
191
|
}
|
package/dist/types.d.mts
CHANGED
|
@@ -5,7 +5,7 @@ declare module '@nuxt/schema' {
|
|
|
5
5
|
interface PublicRuntimeConfig extends ModulePublicRuntimeConfig {}
|
|
6
6
|
}
|
|
7
7
|
|
|
8
|
-
export { type
|
|
8
|
+
export { type ModuleOptions } from '../dist/runtime/types.js'
|
|
9
9
|
|
|
10
10
|
export { default } from './module.mjs'
|
|
11
11
|
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "nuxt-ai-ready",
|
|
3
3
|
"type": "module",
|
|
4
|
-
"version": "0.
|
|
4
|
+
"version": "0.4.0",
|
|
5
5
|
"description": "Best practice AI & LLM discoverability for Nuxt sites.",
|
|
6
6
|
"author": {
|
|
7
7
|
"name": "Harlan Wilton",
|
|
@@ -31,25 +31,26 @@
|
|
|
31
31
|
"files": [
|
|
32
32
|
"dist"
|
|
33
33
|
],
|
|
34
|
+
"peerDependencies": {
|
|
35
|
+
"@nuxtjs/sitemap": "^7.5.0"
|
|
36
|
+
},
|
|
34
37
|
"dependencies": {
|
|
35
38
|
"@clack/prompts": "^0.11.0",
|
|
36
39
|
"@nuxt/kit": "4.2.2",
|
|
37
|
-
"@toon-format/toon": "^2.1.0",
|
|
38
40
|
"consola": "^3.4.2",
|
|
39
41
|
"defu": "^6.1.4",
|
|
42
|
+
"fuse.js": "^7.1.0",
|
|
40
43
|
"mdream": "^0.15.3",
|
|
41
|
-
"
|
|
42
|
-
"nuxt-site-config": "3.2.11",
|
|
44
|
+
"nuxt-site-config": "3.2.14",
|
|
43
45
|
"ofetch": "^1.5.1",
|
|
44
46
|
"pathe": "^2.0.3",
|
|
45
47
|
"pkg-types": "^2.3.0",
|
|
46
48
|
"std-env": "3.10.0",
|
|
47
|
-
"tokenx": "^1.2.1",
|
|
48
49
|
"ufo": "^1.6.1",
|
|
49
50
|
"unstorage": "^1.17.3"
|
|
50
51
|
},
|
|
51
52
|
"devDependencies": {
|
|
52
|
-
"@antfu/eslint-config": "^6.
|
|
53
|
+
"@antfu/eslint-config": "^6.7.1",
|
|
53
54
|
"@arethetypeswrong/cli": "^0.18.2",
|
|
54
55
|
"@headlessui/vue": "^1.7.23",
|
|
55
56
|
"@nuxt/content": "^3.9.0",
|
|
@@ -60,29 +61,29 @@
|
|
|
60
61
|
"@nuxtjs/eslint-config-typescript": "^12.1.0",
|
|
61
62
|
"@nuxtjs/i18n": "^10.2.1",
|
|
62
63
|
"@nuxtjs/mcp-toolkit": "^0.5.2",
|
|
63
|
-
"@nuxtjs/robots": "^5.6.
|
|
64
|
-
"@nuxtjs/sitemap": "
|
|
65
|
-
"@vitest/coverage-v8": "^4.0.
|
|
64
|
+
"@nuxtjs/robots": "^5.6.7",
|
|
65
|
+
"@nuxtjs/sitemap": "7.5.0",
|
|
66
|
+
"@vitest/coverage-v8": "^4.0.16",
|
|
66
67
|
"@vueuse/nuxt": "^14.1.0",
|
|
67
68
|
"better-sqlite3": "^12.5.0",
|
|
68
69
|
"bumpp": "^10.3.2",
|
|
69
|
-
"eslint": "^9.39.
|
|
70
|
+
"eslint": "^9.39.2",
|
|
70
71
|
"execa": "^9.6.1",
|
|
71
72
|
"happy-dom": "^20.0.11",
|
|
72
73
|
"nuxt": "^4.2.2",
|
|
73
|
-
"nuxt-site-config": "3.2.
|
|
74
|
+
"nuxt-site-config": "3.2.14",
|
|
74
75
|
"playwright": "^1.57.0",
|
|
75
76
|
"playwright-core": "^1.57.0",
|
|
76
77
|
"postgres": "^3.4.7",
|
|
77
78
|
"typescript": "^5.9.3",
|
|
78
|
-
"vitest": "^4.0.
|
|
79
|
+
"vitest": "^4.0.16",
|
|
79
80
|
"vue": "^3.5.25",
|
|
80
|
-
"vue-router": "^4.6.
|
|
81
|
+
"vue-router": "^4.6.4",
|
|
81
82
|
"vue-tsc": "^3.1.8",
|
|
82
|
-
"zod": "^4.1
|
|
83
|
+
"zod": "^4.2.1"
|
|
83
84
|
},
|
|
84
85
|
"resolutions": {
|
|
85
|
-
"nuxt-ai-ready": "
|
|
86
|
+
"nuxt-ai-ready": "workspace:*"
|
|
86
87
|
},
|
|
87
88
|
"scripts": {
|
|
88
89
|
"lint": "eslint . --fix",
|
package/dist/runtime/llms-txt.js
DELETED
|
@@ -1,35 +0,0 @@
|
|
|
1
|
-
function normalizeLink(link) {
|
|
2
|
-
const parts = [];
|
|
3
|
-
parts.push(`- [${link.title}](${link.href})`);
|
|
4
|
-
if (link.description) {
|
|
5
|
-
parts.push(` ${link.description}`);
|
|
6
|
-
}
|
|
7
|
-
return parts.join("\n");
|
|
8
|
-
}
|
|
9
|
-
function normalizeSection(section) {
|
|
10
|
-
const parts = [];
|
|
11
|
-
parts.push(`## ${section.title}`);
|
|
12
|
-
parts.push("");
|
|
13
|
-
if (section.description) {
|
|
14
|
-
const descriptions = Array.isArray(section.description) ? section.description : [section.description];
|
|
15
|
-
parts.push(...descriptions);
|
|
16
|
-
parts.push("");
|
|
17
|
-
}
|
|
18
|
-
if (section.links?.length) {
|
|
19
|
-
parts.push(...section.links.map(normalizeLink));
|
|
20
|
-
}
|
|
21
|
-
return parts.join("\n");
|
|
22
|
-
}
|
|
23
|
-
export function normalizeLlmsTxtConfig(config) {
|
|
24
|
-
const parts = [];
|
|
25
|
-
if (config.sections?.length) {
|
|
26
|
-
parts.push(...config.sections.map(normalizeSection));
|
|
27
|
-
}
|
|
28
|
-
if (config.notes) {
|
|
29
|
-
parts.push("## Notes");
|
|
30
|
-
parts.push("");
|
|
31
|
-
const notes = Array.isArray(config.notes) ? config.notes : [config.notes];
|
|
32
|
-
parts.push(...notes);
|
|
33
|
-
}
|
|
34
|
-
return parts.join("\n\n");
|
|
35
|
-
}
|
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
import { getDevPages, jsonResult } from "../utils.js";
|
|
2
|
-
export default {
|
|
3
|
-
name: "list_pages",
|
|
4
|
-
description: "Lists all available pages with their routes. In dev mode, returns JSON from sitemap/routes (TOON format unavailable until build).",
|
|
5
|
-
inputSchema: {},
|
|
6
|
-
async handler() {
|
|
7
|
-
const pages = await getDevPages();
|
|
8
|
-
return jsonResult(pages);
|
|
9
|
-
}
|
|
10
|
-
};
|
|
@@ -1,34 +0,0 @@
|
|
|
1
|
-
import routes from "#ai-ready/routes.mjs";
|
|
2
|
-
import { useRuntimeConfig } from "nitropack/runtime";
|
|
3
|
-
export { jsonResult } from "../utils.js";
|
|
4
|
-
function routeToRegex(routePath) {
|
|
5
|
-
const pattern = routePath.replace(/:[^/]+\(\.\*\)\*?/g, ".*").replace(/:[^/]+/g, "[^/]+");
|
|
6
|
-
return new RegExp(`^${pattern}$`);
|
|
7
|
-
}
|
|
8
|
-
function matchRoute(path, routeRecords) {
|
|
9
|
-
for (const r of routeRecords) {
|
|
10
|
-
if (routeToRegex(r.path).test(path))
|
|
11
|
-
return r;
|
|
12
|
-
}
|
|
13
|
-
}
|
|
14
|
-
export async function getDevPages() {
|
|
15
|
-
const config = useRuntimeConfig()["nuxt-ai-ready"];
|
|
16
|
-
if (!config.hasSitemap)
|
|
17
|
-
return routes.map((r) => ({ route: r.path, name: r.name, meta: r.meta }));
|
|
18
|
-
const { parseSitemapXml } = await import("@nuxtjs/sitemap/utils");
|
|
19
|
-
const sitemapRes = await fetch("/sitemap.xml");
|
|
20
|
-
if (!sitemapRes.ok)
|
|
21
|
-
return routes.map((r) => ({ route: r.path, name: r.name, meta: r.meta }));
|
|
22
|
-
const xml = await sitemapRes.text();
|
|
23
|
-
const { urls } = await parseSitemapXml(xml);
|
|
24
|
-
return urls.map((entry) => {
|
|
25
|
-
const pathname = typeof entry === "string" ? new URL(entry).pathname : new URL(entry.loc).pathname;
|
|
26
|
-
const matched = matchRoute(pathname, routes);
|
|
27
|
-
return {
|
|
28
|
-
route: pathname,
|
|
29
|
-
...typeof entry !== "string" && entry.lastmod && { lastmod: entry.lastmod },
|
|
30
|
-
...matched?.name && { name: matched.name },
|
|
31
|
-
...matched?.meta && { meta: matched.meta }
|
|
32
|
-
};
|
|
33
|
-
});
|
|
34
|
-
}
|
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
import { getNitroOrigin } from "#site-config/server/composables";
|
|
2
|
-
import { useEvent } from "nitropack/runtime";
|
|
3
|
-
export default {
|
|
4
|
-
uri: "resource://nuxt-ai-ready/pages-chunks",
|
|
5
|
-
name: "All Page Chunks",
|
|
6
|
-
description: "Chunk-level content (id, route, content) in TOON format for RAG/embeddings. Join with pages resource using id field - match chunk.id with page.chunkIds[] to get title, description, headings. TOON is token-efficient JSON encoding (see https://toonformat.dev)",
|
|
7
|
-
metadata: {
|
|
8
|
-
mimeType: "text/toon"
|
|
9
|
-
},
|
|
10
|
-
cache: "1h",
|
|
11
|
-
async handler(uri) {
|
|
12
|
-
const event = useEvent();
|
|
13
|
-
const nitroOrigin = getNitroOrigin(event);
|
|
14
|
-
const text = await $fetch(`/llms-full.toon`, {
|
|
15
|
-
baseURL: nitroOrigin
|
|
16
|
-
});
|
|
17
|
-
return {
|
|
18
|
-
contents: [{
|
|
19
|
-
uri: uri.toString(),
|
|
20
|
-
mimeType: "text/toon",
|
|
21
|
-
text
|
|
22
|
-
}]
|
|
23
|
-
};
|
|
24
|
-
}
|
|
25
|
-
};
|
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
import { getNitroOrigin } from "#site-config/server/composables";
|
|
2
|
-
import { useEvent } from "nitropack/runtime";
|
|
3
|
-
export default {
|
|
4
|
-
uri: "resource://nuxt-ai-ready/pages",
|
|
5
|
-
name: "All Pages",
|
|
6
|
-
description: "Page-level metadata (route, title, description, headings, chunkIds, updatedAt) in TOON format. Each page includes chunkIds to join with pages-chunks resource for chunk-level content. TOON is token-efficient JSON encoding (see https://toonformat.dev)",
|
|
7
|
-
metadata: {
|
|
8
|
-
mimeType: "text/toon"
|
|
9
|
-
},
|
|
10
|
-
cache: "1h",
|
|
11
|
-
async handler(uri) {
|
|
12
|
-
const event = useEvent();
|
|
13
|
-
const nitroOrigin = getNitroOrigin(event);
|
|
14
|
-
const text = await $fetch(`/llms.toon`, {
|
|
15
|
-
baseURL: nitroOrigin
|
|
16
|
-
});
|
|
17
|
-
return {
|
|
18
|
-
contents: [{
|
|
19
|
-
uri: uri.toString(),
|
|
20
|
-
mimeType: "text/toon",
|
|
21
|
-
text
|
|
22
|
-
}]
|
|
23
|
-
};
|
|
24
|
-
}
|
|
25
|
-
};
|
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
import { getNitroOrigin } from "#site-config/server/composables";
|
|
2
|
-
import { useEvent } from "nitropack/runtime";
|
|
3
|
-
import { z } from "zod";
|
|
4
|
-
import { toonResult } from "../../utils.js";
|
|
5
|
-
const schema = {
|
|
6
|
-
mode: z.enum(["chunks", "minimal"]).default("minimal").describe("Return individual content chunks (chunks) or page-level metadata (minimal)")
|
|
7
|
-
};
|
|
8
|
-
export default {
|
|
9
|
-
name: "list_pages",
|
|
10
|
-
description: 'Lists all available pages in TOON format (token-efficient). Use "chunks" mode to get individual content chunks, or "minimal" for page-level metadata.',
|
|
11
|
-
inputSchema: schema,
|
|
12
|
-
cache: "1h",
|
|
13
|
-
async handler({ mode }) {
|
|
14
|
-
const event = useEvent();
|
|
15
|
-
const nitroOrigin = getNitroOrigin(event);
|
|
16
|
-
const text = await $fetch(mode === "chunks" ? "/llms-full.toon" : "/llms.toon", {
|
|
17
|
-
baseURL: nitroOrigin
|
|
18
|
-
});
|
|
19
|
-
return toonResult(text);
|
|
20
|
-
}
|
|
21
|
-
};
|
|
@@ -1,7 +0,0 @@
|
|
|
1
|
-
export function jsonResult(data, pretty = true) {
|
|
2
|
-
const text = pretty ? JSON.stringify(data, null, 2) : JSON.stringify(data);
|
|
3
|
-
return { content: [{ type: "text", text }] };
|
|
4
|
-
}
|
|
5
|
-
export function toonResult(toon) {
|
|
6
|
-
return { content: [{ type: "text", text: toon }] };
|
|
7
|
-
}
|
|
@@ -1,148 +0,0 @@
|
|
|
1
|
-
import { withSiteUrl } from "#site-config/server/composables/utils";
|
|
2
|
-
import { createError, defineEventHandler, getHeader, setHeader } from "h3";
|
|
3
|
-
import { htmlToMarkdown } from "mdream";
|
|
4
|
-
import { extractionPlugin } from "mdream/plugins";
|
|
5
|
-
import { withMinimalPreset } from "mdream/preset/minimal";
|
|
6
|
-
import { useNitroApp, useRuntimeConfig } from "nitropack/runtime";
|
|
7
|
-
import { logger } from "../logger.js";
|
|
8
|
-
import { convertHtmlToMarkdownChunks } from "../utils.js";
|
|
9
|
-
function normalizeWhitespace(text) {
|
|
10
|
-
return text.replace(/\u00A0/g, " ");
|
|
11
|
-
}
|
|
12
|
-
function shouldServeMarkdown(event) {
|
|
13
|
-
const accept = getHeader(event, "accept") || "";
|
|
14
|
-
const secFetchDest = getHeader(event, "sec-fetch-dest") || "";
|
|
15
|
-
if (secFetchDest === "document") {
|
|
16
|
-
return false;
|
|
17
|
-
}
|
|
18
|
-
if (accept.includes("text/html")) {
|
|
19
|
-
return false;
|
|
20
|
-
}
|
|
21
|
-
return accept.includes("*/*") || accept.includes("text/markdown");
|
|
22
|
-
}
|
|
23
|
-
async function convertHtmlToMarkdown(html, url, config, route, event) {
|
|
24
|
-
const nitroApp = useNitroApp();
|
|
25
|
-
let title = "";
|
|
26
|
-
let description = "";
|
|
27
|
-
const headings = [];
|
|
28
|
-
const extractPlugin = extractionPlugin({
|
|
29
|
-
title(el) {
|
|
30
|
-
title = el.textContent;
|
|
31
|
-
},
|
|
32
|
-
'meta[name="description"]': (el) => {
|
|
33
|
-
description = el.attributes.content || "";
|
|
34
|
-
},
|
|
35
|
-
"h1, h2, h3, h4, h5, h6": (el) => {
|
|
36
|
-
const text = el.textContent?.trim();
|
|
37
|
-
const level = el.name.toLowerCase();
|
|
38
|
-
if (text)
|
|
39
|
-
headings.push({ [level]: text });
|
|
40
|
-
}
|
|
41
|
-
});
|
|
42
|
-
let options = {
|
|
43
|
-
origin: url,
|
|
44
|
-
...config.mdreamOptions
|
|
45
|
-
};
|
|
46
|
-
if (config.mdreamOptions?.preset === "minimal") {
|
|
47
|
-
options = withMinimalPreset(options);
|
|
48
|
-
options.plugins = [extractPlugin, ...options.plugins || []];
|
|
49
|
-
} else {
|
|
50
|
-
options.plugins = [extractPlugin, ...options.plugins || []];
|
|
51
|
-
}
|
|
52
|
-
await nitroApp.hooks.callHook("ai-ready:mdreamConfig", options);
|
|
53
|
-
let markdown = htmlToMarkdown(html, options);
|
|
54
|
-
const context = {
|
|
55
|
-
html,
|
|
56
|
-
markdown,
|
|
57
|
-
route,
|
|
58
|
-
title,
|
|
59
|
-
description,
|
|
60
|
-
isPrerender: Boolean(import.meta.prerender),
|
|
61
|
-
event
|
|
62
|
-
};
|
|
63
|
-
await nitroApp.hooks.callHook("ai-ready:markdown", context);
|
|
64
|
-
markdown = normalizeWhitespace(context.markdown);
|
|
65
|
-
return { markdown, title: normalizeWhitespace(title), description: normalizeWhitespace(description), headings };
|
|
66
|
-
}
|
|
67
|
-
export default defineEventHandler(async (event) => {
|
|
68
|
-
let path = event.path;
|
|
69
|
-
const config = useRuntimeConfig(event)["nuxt-ai-ready"];
|
|
70
|
-
if (path.startsWith("/api") || path.startsWith("/_") || path.startsWith("/@")) {
|
|
71
|
-
return;
|
|
72
|
-
}
|
|
73
|
-
const lastSegment = path.split("/").pop() || "";
|
|
74
|
-
const hasExtension = lastSegment.includes(".");
|
|
75
|
-
const extension = hasExtension ? lastSegment.substring(lastSegment.lastIndexOf(".")) : "";
|
|
76
|
-
if (hasExtension && extension !== ".md") {
|
|
77
|
-
return;
|
|
78
|
-
}
|
|
79
|
-
const hasMarkdownExtension = path.endsWith(".md");
|
|
80
|
-
const clientPrefersMarkdown = shouldServeMarkdown(event);
|
|
81
|
-
if (!hasMarkdownExtension && !clientPrefersMarkdown) {
|
|
82
|
-
return;
|
|
83
|
-
}
|
|
84
|
-
if (hasMarkdownExtension) {
|
|
85
|
-
path = path.slice(0, -3);
|
|
86
|
-
}
|
|
87
|
-
if (path === "/index") {
|
|
88
|
-
path = "/";
|
|
89
|
-
}
|
|
90
|
-
let html;
|
|
91
|
-
try {
|
|
92
|
-
const response = await event.fetch(path);
|
|
93
|
-
if (!response.ok) {
|
|
94
|
-
if (hasMarkdownExtension) {
|
|
95
|
-
return createError({
|
|
96
|
-
statusCode: response.status,
|
|
97
|
-
statusMessage: response.statusText,
|
|
98
|
-
message: `Failed to fetch HTML for ${path}`
|
|
99
|
-
});
|
|
100
|
-
}
|
|
101
|
-
return;
|
|
102
|
-
}
|
|
103
|
-
const contentType = response.headers.get("content-type") || "";
|
|
104
|
-
if (!contentType.includes("text/html")) {
|
|
105
|
-
if (hasMarkdownExtension) {
|
|
106
|
-
return createError({
|
|
107
|
-
statusCode: 415,
|
|
108
|
-
statusMessage: "Unsupported Media Type",
|
|
109
|
-
message: `Expected text/html but got ${contentType} for ${path}`
|
|
110
|
-
});
|
|
111
|
-
}
|
|
112
|
-
return;
|
|
113
|
-
}
|
|
114
|
-
html = await response.text();
|
|
115
|
-
} catch (e) {
|
|
116
|
-
logger.error(`Failed to fetch HTML for ${path}`, e);
|
|
117
|
-
if (hasMarkdownExtension) {
|
|
118
|
-
return createError({
|
|
119
|
-
statusCode: 500,
|
|
120
|
-
statusMessage: "Internal Server Error",
|
|
121
|
-
message: `Failed to fetch HTML for ${path}`
|
|
122
|
-
});
|
|
123
|
-
}
|
|
124
|
-
return;
|
|
125
|
-
}
|
|
126
|
-
if (import.meta.prerender) {
|
|
127
|
-
const result2 = convertHtmlToMarkdownChunks(
|
|
128
|
-
html,
|
|
129
|
-
withSiteUrl(event, path),
|
|
130
|
-
config.mdreamOptions
|
|
131
|
-
);
|
|
132
|
-
return JSON.stringify(result2);
|
|
133
|
-
}
|
|
134
|
-
const result = await convertHtmlToMarkdown(
|
|
135
|
-
html,
|
|
136
|
-
withSiteUrl(event, path),
|
|
137
|
-
config,
|
|
138
|
-
path,
|
|
139
|
-
event
|
|
140
|
-
);
|
|
141
|
-
setHeader(event, "content-type", "text/markdown; charset=utf-8");
|
|
142
|
-
if (config.markdownCacheHeaders) {
|
|
143
|
-
const { maxAge, swr } = config.markdownCacheHeaders;
|
|
144
|
-
const cacheControl = swr ? `public, max-age=${maxAge}, stale-while-revalidate=${maxAge}` : `public, max-age=${maxAge}`;
|
|
145
|
-
setHeader(event, "cache-control", cacheControl);
|
|
146
|
-
}
|
|
147
|
-
return result.markdown;
|
|
148
|
-
});
|
|
@@ -1,22 +0,0 @@
|
|
|
1
|
-
import { readFile } from "node:fs/promises";
|
|
2
|
-
import { defineNitroPlugin, useRuntimeConfig } from "nitropack/runtime";
|
|
3
|
-
export default defineNitroPlugin((nitroApp) => {
|
|
4
|
-
const config = useRuntimeConfig();
|
|
5
|
-
const manifestPath = config["nuxt-ai-ready"]?.timestampsManifestPath;
|
|
6
|
-
if (!manifestPath) {
|
|
7
|
-
return;
|
|
8
|
-
}
|
|
9
|
-
nitroApp.hooks.hook("sitemap:resolved", async (ctx) => {
|
|
10
|
-
const manifest = await readFile(manifestPath, "utf-8").then((data) => JSON.parse(data)).catch(() => null);
|
|
11
|
-
if (!manifest) {
|
|
12
|
-
return;
|
|
13
|
-
}
|
|
14
|
-
for (const url of ctx.urls) {
|
|
15
|
-
const route = url.loc.replace(/^https?:\/\/[^/]+/, "").replace(/\/$/, "") || "/";
|
|
16
|
-
const pageData = manifest.pages[route];
|
|
17
|
-
if (pageData?.updatedAt) {
|
|
18
|
-
url.lastmod = pageData.updatedAt;
|
|
19
|
-
}
|
|
20
|
-
}
|
|
21
|
-
});
|
|
22
|
-
});
|
|
File without changes
|
|
File without changes
|