nuxt-ai-ready 0.1.0 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/module.d.mts +6 -29
- package/dist/module.json +1 -1
- package/dist/module.mjs +108 -172
- package/dist/runtime/server/middleware/mdream.js +64 -6
- package/package.json +2 -2
package/dist/module.d.mts
CHANGED
|
@@ -1,36 +1,8 @@
|
|
|
1
1
|
import * as _nuxt_schema from '@nuxt/schema';
|
|
2
|
-
import {
|
|
2
|
+
import { BulkChunk, ModuleOptions } from '../dist/runtime/types.js';
|
|
3
3
|
export { BulkChunk, ModuleOptions } from '../dist/runtime/types.js';
|
|
4
|
-
import { ProcessedFile } from 'mdream/llms-txt';
|
|
5
|
-
|
|
6
|
-
/**
|
|
7
|
-
* Hook payload for mdream:llms-txt
|
|
8
|
-
* Called after mdream has generated llms.txt, before writing to disk
|
|
9
|
-
*
|
|
10
|
-
* IMPORTANT: This uses a mutable pattern. Hooks should modify the content
|
|
11
|
-
* and fullContent properties directly rather than returning values.
|
|
12
|
-
*
|
|
13
|
-
* @example
|
|
14
|
-
* nuxt.hooks.hook('mdream:llms-txt', async (payload) => {
|
|
15
|
-
* payload.content += '\n\n## Custom Section\n\nAdded by hook!'
|
|
16
|
-
* payload.fullContent += '\n\n## Custom Section (Full)\n\nAdded by hook!'
|
|
17
|
-
* })
|
|
18
|
-
*/
|
|
19
|
-
interface LlmsTxtGeneratePayload {
|
|
20
|
-
/** Current llms.txt content - modify this directly */
|
|
21
|
-
content: string;
|
|
22
|
-
/** Current llms-full.txt content - modify this directly */
|
|
23
|
-
fullContent: string;
|
|
24
|
-
/** All routes with their metadata (read-only) */
|
|
25
|
-
pages: ProcessedFile[];
|
|
26
|
-
}
|
|
27
4
|
|
|
28
5
|
interface ModuleHooks {
|
|
29
|
-
/**
|
|
30
|
-
* Hook to modify llms.txt content before final output
|
|
31
|
-
* Other modules can append their own API endpoints here
|
|
32
|
-
*/
|
|
33
|
-
'ai-ready:llms-txt': (payload: LlmsTxtGeneratePayload) => void | Promise<void>;
|
|
34
6
|
/**
|
|
35
7
|
* Hook to add routes to the AI ready
|
|
36
8
|
* Other modules can register their own API routes
|
|
@@ -58,5 +30,10 @@ interface ModulePublicRuntimeConfig {
|
|
|
58
30
|
}
|
|
59
31
|
declare const _default: _nuxt_schema.NuxtModule<ModuleOptions, ModuleOptions, false>;
|
|
60
32
|
|
|
33
|
+
declare module '@nuxt/schema' {
|
|
34
|
+
interface NuxtHooks extends ModuleHooks {
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
61
38
|
export { _default as default };
|
|
62
39
|
export type { ModuleHooks, ModulePublicRuntimeConfig };
|
package/dist/module.json
CHANGED
package/dist/module.mjs
CHANGED
|
@@ -1,109 +1,140 @@
|
|
|
1
|
-
import { createHash } from 'node:crypto';
|
|
2
|
-
import { mkdirSync, createWriteStream } from 'node:fs';
|
|
3
1
|
import { useLogger, useNuxt, defineNuxtModule, createResolver, addTypeTemplate, hasNuxtModule, addServerHandler, addPlugin } from '@nuxt/kit';
|
|
4
2
|
import defu from 'defu';
|
|
5
|
-
import { TagIdMap } from 'mdream';
|
|
6
|
-
import { extractionPlugin } from 'mdream/plugins';
|
|
7
|
-
import { htmlToMarkdownSplitChunksStream } from 'mdream/splitter';
|
|
8
3
|
import { useSiteConfig, installNuxtSiteConfig, withSiteUrl } from 'nuxt-site-config/kit';
|
|
9
|
-
import { relative
|
|
4
|
+
import { relative as relative$1 } from 'pathe';
|
|
10
5
|
import { readPackageJSON } from 'pkg-types';
|
|
11
|
-
import {
|
|
12
|
-
import {
|
|
13
|
-
import {
|
|
14
|
-
import {
|
|
15
|
-
import {
|
|
6
|
+
import { createHash } from 'node:crypto';
|
|
7
|
+
import { mkdirSync, createWriteStream } from 'node:fs';
|
|
8
|
+
import { stat } from 'node:fs/promises';
|
|
9
|
+
import { join, dirname, relative } from 'node:path';
|
|
10
|
+
import { createLlmsTxtStream } from 'mdream/llms-txt';
|
|
16
11
|
|
|
17
12
|
const logger = useLogger("nuxt-ai-ready");
|
|
18
13
|
|
|
14
|
+
function generateVectorId(route, chunkIdx) {
|
|
15
|
+
const hash = createHash("sha256").update(route).digest("hex").substring(0, 48);
|
|
16
|
+
return `${hash}-${chunkIdx}`;
|
|
17
|
+
}
|
|
19
18
|
function setupPrerenderHandler() {
|
|
20
19
|
const nuxt = useNuxt();
|
|
21
|
-
const pages = [];
|
|
22
20
|
nuxt.hooks.hook("nitro:init", async (nitro) => {
|
|
21
|
+
let writer = null;
|
|
22
|
+
let bulkStream = null;
|
|
23
|
+
let bulkStreamEntries = 0;
|
|
24
|
+
let pageCount = 0;
|
|
25
|
+
const startTime = Date.now();
|
|
26
|
+
const bulkPath = join(nitro.options.output.publicDir, "content.jsonl");
|
|
23
27
|
nitro.hooks.hook("prerender:generate", async (route) => {
|
|
24
28
|
if (!route.fileName?.endsWith(".md")) {
|
|
25
29
|
return;
|
|
26
30
|
}
|
|
27
|
-
|
|
28
|
-
|
|
31
|
+
let pageRoute = route.route.replace(/\.md$/, "");
|
|
32
|
+
if (pageRoute === "/index") {
|
|
33
|
+
pageRoute = "/";
|
|
34
|
+
}
|
|
35
|
+
if (!writer) {
|
|
36
|
+
const siteConfig = useSiteConfig();
|
|
37
|
+
const stream = createLlmsTxtStream({
|
|
38
|
+
siteName: siteConfig.name || siteConfig.url,
|
|
39
|
+
description: siteConfig.description,
|
|
40
|
+
origin: siteConfig.url,
|
|
41
|
+
generateFull: true,
|
|
42
|
+
outputDir: nitro.options.output.publicDir
|
|
43
|
+
});
|
|
44
|
+
writer = stream.getWriter();
|
|
45
|
+
}
|
|
46
|
+
if (!bulkStream) {
|
|
47
|
+
mkdirSync(dirname(bulkPath), { recursive: true });
|
|
48
|
+
bulkStream = createWriteStream(bulkPath);
|
|
49
|
+
logger.info(`Bulk JSONL stream created at ${relative(nuxt.options.rootDir, bulkPath)}`);
|
|
50
|
+
}
|
|
51
|
+
const { chunks, title, description, headings } = JSON.parse(route.contents || "{}");
|
|
52
|
+
const markdown = chunks.map((c) => c.content).join("\n\n");
|
|
53
|
+
await writer.write({
|
|
29
54
|
filePath: route.fileName,
|
|
30
|
-
url:
|
|
55
|
+
url: pageRoute,
|
|
31
56
|
title,
|
|
32
57
|
content: markdown,
|
|
33
58
|
metadata: {
|
|
34
59
|
description,
|
|
35
60
|
title
|
|
36
61
|
}
|
|
37
|
-
};
|
|
38
|
-
|
|
62
|
+
});
|
|
63
|
+
pageCount++;
|
|
64
|
+
logger.debug(`Processing ${chunks.length} chunks for route: ${pageRoute}`);
|
|
65
|
+
for (let idx = 0; idx < chunks.length; idx++) {
|
|
66
|
+
const chunk = chunks[idx];
|
|
67
|
+
if (!chunk)
|
|
68
|
+
continue;
|
|
69
|
+
const bulkChunk = {
|
|
70
|
+
id: generateVectorId(pageRoute, idx),
|
|
71
|
+
route: pageRoute,
|
|
72
|
+
chunkIndex: idx,
|
|
73
|
+
content: chunk.content,
|
|
74
|
+
headers: chunk.metadata?.headers,
|
|
75
|
+
loc: chunk.metadata?.loc,
|
|
76
|
+
title,
|
|
77
|
+
description
|
|
78
|
+
};
|
|
79
|
+
await nuxt.hooks.callHook("ai-ready:chunk", {
|
|
80
|
+
chunk: bulkChunk,
|
|
81
|
+
route: pageRoute,
|
|
82
|
+
title,
|
|
83
|
+
description,
|
|
84
|
+
headings
|
|
85
|
+
});
|
|
86
|
+
bulkStream.write(`${JSON.stringify(bulkChunk)}
|
|
87
|
+
`);
|
|
88
|
+
bulkStreamEntries++;
|
|
89
|
+
}
|
|
90
|
+
logger.debug(`Completed ${chunks.length} chunks for ${pageRoute}`);
|
|
39
91
|
route.contents = markdown;
|
|
40
92
|
});
|
|
41
93
|
nitro.hooks.hook("prerender:done", async () => {
|
|
42
|
-
if (
|
|
94
|
+
if (!writer) {
|
|
43
95
|
return;
|
|
44
96
|
}
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
files: pages,
|
|
50
|
-
generateFull: true,
|
|
51
|
-
siteName: siteConfig.name || siteConfig.url,
|
|
52
|
-
description: siteConfig.description
|
|
53
|
-
});
|
|
54
|
-
logger.success(`Generated markdown for ${pages.length} pages`);
|
|
55
|
-
const hookPayload = {
|
|
56
|
-
content: artifacts.llmsTxt || "",
|
|
57
|
-
fullContent: artifacts.llmsFullTxt || "",
|
|
58
|
-
pages
|
|
59
|
-
};
|
|
60
|
-
const llmsTxtConfig = nuxt.options.runtimeConfig["nuxt-ai-ready"].llmsTxt;
|
|
61
|
-
const normalizedContent = normalizeLlmsTxtConfig(llmsTxtConfig);
|
|
62
|
-
if (normalizedContent) {
|
|
63
|
-
hookPayload.content = `${hookPayload.content}
|
|
64
|
-
|
|
65
|
-
${normalizedContent}
|
|
66
|
-
`;
|
|
97
|
+
await writer.close();
|
|
98
|
+
if (bulkStream) {
|
|
99
|
+
bulkStream.end();
|
|
100
|
+
logger.info(`Bulk JSONL stream closed with ${bulkStreamEntries} chunks`);
|
|
67
101
|
}
|
|
68
|
-
|
|
69
|
-
const
|
|
70
|
-
const
|
|
71
|
-
|
|
72
|
-
if (finalLlmsTxt) {
|
|
73
|
-
const llmsTxtPath = join(nitro.options.output.publicDir, "llms.txt");
|
|
74
|
-
await writeFile(llmsTxtPath, finalLlmsTxt, "utf-8");
|
|
75
|
-
const sizeKb = (Buffer.byteLength(finalLlmsTxt, "utf-8") / 1024).toFixed(2);
|
|
76
|
-
generatedFiles.push({ path: "llms.txt", size: `${sizeKb}kb` });
|
|
77
|
-
nitro._prerenderedRoutes.push({
|
|
102
|
+
const llmsTxtPath = join(nitro.options.output.publicDir, "llms.txt");
|
|
103
|
+
const llmsFullTxtPath = join(nitro.options.output.publicDir, "llms-full.txt");
|
|
104
|
+
const files = [
|
|
105
|
+
{
|
|
78
106
|
route: "/llms.txt",
|
|
79
107
|
fileName: llmsTxtPath,
|
|
80
108
|
generateTimeMS: 0
|
|
81
|
-
}
|
|
82
|
-
|
|
83
|
-
if (finalLlmsFullTxt) {
|
|
84
|
-
const llmsFullTxtPath = join(nitro.options.output.publicDir, "llms-full.txt");
|
|
85
|
-
await writeFile(llmsFullTxtPath, finalLlmsFullTxt, "utf-8");
|
|
86
|
-
const sizeKb = (Buffer.byteLength(finalLlmsFullTxt, "utf-8") / 1024).toFixed(2);
|
|
87
|
-
generatedFiles.push({ path: "llms-full.txt", size: `${sizeKb}kb` });
|
|
88
|
-
nitro._prerenderedRoutes.push({
|
|
109
|
+
},
|
|
110
|
+
{
|
|
89
111
|
route: "/llms-full.txt",
|
|
90
112
|
fileName: llmsFullTxtPath,
|
|
91
113
|
generateTimeMS: 0
|
|
114
|
+
}
|
|
115
|
+
];
|
|
116
|
+
if (bulkStream) {
|
|
117
|
+
files.push({
|
|
118
|
+
route: "/content.jsonl",
|
|
119
|
+
fileName: bulkPath,
|
|
120
|
+
generateTimeMS: 0
|
|
92
121
|
});
|
|
93
122
|
}
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
123
|
+
const [llmsStats, llmsFullStats, bulkStats] = await Promise.all([
|
|
124
|
+
stat(llmsTxtPath),
|
|
125
|
+
stat(llmsFullTxtPath),
|
|
126
|
+
bulkStream ? stat(bulkPath) : Promise.resolve(null)
|
|
127
|
+
]);
|
|
128
|
+
nitro._prerenderedRoutes.push(...files);
|
|
129
|
+
const elapsed = Date.now() - startTime;
|
|
130
|
+
const llmsKb = (llmsStats.size / 1024).toFixed(2);
|
|
131
|
+
const llmsFullKb = (llmsFullStats.size / 1024).toFixed(2);
|
|
132
|
+
const bulkKb = bulkStats ? (bulkStats.size / 1024).toFixed(2) : "0";
|
|
133
|
+
logger.info(`Generated llms.txt (${llmsKb}kb), llms-full.txt (${llmsFullKb}kb), and content.jsonl (${bulkKb}kb) from ${pageCount} pages (${bulkStreamEntries} chunks) in ${elapsed}ms`);
|
|
99
134
|
});
|
|
100
135
|
});
|
|
101
136
|
}
|
|
102
137
|
|
|
103
|
-
function generateVectorId(route, chunkIdx) {
|
|
104
|
-
const hash = createHash("sha256").update(route).digest("hex").substring(0, 48);
|
|
105
|
-
return `${hash}-${chunkIdx}`;
|
|
106
|
-
}
|
|
107
138
|
const module = defineNuxtModule({
|
|
108
139
|
meta: {
|
|
109
140
|
name: "nuxt-ai-ready",
|
|
@@ -140,8 +171,8 @@ const module = defineNuxtModule({
|
|
|
140
171
|
};
|
|
141
172
|
},
|
|
142
173
|
async setup(config, nuxt) {
|
|
143
|
-
const { resolve
|
|
144
|
-
const { version } = await readPackageJSON(resolve
|
|
174
|
+
const { resolve } = createResolver(import.meta.url);
|
|
175
|
+
const { version } = await readPackageJSON(resolve("../package.json"));
|
|
145
176
|
logger.level = config.debug || nuxt.options.debug ? 4 : 3;
|
|
146
177
|
if (config.enabled === false) {
|
|
147
178
|
logger.debug("Module is disabled, skipping setup.");
|
|
@@ -149,15 +180,15 @@ const module = defineNuxtModule({
|
|
|
149
180
|
}
|
|
150
181
|
await installNuxtSiteConfig();
|
|
151
182
|
nuxt.options.nitro.alias = nuxt.options.nitro.alias || {};
|
|
152
|
-
nuxt.options.alias["#ai-ready"] = resolve
|
|
183
|
+
nuxt.options.alias["#ai-ready"] = resolve("./runtime");
|
|
153
184
|
if (!nuxt.options.mcp?.name) {
|
|
154
185
|
nuxt.options.mcp = nuxt.options.mcp || {};
|
|
155
186
|
nuxt.options.mcp.name = useSiteConfig().name;
|
|
156
187
|
}
|
|
157
188
|
nuxt.options.nitro.scanDirs = nuxt.options.nitro.scanDirs || [];
|
|
158
189
|
nuxt.options.nitro.scanDirs.push(
|
|
159
|
-
resolve
|
|
160
|
-
resolve
|
|
190
|
+
resolve("./runtime/server/utils"),
|
|
191
|
+
resolve("./runtime/server/mcp")
|
|
161
192
|
);
|
|
162
193
|
if (typeof config.contentSignal === "object") {
|
|
163
194
|
nuxt.options.robots.groups.push({
|
|
@@ -169,7 +200,7 @@ const module = defineNuxtModule({
|
|
|
169
200
|
addTypeTemplate({
|
|
170
201
|
filename: "module/nuxt-ai-ready.d.ts",
|
|
171
202
|
getContents: (data) => {
|
|
172
|
-
const typesPath = relative
|
|
203
|
+
const typesPath = relative$1(resolve(data.nuxt.options.rootDir, data.nuxt.options.buildDir, "module"), resolve("runtime/types"));
|
|
173
204
|
const nitroTypes = ` interface NitroRuntimeHooks {
|
|
174
205
|
'ai-ready:markdown': (context: import('${typesPath}').MarkdownContext) => void | Promise<void>
|
|
175
206
|
'ai-ready:mdreamConfig': (config: import('mdream').HTMLToMarkdownOptions) => void | Promise<void>
|
|
@@ -210,7 +241,7 @@ Returns JSONL (newline-delimited JSON) with all indexed content.`
|
|
|
210
241
|
const hasMCP = hasNuxtModule("@nuxtjs/mcp-toolkit");
|
|
211
242
|
if (hasMCP) {
|
|
212
243
|
nuxt.hook("mcp:definitions:paths", (paths) => {
|
|
213
|
-
const mcpRuntimeDir = resolve
|
|
244
|
+
const mcpRuntimeDir = resolve("./runtime/server/mcp");
|
|
214
245
|
paths.tools = paths.tools || [];
|
|
215
246
|
paths.resources = paths.resources || [];
|
|
216
247
|
paths.prompts = paths.prompts || [];
|
|
@@ -251,13 +282,13 @@ Returns JSONL (newline-delimited JSON) with all indexed content.`
|
|
|
251
282
|
};
|
|
252
283
|
addServerHandler({
|
|
253
284
|
middleware: true,
|
|
254
|
-
handler: resolve
|
|
285
|
+
handler: resolve("./runtime/server/middleware/mdream")
|
|
255
286
|
});
|
|
256
287
|
if (nuxt.options.build) {
|
|
257
|
-
addPlugin({ mode: "server", src: resolve
|
|
288
|
+
addPlugin({ mode: "server", src: resolve("./runtime/nuxt/plugins/prerender") });
|
|
258
289
|
}
|
|
259
290
|
if (nuxt.options.dev) {
|
|
260
|
-
addServerHandler({ route: "/llms.txt", handler: resolve
|
|
291
|
+
addServerHandler({ route: "/llms.txt", handler: resolve("./runtime/server/routes/llms.txt.get") });
|
|
261
292
|
}
|
|
262
293
|
const isStatic = nuxt.options.nitro.static || nuxt.options._generate || false;
|
|
263
294
|
if (isStatic || nuxt.options.nitro.prerender?.routes?.length) {
|
|
@@ -271,101 +302,6 @@ Returns JSONL (newline-delimited JSON) with all indexed content.`
|
|
|
271
302
|
}
|
|
272
303
|
};
|
|
273
304
|
}
|
|
274
|
-
const isBuildMode = !nuxt.options._prepare && !nuxt.options.dev;
|
|
275
|
-
nuxt.hooks.hook("modules:done", () => {
|
|
276
|
-
nuxt.hook("nitro:init", async (nitro) => {
|
|
277
|
-
if (!isBuildMode) {
|
|
278
|
-
logger.debug("Dev mode: skipping llms.txt generation");
|
|
279
|
-
return;
|
|
280
|
-
}
|
|
281
|
-
if (config.bulkRoute === false) {
|
|
282
|
-
logger.debug("Bulk route disabled, skipping bulk generation");
|
|
283
|
-
return;
|
|
284
|
-
}
|
|
285
|
-
const bulkPath = resolve(nitro.options.output.dir, `public${config.bulkRoute}`);
|
|
286
|
-
let bulkStream = null;
|
|
287
|
-
let bulkStreamEntries = 0;
|
|
288
|
-
nitro.hooks.hook("prerender:route", async (route) => {
|
|
289
|
-
const isHtml = route.fileName?.endsWith(".html") && route.contents.startsWith("<!DOCTYPE html");
|
|
290
|
-
if (!isHtml || !route.contents) {
|
|
291
|
-
return;
|
|
292
|
-
}
|
|
293
|
-
if (typeof route._sitemap !== "undefined" && !route._sitemap) {
|
|
294
|
-
return;
|
|
295
|
-
}
|
|
296
|
-
let title = "";
|
|
297
|
-
let description = "";
|
|
298
|
-
const headings = [];
|
|
299
|
-
const extractPlugin = extractionPlugin({
|
|
300
|
-
title(el) {
|
|
301
|
-
title = el.textContent;
|
|
302
|
-
},
|
|
303
|
-
'meta[name="description"]': (el) => {
|
|
304
|
-
description = el.attributes.content || "";
|
|
305
|
-
},
|
|
306
|
-
"h1, h2, h3, h4, h5, h6": (el) => {
|
|
307
|
-
const text = el.textContent?.trim();
|
|
308
|
-
const level = el.name.toLowerCase();
|
|
309
|
-
if (text)
|
|
310
|
-
headings.push({ [level]: text });
|
|
311
|
-
}
|
|
312
|
-
});
|
|
313
|
-
const options = {
|
|
314
|
-
origin: useSiteConfig().url,
|
|
315
|
-
...config.mdreamOptions || {}
|
|
316
|
-
};
|
|
317
|
-
options.plugins = [extractPlugin, ...options.plugins || []];
|
|
318
|
-
const chunksStream = htmlToMarkdownSplitChunksStream(route.contents, {
|
|
319
|
-
...options,
|
|
320
|
-
headersToSplitOn: [TagIdMap.h1, TagIdMap.h2, TagIdMap.h3],
|
|
321
|
-
origin: useSiteConfig().url,
|
|
322
|
-
chunkSize: 256,
|
|
323
|
-
stripHeaders: false,
|
|
324
|
-
lengthFunction(text) {
|
|
325
|
-
return estimateTokenCount(text);
|
|
326
|
-
}
|
|
327
|
-
});
|
|
328
|
-
if (!bulkStream) {
|
|
329
|
-
mkdirSync(dirname(bulkPath), { recursive: true });
|
|
330
|
-
bulkStream = createWriteStream(bulkPath);
|
|
331
|
-
logger.info(`Bulk JSONL stream created at ${relative(nuxt.options.rootDir, bulkPath)}`);
|
|
332
|
-
}
|
|
333
|
-
logger.debug(`Processing chunks for route: ${route.route}`);
|
|
334
|
-
let idx = 0;
|
|
335
|
-
for await (const chunk of chunksStream) {
|
|
336
|
-
logger.debug(` Chunk ${idx}: ${chunk.content.length} chars, headers: ${JSON.stringify(chunk.metadata?.headers)}`);
|
|
337
|
-
const bulkChunk = {
|
|
338
|
-
id: generateVectorId(route.route, idx),
|
|
339
|
-
route: route.route,
|
|
340
|
-
chunkIndex: idx,
|
|
341
|
-
content: chunk.content,
|
|
342
|
-
headers: chunk.metadata?.headers,
|
|
343
|
-
loc: chunk.metadata?.loc,
|
|
344
|
-
title,
|
|
345
|
-
description
|
|
346
|
-
};
|
|
347
|
-
await nuxt.hooks.callHook("ai-ready:chunk", {
|
|
348
|
-
chunk: bulkChunk,
|
|
349
|
-
route: route.route,
|
|
350
|
-
title,
|
|
351
|
-
description,
|
|
352
|
-
headings
|
|
353
|
-
});
|
|
354
|
-
bulkStream.write(`${JSON.stringify(bulkChunk)}
|
|
355
|
-
`);
|
|
356
|
-
bulkStreamEntries++;
|
|
357
|
-
idx++;
|
|
358
|
-
}
|
|
359
|
-
logger.debug(`Completed ${idx} chunks for ${route.route}`);
|
|
360
|
-
});
|
|
361
|
-
nitro.hooks.hook("prerender:done", () => {
|
|
362
|
-
if (bulkStream) {
|
|
363
|
-
bulkStream.end();
|
|
364
|
-
logger.success(`Bulk JSONL exported ${bulkStreamEntries} entries.`);
|
|
365
|
-
}
|
|
366
|
-
});
|
|
367
|
-
});
|
|
368
|
-
});
|
|
369
305
|
}
|
|
370
306
|
});
|
|
371
307
|
|
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
import { withSiteUrl } from "#site-config/server/composables/utils";
|
|
2
2
|
import { createError, defineEventHandler, getHeader, setHeader } from "h3";
|
|
3
|
-
import { htmlToMarkdown } from "mdream";
|
|
3
|
+
import { htmlToMarkdown, TagIdMap } from "mdream";
|
|
4
4
|
import { extractionPlugin } from "mdream/plugins";
|
|
5
5
|
import { withMinimalPreset } from "mdream/preset/minimal";
|
|
6
|
+
import { htmlToMarkdownSplitChunksStream } from "mdream/splitter";
|
|
6
7
|
import { useNitroApp, useRuntimeConfig } from "nitropack/runtime";
|
|
8
|
+
import { estimateTokenCount } from "tokenx";
|
|
7
9
|
import { logger } from "../logger.js";
|
|
8
10
|
function shouldServeMarkdown(event) {
|
|
9
11
|
const accept = getHeader(event, "accept") || "";
|
|
@@ -20,12 +22,19 @@ async function convertHtmlToMarkdown(html, url, config, route, event) {
|
|
|
20
22
|
const nitroApp = useNitroApp();
|
|
21
23
|
let title = "";
|
|
22
24
|
let description = "";
|
|
25
|
+
const headings = [];
|
|
23
26
|
const extractPlugin = extractionPlugin({
|
|
24
27
|
title(el) {
|
|
25
28
|
title = el.textContent;
|
|
26
29
|
},
|
|
27
30
|
'meta[name="description"]': (el) => {
|
|
28
31
|
description = el.attributes.content || "";
|
|
32
|
+
},
|
|
33
|
+
"h1, h2, h3, h4, h5, h6": (el) => {
|
|
34
|
+
const text = el.textContent?.trim();
|
|
35
|
+
const level = el.name.toLowerCase();
|
|
36
|
+
if (text)
|
|
37
|
+
headings.push({ [level]: text });
|
|
29
38
|
}
|
|
30
39
|
});
|
|
31
40
|
let options = {
|
|
@@ -51,7 +60,51 @@ async function convertHtmlToMarkdown(html, url, config, route, event) {
|
|
|
51
60
|
};
|
|
52
61
|
await nitroApp.hooks.callHook("ai-ready:markdown", context);
|
|
53
62
|
markdown = context.markdown;
|
|
54
|
-
return { markdown, title, description };
|
|
63
|
+
return { markdown, title, description, headings };
|
|
64
|
+
}
|
|
65
|
+
async function convertHtmlToMarkdownChunks(html, url, config) {
|
|
66
|
+
let title = "";
|
|
67
|
+
let description = "";
|
|
68
|
+
const headings = [];
|
|
69
|
+
const extractPlugin = extractionPlugin({
|
|
70
|
+
title(el) {
|
|
71
|
+
title = el.textContent;
|
|
72
|
+
},
|
|
73
|
+
'meta[name="description"]': (el) => {
|
|
74
|
+
description = el.attributes.content || "";
|
|
75
|
+
},
|
|
76
|
+
"h1, h2, h3, h4, h5, h6": (el) => {
|
|
77
|
+
const text = el.textContent?.trim();
|
|
78
|
+
const level = el.name.toLowerCase();
|
|
79
|
+
if (text)
|
|
80
|
+
headings.push({ [level]: text });
|
|
81
|
+
}
|
|
82
|
+
});
|
|
83
|
+
let options = {
|
|
84
|
+
origin: url,
|
|
85
|
+
...config.mdreamOptions
|
|
86
|
+
};
|
|
87
|
+
if (config.mdreamOptions?.preset === "minimal") {
|
|
88
|
+
options = withMinimalPreset(options);
|
|
89
|
+
options.plugins = [extractPlugin, ...options.plugins || []];
|
|
90
|
+
} else {
|
|
91
|
+
options.plugins = [extractPlugin, ...options.plugins || []];
|
|
92
|
+
}
|
|
93
|
+
const chunksStream = htmlToMarkdownSplitChunksStream(html, {
|
|
94
|
+
...options,
|
|
95
|
+
headersToSplitOn: [TagIdMap.h1, TagIdMap.h2, TagIdMap.h3],
|
|
96
|
+
origin: url,
|
|
97
|
+
chunkSize: 256,
|
|
98
|
+
stripHeaders: false,
|
|
99
|
+
lengthFunction(text) {
|
|
100
|
+
return estimateTokenCount(text);
|
|
101
|
+
}
|
|
102
|
+
});
|
|
103
|
+
const chunks = [];
|
|
104
|
+
for await (const chunk of chunksStream) {
|
|
105
|
+
chunks.push(chunk);
|
|
106
|
+
}
|
|
107
|
+
return { chunks, title, description, headings };
|
|
55
108
|
}
|
|
56
109
|
export default defineEventHandler(async (event) => {
|
|
57
110
|
let path = event.path;
|
|
@@ -112,6 +165,14 @@ export default defineEventHandler(async (event) => {
|
|
|
112
165
|
}
|
|
113
166
|
return;
|
|
114
167
|
}
|
|
168
|
+
if (import.meta.prerender) {
|
|
169
|
+
const result2 = await convertHtmlToMarkdownChunks(
|
|
170
|
+
html,
|
|
171
|
+
withSiteUrl(event, path),
|
|
172
|
+
config
|
|
173
|
+
);
|
|
174
|
+
return JSON.stringify(result2);
|
|
175
|
+
}
|
|
115
176
|
const result = await convertHtmlToMarkdown(
|
|
116
177
|
html,
|
|
117
178
|
withSiteUrl(event, path),
|
|
@@ -120,13 +181,10 @@ export default defineEventHandler(async (event) => {
|
|
|
120
181
|
event
|
|
121
182
|
);
|
|
122
183
|
setHeader(event, "content-type", "text/markdown; charset=utf-8");
|
|
123
|
-
if (
|
|
184
|
+
if (config.markdownCacheHeaders) {
|
|
124
185
|
const { maxAge, swr } = config.markdownCacheHeaders;
|
|
125
186
|
const cacheControl = swr ? `public, max-age=${maxAge}, stale-while-revalidate=${maxAge}` : `public, max-age=${maxAge}`;
|
|
126
187
|
setHeader(event, "cache-control", cacheControl);
|
|
127
188
|
}
|
|
128
|
-
if (import.meta.prerender) {
|
|
129
|
-
return JSON.stringify(result);
|
|
130
|
-
}
|
|
131
189
|
return result.markdown;
|
|
132
190
|
});
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "nuxt-ai-ready",
|
|
3
3
|
"type": "module",
|
|
4
|
-
"version": "0.1.
|
|
4
|
+
"version": "0.1.3",
|
|
5
5
|
"description": "Best practice AI & LLM discoverability for Nuxt sites.",
|
|
6
6
|
"author": {
|
|
7
7
|
"name": "Harlan Wilton",
|
|
@@ -35,7 +35,7 @@
|
|
|
35
35
|
"@nuxt/kit": "4.2.1",
|
|
36
36
|
"consola": "^3.4.2",
|
|
37
37
|
"defu": "^6.1.4",
|
|
38
|
-
"mdream": "^0.
|
|
38
|
+
"mdream": "^0.15.0",
|
|
39
39
|
"minimatch": "^10.1.1",
|
|
40
40
|
"nuxt-site-config": "^3.2.11",
|
|
41
41
|
"pathe": "^2.0.3",
|