nuxt-ai-ready 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/module.json CHANGED
@@ -4,7 +4,7 @@
4
4
  "nuxt": ">=4.0.0"
5
5
  },
6
6
  "configKey": "aiReady",
7
- "version": "0.1.2",
7
+ "version": "0.1.3",
8
8
  "builder": {
9
9
  "@nuxt/module-builder": "1.0.2",
10
10
  "unbuild": "3.6.1"
package/dist/module.mjs CHANGED
@@ -1,31 +1,37 @@
1
- import { createHash } from 'node:crypto';
2
- import { mkdirSync, createWriteStream } from 'node:fs';
3
1
  import { useLogger, useNuxt, defineNuxtModule, createResolver, addTypeTemplate, hasNuxtModule, addServerHandler, addPlugin } from '@nuxt/kit';
4
2
  import defu from 'defu';
5
- import { TagIdMap } from 'mdream';
6
- import { extractionPlugin } from 'mdream/plugins';
7
- import { htmlToMarkdownSplitChunksStream } from 'mdream/splitter';
8
3
  import { useSiteConfig, installNuxtSiteConfig, withSiteUrl } from 'nuxt-site-config/kit';
9
- import { isPathFile } from 'nuxt-site-config/urls';
10
- import { relative, resolve, dirname } from 'pathe';
4
+ import { relative as relative$1 } from 'pathe';
11
5
  import { readPackageJSON } from 'pkg-types';
12
- import { estimateTokenCount } from 'tokenx';
6
+ import { createHash } from 'node:crypto';
7
+ import { mkdirSync, createWriteStream } from 'node:fs';
13
8
  import { stat } from 'node:fs/promises';
14
- import { join } from 'node:path';
9
+ import { join, dirname, relative } from 'node:path';
15
10
  import { createLlmsTxtStream } from 'mdream/llms-txt';
16
11
 
17
12
  const logger = useLogger("nuxt-ai-ready");
18
13
 
14
+ function generateVectorId(route, chunkIdx) {
15
+ const hash = createHash("sha256").update(route).digest("hex").substring(0, 48);
16
+ return `${hash}-${chunkIdx}`;
17
+ }
19
18
  function setupPrerenderHandler() {
20
19
  const nuxt = useNuxt();
21
20
  nuxt.hooks.hook("nitro:init", async (nitro) => {
22
21
  let writer = null;
22
+ let bulkStream = null;
23
+ let bulkStreamEntries = 0;
23
24
  let pageCount = 0;
24
25
  const startTime = Date.now();
26
+ const bulkPath = join(nitro.options.output.publicDir, "content.jsonl");
25
27
  nitro.hooks.hook("prerender:generate", async (route) => {
26
28
  if (!route.fileName?.endsWith(".md")) {
27
29
  return;
28
30
  }
31
+ let pageRoute = route.route.replace(/\.md$/, "");
32
+ if (pageRoute === "/index") {
33
+ pageRoute = "/";
34
+ }
29
35
  if (!writer) {
30
36
  const siteConfig = useSiteConfig();
31
37
  const stream = createLlmsTxtStream({
@@ -37,10 +43,16 @@ function setupPrerenderHandler() {
37
43
  });
38
44
  writer = stream.getWriter();
39
45
  }
40
- const { markdown, title, description } = JSON.parse(route.contents || "{}");
46
+ if (!bulkStream) {
47
+ mkdirSync(dirname(bulkPath), { recursive: true });
48
+ bulkStream = createWriteStream(bulkPath);
49
+ logger.info(`Bulk JSONL stream created at ${relative(nuxt.options.rootDir, bulkPath)}`);
50
+ }
51
+ const { chunks, title, description, headings } = JSON.parse(route.contents || "{}");
52
+ const markdown = chunks.map((c) => c.content).join("\n\n");
41
53
  await writer.write({
42
54
  filePath: route.fileName,
43
- url: route.route,
55
+ url: pageRoute,
44
56
  title,
45
57
  content: markdown,
46
58
  metadata: {
@@ -49,6 +61,33 @@ function setupPrerenderHandler() {
49
61
  }
50
62
  });
51
63
  pageCount++;
64
+ logger.debug(`Processing ${chunks.length} chunks for route: ${pageRoute}`);
65
+ for (let idx = 0; idx < chunks.length; idx++) {
66
+ const chunk = chunks[idx];
67
+ if (!chunk)
68
+ continue;
69
+ const bulkChunk = {
70
+ id: generateVectorId(pageRoute, idx),
71
+ route: pageRoute,
72
+ chunkIndex: idx,
73
+ content: chunk.content,
74
+ headers: chunk.metadata?.headers,
75
+ loc: chunk.metadata?.loc,
76
+ title,
77
+ description
78
+ };
79
+ await nuxt.hooks.callHook("ai-ready:chunk", {
80
+ chunk: bulkChunk,
81
+ route: pageRoute,
82
+ title,
83
+ description,
84
+ headings
85
+ });
86
+ bulkStream.write(`${JSON.stringify(bulkChunk)}
87
+ `);
88
+ bulkStreamEntries++;
89
+ }
90
+ logger.debug(`Completed ${chunks.length} chunks for ${pageRoute}`);
52
91
  route.contents = markdown;
53
92
  });
54
93
  nitro.hooks.hook("prerender:done", async () => {
@@ -56,13 +95,13 @@ function setupPrerenderHandler() {
56
95
  return;
57
96
  }
58
97
  await writer.close();
98
+ if (bulkStream) {
99
+ bulkStream.end();
100
+ logger.info(`Bulk JSONL stream closed with ${bulkStreamEntries} chunks`);
101
+ }
59
102
  const llmsTxtPath = join(nitro.options.output.publicDir, "llms.txt");
60
103
  const llmsFullTxtPath = join(nitro.options.output.publicDir, "llms-full.txt");
61
- const [llmsStats, llmsFullStats] = await Promise.all([
62
- stat(llmsTxtPath),
63
- stat(llmsFullTxtPath)
64
- ]);
65
- nitro._prerenderedRoutes.push(
104
+ const files = [
66
105
  {
67
106
  route: "/llms.txt",
68
107
  fileName: llmsTxtPath,
@@ -73,19 +112,29 @@ function setupPrerenderHandler() {
73
112
  fileName: llmsFullTxtPath,
74
113
  generateTimeMS: 0
75
114
  }
76
- );
115
+ ];
116
+ if (bulkStream) {
117
+ files.push({
118
+ route: "/content.jsonl",
119
+ fileName: bulkPath,
120
+ generateTimeMS: 0
121
+ });
122
+ }
123
+ const [llmsStats, llmsFullStats, bulkStats] = await Promise.all([
124
+ stat(llmsTxtPath),
125
+ stat(llmsFullTxtPath),
126
+ bulkStream ? stat(bulkPath) : Promise.resolve(null)
127
+ ]);
128
+ nitro._prerenderedRoutes.push(...files);
77
129
  const elapsed = Date.now() - startTime;
78
130
  const llmsKb = (llmsStats.size / 1024).toFixed(2);
79
131
  const llmsFullKb = (llmsFullStats.size / 1024).toFixed(2);
80
- logger.info(`Generated llms.txt (${llmsKb}kb) and llms-full.txt (${llmsFullKb}kb) from ${pageCount} pages in ${elapsed}ms`);
132
+ const bulkKb = bulkStats ? (bulkStats.size / 1024).toFixed(2) : "0";
133
+ logger.info(`Generated llms.txt (${llmsKb}kb), llms-full.txt (${llmsFullKb}kb), and content.jsonl (${bulkKb}kb) from ${pageCount} pages (${bulkStreamEntries} chunks) in ${elapsed}ms`);
81
134
  });
82
135
  });
83
136
  }
84
137
 
85
- function generateVectorId(route, chunkIdx) {
86
- const hash = createHash("sha256").update(route).digest("hex").substring(0, 48);
87
- return `${hash}-${chunkIdx}`;
88
- }
89
138
  const module = defineNuxtModule({
90
139
  meta: {
91
140
  name: "nuxt-ai-ready",
@@ -122,8 +171,8 @@ const module = defineNuxtModule({
122
171
  };
123
172
  },
124
173
  async setup(config, nuxt) {
125
- const { resolve: resolve$1 } = createResolver(import.meta.url);
126
- const { version } = await readPackageJSON(resolve$1("../package.json"));
174
+ const { resolve } = createResolver(import.meta.url);
175
+ const { version } = await readPackageJSON(resolve("../package.json"));
127
176
  logger.level = config.debug || nuxt.options.debug ? 4 : 3;
128
177
  if (config.enabled === false) {
129
178
  logger.debug("Module is disabled, skipping setup.");
@@ -131,15 +180,15 @@ const module = defineNuxtModule({
131
180
  }
132
181
  await installNuxtSiteConfig();
133
182
  nuxt.options.nitro.alias = nuxt.options.nitro.alias || {};
134
- nuxt.options.alias["#ai-ready"] = resolve$1("./runtime");
183
+ nuxt.options.alias["#ai-ready"] = resolve("./runtime");
135
184
  if (!nuxt.options.mcp?.name) {
136
185
  nuxt.options.mcp = nuxt.options.mcp || {};
137
186
  nuxt.options.mcp.name = useSiteConfig().name;
138
187
  }
139
188
  nuxt.options.nitro.scanDirs = nuxt.options.nitro.scanDirs || [];
140
189
  nuxt.options.nitro.scanDirs.push(
141
- resolve$1("./runtime/server/utils"),
142
- resolve$1("./runtime/server/mcp")
190
+ resolve("./runtime/server/utils"),
191
+ resolve("./runtime/server/mcp")
143
192
  );
144
193
  if (typeof config.contentSignal === "object") {
145
194
  nuxt.options.robots.groups.push({
@@ -151,7 +200,7 @@ const module = defineNuxtModule({
151
200
  addTypeTemplate({
152
201
  filename: "module/nuxt-ai-ready.d.ts",
153
202
  getContents: (data) => {
154
- const typesPath = relative(resolve$1(data.nuxt.options.rootDir, data.nuxt.options.buildDir, "module"), resolve$1("runtime/types"));
203
+ const typesPath = relative$1(resolve(data.nuxt.options.rootDir, data.nuxt.options.buildDir, "module"), resolve("runtime/types"));
155
204
  const nitroTypes = ` interface NitroRuntimeHooks {
156
205
  'ai-ready:markdown': (context: import('${typesPath}').MarkdownContext) => void | Promise<void>
157
206
  'ai-ready:mdreamConfig': (config: import('mdream').HTMLToMarkdownOptions) => void | Promise<void>
@@ -192,7 +241,7 @@ Returns JSONL (newline-delimited JSON) with all indexed content.`
192
241
  const hasMCP = hasNuxtModule("@nuxtjs/mcp-toolkit");
193
242
  if (hasMCP) {
194
243
  nuxt.hook("mcp:definitions:paths", (paths) => {
195
- const mcpRuntimeDir = resolve$1("./runtime/server/mcp");
244
+ const mcpRuntimeDir = resolve("./runtime/server/mcp");
196
245
  paths.tools = paths.tools || [];
197
246
  paths.resources = paths.resources || [];
198
247
  paths.prompts = paths.prompts || [];
@@ -233,13 +282,13 @@ Returns JSONL (newline-delimited JSON) with all indexed content.`
233
282
  };
234
283
  addServerHandler({
235
284
  middleware: true,
236
- handler: resolve$1("./runtime/server/middleware/mdream")
285
+ handler: resolve("./runtime/server/middleware/mdream")
237
286
  });
238
287
  if (nuxt.options.build) {
239
- addPlugin({ mode: "server", src: resolve$1("./runtime/nuxt/plugins/prerender") });
288
+ addPlugin({ mode: "server", src: resolve("./runtime/nuxt/plugins/prerender") });
240
289
  }
241
290
  if (nuxt.options.dev) {
242
- addServerHandler({ route: "/llms.txt", handler: resolve$1("./runtime/server/routes/llms.txt.get") });
291
+ addServerHandler({ route: "/llms.txt", handler: resolve("./runtime/server/routes/llms.txt.get") });
243
292
  }
244
293
  const isStatic = nuxt.options.nitro.static || nuxt.options._generate || false;
245
294
  if (isStatic || nuxt.options.nitro.prerender?.routes?.length) {
@@ -253,104 +302,6 @@ Returns JSONL (newline-delimited JSON) with all indexed content.`
253
302
  }
254
303
  };
255
304
  }
256
- const isBuildMode = !nuxt.options._prepare && !nuxt.options.dev;
257
- nuxt.hooks.hook("modules:done", () => {
258
- nuxt.hook("nitro:init", async (nitro) => {
259
- if (!isBuildMode) {
260
- logger.debug("Dev mode: skipping llms.txt generation");
261
- return;
262
- }
263
- if (config.bulkRoute === false) {
264
- logger.debug("Bulk route disabled, skipping bulk generation");
265
- return;
266
- }
267
- const bulkPath = resolve(nitro.options.output.dir, `public${config.bulkRoute}`);
268
- let bulkStream = null;
269
- let bulkStreamEntries = 0;
270
- nitro.hooks.hook("prerender:route", async (route) => {
271
- const isHtml = route.fileName?.endsWith(".html") && route.contents.startsWith("<!DOCTYPE html");
272
- if (!isHtml || !route.contents) {
273
- return;
274
- }
275
- if (typeof route._sitemap !== "undefined" && !route._sitemap) {
276
- return;
277
- }
278
- if (isPathFile(route.route)) {
279
- return;
280
- }
281
- let title = "";
282
- let description = "";
283
- const headings = [];
284
- const extractPlugin = extractionPlugin({
285
- title(el) {
286
- title = el.textContent;
287
- },
288
- 'meta[name="description"]': (el) => {
289
- description = el.attributes.content || "";
290
- },
291
- "h1, h2, h3, h4, h5, h6": (el) => {
292
- const text = el.textContent?.trim();
293
- const level = el.name.toLowerCase();
294
- if (text)
295
- headings.push({ [level]: text });
296
- }
297
- });
298
- const options = {
299
- origin: useSiteConfig().url,
300
- ...config.mdreamOptions || {}
301
- };
302
- options.plugins = [extractPlugin, ...options.plugins || []];
303
- const chunksStream = htmlToMarkdownSplitChunksStream(route.contents, {
304
- ...options,
305
- headersToSplitOn: [TagIdMap.h1, TagIdMap.h2, TagIdMap.h3],
306
- origin: useSiteConfig().url,
307
- chunkSize: 256,
308
- stripHeaders: false,
309
- lengthFunction(text) {
310
- return estimateTokenCount(text);
311
- }
312
- });
313
- if (!bulkStream) {
314
- mkdirSync(dirname(bulkPath), { recursive: true });
315
- bulkStream = createWriteStream(bulkPath);
316
- logger.info(`Bulk JSONL stream created at ${relative(nuxt.options.rootDir, bulkPath)}`);
317
- }
318
- logger.debug(`Processing chunks for route: ${route.route}`);
319
- let idx = 0;
320
- for await (const chunk of chunksStream) {
321
- logger.debug(` Chunk ${idx}: ${chunk.content.length} chars, headers: ${JSON.stringify(chunk.metadata?.headers)}`);
322
- const bulkChunk = {
323
- id: generateVectorId(route.route, idx),
324
- route: route.route,
325
- chunkIndex: idx,
326
- content: chunk.content,
327
- headers: chunk.metadata?.headers,
328
- loc: chunk.metadata?.loc,
329
- title,
330
- description
331
- };
332
- await nuxt.hooks.callHook("ai-ready:chunk", {
333
- chunk: bulkChunk,
334
- route: route.route,
335
- title,
336
- description,
337
- headings
338
- });
339
- bulkStream.write(`${JSON.stringify(bulkChunk)}
340
- `);
341
- bulkStreamEntries++;
342
- idx++;
343
- }
344
- logger.debug(`Completed ${idx} chunks for ${route.route}`);
345
- });
346
- nitro.hooks.hook("prerender:done", () => {
347
- if (bulkStream) {
348
- bulkStream.end();
349
- logger.success(`Bulk JSONL exported ${bulkStreamEntries} entries.`);
350
- }
351
- });
352
- });
353
- });
354
305
  }
355
306
  });
356
307
 
@@ -1,9 +1,11 @@
1
1
  import { withSiteUrl } from "#site-config/server/composables/utils";
2
2
  import { createError, defineEventHandler, getHeader, setHeader } from "h3";
3
- import { htmlToMarkdown } from "mdream";
3
+ import { htmlToMarkdown, TagIdMap } from "mdream";
4
4
  import { extractionPlugin } from "mdream/plugins";
5
5
  import { withMinimalPreset } from "mdream/preset/minimal";
6
+ import { htmlToMarkdownSplitChunksStream } from "mdream/splitter";
6
7
  import { useNitroApp, useRuntimeConfig } from "nitropack/runtime";
8
+ import { estimateTokenCount } from "tokenx";
7
9
  import { logger } from "../logger.js";
8
10
  function shouldServeMarkdown(event) {
9
11
  const accept = getHeader(event, "accept") || "";
@@ -20,12 +22,19 @@ async function convertHtmlToMarkdown(html, url, config, route, event) {
20
22
  const nitroApp = useNitroApp();
21
23
  let title = "";
22
24
  let description = "";
25
+ const headings = [];
23
26
  const extractPlugin = extractionPlugin({
24
27
  title(el) {
25
28
  title = el.textContent;
26
29
  },
27
30
  'meta[name="description"]': (el) => {
28
31
  description = el.attributes.content || "";
32
+ },
33
+ "h1, h2, h3, h4, h5, h6": (el) => {
34
+ const text = el.textContent?.trim();
35
+ const level = el.name.toLowerCase();
36
+ if (text)
37
+ headings.push({ [level]: text });
29
38
  }
30
39
  });
31
40
  let options = {
@@ -51,7 +60,51 @@ async function convertHtmlToMarkdown(html, url, config, route, event) {
51
60
  };
52
61
  await nitroApp.hooks.callHook("ai-ready:markdown", context);
53
62
  markdown = context.markdown;
54
- return { markdown, title, description };
63
+ return { markdown, title, description, headings };
64
+ }
65
+ async function convertHtmlToMarkdownChunks(html, url, config) {
66
+ let title = "";
67
+ let description = "";
68
+ const headings = [];
69
+ const extractPlugin = extractionPlugin({
70
+ title(el) {
71
+ title = el.textContent;
72
+ },
73
+ 'meta[name="description"]': (el) => {
74
+ description = el.attributes.content || "";
75
+ },
76
+ "h1, h2, h3, h4, h5, h6": (el) => {
77
+ const text = el.textContent?.trim();
78
+ const level = el.name.toLowerCase();
79
+ if (text)
80
+ headings.push({ [level]: text });
81
+ }
82
+ });
83
+ let options = {
84
+ origin: url,
85
+ ...config.mdreamOptions
86
+ };
87
+ if (config.mdreamOptions?.preset === "minimal") {
88
+ options = withMinimalPreset(options);
89
+ options.plugins = [extractPlugin, ...options.plugins || []];
90
+ } else {
91
+ options.plugins = [extractPlugin, ...options.plugins || []];
92
+ }
93
+ const chunksStream = htmlToMarkdownSplitChunksStream(html, {
94
+ ...options,
95
+ headersToSplitOn: [TagIdMap.h1, TagIdMap.h2, TagIdMap.h3],
96
+ origin: url,
97
+ chunkSize: 256,
98
+ stripHeaders: false,
99
+ lengthFunction(text) {
100
+ return estimateTokenCount(text);
101
+ }
102
+ });
103
+ const chunks = [];
104
+ for await (const chunk of chunksStream) {
105
+ chunks.push(chunk);
106
+ }
107
+ return { chunks, title, description, headings };
55
108
  }
56
109
  export default defineEventHandler(async (event) => {
57
110
  let path = event.path;
@@ -112,6 +165,14 @@ export default defineEventHandler(async (event) => {
112
165
  }
113
166
  return;
114
167
  }
168
+ if (import.meta.prerender) {
169
+ const result2 = await convertHtmlToMarkdownChunks(
170
+ html,
171
+ withSiteUrl(event, path),
172
+ config
173
+ );
174
+ return JSON.stringify(result2);
175
+ }
115
176
  const result = await convertHtmlToMarkdown(
116
177
  html,
117
178
  withSiteUrl(event, path),
@@ -120,13 +181,10 @@ export default defineEventHandler(async (event) => {
120
181
  event
121
182
  );
122
183
  setHeader(event, "content-type", "text/markdown; charset=utf-8");
123
- if (!import.meta.prerender && config.markdownCacheHeaders) {
184
+ if (config.markdownCacheHeaders) {
124
185
  const { maxAge, swr } = config.markdownCacheHeaders;
125
186
  const cacheControl = swr ? `public, max-age=${maxAge}, stale-while-revalidate=${maxAge}` : `public, max-age=${maxAge}`;
126
187
  setHeader(event, "cache-control", cacheControl);
127
188
  }
128
- if (import.meta.prerender) {
129
- return JSON.stringify(result);
130
- }
131
189
  return result.markdown;
132
190
  });
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "nuxt-ai-ready",
3
3
  "type": "module",
4
- "version": "0.1.2",
4
+ "version": "0.1.3",
5
5
  "description": "Best practice AI & LLM discoverability for Nuxt sites.",
6
6
  "author": {
7
7
  "name": "Harlan Wilton",