nuxt-ai-ready 0.3.7 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/README.md +8 -14
  2. package/dist/module.d.mts +13 -20
  3. package/dist/module.json +1 -1
  4. package/dist/module.mjs +384 -380
  5. package/dist/runtime/{llms-txt.d.ts → llms-txt-utils.d.ts} +2 -0
  6. package/dist/runtime/llms-txt-utils.js +114 -0
  7. package/dist/runtime/nuxt/plugins/{prerender.js → md-hints.prerender.js} +1 -1
  8. package/dist/runtime/server/mcp/resources/pages.d.ts +17 -0
  9. package/dist/runtime/server/mcp/{dev/resources → resources}/pages.js +5 -4
  10. package/dist/runtime/server/mcp/tools/list-pages.d.ts +16 -0
  11. package/dist/runtime/server/mcp/tools/list-pages.js +11 -0
  12. package/dist/runtime/server/mcp/tools/search-pages-fuzzy.d.ts +3 -0
  13. package/dist/runtime/server/mcp/tools/search-pages-fuzzy.js +25 -0
  14. package/dist/runtime/server/middleware/markdown.js +62 -0
  15. package/dist/runtime/server/middleware/markdown.prerender.d.ts +2 -0
  16. package/dist/runtime/server/middleware/markdown.prerender.js +35 -0
  17. package/dist/runtime/server/routes/llms-full.txt.get.d.ts +2 -0
  18. package/dist/runtime/server/routes/llms-full.txt.get.js +5 -0
  19. package/dist/runtime/server/routes/llms.txt.get.js +18 -18
  20. package/dist/runtime/server/utils/pageData.d.ts +25 -0
  21. package/dist/runtime/server/utils/pageData.js +25 -0
  22. package/dist/runtime/server/utils/sitemap.d.ts +6 -0
  23. package/dist/runtime/server/utils/sitemap.js +25 -0
  24. package/dist/runtime/server/utils.d.ts +15 -3
  25. package/dist/runtime/server/utils.js +93 -45
  26. package/dist/runtime/types.d.ts +25 -52
  27. package/dist/types.d.mts +1 -1
  28. package/package.json +16 -15
  29. package/dist/runtime/llms-txt.js +0 -35
  30. package/dist/runtime/server/mcp/dev/tools/list-pages.js +0 -10
  31. package/dist/runtime/server/mcp/dev/utils.js +0 -34
  32. package/dist/runtime/server/mcp/prod/resources/pages-chunks.js +0 -25
  33. package/dist/runtime/server/mcp/prod/resources/pages.js +0 -25
  34. package/dist/runtime/server/mcp/prod/tools/list-pages.js +0 -21
  35. package/dist/runtime/server/mcp/utils.d.ts +0 -3
  36. package/dist/runtime/server/mcp/utils.js +0 -7
  37. package/dist/runtime/server/middleware/mdream.js +0 -148
  38. package/dist/runtime/server/plugins/sitemap-lastmod.d.ts +0 -2
  39. package/dist/runtime/server/plugins/sitemap-lastmod.js +0 -22
  40. /package/dist/runtime/nuxt/plugins/{prerender.d.ts → md-hints.prerender.d.ts} +0 -0
  41. /package/dist/runtime/server/middleware/{mdream.d.ts → markdown.d.ts} +0 -0
package/dist/module.mjs CHANGED
@@ -1,19 +1,15 @@
1
- import { dirname, join } from 'node:path';
2
- import { useLogger, useNuxt, defineNuxtModule, createResolver, addTypeTemplate, hasNuxtModule, addServerHandler, addPlugin, extendPages } from '@nuxt/kit';
1
+ import { appendFile, mkdir, writeFile, stat, access } from 'node:fs/promises';
2
+ import { join, dirname } from 'node:path';
3
+ import { useLogger, useNuxt, defineNuxtModule, createResolver, addTypeTemplate, hasNuxtModule, addServerHandler, addPlugin } from '@nuxt/kit';
3
4
  import defu from 'defu';
4
5
  import { useSiteConfig, installNuxtSiteConfig, withSiteUrl } from 'nuxt-site-config/kit';
5
- import { relative } from 'pathe';
6
6
  import { readPackageJSON } from 'pkg-types';
7
7
  import * as p from '@clack/prompts';
8
8
  import { $fetch } from 'ofetch';
9
9
  import { isTest, isCI } from 'std-env';
10
- import { createHash } from 'node:crypto';
11
- import { mkdirSync, createWriteStream } from 'node:fs';
12
- import { mkdir, stat, open } from 'node:fs/promises';
13
- import { encodeLines } from '@toon-format/toon';
14
- import { createLlmsTxtStream } from 'mdream/llms-txt';
15
- import { createStorage } from 'unstorage';
16
- import fsDriver from 'unstorage/drivers/fs';
10
+ import { parseSitemapXml } from '@nuxtjs/sitemap/utils';
11
+ import { colorize } from 'consola/utils';
12
+ import { withBase } from 'ufo';
17
13
 
18
14
  const logger = useLogger("nuxt-ai-ready");
19
15
 
@@ -68,350 +64,320 @@ function hookNuxtSeoProLicense() {
68
64
  }
69
65
  }
70
66
 
71
- function createContentHashManager(options) {
72
- const { storagePath, debug = false } = options;
73
- let storage;
74
- let manifest = {
75
- pages: {},
76
- version: "1"
77
- };
78
- async function initStorage() {
79
- await mkdir(dirname(storagePath), { recursive: true });
80
- storage = createStorage({
81
- driver: fsDriver({ base: dirname(storagePath) })
82
- });
67
+ function normalizeLink(link) {
68
+ const parts = [];
69
+ parts.push(`- [${link.title}](${link.href})`);
70
+ if (link.description)
71
+ parts.push(` ${link.description}`);
72
+ return parts.join("\n");
73
+ }
74
+ function normalizeSection(section) {
75
+ const parts = [];
76
+ parts.push(`## ${section.title}`);
77
+ parts.push("");
78
+ if (section.description) {
79
+ const descriptions = Array.isArray(section.description) ? section.description : [section.description];
80
+ parts.push(...descriptions);
81
+ parts.push("");
83
82
  }
84
- function hashContent(markdown) {
85
- return createHash("sha256").update(markdown).digest("hex");
83
+ if (section.links?.length)
84
+ parts.push(...section.links.map(normalizeLink));
85
+ return parts.join("\n");
86
+ }
87
+ function normalizeLlmsTxtConfig(config) {
88
+ const parts = [];
89
+ if (config.sections?.length)
90
+ parts.push(...config.sections.map(normalizeSection));
91
+ if (config.notes) {
92
+ parts.push("## Notes");
93
+ parts.push("");
94
+ const notes = Array.isArray(config.notes) ? config.notes : [config.notes];
95
+ parts.push(...notes);
86
96
  }
87
- async function getManifest() {
88
- if (!storage) {
89
- await initStorage();
90
- }
91
- const stored = await storage.getItem("content-hashes.json");
92
- if (stored) {
93
- manifest = stored;
94
- if (debug) {
95
- logger.debug(`Loaded manifest with ${Object.keys(manifest.pages).length} pages`);
96
- }
97
- } else {
98
- if (debug) {
99
- logger.debug("No existing manifest found, starting fresh");
100
- }
97
+ return parts.join("\n\n");
98
+ }
99
+ function createCrawlerState(pageDataPath, llmsFullTxtPath, siteInfo, llmsTxtConfig) {
100
+ return {
101
+ prerenderedRoutes: /* @__PURE__ */ new Set(),
102
+ totalProcessingTime: 0,
103
+ initialized: false,
104
+ jsonlInitialized: false,
105
+ pageDataPath,
106
+ llmsFullTxtPath,
107
+ siteInfo,
108
+ llmsTxtConfig
109
+ };
110
+ }
111
+ function buildLlmsFullTxtHeader(siteInfo, llmsTxtConfig) {
112
+ const parts = [];
113
+ parts.push(`# ${siteInfo?.name || siteInfo?.url || "Site"}`);
114
+ if (siteInfo?.description)
115
+ parts.push(`
116
+ > ${siteInfo.description}`);
117
+ if (siteInfo?.url)
118
+ parts.push(`
119
+ Canonical Origin: ${siteInfo.url}`);
120
+ parts.push("");
121
+ if (llmsTxtConfig) {
122
+ const normalizedContent = normalizeLlmsTxtConfig(llmsTxtConfig);
123
+ if (normalizedContent) {
124
+ parts.push(normalizedContent);
125
+ parts.push("");
101
126
  }
102
- return manifest;
103
127
  }
104
- async function saveManifest() {
105
- if (!storage) {
106
- await initStorage();
107
- }
108
- await storage.setItem("content-hashes.json", manifest);
109
- if (debug) {
110
- logger.debug(`Saved manifest with ${Object.keys(manifest.pages).length} pages`);
111
- }
128
+ parts.push("## Pages\n\n");
129
+ return parts.join("\n");
130
+ }
131
+ async function initCrawler(state) {
132
+ if (state.initialized)
133
+ return;
134
+ if (state.pageDataPath) {
135
+ await mkdir(dirname(state.pageDataPath), { recursive: true });
136
+ await writeFile(state.pageDataPath, "", "utf-8");
137
+ state.jsonlInitialized = true;
138
+ logger.debug(`Crawler initialized with JSONL at ${state.pageDataPath}`);
112
139
  }
113
- function updatePageHash(route, markdown, previousManifest) {
114
- const contentHash = hashContent(markdown);
115
- const now = (/* @__PURE__ */ new Date()).toISOString();
116
- const existing = previousManifest.pages[route];
117
- let result;
118
- if (!existing) {
119
- result = {
120
- contentHash,
121
- updatedAt: now,
122
- firstSeenAt: now
123
- };
124
- if (debug) {
125
- logger.debug(`New page detected: ${route}`);
126
- }
127
- } else if (existing.contentHash !== contentHash) {
128
- result = {
129
- contentHash,
130
- updatedAt: now,
131
- firstSeenAt: existing.firstSeenAt
132
- };
133
- if (debug) {
134
- logger.debug(`Content changed: ${route}`);
135
- }
136
- } else {
137
- result = {
138
- contentHash: existing.contentHash,
139
- updatedAt: existing.updatedAt,
140
- firstSeenAt: existing.firstSeenAt
141
- };
142
- if (debug) {
143
- logger.debug(`Content unchanged: ${route}`);
144
- }
145
- }
146
- manifest.pages[route] = result;
147
- return result;
140
+ if (state.llmsFullTxtPath) {
141
+ await mkdir(dirname(state.llmsFullTxtPath), { recursive: true });
142
+ const header = buildLlmsFullTxtHeader(state.siteInfo, state.llmsTxtConfig);
143
+ await writeFile(state.llmsFullTxtPath, header, "utf-8");
144
+ logger.debug(`llms-full.txt initialized at ${state.llmsFullTxtPath}`);
145
+ }
146
+ state.initialized = true;
147
+ }
148
+ function flattenHeadings(headings) {
149
+ return (headings || []).map((h) => Object.entries(h).map(([tag, text]) => `${tag}:${text}`).join("")).join("|");
150
+ }
151
+ function stripFrontmatter(markdown) {
152
+ return markdown.replace(/^---\n[\s\S]*?\n---\n*/, "");
153
+ }
154
+ function normalizeHeadings(markdown) {
155
+ return markdown.replace(/^(#{1,6})\s+(.+)$/gm, (_, hashes, text) => {
156
+ const level = hashes.length;
157
+ return `h${level}. ${text}`;
158
+ });
159
+ }
160
+ function formatPageForLlmsFullTxt(route, title, description, markdown, siteUrl) {
161
+ const canonicalUrl = siteUrl ? `${siteUrl.replace(/\/$/, "")}${route}` : route;
162
+ const heading = title && title !== route ? `### ${title}` : `### ${route}`;
163
+ let content = stripFrontmatter(markdown);
164
+ content = normalizeHeadings(content);
165
+ const parts = [heading, ""];
166
+ parts.push(`Source: ${canonicalUrl}`);
167
+ if (description)
168
+ parts.push(`Description: ${description}`);
169
+ parts.push("");
170
+ if (content.trim()) {
171
+ parts.push(content.trim());
172
+ parts.push("");
173
+ }
174
+ parts.push("---");
175
+ parts.push("");
176
+ return `${parts.join("\n")}
177
+ `;
178
+ }
179
+ async function processMarkdownRoute(state, nuxt, route, parsed, lastmod, options) {
180
+ const { markdown, title, description, headings, updatedAt: metaUpdatedAt } = parsed;
181
+ let updatedAt = (lastmod instanceof Date ? lastmod.toISOString() : lastmod) || (/* @__PURE__ */ new Date()).toISOString();
182
+ if (metaUpdatedAt) {
183
+ const parsedDate = new Date(metaUpdatedAt);
184
+ if (!Number.isNaN(parsedDate.getTime()))
185
+ updatedAt = parsedDate.toISOString();
148
186
  }
149
- function setPageTimestamp(route, markdown, timestamp, previousManifest) {
150
- const contentHash = hashContent(markdown);
151
- const existing = previousManifest.pages[route];
152
- manifest.pages[route] = {
153
- contentHash,
154
- updatedAt: timestamp,
155
- firstSeenAt: existing?.firstSeenAt || timestamp
187
+ await nuxt.hooks.callHook("ai-ready:page:markdown", { route, markdown, title, description, headings });
188
+ if (state.jsonlInitialized && state.pageDataPath) {
189
+ const pageData = {
190
+ route,
191
+ title,
192
+ description,
193
+ headings: flattenHeadings(headings),
194
+ updatedAt,
195
+ markdown
156
196
  };
157
- if (debug) {
158
- logger.debug(`Manual timestamp set for ${route}: ${timestamp}`);
197
+ await appendFile(state.pageDataPath, `${JSON.stringify(pageData)}
198
+ `, "utf-8");
199
+ }
200
+ state.prerenderedRoutes.add(route);
201
+ }
202
+ async function crawlSitemapEntries(state, nuxt, nitro, entries) {
203
+ logger.debug(`Crawling ${entries.length} sitemap entries`);
204
+ let crawled = 0;
205
+ let skipped = 0;
206
+ for (const entry of entries) {
207
+ const loc = typeof entry === "string" ? entry : entry.loc;
208
+ const lastmod = typeof entry === "string" ? void 0 : entry.lastmod;
209
+ const route = loc.startsWith("http") ? new URL(loc).pathname : loc;
210
+ if (route.split("/").some((segment) => segment.startsWith("_"))) {
211
+ skipped++;
212
+ continue;
213
+ }
214
+ if (state.prerenderedRoutes.has(route)) {
215
+ skipped++;
216
+ continue;
159
217
  }
218
+ const mdRoute = route === "/" ? "/index.md" : `${route}.md`;
219
+ const mdUrl = withBase(mdRoute, nitro.options.baseURL);
220
+ logger.debug(`Fetching markdown for ${route} \u2192 ${mdUrl}`);
221
+ const res = await globalThis.$fetch(mdUrl, {
222
+ headers: { "x-nitro-prerender": mdRoute }
223
+ }).catch((err) => {
224
+ logger.debug(`Failed to fetch ${mdUrl}: ${err.message}`);
225
+ return null;
226
+ });
227
+ if (!res)
228
+ continue;
229
+ const parsed = JSON.parse(res);
230
+ await processMarkdownRoute(state, nuxt, route, parsed, lastmod);
231
+ crawled++;
160
232
  }
233
+ logger.debug(`Sitemap crawl complete: ${crawled} crawled, ${skipped} skipped (already indexed)`);
234
+ return crawled;
235
+ }
236
+ async function crawlSitemapContent(state, nuxt, nitro, sitemapContent) {
237
+ logger.debug(`Parsing sitemap XML (${sitemapContent.length} bytes)`);
238
+ const result = await parseSitemapXml(sitemapContent);
239
+ const urls = result?.urls || [];
240
+ logger.debug(`Found ${urls.length} URLs in sitemap`);
241
+ return crawlSitemapEntries(state, nuxt, nitro, urls);
242
+ }
243
+ function isNuxtGenerate() {
244
+ return process.argv.includes("generate") || process.env.NUXT_GENERATE === "true" || process.env.prerender === "true";
245
+ }
246
+ function resolveNitroPreset() {
247
+ return process.env.NITRO_PRESET || process.env.SERVER_PRESET;
248
+ }
249
+ function includesSitemapRoot(sitemapName, routes) {
250
+ return routes.some((r) => r === `/${sitemapName}` || r.startsWith(`/${sitemapName}/`));
251
+ }
252
+ function detectSitemapPrerender(sitemapName = "sitemap.xml") {
253
+ const nuxt = useNuxt();
254
+ const prerenderedRoutes = nuxt.options.nitro.prerender?.routes || [];
255
+ const hasSitemapModule = nuxt.options._installedModules?.some(
256
+ (m) => m.meta?.name === "@nuxtjs/sitemap"
257
+ );
258
+ let prerenderSitemap = hasSitemapModule || isNuxtGenerate() || includesSitemapRoot(sitemapName, prerenderedRoutes);
259
+ if (resolveNitroPreset() === "vercel-edge")
260
+ prerenderSitemap = true;
261
+ const hasPrerender = !!(nuxt.options.nitro.prerender?.routes?.length || nuxt.options.nitro.prerender?.crawlLinks);
262
+ const shouldHookIntoPrerender = prerenderSitemap || hasPrerender;
263
+ logger.debug(`Sitemap detection: module=${hasSitemapModule}, generate=${isNuxtGenerate()}, routes=${includesSitemapRoot(sitemapName, prerenderedRoutes)}`);
161
264
  return {
162
- getManifest,
163
- saveManifest,
164
- hashContent,
165
- updatePageHash,
166
- setPageTimestamp
265
+ useSitemapHook: prerenderSitemap,
266
+ usePrerenderHook: shouldHookIntoPrerender && !prerenderSitemap
167
267
  };
168
268
  }
169
-
170
- function generateVectorId(route, chunkIdx) {
171
- const hash = createHash("sha256").update(route).digest("hex").substring(0, 8);
172
- return `${hash}-${chunkIdx}`;
173
- }
174
- async function updateFirstLine(filePath, newFirstLine) {
175
- const fh = await open(filePath, "r+");
176
- try {
177
- const buffer = Buffer.alloc(1024);
178
- await fh.read(buffer, 0, 1024, 0);
179
- const content = buffer.toString("utf-8");
180
- const firstLineEnd = content.indexOf("\n");
181
- const oldFirstLine = content.substring(0, firstLineEnd);
182
- const paddedLine = newFirstLine.padEnd(oldFirstLine.length, " ");
183
- await fh.write(paddedLine, 0, "utf-8");
184
- } finally {
185
- await fh.close();
186
- }
269
+ async function prerenderRoute(nitro, route) {
270
+ const start = Date.now();
271
+ const encodedRoute = encodeURI(route);
272
+ const fetchUrl = withBase(encodedRoute, nitro.options.baseURL);
273
+ const res = await globalThis.$fetch.raw(fetchUrl, {
274
+ headers: { "x-nitro-prerender": encodedRoute },
275
+ retry: nitro.options.prerender.retry,
276
+ retryDelay: nitro.options.prerender.retryDelay
277
+ });
278
+ const filePath = join(nitro.options.output.publicDir, route);
279
+ await mkdir(dirname(filePath), { recursive: true });
280
+ const data = res._data;
281
+ if (data === void 0)
282
+ throw new Error(`No data returned from '${fetchUrl}'`);
283
+ await writeFile(filePath, data, "utf8");
284
+ const _route = {
285
+ route,
286
+ fileName: filePath,
287
+ generateTimeMS: Date.now() - start
288
+ };
289
+ nitro._prerenderedRoutes.push(_route);
290
+ return stat(filePath);
187
291
  }
188
- function setupPrerenderHandler(llmsTxtConfig, timestampsConfig) {
292
+ function setupPrerenderHandler(pageDataPath, siteInfo, llmsTxtConfig) {
189
293
  const nuxt = useNuxt();
190
294
  nuxt.hooks.hook("nitro:init", async (nitro) => {
191
- let writer = null;
192
- let chunksStream = null;
193
- let pagesStream = null;
194
- let chunksProcessed = 0;
195
- let pageCount = 0;
196
- const startTime = Date.now();
197
- const pagesChunksPath = join(nitro.options.output.publicDir, "llms-full.toon");
198
- const pagesPath = join(nitro.options.output.publicDir, "llms.toon");
199
- let contentHashManager = null;
200
- let previousManifest = null;
201
- if (timestampsConfig?.enabled) {
202
- const manifestPath = join(
203
- nuxt.options.rootDir,
204
- timestampsConfig.manifestPath || "node_modules/.cache/nuxt-seo/ai-index/content-hashes.json"
205
- );
206
- contentHashManager = createContentHashManager({
207
- storagePath: manifestPath,
208
- debug: !!nuxt.options.debug
209
- });
210
- }
295
+ const llmsFullTxtPath = join(nitro.options.output.publicDir, "llms-full.txt");
296
+ const state = createCrawlerState(pageDataPath, llmsFullTxtPath, siteInfo, llmsTxtConfig);
297
+ let initPromise = null;
211
298
  nitro.hooks.hook("prerender:generate", async (route) => {
212
- if (route.route === "/sitemap.xml" && contentHashManager) {
213
- await contentHashManager.saveManifest();
214
- logger.debug("Saved content hash manifest before sitemap generation");
215
- }
216
- if (!route.fileName?.endsWith(".md")) {
299
+ if (!route.fileName?.endsWith(".md"))
217
300
  return;
218
- }
219
301
  let pageRoute = route.route.replace(/\.md$/, "");
220
- if (pageRoute === "/index") {
302
+ if (pageRoute === "/index")
221
303
  pageRoute = "/";
222
- }
223
- if (!writer) {
224
- const siteConfig = useSiteConfig();
225
- const stream = createLlmsTxtStream({
226
- siteName: siteConfig.name || siteConfig.url,
227
- description: siteConfig.description,
228
- origin: siteConfig.url,
229
- generateFull: true,
230
- outputDir: nitro.options.output.publicDir,
231
- sections: llmsTxtConfig.sections,
232
- notes: llmsTxtConfig.notes
233
- });
234
- writer = stream.getWriter();
235
- if (contentHashManager && !previousManifest) {
236
- previousManifest = await contentHashManager.getManifest();
237
- }
238
- mkdirSync(dirname(pagesChunksPath), { recursive: true });
239
- mkdirSync(dirname(pagesPath), { recursive: true });
240
- chunksStream = createWriteStream(pagesChunksPath, { encoding: "utf-8" });
241
- chunksStream.write("pageChunks[999999]{id,route,content}:\n");
242
- pagesStream = createWriteStream(pagesPath, { encoding: "utf-8" });
243
- pagesStream.write("pages[999999]{route,title,description,headings,chunkIds,updatedAt}:\n");
244
- }
245
- const { markdown, chunks, title, description, headings, updatedAt: metaUpdatedAt } = JSON.parse(route.contents || "{}");
246
- let pageTimestamp = {
247
- updatedAt: (/* @__PURE__ */ new Date()).toISOString()
248
- };
249
- let usedMetaTimestamp = false;
304
+ const pageStartTime = Date.now();
305
+ if (!initPromise)
306
+ initPromise = initCrawler(state);
307
+ await initPromise;
308
+ const parsed = JSON.parse(route.contents || "{}");
309
+ const { markdown, title, description, headings, updatedAt: metaUpdatedAt } = parsed;
310
+ let updatedAt = (/* @__PURE__ */ new Date()).toISOString();
250
311
  if (metaUpdatedAt) {
251
312
  const parsedDate = new Date(metaUpdatedAt);
252
- if (!Number.isNaN(parsedDate.getTime())) {
253
- pageTimestamp.updatedAt = parsedDate.toISOString();
254
- usedMetaTimestamp = true;
255
- if (contentHashManager && previousManifest) {
256
- contentHashManager.setPageTimestamp(pageRoute, markdown, pageTimestamp.updatedAt, previousManifest);
257
- }
258
- }
259
- }
260
- if (!usedMetaTimestamp && contentHashManager && previousManifest) {
261
- pageTimestamp = contentHashManager.updatePageHash(
262
- pageRoute,
263
- markdown,
264
- previousManifest
265
- );
313
+ if (!Number.isNaN(parsedDate.getTime()))
314
+ updatedAt = parsedDate.toISOString();
266
315
  }
267
- await writer.write({
268
- url: pageRoute,
316
+ await nuxt.hooks.callHook("ai-ready:page:markdown", {
317
+ route: pageRoute,
318
+ markdown,
269
319
  title,
270
- content: markdown,
271
- metadata: {
272
- description,
273
- title
274
- }
320
+ description,
321
+ headings
275
322
  });
276
- pageCount++;
277
- logger.debug(`Processing ${chunks.length} chunks for route: ${pageRoute}`);
278
- const chunkIds = [];
279
- for (let idx = 0; idx < chunks.length; idx++) {
280
- const chunk = chunks[idx];
281
- if (!chunk)
282
- continue;
283
- const chunkId = generateVectorId(pageRoute, idx);
284
- chunkIds.push(chunkId);
285
- const bulkChunk = {
286
- id: chunkId,
287
- route: pageRoute,
288
- content: chunk.content
289
- };
290
- await nuxt.hooks.callHook("ai-ready:chunk", {
291
- chunk: bulkChunk,
323
+ if (state.jsonlInitialized && state.pageDataPath) {
324
+ const pageData = {
292
325
  route: pageRoute,
293
326
  title,
294
327
  description,
295
- headings: Object.entries(headings).flatMap(
296
- ([tag, texts]) => texts.map((text) => ({ [tag]: text }))
297
- )
298
- });
299
- if (chunksStream) {
300
- const lines = Array.from(encodeLines({ pageChunks: [bulkChunk] }));
301
- if (lines[1]) {
302
- chunksStream.write(`${lines[1]}
303
- `);
304
- }
305
- }
306
- chunksProcessed++;
328
+ headings: flattenHeadings(headings),
329
+ updatedAt,
330
+ markdown
331
+ };
332
+ await appendFile(state.pageDataPath, `${JSON.stringify(pageData)}
333
+ `, "utf-8");
307
334
  }
308
- logger.debug(`Completed ${chunks.length} chunks for ${pageRoute}`);
309
- const pageDoc = {
310
- route: pageRoute,
311
- title,
312
- description,
313
- // Convert headings object to readable string format (h1:Title|h2:Subtitle,...)
314
- headings: headings && Object.keys(headings).length ? Object.entries(headings).flatMap(
315
- ([tag, texts]) => texts.map((text) => `${tag}:${text}`)
316
- ).join("|") : "",
317
- // Join chunkIds array to comma-separated string
318
- chunkIds: chunkIds.join(","),
319
- updatedAt: pageTimestamp.updatedAt
320
- };
321
- if (pagesStream) {
322
- const lines = Array.from(encodeLines({ pages: [pageDoc] }));
323
- if (lines[1]) {
324
- pagesStream.write(`${lines[1]}
325
- `);
326
- }
335
+ if (state.llmsFullTxtPath) {
336
+ const pageContent = formatPageForLlmsFullTxt(pageRoute, title, description, markdown, state.siteInfo?.url);
337
+ await appendFile(state.llmsFullTxtPath, pageContent, "utf-8");
327
338
  }
339
+ state.prerenderedRoutes.add(pageRoute);
328
340
  route.contents = markdown;
341
+ state.totalProcessingTime += Date.now() - pageStartTime;
329
342
  });
330
- nitro.hooks.hook("prerender:done", async () => {
331
- if (!writer) {
332
- return;
333
- }
334
- await writer.close();
335
- if (chunksStream) {
336
- await new Promise((resolve, reject) => {
337
- chunksStream.on("error", reject);
338
- chunksStream.on("finish", resolve);
339
- chunksStream.end();
340
- });
341
- }
342
- if (pagesStream) {
343
- await new Promise((resolve, reject) => {
344
- pagesStream.on("error", reject);
345
- pagesStream.on("finish", resolve);
346
- pagesStream.end();
347
- });
348
- }
349
- await updateFirstLine(pagesChunksPath, `pageChunks[${chunksProcessed}]{id,route,content}:`);
350
- await updateFirstLine(pagesPath, `pages[${pageCount}]{route,title,description,headings,chunkIds,updatedAt}:`);
351
- if (contentHashManager) {
352
- await contentHashManager.saveManifest();
353
- logger.debug("Saved content hash manifest");
354
- }
355
- logger.info(`Wrote llms-full.toon with ${chunksProcessed} chunks`);
356
- logger.info(`Wrote llms.toon with ${pageCount} pages`);
357
- const llmsTxtPath = join(nitro.options.output.publicDir, "llms.txt");
358
- const llmsFullTxtPath = join(nitro.options.output.publicDir, "llms-full.txt");
359
- const files = [
360
- {
361
- route: "/llms.txt",
362
- fileName: llmsTxtPath,
363
- generateTimeMS: 0
364
- },
365
- {
366
- route: "/llms-full.txt",
367
- fileName: llmsFullTxtPath,
368
- generateTimeMS: 0
369
- },
370
- {
371
- route: "/llms-full.toon",
372
- fileName: pagesChunksPath,
373
- generateTimeMS: 0
374
- },
375
- {
376
- route: "/llms.toon",
377
- fileName: pagesPath,
378
- generateTimeMS: 0
379
- }
380
- ];
381
- const [llmsStats, llmsFullStats, pagesChunksStats, pagesStats] = await Promise.all([
382
- stat(llmsTxtPath),
383
- stat(llmsFullTxtPath),
384
- stat(pagesChunksPath),
385
- stat(pagesPath)
386
- ]);
387
- nitro._prerenderedRoutes.push(...files);
388
- const elapsed = Date.now() - startTime;
389
- const llmsKb = (llmsStats.size / 1024).toFixed(2);
390
- const llmsFullKb = (llmsFullStats.size / 1024).toFixed(2);
391
- const pagesChunksKb = (pagesChunksStats.size / 1024).toFixed(2);
392
- const pagesKb = (pagesStats.size / 1024).toFixed(2);
393
- logger.info(`Generated llms.txt (${llmsKb}kb), llms-full.txt (${llmsFullKb}kb), llms-full.toon (${pagesChunksKb}kb), and llms.toon (${pagesKb}kb) from ${pageCount} pages (${chunksProcessed} chunks) in ${elapsed}ms`);
394
- });
343
+ async function writeLlmsFiles() {
344
+ const llmsStats = await prerenderRoute(nitro, "/llms.txt");
345
+ const llmsFullStats = await stat(state.llmsFullTxtPath);
346
+ const kb = (b) => (b / 1024).toFixed(1);
347
+ const totalKb = kb(llmsStats.size + llmsFullStats.size);
348
+ const dim = (s) => colorize("dim", s);
349
+ const cyan = (s) => colorize("cyan", s);
350
+ const timeStr = state.totalProcessingTime >= 100 ? ` in ${cyan(`${(state.totalProcessingTime / 1e3).toFixed(1)}s`)}` : "";
351
+ logger.info(`Indexed ${cyan(String(state.prerenderedRoutes.size))} pages for llms.txt${timeStr} \u2192 ${cyan(`${totalKb}kb`)}`);
352
+ logger.info(dim(` llms.txt: ${kb(llmsStats.size)}kb, llms-full.txt: ${kb(llmsFullStats.size)}kb`));
353
+ }
354
+ const { useSitemapHook, usePrerenderHook } = detectSitemapPrerender();
355
+ logger.debug(`Prerender hooks: sitemap=${useSitemapHook}, prerender=${usePrerenderHook}`);
356
+ if (useSitemapHook) {
357
+ nuxt.hooks.hook("sitemap:prerender:done", async (ctx) => {
358
+ if (!state.initialized)
359
+ return;
360
+ for (const sitemap of ctx.sitemaps)
361
+ await crawlSitemapContent(state, nuxt, nitro, sitemap.content);
362
+ await writeLlmsFiles();
363
+ state.prerenderedRoutes.clear();
364
+ });
365
+ } else if (usePrerenderHook) {
366
+ nitro.hooks.hook("prerender:done", async () => {
367
+ if (!state.initialized)
368
+ return;
369
+ const sitemapContent = await globalThis.$fetch("/sitemap.xml", {
370
+ headers: { "x-nitro-prerender": "/sitemap.xml" }
371
+ }).catch(() => null);
372
+ if (sitemapContent)
373
+ await crawlSitemapContent(state, nuxt, nitro, sitemapContent);
374
+ await writeLlmsFiles();
375
+ state.prerenderedRoutes.clear();
376
+ });
377
+ }
395
378
  });
396
379
  }
397
380
 
398
- function createPagesPromise(nuxt = useNuxt()) {
399
- return new Promise((resolve) => {
400
- nuxt.hooks.hook("modules:done", () => {
401
- if (typeof nuxt.options.pages === "boolean" && !nuxt.options.pages || typeof nuxt.options.pages === "object" && !nuxt.options.pages.enabled) {
402
- return resolve([]);
403
- }
404
- extendPages(resolve);
405
- });
406
- });
407
- }
408
- function flattenPages(pages, parent = "") {
409
- return pages.flatMap((page) => {
410
- const path = parent + page.path;
411
- const current = { path, name: page.name, meta: page.meta };
412
- return page.children?.length ? [current, ...flattenPages(page.children, path)] : [current];
413
- });
414
- }
415
381
  const module$1 = defineNuxtModule({
416
382
  meta: {
417
383
  name: "nuxt-ai-ready",
@@ -424,6 +390,9 @@ const module$1 = defineNuxtModule({
424
390
  "@nuxtjs/robots": {
425
391
  version: ">=5.6.0"
426
392
  },
393
+ "@nuxtjs/sitemap": {
394
+ version: ">=7"
395
+ },
427
396
  "nuxt-site-config": {
428
397
  version: ">=3"
429
398
  },
@@ -444,10 +413,8 @@ const module$1 = defineNuxtModule({
444
413
  // 1 hour
445
414
  swr: true
446
415
  },
447
- timestamps: {
448
- enabled: false,
449
- manifestPath: "node_modules/.cache/nuxt-seo/ai-index/content-hashes.json"
450
- }
416
+ cacheMaxAgeSeconds: 600
417
+ // 10 minutes
451
418
  };
452
419
  },
453
420
  async setup(config, nuxt) {
@@ -470,16 +437,9 @@ const module$1 = defineNuxtModule({
470
437
  nuxt.options.nitro.scanDirs.push(
471
438
  resolve("./runtime/server/utils")
472
439
  );
473
- const pagesPromise = createPagesPromise(nuxt);
474
- nuxt.hooks.hook("nitro:config", (nitroConfig) => {
475
- nitroConfig.virtual = nitroConfig.virtual || {};
476
- nitroConfig.virtual["#ai-ready/routes.mjs"] = async () => {
477
- const pages = await pagesPromise;
478
- const routes = flattenPages(pages);
479
- return `export default ${JSON.stringify(routes)}`;
480
- };
481
- });
482
440
  if (typeof config.contentSignal === "object") {
441
+ nuxt.options.robots = nuxt.options.robots || {};
442
+ nuxt.options.robots.groups = nuxt.options.robots.groups || [];
483
443
  nuxt.options.robots.groups.push({
484
444
  userAgent: "*",
485
445
  contentUsage: [`train-ai=${config.contentSignal.aiTrain ? "y" : "n"}`],
@@ -487,54 +447,52 @@ const module$1 = defineNuxtModule({
487
447
  });
488
448
  }
489
449
  addTypeTemplate({
490
- filename: "module/nuxt-ai-ready.d.ts",
491
- getContents: (data) => {
492
- const typesPath = relative(resolve(data.nuxt.options.rootDir, data.nuxt.options.buildDir, "module"), resolve("runtime/types"));
493
- const nitroTypes = ` interface NitroRuntimeHooks {
494
- 'ai-ready:markdown': (context: import('${typesPath}').MarkdownContext) => void | Promise<void>
495
- 'ai-ready:mdreamConfig': (config: import('mdream').HTMLToMarkdownOptions) => void | Promise<void>
496
- }`;
497
- return `// Generated by nuxt-ai-ready
450
+ filename: "types/nuxt-ai-ready.d.ts",
451
+ getContents: () => `// Generated by nuxt-ai-ready
452
+ import type { MarkdownContext } from 'nuxt-ai-ready'
453
+ import type { HTMLToMarkdownOptions } from 'mdream'
454
+
498
455
  declare module 'nitropack/types' {
499
- ${nitroTypes}
456
+ interface NitroRuntimeHooks {
457
+ 'ai-ready:markdown': (context: MarkdownContext) => void | Promise<void>
458
+ 'ai-ready:mdreamConfig': (config: HTMLToMarkdownOptions) => void | Promise<void>
459
+ }
500
460
  }
501
461
 
502
- declare module 'nitropack' {
503
- ${nitroTypes}
462
+ declare module '#ai-ready-virtual/read-page-data.mjs' {
463
+ export function readPageDataFromFilesystem(): Promise<Array<{
464
+ route: string
465
+ title: string
466
+ description: string
467
+ headings: string
468
+ updatedAt: string
469
+ markdown: string
470
+ }> | null>
471
+ }
472
+
473
+ declare module '#ai-ready-virtual/page-data.mjs' {
474
+ export const pages: never[]
504
475
  }
505
476
 
506
477
  export {}
507
- `;
508
- }
509
- }, {
510
- nitro: true
511
- });
478
+ `
479
+ }, { nitro: true });
512
480
  const defaultLlmsTxtSections = [];
513
- const pagesRoute = withSiteUrl("llms.toon");
514
- const pagesChunksRoute = withSiteUrl("llms-full.toon");
481
+ const llmsFullRoute = withSiteUrl("llms-full.txt");
515
482
  defaultLlmsTxtSections.push({
516
483
  title: "LLM Resources",
517
484
  links: [
518
485
  {
519
- title: "Pages Minimal",
520
- href: pagesRoute,
521
- description: `Page-level metadata in TOON format (token-efficient JSON encoding, see https://toonformat.dev). Contains: route, title, description, headings, chunkIds. Use with llms-full.toon for complete content. Fields: { route, title, description, headings, chunkIds }.
522
-
523
- <code lang="bash">curl "${pagesRoute}"</code>`
524
- },
525
- {
526
- title: "Page Chunks",
527
- href: pagesChunksRoute,
528
- description: `Individual content chunks in TOON format for RAG/embeddings. Contains: id, route, content. Fields: { id, route, content }. Join with llms.toon using route to get title/description/headings metadata. Chunk index inferred from id suffix (e.g., "hash-0", "hash-1").
529
-
530
- <code lang="bash">curl "${pagesChunksRoute}"</code>`
486
+ title: "Full Content",
487
+ href: llmsFullRoute,
488
+ description: "Complete page content in markdown format."
531
489
  }
532
490
  ]
533
491
  });
534
492
  const hasMCP = hasNuxtModule("@nuxtjs/mcp-toolkit");
535
493
  if (hasMCP) {
536
494
  nuxt.hook("mcp:definitions:paths", (paths) => {
537
- const mcpRuntimeDir = resolve(`./runtime/server/mcp/${nuxt.options.dev ? "dev" : "prod"}`);
495
+ const mcpRuntimeDir = resolve(`./runtime/server/mcp`);
538
496
  const mcpConfig = config.mcp || {};
539
497
  if (mcpConfig.tools !== false)
540
498
  (paths.tools ||= []).push(`${mcpRuntimeDir}/tools`);
@@ -569,41 +527,87 @@ export {}
569
527
  await nuxt.callHook("ai-ready:llms-txt", llmsTxtPayload);
570
528
  mergedLlmsTxt.sections = llmsTxtPayload.sections;
571
529
  mergedLlmsTxt.notes = llmsTxtPayload.notes.length > 0 ? llmsTxtPayload.notes : void 0;
572
- const timestampsManifestPath = config.timestamps?.enabled ? join(nuxt.options.rootDir, config.timestamps.manifestPath || "node_modules/.cache/nuxt-seo/ai-index/content-hashes.json") : void 0;
530
+ const prerenderCacheDir = join(nuxt.options.rootDir, "node_modules/.cache/nuxt-seo/ai-ready/routes");
531
+ const pageDataPath = join(nuxt.options.buildDir, ".data/ai-ready/page-data.jsonl");
532
+ nuxt.hooks.hook("nitro:config", (nitroConfig) => {
533
+ nitroConfig.virtual = nitroConfig.virtual || {};
534
+ nitroConfig.virtual["#ai-ready-virtual/read-page-data.mjs"] = `
535
+ import { readFile } from 'node:fs/promises'
536
+
537
+ export async function readPageDataFromFilesystem() {
538
+ if (!import.meta.prerender) {
539
+ return null
540
+ }
541
+ const data = await readFile(${JSON.stringify(pageDataPath)}, 'utf-8').catch(() => null)
542
+ if (!data) return []
543
+ return data.trim().split('\\n').filter(Boolean).map(line => JSON.parse(line))
544
+ }
545
+ `;
546
+ nitroConfig.virtual["#ai-ready-virtual/page-data.mjs"] = `export const pages = []`;
547
+ });
573
548
  nuxt.options.runtimeConfig["nuxt-ai-ready"] = {
574
549
  version: version || "0.0.0",
575
550
  debug: config.debug || false,
576
- hasSitemap: hasNuxtModule("@nuxtjs/sitemap"),
577
551
  mdreamOptions: config.mdreamOptions || {},
578
552
  markdownCacheHeaders: defu(config.markdownCacheHeaders, {
579
553
  maxAge: 3600,
580
554
  swr: true
581
555
  }),
582
556
  llmsTxt: mergedLlmsTxt,
583
- timestampsManifestPath
557
+ cacheMaxAgeSeconds: config.cacheMaxAgeSeconds ?? 600,
558
+ prerenderCacheDir
584
559
  };
585
- if (config.timestamps?.enabled && hasNuxtModule("@nuxtjs/sitemap")) {
586
- nuxt.hook("nitro:config", (nitroConfig) => {
587
- nitroConfig.plugins = nitroConfig.plugins || [];
588
- nitroConfig.plugins.push(resolve("./runtime/server/plugins/sitemap-lastmod"));
589
- });
590
- }
591
560
  addServerHandler({
592
561
  middleware: true,
593
- handler: resolve("./runtime/server/middleware/mdream")
562
+ handler: resolve("./runtime/server/middleware/markdown.prerender")
563
+ });
564
+ addServerHandler({
565
+ middleware: true,
566
+ handler: resolve("./runtime/server/middleware/markdown")
594
567
  });
595
568
  if (nuxt.options.build) {
596
- addPlugin({ mode: "server", src: resolve("./runtime/nuxt/plugins/prerender") });
569
+ addPlugin({
570
+ mode: "server",
571
+ src: resolve("./runtime/nuxt/plugins/md-hints.prerender")
572
+ });
597
573
  }
598
574
  addServerHandler({ route: "/llms.txt", handler: resolve("./runtime/server/routes/llms.txt.get") });
599
- addServerHandler({ route: "/llms-full.txt", handler: resolve("./runtime/server/routes/llms.txt.get") });
575
+ addServerHandler({ route: "/llms-full.txt", handler: resolve("./runtime/server/routes/llms-full.txt.get") });
600
576
  const isStatic = nuxt.options.nitro.static || nuxt.options._generate || false;
601
- if (isStatic || nuxt.options.nitro.prerender?.routes?.length) {
602
- setupPrerenderHandler(mergedLlmsTxt, config.timestamps);
577
+ const hasPrerenderedRoutes = nuxt.options.nitro.prerender?.routes?.length;
578
+ const isSPA = nuxt.options.ssr === false;
579
+ if (!nuxt.options.dev && !nuxt.options._prepare) {
580
+ if (isSPA && !hasPrerenderedRoutes) {
581
+ logger.warn("SPA mode detected without prerendering. llms-full.txt will not be generated.");
582
+ logger.warn("For full functionality, enable SSR or prerender routes.");
583
+ } else if (!isStatic && !hasPrerenderedRoutes) {
584
+ logger.info("SSR-only mode: llms-full.txt requires prerendering. Runtime markdown conversion available.");
585
+ }
586
+ }
587
+ if (isStatic || hasPrerenderedRoutes) {
588
+ const siteConfig = useSiteConfig();
589
+ setupPrerenderHandler(pageDataPath, {
590
+ name: siteConfig.name,
591
+ url: siteConfig.url,
592
+ description: siteConfig.description
593
+ }, mergedLlmsTxt);
603
594
  }
604
595
  nuxt.options.nitro.routeRules = nuxt.options.nitro.routeRules || {};
605
- nuxt.options.nitro.routeRules["/llms.toon"] = { headers: { "Content-Type": "text/toon; charset=utf-8" } };
606
- nuxt.options.nitro.routeRules["/llms-full.toon"] = { headers: { "Content-Type": "text/toon; charset=utf-8" } };
596
+ nuxt.options.nitro.routeRules["/llms.txt"] = { headers: { "Content-Type": "text/plain; charset=utf-8" } };
597
+ nuxt.options.nitro.routeRules["/llms-full.txt"] = { headers: { "Content-Type": "text/plain; charset=utf-8" } };
598
+ nuxt.hooks.hook("nitro:build:before", (nitro) => {
599
+ nitro.hooks.hook("compiled", async () => {
600
+ const headersPath = join(nitro.options.output.publicDir, "_headers");
601
+ const exists = await access(headersPath).then(() => true).catch(() => false);
602
+ if (exists) {
603
+ await appendFile(headersPath, `
604
+ /*.md
605
+ Content-Type: text/markdown; charset=utf-8
606
+ `);
607
+ logger.debug("Appended .md charset header to _headers");
608
+ }
609
+ });
610
+ });
607
611
  }
608
612
  });
609
613