nuxt-ai-ready 0.3.2 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/module.json
CHANGED
package/dist/module.mjs
CHANGED
|
@@ -184,8 +184,7 @@ function setupPrerenderHandler(llmsTxtConfig, timestampsConfig) {
|
|
|
184
184
|
pagesStream = createWriteStream(pagesPath, { encoding: "utf-8" });
|
|
185
185
|
pagesStream.write("pages[999999]{route,title,description,headings,chunkIds,updatedAt}:\n");
|
|
186
186
|
}
|
|
187
|
-
const { chunks, title, description, headings, updatedAt: metaUpdatedAt } = JSON.parse(route.contents || "{}");
|
|
188
|
-
const markdown = chunks.map((c) => c.content).join("\n\n");
|
|
187
|
+
const { markdown, chunks, title, description, headings, updatedAt: metaUpdatedAt } = JSON.parse(route.contents || "{}");
|
|
189
188
|
let pageTimestamp = {
|
|
190
189
|
updatedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
191
190
|
};
|
|
@@ -121,7 +121,7 @@ export default defineEventHandler(async (event) => {
|
|
|
121
121
|
return;
|
|
122
122
|
}
|
|
123
123
|
if (import.meta.prerender) {
|
|
124
|
-
const result2 =
|
|
124
|
+
const result2 = convertHtmlToMarkdownChunks(
|
|
125
125
|
html,
|
|
126
126
|
withSiteUrl(event, path),
|
|
127
127
|
config.mdreamOptions
|
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
import type { ModulePublicRuntimeConfig } from '../../module.js';
|
|
2
|
-
export declare function convertHtmlToMarkdownChunks(html: string, url: string, mdreamOptions: ModulePublicRuntimeConfig['mdreamOptions']):
|
|
3
|
-
headings: Record<string, string[]>;
|
|
2
|
+
export declare function convertHtmlToMarkdownChunks(html: string, url: string, mdreamOptions: ModulePublicRuntimeConfig['mdreamOptions']): {
|
|
4
3
|
updatedAt?: string | undefined;
|
|
4
|
+
markdown: string;
|
|
5
5
|
chunks: import("mdream").MarkdownChunk[];
|
|
6
6
|
title: string;
|
|
7
7
|
description: string;
|
|
8
|
-
|
|
8
|
+
headings: Record<string, string[]>;
|
|
9
|
+
};
|
|
@@ -1,9 +1,17 @@
|
|
|
1
|
-
import { TagIdMap } from "mdream";
|
|
1
|
+
import { htmlToMarkdown, TagIdMap } from "mdream";
|
|
2
2
|
import { extractionPlugin } from "mdream/plugins";
|
|
3
3
|
import { withMinimalPreset } from "mdream/preset/minimal";
|
|
4
|
-
import {
|
|
4
|
+
import { htmlToMarkdownSplitChunks } from "mdream/splitter";
|
|
5
5
|
import { estimateTokenCount } from "tokenx";
|
|
6
|
-
|
|
6
|
+
function stripFrontmatter(text) {
|
|
7
|
+
if (!text.startsWith("---\n"))
|
|
8
|
+
return text;
|
|
9
|
+
const endIdx = text.indexOf("\n---", 4);
|
|
10
|
+
if (endIdx === -1)
|
|
11
|
+
return text;
|
|
12
|
+
return text.slice(endIdx + 4).trimStart();
|
|
13
|
+
}
|
|
14
|
+
export function convertHtmlToMarkdownChunks(html, url, mdreamOptions) {
|
|
7
15
|
let title = "";
|
|
8
16
|
let description = "";
|
|
9
17
|
let updatedAt;
|
|
@@ -14,7 +22,6 @@ export async function convertHtmlToMarkdownChunks(html, url, mdreamOptions) {
|
|
|
14
22
|
'meta[name="description"]': (el) => {
|
|
15
23
|
description = el.attributes.content || "";
|
|
16
24
|
},
|
|
17
|
-
// Extract timestamp from various meta tag formats
|
|
18
25
|
'meta[property="article:modified_time"], meta[name="last-modified"], meta[name="updated"], meta[property="og:updated_time"], meta[name="lastmod"]': (el) => {
|
|
19
26
|
if (!updatedAt && el.attributes.content) {
|
|
20
27
|
updatedAt = el.attributes.content;
|
|
@@ -31,33 +38,43 @@ export async function convertHtmlToMarkdownChunks(html, url, mdreamOptions) {
|
|
|
31
38
|
} else {
|
|
32
39
|
options.plugins = [extractPlugin, ...options.plugins || []];
|
|
33
40
|
}
|
|
34
|
-
const
|
|
41
|
+
const rawMarkdown = htmlToMarkdown(html, options);
|
|
42
|
+
const markdown = stripFrontmatter(rawMarkdown);
|
|
43
|
+
const rawChunks = htmlToMarkdownSplitChunks(html, {
|
|
35
44
|
...options,
|
|
36
45
|
headersToSplitOn: [TagIdMap.h1, TagIdMap.h2, TagIdMap.h3],
|
|
37
46
|
origin: url,
|
|
38
|
-
chunkSize:
|
|
47
|
+
chunkSize: 512,
|
|
39
48
|
stripHeaders: false,
|
|
40
49
|
lengthFunction(text) {
|
|
41
50
|
return estimateTokenCount(text);
|
|
42
51
|
}
|
|
43
52
|
});
|
|
44
|
-
const chunks =
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
53
|
+
const chunks = rawChunks.filter((chunk, idx) => {
|
|
54
|
+
if (idx === 0 && chunk.content.startsWith("---\n")) {
|
|
55
|
+
const endIdx = chunk.content.indexOf("\n---", 4);
|
|
56
|
+
if (endIdx !== -1) {
|
|
57
|
+
chunk.content = chunk.content.slice(endIdx + 4).trimStart();
|
|
58
|
+
return chunk.content.length > 0;
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
return true;
|
|
62
|
+
});
|
|
63
|
+
const headings = chunks.reduce((set, chunk) => {
|
|
64
|
+
Object.entries(chunk.metadata?.headers || {}).forEach(([k, v]) => {
|
|
65
|
+
if (!set[k])
|
|
66
|
+
set[k] = [];
|
|
67
|
+
if (v && !set[k].includes(v))
|
|
68
|
+
set[k].push(v);
|
|
69
|
+
});
|
|
70
|
+
return set;
|
|
71
|
+
}, {});
|
|
48
72
|
return {
|
|
73
|
+
markdown,
|
|
49
74
|
chunks,
|
|
50
75
|
title,
|
|
51
76
|
description,
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
Object.entries(m.metadata?.headers || {}).forEach(([k, v]) => {
|
|
55
|
-
if (!set[k])
|
|
56
|
-
set[k] = [];
|
|
57
|
-
if (v && !set[k].includes(v))
|
|
58
|
-
set[k].push(v);
|
|
59
|
-
});
|
|
60
|
-
return set;
|
|
61
|
-
}, {})
|
|
77
|
+
headings,
|
|
78
|
+
...updatedAt && { updatedAt }
|
|
62
79
|
};
|
|
63
80
|
}
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "nuxt-ai-ready",
|
|
3
3
|
"type": "module",
|
|
4
|
-
"version": "0.3.
|
|
4
|
+
"version": "0.3.3",
|
|
5
5
|
"description": "Best practice AI & LLM discoverability for Nuxt sites.",
|
|
6
6
|
"author": {
|
|
7
7
|
"name": "Harlan Wilton",
|
|
@@ -59,7 +59,7 @@
|
|
|
59
59
|
"@nuxtjs/i18n": "^10.2.1",
|
|
60
60
|
"@nuxtjs/mcp-toolkit": "^0.5.1",
|
|
61
61
|
"@nuxtjs/robots": "^5.6.1",
|
|
62
|
-
"@nuxtjs/sitemap": "^7.4.
|
|
62
|
+
"@nuxtjs/sitemap": "^7.4.9",
|
|
63
63
|
"@vitest/coverage-v8": "^4.0.15",
|
|
64
64
|
"@vueuse/nuxt": "^14.1.0",
|
|
65
65
|
"better-sqlite3": "^12.5.0",
|
|
@@ -76,7 +76,7 @@
|
|
|
76
76
|
"vitest": "^4.0.15",
|
|
77
77
|
"vue": "^3.5.25",
|
|
78
78
|
"vue-router": "^4.6.3",
|
|
79
|
-
"vue-tsc": "^3.1.
|
|
79
|
+
"vue-tsc": "^3.1.7",
|
|
80
80
|
"zod": "^4.1.13"
|
|
81
81
|
},
|
|
82
82
|
"resolutions": {
|