nuxt-ai-ready 0.3.7 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -14
- package/dist/module.d.mts +13 -20
- package/dist/module.json +1 -1
- package/dist/module.mjs +384 -380
- package/dist/runtime/{llms-txt.d.ts → llms-txt-utils.d.ts} +2 -0
- package/dist/runtime/llms-txt-utils.js +114 -0
- package/dist/runtime/nuxt/plugins/{prerender.js → md-hints.prerender.js} +1 -1
- package/dist/runtime/server/mcp/resources/pages.d.ts +17 -0
- package/dist/runtime/server/mcp/{dev/resources → resources}/pages.js +5 -4
- package/dist/runtime/server/mcp/tools/list-pages.d.ts +16 -0
- package/dist/runtime/server/mcp/tools/list-pages.js +11 -0
- package/dist/runtime/server/mcp/tools/search-pages-fuzzy.d.ts +3 -0
- package/dist/runtime/server/mcp/tools/search-pages-fuzzy.js +25 -0
- package/dist/runtime/server/middleware/markdown.js +62 -0
- package/dist/runtime/server/middleware/markdown.prerender.d.ts +2 -0
- package/dist/runtime/server/middleware/markdown.prerender.js +35 -0
- package/dist/runtime/server/routes/llms-full.txt.get.d.ts +2 -0
- package/dist/runtime/server/routes/llms-full.txt.get.js +5 -0
- package/dist/runtime/server/routes/llms.txt.get.js +18 -18
- package/dist/runtime/server/utils/pageData.d.ts +25 -0
- package/dist/runtime/server/utils/pageData.js +25 -0
- package/dist/runtime/server/utils/sitemap.d.ts +6 -0
- package/dist/runtime/server/utils/sitemap.js +25 -0
- package/dist/runtime/server/utils.d.ts +15 -3
- package/dist/runtime/server/utils.js +93 -45
- package/dist/runtime/types.d.ts +25 -52
- package/dist/types.d.mts +1 -1
- package/package.json +16 -15
- package/dist/runtime/llms-txt.js +0 -35
- package/dist/runtime/server/mcp/dev/tools/list-pages.js +0 -10
- package/dist/runtime/server/mcp/dev/utils.js +0 -34
- package/dist/runtime/server/mcp/prod/resources/pages-chunks.js +0 -25
- package/dist/runtime/server/mcp/prod/resources/pages.js +0 -25
- package/dist/runtime/server/mcp/prod/tools/list-pages.js +0 -21
- package/dist/runtime/server/mcp/utils.d.ts +0 -3
- package/dist/runtime/server/mcp/utils.js +0 -7
- package/dist/runtime/server/middleware/mdream.js +0 -148
- package/dist/runtime/server/plugins/sitemap-lastmod.d.ts +0 -2
- package/dist/runtime/server/plugins/sitemap-lastmod.js +0 -22
- /package/dist/runtime/nuxt/plugins/{prerender.d.ts → md-hints.prerender.d.ts} +0 -0
- /package/dist/runtime/server/middleware/{mdream.d.ts → markdown.d.ts} +0 -0
|
@@ -1,5 +1,7 @@
|
|
|
1
|
+
import type { H3Event } from 'h3';
|
|
1
2
|
import type { LlmsTxtConfig } from './types.js';
|
|
2
3
|
/**
|
|
3
4
|
* Normalize llms.txt structured configuration to markdown string
|
|
4
5
|
*/
|
|
5
6
|
export declare function normalizeLlmsTxtConfig(config: LlmsTxtConfig): string;
|
|
7
|
+
export declare function buildLlmsTxt(event: H3Event): Promise<string>;
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
import { getSiteConfig } from "#site-config/server/composables/getSiteConfig";
|
|
2
|
+
import { useRuntimeConfig } from "nitropack/runtime";
|
|
3
|
+
import { getPages } from "./server/utils/pageData.js";
|
|
4
|
+
import { fetchSitemapUrls } from "./server/utils/sitemap.js";
|
|
5
|
+
function normalizeLink(link) {
|
|
6
|
+
const parts = [];
|
|
7
|
+
parts.push(`- [${link.title}](${link.href})`);
|
|
8
|
+
if (link.description) {
|
|
9
|
+
parts.push(` ${link.description}`);
|
|
10
|
+
}
|
|
11
|
+
return parts.join("\n");
|
|
12
|
+
}
|
|
13
|
+
function normalizeSection(section) {
|
|
14
|
+
const parts = [];
|
|
15
|
+
parts.push(`## ${section.title}`);
|
|
16
|
+
parts.push("");
|
|
17
|
+
if (section.description) {
|
|
18
|
+
const descriptions = Array.isArray(section.description) ? section.description : [section.description];
|
|
19
|
+
parts.push(...descriptions);
|
|
20
|
+
parts.push("");
|
|
21
|
+
}
|
|
22
|
+
if (section.links?.length) {
|
|
23
|
+
parts.push(...section.links.map(normalizeLink));
|
|
24
|
+
}
|
|
25
|
+
return parts.join("\n");
|
|
26
|
+
}
|
|
27
|
+
export function normalizeLlmsTxtConfig(config) {
|
|
28
|
+
const parts = [];
|
|
29
|
+
if (config.sections?.length) {
|
|
30
|
+
parts.push(...config.sections.map(normalizeSection));
|
|
31
|
+
}
|
|
32
|
+
if (config.notes) {
|
|
33
|
+
parts.push("## Notes");
|
|
34
|
+
parts.push("");
|
|
35
|
+
const notes = Array.isArray(config.notes) ? config.notes : [config.notes];
|
|
36
|
+
parts.push(...notes);
|
|
37
|
+
}
|
|
38
|
+
return parts.join("\n\n");
|
|
39
|
+
}
|
|
40
|
+
export async function buildLlmsTxt(event) {
|
|
41
|
+
const runtimeConfig = useRuntimeConfig(event);
|
|
42
|
+
const aiReadyConfig = runtimeConfig["nuxt-ai-ready"];
|
|
43
|
+
const sitemapConfig = runtimeConfig.sitemap;
|
|
44
|
+
const siteConfig = getSiteConfig(event);
|
|
45
|
+
const llmsTxtConfig = aiReadyConfig.llmsTxt;
|
|
46
|
+
const parts = [];
|
|
47
|
+
parts.push(`# ${siteConfig.name || siteConfig.url}`);
|
|
48
|
+
if (siteConfig.description) {
|
|
49
|
+
parts.push(`
|
|
50
|
+
> ${siteConfig.description}`);
|
|
51
|
+
}
|
|
52
|
+
if (siteConfig.url) {
|
|
53
|
+
parts.push(`
|
|
54
|
+
Canonical Origin: ${siteConfig.url}`);
|
|
55
|
+
}
|
|
56
|
+
parts.push("");
|
|
57
|
+
const sections = llmsTxtConfig.sections ? [...llmsTxtConfig.sections] : [];
|
|
58
|
+
if (sections[0]?.links) {
|
|
59
|
+
if (sitemapConfig?.sitemaps) {
|
|
60
|
+
const sitemapRoutes = Object.values(sitemapConfig.sitemaps).map((s) => s.sitemapName);
|
|
61
|
+
for (const name of sitemapRoutes) {
|
|
62
|
+
sections[0].links.push({ title: name, href: `/${name}`, description: "XML sitemap for search engines and crawlers." });
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
sections[0].links.push({ title: "robots.txt", href: "/robots.txt", description: "Crawler rules and permissions." });
|
|
66
|
+
}
|
|
67
|
+
const normalizedContent = normalizeLlmsTxtConfig({ ...llmsTxtConfig, sections });
|
|
68
|
+
if (normalizedContent) {
|
|
69
|
+
parts.push(normalizedContent);
|
|
70
|
+
parts.push("");
|
|
71
|
+
}
|
|
72
|
+
const pages = await getPages();
|
|
73
|
+
const urls = await fetchSitemapUrls(event);
|
|
74
|
+
const devModeHint = import.meta.dev && pages.size === 0 ? " (dev mode - run `nuxi generate` for page titles)" : "";
|
|
75
|
+
const prerendered = [];
|
|
76
|
+
for (const [pathname, page] of pages) {
|
|
77
|
+
prerendered.push({ pathname, title: page.title });
|
|
78
|
+
}
|
|
79
|
+
const other = [];
|
|
80
|
+
for (const url of urls) {
|
|
81
|
+
const pathname = url.loc.startsWith("http") ? new URL(url.loc).pathname : url.loc;
|
|
82
|
+
if (!pages.has(pathname)) {
|
|
83
|
+
other.push(pathname);
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
if (prerendered.length > 0 && other.length > 0) {
|
|
87
|
+
parts.push(`## Prerendered Pages${devModeHint}
|
|
88
|
+
`);
|
|
89
|
+
for (const { pathname, title } of prerendered) {
|
|
90
|
+
parts.push(title && title !== pathname ? `- [${title}](${pathname})` : `- ${pathname}`);
|
|
91
|
+
}
|
|
92
|
+
parts.push("");
|
|
93
|
+
parts.push("## Other Pages\n");
|
|
94
|
+
for (const pathname of other) {
|
|
95
|
+
parts.push(`- ${pathname}`);
|
|
96
|
+
}
|
|
97
|
+
parts.push("");
|
|
98
|
+
} else if (prerendered.length > 0) {
|
|
99
|
+
parts.push(`## Pages${devModeHint}
|
|
100
|
+
`);
|
|
101
|
+
for (const { pathname, title } of prerendered) {
|
|
102
|
+
parts.push(title && title !== pathname ? `- [${title}](${pathname})` : `- ${pathname}`);
|
|
103
|
+
}
|
|
104
|
+
parts.push("");
|
|
105
|
+
} else if (other.length > 0) {
|
|
106
|
+
parts.push(`## Pages${devModeHint}
|
|
107
|
+
`);
|
|
108
|
+
for (const pathname of other) {
|
|
109
|
+
parts.push(`- ${pathname}`);
|
|
110
|
+
}
|
|
111
|
+
parts.push("");
|
|
112
|
+
}
|
|
113
|
+
return parts.join("\n");
|
|
114
|
+
}
|
|
@@ -7,7 +7,7 @@ export default defineNuxtPlugin({
|
|
|
7
7
|
}
|
|
8
8
|
nuxtApp.hooks.hook("app:rendered", (ctx) => {
|
|
9
9
|
let url = ctx.ssrContext?.url || "";
|
|
10
|
-
if (isPathFile(url)) {
|
|
10
|
+
if (isPathFile(url) || ctx.ssrContext?.error || ctx.ssrContext?.noSSR) {
|
|
11
11
|
return;
|
|
12
12
|
}
|
|
13
13
|
if (url.endsWith("/")) {
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
declare const _default: {
|
|
2
|
+
uri: string;
|
|
3
|
+
name: string;
|
|
4
|
+
description: string;
|
|
5
|
+
metadata: {
|
|
6
|
+
mimeType: string;
|
|
7
|
+
};
|
|
8
|
+
cache: "1h";
|
|
9
|
+
handler(uri: URL): Promise<{
|
|
10
|
+
contents: {
|
|
11
|
+
uri: string;
|
|
12
|
+
mimeType: string;
|
|
13
|
+
text: string;
|
|
14
|
+
}[];
|
|
15
|
+
}>;
|
|
16
|
+
};
|
|
17
|
+
export default _default;
|
|
@@ -1,18 +1,19 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { getPagesList } from "../../utils/pageData.js";
|
|
2
2
|
export default {
|
|
3
3
|
uri: "resource://nuxt-ai-ready/pages",
|
|
4
4
|
name: "All Pages",
|
|
5
|
-
description: "Page
|
|
5
|
+
description: "Page listing as JSON.",
|
|
6
6
|
metadata: {
|
|
7
7
|
mimeType: "application/json"
|
|
8
8
|
},
|
|
9
|
+
cache: "1h",
|
|
9
10
|
async handler(uri) {
|
|
10
|
-
const pages = await
|
|
11
|
+
const pages = await getPagesList();
|
|
11
12
|
return {
|
|
12
13
|
contents: [{
|
|
13
14
|
uri: uri.toString(),
|
|
14
15
|
mimeType: "application/json",
|
|
15
|
-
text: JSON.stringify(pages
|
|
16
|
+
text: JSON.stringify(pages)
|
|
16
17
|
}]
|
|
17
18
|
};
|
|
18
19
|
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Lists all pages with metadata
|
|
3
|
+
*/
|
|
4
|
+
declare const _default: {
|
|
5
|
+
name: string;
|
|
6
|
+
description: string;
|
|
7
|
+
inputSchema: {};
|
|
8
|
+
cache: "1h";
|
|
9
|
+
handler(): Promise<{
|
|
10
|
+
content: {
|
|
11
|
+
type: "text";
|
|
12
|
+
text: string;
|
|
13
|
+
}[];
|
|
14
|
+
}>;
|
|
15
|
+
};
|
|
16
|
+
export default _default;
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import { getPagesList } from "../../utils/pageData.js";
|
|
2
|
+
export default {
|
|
3
|
+
name: "list_pages",
|
|
4
|
+
description: "Lists all available pages with titles, descriptions and routes.",
|
|
5
|
+
inputSchema: {},
|
|
6
|
+
cache: "1h",
|
|
7
|
+
async handler() {
|
|
8
|
+
const pages = await getPagesList();
|
|
9
|
+
return { content: [{ type: "text", text: JSON.stringify(pages) }] };
|
|
10
|
+
}
|
|
11
|
+
};
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import Fuse from "fuse.js";
|
|
2
|
+
import { z } from "zod";
|
|
3
|
+
import { getPagesList } from "../../utils/pageData.js";
|
|
4
|
+
const inputSchema = {
|
|
5
|
+
query: z.string().describe("Search query"),
|
|
6
|
+
limit: z.number().optional().default(10).describe("Max results")
|
|
7
|
+
};
|
|
8
|
+
const tool = {
|
|
9
|
+
name: "search_pages_fuzzy",
|
|
10
|
+
description: "Fuzzy search pages by title, description, route.",
|
|
11
|
+
inputSchema,
|
|
12
|
+
cache: "5m",
|
|
13
|
+
async handler({ query, limit }) {
|
|
14
|
+
const pages = await getPagesList();
|
|
15
|
+
const fuse = new Fuse(pages, {
|
|
16
|
+
keys: ["title", "description", "route"],
|
|
17
|
+
threshold: 0.4,
|
|
18
|
+
includeScore: true
|
|
19
|
+
});
|
|
20
|
+
const results = fuse.search(query, { limit });
|
|
21
|
+
const items = results.map((r) => ({ ...r.item, score: r.score ?? 0 }));
|
|
22
|
+
return { content: [{ type: "text", text: JSON.stringify(items) }] };
|
|
23
|
+
}
|
|
24
|
+
};
|
|
25
|
+
export default tool;
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import { withSiteUrl } from "#site-config/server/composables/utils";
|
|
2
|
+
import { createError, defineEventHandler, setHeader } from "h3";
|
|
3
|
+
import { useRuntimeConfig } from "nitropack/runtime";
|
|
4
|
+
import { logger } from "../logger.js";
|
|
5
|
+
import { convertHtmlToMarkdown, getMarkdownRenderInfo } from "../utils.js";
|
|
6
|
+
export default defineEventHandler(async (event) => {
|
|
7
|
+
const renderInfo = getMarkdownRenderInfo(event);
|
|
8
|
+
if (!renderInfo)
|
|
9
|
+
return;
|
|
10
|
+
const { path, isExplicit } = renderInfo;
|
|
11
|
+
const config = useRuntimeConfig(event)["nuxt-ai-ready"];
|
|
12
|
+
const response = await event.fetch(path).catch((e) => {
|
|
13
|
+
logger.error(`Failed to fetch HTML for ${path}`, e);
|
|
14
|
+
return null;
|
|
15
|
+
});
|
|
16
|
+
if (!response) {
|
|
17
|
+
if (isExplicit) {
|
|
18
|
+
return createError({
|
|
19
|
+
statusCode: 500,
|
|
20
|
+
statusMessage: "Internal Server Error",
|
|
21
|
+
message: `Failed to fetch HTML for ${path}`
|
|
22
|
+
});
|
|
23
|
+
}
|
|
24
|
+
return;
|
|
25
|
+
}
|
|
26
|
+
if (!response.ok) {
|
|
27
|
+
if (isExplicit) {
|
|
28
|
+
return createError({
|
|
29
|
+
statusCode: response.status,
|
|
30
|
+
statusMessage: response.statusText,
|
|
31
|
+
message: `Failed to fetch HTML for ${path}`
|
|
32
|
+
});
|
|
33
|
+
}
|
|
34
|
+
return;
|
|
35
|
+
}
|
|
36
|
+
const contentType = response.headers.get("content-type") || "";
|
|
37
|
+
if (!contentType.includes("text/html")) {
|
|
38
|
+
if (isExplicit) {
|
|
39
|
+
return createError({
|
|
40
|
+
statusCode: 415,
|
|
41
|
+
statusMessage: "Unsupported Media Type",
|
|
42
|
+
message: `Expected text/html but got ${contentType} for ${path}`
|
|
43
|
+
});
|
|
44
|
+
}
|
|
45
|
+
return;
|
|
46
|
+
}
|
|
47
|
+
const html = await response.text();
|
|
48
|
+
const result = await convertHtmlToMarkdown(
|
|
49
|
+
html,
|
|
50
|
+
withSiteUrl(event, path),
|
|
51
|
+
config,
|
|
52
|
+
path,
|
|
53
|
+
event
|
|
54
|
+
);
|
|
55
|
+
setHeader(event, "content-type", "text/markdown; charset=utf-8");
|
|
56
|
+
if (config.markdownCacheHeaders) {
|
|
57
|
+
const { maxAge, swr } = config.markdownCacheHeaders;
|
|
58
|
+
const cacheControl = swr ? `public, max-age=${maxAge}, stale-while-revalidate=${maxAge}` : `public, max-age=${maxAge}`;
|
|
59
|
+
setHeader(event, "cache-control", cacheControl);
|
|
60
|
+
}
|
|
61
|
+
return result.markdown;
|
|
62
|
+
});
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import { withSiteUrl } from "#site-config/server/composables/utils";
|
|
2
|
+
import { createError, defineEventHandler } from "h3";
|
|
3
|
+
import { useRuntimeConfig } from "nitropack/runtime";
|
|
4
|
+
import { convertHtmlToMarkdownMeta, getMarkdownRenderInfo } from "../utils.js";
|
|
5
|
+
export default defineEventHandler(async (event) => {
|
|
6
|
+
if (!import.meta.prerender) {
|
|
7
|
+
return;
|
|
8
|
+
}
|
|
9
|
+
const renderInfo = getMarkdownRenderInfo(event, true);
|
|
10
|
+
if (!renderInfo)
|
|
11
|
+
return;
|
|
12
|
+
const { path } = renderInfo;
|
|
13
|
+
const runtimeConfig = useRuntimeConfig(event)["nuxt-ai-ready"];
|
|
14
|
+
const response = await event.fetch(path);
|
|
15
|
+
if (!response.ok) {
|
|
16
|
+
return createError({
|
|
17
|
+
statusCode: response.status,
|
|
18
|
+
statusMessage: response.statusText,
|
|
19
|
+
message: `Failed to fetch HTML for ${path}`
|
|
20
|
+
});
|
|
21
|
+
}
|
|
22
|
+
const html = await response.text();
|
|
23
|
+
if (html.includes("__NUXT_ERROR__") || html.includes("nuxt-error-page")) {
|
|
24
|
+
return createError({
|
|
25
|
+
statusCode: 404,
|
|
26
|
+
message: `Page rendered as error: ${path}`
|
|
27
|
+
});
|
|
28
|
+
}
|
|
29
|
+
const result = convertHtmlToMarkdownMeta(
|
|
30
|
+
html,
|
|
31
|
+
withSiteUrl(event, path),
|
|
32
|
+
runtimeConfig.mdreamOptions
|
|
33
|
+
);
|
|
34
|
+
return JSON.stringify(result);
|
|
35
|
+
});
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
import { eventHandler, setHeader } from "h3";
|
|
2
|
+
export default eventHandler((event) => {
|
|
3
|
+
setHeader(event, "Content-Type", "text/plain; charset=utf-8");
|
|
4
|
+
return "# llms-full.txt\n\nThis file is only available for prerendered routes.\nRun `nuxi generate` to generate this file.";
|
|
5
|
+
});
|
|
@@ -1,23 +1,23 @@
|
|
|
1
|
-
import { getSiteConfig } from "#site-config/server/composables/getSiteConfig";
|
|
2
1
|
import { eventHandler, setHeader } from "h3";
|
|
3
|
-
import { useRuntimeConfig } from "nitropack/runtime";
|
|
4
|
-
import {
|
|
2
|
+
import { defineCachedFunction, useRuntimeConfig } from "nitropack/runtime";
|
|
3
|
+
import { buildLlmsTxt } from "../../llms-txt-utils.js";
|
|
4
|
+
const buildLlmsTxtCached = defineCachedFunction(
|
|
5
|
+
buildLlmsTxt,
|
|
6
|
+
{
|
|
7
|
+
name: "llms-txt",
|
|
8
|
+
group: "ai-ready",
|
|
9
|
+
maxAge: 60 * 10,
|
|
10
|
+
// 10 minutes
|
|
11
|
+
swr: true
|
|
12
|
+
}
|
|
13
|
+
);
|
|
5
14
|
export default eventHandler(async (event) => {
|
|
6
15
|
const runtimeConfig = useRuntimeConfig(event)["nuxt-ai-ready"];
|
|
7
|
-
const
|
|
8
|
-
const
|
|
9
|
-
const parts = [];
|
|
10
|
-
parts.push(`# ${siteConfig.name || siteConfig.url}`);
|
|
11
|
-
if (siteConfig.description) {
|
|
12
|
-
parts.push(`
|
|
13
|
-
> ${siteConfig.description}
|
|
14
|
-
`);
|
|
15
|
-
}
|
|
16
|
-
parts.push("<!-- Pages will be generated at build time -->\n");
|
|
17
|
-
const normalizedContent = normalizeLlmsTxtConfig(llmsTxtConfig);
|
|
18
|
-
if (normalizedContent) {
|
|
19
|
-
parts.push(normalizedContent);
|
|
20
|
-
}
|
|
16
|
+
const cacheEnabled = !import.meta.dev && runtimeConfig.cacheMaxAgeSeconds > 0;
|
|
17
|
+
const content = cacheEnabled ? await buildLlmsTxtCached(event) : await buildLlmsTxt(event);
|
|
21
18
|
setHeader(event, "Content-Type", "text/plain; charset=utf-8");
|
|
22
|
-
|
|
19
|
+
if (cacheEnabled) {
|
|
20
|
+
setHeader(event, "Cache-Control", `public, max-age=${runtimeConfig.cacheMaxAgeSeconds}, s-maxage=${runtimeConfig.cacheMaxAgeSeconds}, stale-while-revalidate=3600`);
|
|
21
|
+
}
|
|
22
|
+
return content;
|
|
23
23
|
});
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
/** Page entry from virtual module */
|
|
2
|
+
export interface PageEntry {
|
|
3
|
+
route: string;
|
|
4
|
+
title: string;
|
|
5
|
+
description: string;
|
|
6
|
+
headings: string;
|
|
7
|
+
updatedAt: string;
|
|
8
|
+
}
|
|
9
|
+
/** Page data from JSONL (includes markdown for llms-full.txt) */
|
|
10
|
+
export interface PageData extends PageEntry {
|
|
11
|
+
markdown: string;
|
|
12
|
+
}
|
|
13
|
+
/** Read page data - returns page data indexed by route */
|
|
14
|
+
export declare function getPages(): Promise<Map<string, PageEntry>>;
|
|
15
|
+
/** Get all page data including markdown (prerender only) */
|
|
16
|
+
export declare function readPrerenderedPageData(): Promise<Map<string, PageData>>;
|
|
17
|
+
/** Page list item for MCP tools/resources */
|
|
18
|
+
export interface PageListItem {
|
|
19
|
+
route: string;
|
|
20
|
+
title: string;
|
|
21
|
+
description: string;
|
|
22
|
+
headings?: string;
|
|
23
|
+
}
|
|
24
|
+
/** Get pages as flat list for MCP consumption */
|
|
25
|
+
export declare function getPagesList(): Promise<PageListItem[]>;
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
export async function getPages() {
|
|
2
|
+
if (import.meta.dev)
|
|
3
|
+
return /* @__PURE__ */ new Map();
|
|
4
|
+
if (import.meta.prerender) {
|
|
5
|
+
return readPrerenderedPageData();
|
|
6
|
+
}
|
|
7
|
+
const m = await import("#ai-ready-virtual/page-data.mjs");
|
|
8
|
+
return m.pages?.length ? new Map(m.pages.map((p) => [p.route, p])) : /* @__PURE__ */ new Map();
|
|
9
|
+
}
|
|
10
|
+
export async function readPrerenderedPageData() {
|
|
11
|
+
if (!import.meta.prerender)
|
|
12
|
+
return /* @__PURE__ */ new Map();
|
|
13
|
+
const { readPageDataFromFilesystem } = await import("#ai-ready-virtual/read-page-data.mjs");
|
|
14
|
+
const pages = await readPageDataFromFilesystem();
|
|
15
|
+
return pages?.length ? new Map(pages.map((p) => [p.route, p])) : /* @__PURE__ */ new Map();
|
|
16
|
+
}
|
|
17
|
+
export async function getPagesList() {
|
|
18
|
+
const pages = await getPages();
|
|
19
|
+
return Array.from(pages.values()).map((p) => ({
|
|
20
|
+
route: p.route,
|
|
21
|
+
title: p.title || p.route,
|
|
22
|
+
description: p.description || "",
|
|
23
|
+
headings: p.headings || void 0
|
|
24
|
+
}));
|
|
25
|
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import { parseSitemapXml } from "@nuxtjs/sitemap/utils";
|
|
2
|
+
import { logger } from "../logger.js";
|
|
3
|
+
export async function fetchSitemapUrls(event) {
|
|
4
|
+
const sitemapRes = await event.$fetch("/sitemap.xml", { responseType: "text" }).catch(() => null);
|
|
5
|
+
if (!sitemapRes) {
|
|
6
|
+
logger.warn("Sitemap not found at /sitemap.xml - llms.txt will have no pages listed");
|
|
7
|
+
return [];
|
|
8
|
+
}
|
|
9
|
+
const result = await parseSitemapXml(sitemapRes).catch((e) => {
|
|
10
|
+
logger.warn("Failed to parse sitemap.xml:", e);
|
|
11
|
+
return { urls: [] };
|
|
12
|
+
});
|
|
13
|
+
const urls = result?.urls || [];
|
|
14
|
+
if (urls.length === 0) {
|
|
15
|
+
logger.warn("Sitemap is empty - llms.txt will have no pages listed");
|
|
16
|
+
}
|
|
17
|
+
return urls.map((entry) => {
|
|
18
|
+
if (typeof entry === "string")
|
|
19
|
+
return { loc: entry };
|
|
20
|
+
return {
|
|
21
|
+
loc: entry.loc,
|
|
22
|
+
lastmod: entry.lastmod instanceof Date ? entry.lastmod.toISOString() : entry.lastmod
|
|
23
|
+
};
|
|
24
|
+
});
|
|
25
|
+
}
|
|
@@ -1,9 +1,21 @@
|
|
|
1
|
+
import type { H3Event } from 'h3';
|
|
1
2
|
import type { ModulePublicRuntimeConfig } from '../../module.js';
|
|
2
|
-
export declare function
|
|
3
|
+
export declare function normalizeWhitespace(text: string): string;
|
|
4
|
+
export declare function getMarkdownRenderInfo(event: H3Event, explicitOnly?: boolean): {
|
|
5
|
+
path: string;
|
|
6
|
+
isExplicit: boolean;
|
|
7
|
+
} | null;
|
|
8
|
+
export declare function clientPrefersMarkdown(event: H3Event): boolean;
|
|
9
|
+
export declare function convertHtmlToMarkdown(html: string, url: string, config: ModulePublicRuntimeConfig, route: string, event: H3Event): Promise<{
|
|
10
|
+
markdown: string;
|
|
11
|
+
title: string;
|
|
12
|
+
description: string;
|
|
13
|
+
headings: Record<string, string>[];
|
|
14
|
+
}>;
|
|
15
|
+
export declare function convertHtmlToMarkdownMeta(html: string, url: string, mdreamOptions: ModulePublicRuntimeConfig['mdreamOptions']): {
|
|
3
16
|
updatedAt?: string | undefined;
|
|
4
17
|
markdown: string;
|
|
5
|
-
chunks: import("mdream").MarkdownChunk[];
|
|
6
18
|
title: string;
|
|
7
19
|
description: string;
|
|
8
|
-
headings: Record<string, string[]
|
|
20
|
+
headings: Record<string, string>[];
|
|
9
21
|
};
|
|
@@ -1,23 +1,96 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { getHeader } from "h3";
|
|
2
|
+
import { htmlToMarkdown } from "mdream";
|
|
2
3
|
import { extractionPlugin } from "mdream/plugins";
|
|
3
4
|
import { withMinimalPreset } from "mdream/preset/minimal";
|
|
4
|
-
import {
|
|
5
|
-
|
|
6
|
-
function stripFrontmatter(text) {
|
|
7
|
-
if (!text.startsWith("---\n"))
|
|
8
|
-
return text;
|
|
9
|
-
const endIdx = text.indexOf("\n---", 4);
|
|
10
|
-
if (endIdx === -1)
|
|
11
|
-
return text;
|
|
12
|
-
return text.slice(endIdx + 4).trimStart();
|
|
13
|
-
}
|
|
14
|
-
function normalizeWhitespace(text) {
|
|
5
|
+
import { useNitroApp } from "nitropack/runtime";
|
|
6
|
+
export function normalizeWhitespace(text) {
|
|
15
7
|
return text.replace(/\u00A0/g, " ");
|
|
16
8
|
}
|
|
17
|
-
export function
|
|
9
|
+
export function getMarkdownRenderInfo(event, explicitOnly = false) {
|
|
10
|
+
const originalPath = event.path;
|
|
11
|
+
if (originalPath.startsWith("/api") || originalPath.startsWith("/_") || originalPath.startsWith("/@")) {
|
|
12
|
+
return null;
|
|
13
|
+
}
|
|
14
|
+
const isExplicit = originalPath.endsWith(".md");
|
|
15
|
+
if (explicitOnly && !isExplicit) {
|
|
16
|
+
return null;
|
|
17
|
+
}
|
|
18
|
+
const lastSegment = originalPath.split("/").pop() || "";
|
|
19
|
+
const hasExtension = lastSegment.includes(".");
|
|
20
|
+
const extension = hasExtension ? lastSegment.substring(lastSegment.lastIndexOf(".")) : "";
|
|
21
|
+
if (hasExtension && extension !== ".md") {
|
|
22
|
+
return null;
|
|
23
|
+
}
|
|
24
|
+
const isImplicit = !explicitOnly && clientPrefersMarkdown(event);
|
|
25
|
+
if (!isExplicit && !isImplicit) {
|
|
26
|
+
return null;
|
|
27
|
+
}
|
|
28
|
+
let path = isExplicit ? originalPath.slice(0, -3) : originalPath;
|
|
29
|
+
if (path === "/index") {
|
|
30
|
+
path = "/";
|
|
31
|
+
}
|
|
32
|
+
return { path, isExplicit };
|
|
33
|
+
}
|
|
34
|
+
export function clientPrefersMarkdown(event) {
|
|
35
|
+
const accept = getHeader(event, "accept") || "";
|
|
36
|
+
const secFetchDest = getHeader(event, "sec-fetch-dest") || "";
|
|
37
|
+
if (secFetchDest === "document") {
|
|
38
|
+
return false;
|
|
39
|
+
}
|
|
40
|
+
if (accept.includes("text/html")) {
|
|
41
|
+
return false;
|
|
42
|
+
}
|
|
43
|
+
return accept.includes("*/*") || accept.includes("text/markdown");
|
|
44
|
+
}
|
|
45
|
+
export async function convertHtmlToMarkdown(html, url, config, route, event) {
|
|
46
|
+
const nitroApp = useNitroApp();
|
|
47
|
+
let title = "";
|
|
48
|
+
let description = "";
|
|
49
|
+
const headings = [];
|
|
50
|
+
const extractPlugin = extractionPlugin({
|
|
51
|
+
title(el) {
|
|
52
|
+
title = el.textContent;
|
|
53
|
+
},
|
|
54
|
+
'meta[name="description"]': (el) => {
|
|
55
|
+
description = el.attributes.content || "";
|
|
56
|
+
},
|
|
57
|
+
"h1, h2, h3, h4, h5, h6": (el) => {
|
|
58
|
+
const text = el.textContent?.trim();
|
|
59
|
+
const level = el.name.toLowerCase();
|
|
60
|
+
if (text)
|
|
61
|
+
headings.push({ [level]: text });
|
|
62
|
+
}
|
|
63
|
+
});
|
|
64
|
+
let options = {
|
|
65
|
+
origin: url,
|
|
66
|
+
...config.mdreamOptions
|
|
67
|
+
};
|
|
68
|
+
if (config.mdreamOptions?.preset === "minimal") {
|
|
69
|
+
options = withMinimalPreset(options);
|
|
70
|
+
options.plugins = [extractPlugin, ...options.plugins || []];
|
|
71
|
+
} else {
|
|
72
|
+
options.plugins = [extractPlugin, ...options.plugins || []];
|
|
73
|
+
}
|
|
74
|
+
await nitroApp.hooks.callHook("ai-ready:mdreamConfig", options);
|
|
75
|
+
let markdown = htmlToMarkdown(html, options);
|
|
76
|
+
const context = {
|
|
77
|
+
html,
|
|
78
|
+
markdown,
|
|
79
|
+
route,
|
|
80
|
+
title,
|
|
81
|
+
description,
|
|
82
|
+
isPrerender: false,
|
|
83
|
+
event
|
|
84
|
+
};
|
|
85
|
+
await nitroApp.hooks.callHook("ai-ready:markdown", context);
|
|
86
|
+
markdown = normalizeWhitespace(context.markdown);
|
|
87
|
+
return { markdown, title: normalizeWhitespace(title), description: normalizeWhitespace(description), headings };
|
|
88
|
+
}
|
|
89
|
+
export function convertHtmlToMarkdownMeta(html, url, mdreamOptions) {
|
|
18
90
|
let title = "";
|
|
19
91
|
let description = "";
|
|
20
92
|
let updatedAt;
|
|
93
|
+
const headings = [];
|
|
21
94
|
const extractPlugin = extractionPlugin({
|
|
22
95
|
title(el) {
|
|
23
96
|
title = el.textContent;
|
|
@@ -29,6 +102,12 @@ export function convertHtmlToMarkdownChunks(html, url, mdreamOptions) {
|
|
|
29
102
|
if (!updatedAt && el.attributes.content) {
|
|
30
103
|
updatedAt = el.attributes.content;
|
|
31
104
|
}
|
|
105
|
+
},
|
|
106
|
+
"h1, h2, h3, h4, h5, h6": (el) => {
|
|
107
|
+
const text = el.textContent?.trim();
|
|
108
|
+
const level = el.name.toLowerCase();
|
|
109
|
+
if (text)
|
|
110
|
+
headings.push({ [level]: text });
|
|
32
111
|
}
|
|
33
112
|
});
|
|
34
113
|
let options = {
|
|
@@ -42,40 +121,9 @@ export function convertHtmlToMarkdownChunks(html, url, mdreamOptions) {
|
|
|
42
121
|
options.plugins = [extractPlugin, ...options.plugins || []];
|
|
43
122
|
}
|
|
44
123
|
const rawMarkdown = htmlToMarkdown(html, options);
|
|
45
|
-
const markdown = normalizeWhitespace(
|
|
46
|
-
const rawChunks = htmlToMarkdownSplitChunks(html, {
|
|
47
|
-
...options,
|
|
48
|
-
headersToSplitOn: [TagIdMap.h1, TagIdMap.h2, TagIdMap.h3],
|
|
49
|
-
origin: url,
|
|
50
|
-
chunkSize: 512,
|
|
51
|
-
stripHeaders: false,
|
|
52
|
-
lengthFunction(text) {
|
|
53
|
-
return estimateTokenCount(text);
|
|
54
|
-
}
|
|
55
|
-
});
|
|
56
|
-
const chunks = rawChunks.filter((chunk, idx) => {
|
|
57
|
-
chunk.content = normalizeWhitespace(chunk.content);
|
|
58
|
-
if (idx === 0 && chunk.content.startsWith("---\n")) {
|
|
59
|
-
const endIdx = chunk.content.indexOf("\n---", 4);
|
|
60
|
-
if (endIdx !== -1) {
|
|
61
|
-
chunk.content = chunk.content.slice(endIdx + 4).trimStart();
|
|
62
|
-
return chunk.content.length > 0;
|
|
63
|
-
}
|
|
64
|
-
}
|
|
65
|
-
return true;
|
|
66
|
-
});
|
|
67
|
-
const headings = chunks.reduce((set, chunk) => {
|
|
68
|
-
Object.entries(chunk.metadata?.headers || {}).forEach(([k, v]) => {
|
|
69
|
-
if (!set[k])
|
|
70
|
-
set[k] = [];
|
|
71
|
-
if (v && !set[k].includes(v))
|
|
72
|
-
set[k].push(v);
|
|
73
|
-
});
|
|
74
|
-
return set;
|
|
75
|
-
}, {});
|
|
124
|
+
const markdown = normalizeWhitespace(rawMarkdown);
|
|
76
125
|
return {
|
|
77
126
|
markdown,
|
|
78
|
-
chunks,
|
|
79
127
|
title: normalizeWhitespace(title),
|
|
80
128
|
description: normalizeWhitespace(description),
|
|
81
129
|
headings,
|