@happyalienai/vite-plugin-llm-spider 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js ADDED
@@ -0,0 +1,423 @@
1
+ // src/index.js
2
+ import { preview } from "vite";
3
+ import fs from "fs/promises";
4
+ import path from "path";
5
+ import * as cheerio from "cheerio";
6
+ import TurndownService from "turndown";
7
+ import { gfm } from "turndown-plugin-gfm";
8
+ import puppeteer from "puppeteer";
9
+ function llmSpiderPlugin(userOptions = {}) {
10
+ let resolvedConfig;
11
+ function deepMerge(target, source) {
12
+ const result = { ...target };
13
+ for (const key of Object.keys(source)) {
14
+ if (source[key] && typeof source[key] === "object" && !Array.isArray(source[key]) && !(source[key] instanceof RegExp)) {
15
+ result[key] = deepMerge(target[key] || {}, source[key]);
16
+ } else {
17
+ result[key] = source[key];
18
+ }
19
+ }
20
+ return result;
21
+ }
22
+ const defaults = {
23
+ enabled: true,
24
+ // Recommended: explicit list
25
+ routes: (
26
+ /** @type {RouteDef[] | undefined} */
27
+ void 0
28
+ ),
29
+ // Optional crawl mode (off by default)
30
+ crawl: {
31
+ enabled: false,
32
+ seeds: ["/"],
33
+ maxDepth: 2,
34
+ maxPages: 50,
35
+ concurrency: 3,
36
+ stripQuery: true
37
+ },
38
+ exclude: ["/login", "/admin", "/account"],
39
+ render: {
40
+ waitUntil: "networkidle2",
41
+ // more forgiving than networkidle0 for SPAs
42
+ timeoutMs: 3e4,
43
+ waitForSelector: null,
44
+ // e.g. "main" or "#app main"
45
+ postLoadDelayMs: 0,
46
+ blockRequests: [
47
+ /google-analytics\.com/i,
48
+ /googletagmanager\.com/i,
49
+ /segment\.com/i,
50
+ /hotjar\.com/i
51
+ ],
52
+ launchOptions: {
53
+ headless: "new"
54
+ // For CI containers you may need:
55
+ // args: ["--no-sandbox", "--disable-setuid-sandbox"],
56
+ },
57
+ /**
58
+ * @param {import('puppeteer').Page} _page
59
+ * @param {{ route: string }} _ctx
60
+ */
61
+ beforeGoto: async (_page, _ctx) => {
62
+ },
63
+ /**
64
+ * @param {import('puppeteer').Page} _page
65
+ * @param {{ route: string }} _ctx
66
+ */
67
+ beforeExtract: async (_page, _ctx) => {
68
+ }
69
+ },
70
+ extract: {
71
+ mainSelector: ["main", "#main-content", "[data-main]"],
72
+ removeSelectors: [
73
+ "script",
74
+ "style",
75
+ "noscript",
76
+ "nav",
77
+ "header",
78
+ "footer",
79
+ "svg",
80
+ "iframe",
81
+ "[role='alert']",
82
+ ".cookie",
83
+ ".cookie-banner",
84
+ ".modal"
85
+ ]
86
+ },
87
+ markdown: {
88
+ addFrontmatter: true,
89
+ turndown: {
90
+ headingStyle: "atx",
91
+ codeBlockStyle: "fenced",
92
+ emDelimiter: "_"
93
+ }
94
+ },
95
+ output: {
96
+ // "sibling" => /pricing -> pricing.md ; /docs/ -> docs/index.html.md ; / -> index.html.md
97
+ mode: "sibling",
98
+ subdir: "ai",
99
+ // used only when mode === "subdir"
100
+ llmsTxtFileName: "llms.txt",
101
+ llmsTitle: null,
102
+ // defaults to package name or project dir
103
+ llmsSummary: "LLM-friendly index of important pages and their Markdown equivalents.",
104
+ sort: true
105
+ },
106
+ logLevel: "info"
107
+ // "silent" | "info" | "debug"
108
+ };
109
+ const options = deepMerge(defaults, userOptions);
110
+ const log = {
111
+ info: (...args) => options.logLevel === "info" || options.logLevel === "debug" ? console.log(...args) : void 0,
112
+ debug: (...args) => options.logLevel === "debug" ? console.log(...args) : void 0,
113
+ warn: (...args) => options.logLevel !== "silent" ? console.warn(...args) : void 0
114
+ };
115
+ function isExcluded(route) {
116
+ return (options.exclude || []).some((p) => {
117
+ if (p instanceof RegExp) return p.test(route);
118
+ return route.includes(p);
119
+ });
120
+ }
121
+ function normalizeRoute(input, { stripQuery = true } = {}) {
122
+ if (!input) return null;
123
+ if (input.startsWith("mailto:") || input.startsWith("tel:") || input.startsWith("javascript:"))
124
+ return null;
125
+ let s = input.trim();
126
+ if (s.startsWith("http://") || s.startsWith("https://")) return null;
127
+ const hashIdx = s.indexOf("#");
128
+ if (hashIdx >= 0) s = s.slice(0, hashIdx);
129
+ if (stripQuery) {
130
+ const qIdx = s.indexOf("?");
131
+ if (qIdx >= 0) s = s.slice(0, qIdx);
132
+ }
133
+ if (!s) return null;
134
+ if (!s.startsWith("/")) {
135
+ if (s.startsWith("./"))
136
+ s = s.slice(1);
137
+ else s = "/" + s;
138
+ }
139
+ s = s.replace(/\/{2,}/g, "/");
140
+ return s;
141
+ }
142
+ function routeToMdWebPath(route) {
143
+ if (route === "/") return "index.html.md";
144
+ if (route.endsWith("/")) return route.slice(1) + "index.html.md";
145
+ return route.slice(1) + ".md";
146
+ }
147
+ function routeToMdFsPath(distDir, route) {
148
+ const rel = routeToMdWebPath(route);
149
+ if (options.output.mode === "subdir") {
150
+ return path.join(distDir, options.output.subdir, rel);
151
+ }
152
+ return path.join(distDir, rel);
153
+ }
154
+ function makeLlmsLink(relMdPath) {
155
+ return relMdPath.replace(/\\/g, "/");
156
+ }
157
+ async function safeCloseHttpServer(server) {
158
+ await new Promise((resolve, reject) => {
159
+ server.close((err) => err ? reject(err) : resolve());
160
+ });
161
+ }
162
+ return {
163
+ name: "vite-plugin-llm-spider",
164
+ apply: "build",
165
+ configResolved(rc) {
166
+ resolvedConfig = rc;
167
+ },
168
+ async closeBundle() {
169
+ var _a, _b, _c, _d, _e, _f;
170
+ if (!options.enabled) return;
171
+ if (!resolvedConfig)
172
+ throw new Error("LLM Spider: missing resolved Vite config");
173
+ const distDir = resolvedConfig.build.outDir || "dist";
174
+ const basePath = (resolvedConfig.base || "/").replace(/\\/g, "/");
175
+ let routeDefs = [];
176
+ if (Array.isArray(options.routes) && options.routes.length) {
177
+ routeDefs = options.routes.map((r) => ({
178
+ path: normalizeRoute(r.path, { stripQuery: true }) || "/",
179
+ title: r.title,
180
+ section: r.section || "Pages",
181
+ optional: !!r.optional,
182
+ notes: r.notes
183
+ }));
184
+ } else if ((_a = options.crawl) == null ? void 0 : _a.enabled) {
185
+ routeDefs = [];
186
+ } else {
187
+ routeDefs = [{ path: "/", section: "Pages" }];
188
+ }
189
+ log.info("\nLLM Spider: generating markdown + llms.txt");
190
+ log.debug("distDir:", distDir, "base:", basePath);
191
+ const previewServer = await preview({
192
+ root: resolvedConfig.root,
193
+ base: resolvedConfig.base,
194
+ build: { outDir: distDir },
195
+ preview: { port: 0, open: false, host: "127.0.0.1" },
196
+ configFile: false,
197
+ plugins: [],
198
+ // avoid loading user plugins again
199
+ logLevel: "silent"
200
+ });
201
+ await new Promise((resolve, reject) => {
202
+ const server = previewServer.httpServer;
203
+ if (server.listening) {
204
+ resolve();
205
+ } else {
206
+ server.once("listening", resolve);
207
+ server.once("error", reject);
208
+ setTimeout(() => reject(new Error("Preview server failed to start")), 5e3);
209
+ }
210
+ });
211
+ const addr = previewServer.httpServer.address();
212
+ if (!addr || typeof addr === "string") {
213
+ await safeCloseHttpServer(previewServer.httpServer);
214
+ throw new Error("LLM Spider: could not determine preview server port");
215
+ }
216
+ const normalizedBase = basePath.endsWith("/") ? basePath : basePath + "/";
217
+ const baseUrl = `http://127.0.0.1:${addr.port}${normalizedBase}`;
218
+ log.debug("Preview server at:", baseUrl);
219
+ const browser = await puppeteer.launch(options.render.launchOptions);
220
+ const turndown = new TurndownService(options.markdown.turndown);
221
+ turndown.use(gfm);
222
+ const visited = /* @__PURE__ */ new Set();
223
+ const captured = [];
224
+ const queue = [];
225
+ if ((_b = options.crawl) == null ? void 0 : _b.enabled) {
226
+ for (const seed of options.crawl.seeds || ["/"]) {
227
+ const nr = normalizeRoute(seed, {
228
+ stripQuery: options.crawl.stripQuery
229
+ });
230
+ if (nr) queue.push({ route: nr, depth: 0 });
231
+ }
232
+ } else {
233
+ for (const rd of routeDefs) queue.push({ route: rd.path, depth: 0 });
234
+ }
235
+ const maxDepth = ((_c = options.crawl) == null ? void 0 : _c.enabled) ? options.crawl.maxDepth : 0;
236
+ const maxPages = ((_d = options.crawl) == null ? void 0 : _d.enabled) ? options.crawl.maxPages : queue.length;
237
+ const concurrency = ((_e = options.crawl) == null ? void 0 : _e.enabled) ? options.crawl.concurrency : 3;
238
+ async function captureOne(route) {
239
+ var _a2, _b2, _c2;
240
+ if (visited.has(route)) return;
241
+ if (isExcluded(route)) return;
242
+ if (captured.length >= maxPages) return;
243
+ visited.add(route);
244
+ const page = await browser.newPage();
245
+ if ((_a2 = options.render.blockRequests) == null ? void 0 : _a2.length) {
246
+ await page.setRequestInterception(true);
247
+ page.on("request", (req) => {
248
+ const url = req.url();
249
+ const blocked = options.render.blockRequests.some(
250
+ (p) => p instanceof RegExp ? p.test(url) : url.includes(p)
251
+ );
252
+ if (blocked) req.abort();
253
+ else req.continue();
254
+ });
255
+ }
256
+ try {
257
+ const pageUrl = route === "/" ? baseUrl : baseUrl + route.replace(/^\//, "");
258
+ await options.render.beforeGoto(page, { route });
259
+ await page.goto(pageUrl, {
260
+ waitUntil: options.render.waitUntil,
261
+ timeout: options.render.timeoutMs
262
+ });
263
+ if (options.render.waitForSelector) {
264
+ await page.waitForSelector(options.render.waitForSelector, {
265
+ timeout: options.render.timeoutMs
266
+ });
267
+ }
268
+ if (options.render.postLoadDelayMs > 0) {
269
+ await new Promise(
270
+ (r) => setTimeout(r, options.render.postLoadDelayMs)
271
+ );
272
+ }
273
+ await options.render.beforeExtract(page, { route });
274
+ const html = await page.content();
275
+ const $ = cheerio.load(html);
276
+ let harvestedHrefs = [];
277
+ if ((_b2 = options.crawl) == null ? void 0 : _b2.enabled) {
278
+ harvestedHrefs = $("a[href]").map((_, a) => $(a).attr("href")).get();
279
+ log.debug(` Found ${harvestedHrefs.length} links on ${route}:`, harvestedHrefs.slice(0, 15));
280
+ }
281
+ for (const sel of options.extract.removeSelectors || [])
282
+ $(sel).remove();
283
+ const mainSelectors = Array.isArray(options.extract.mainSelector) ? options.extract.mainSelector : [options.extract.mainSelector];
284
+ let mainHtml = null;
285
+ for (const sel of mainSelectors) {
286
+ if (!sel) continue;
287
+ const node = $(sel).first();
288
+ if (node && node.length) {
289
+ mainHtml = node.html();
290
+ break;
291
+ }
292
+ }
293
+ if (!mainHtml) {
294
+ const main = $("main").first();
295
+ mainHtml = main.length ? main.html() : $("body").html();
296
+ }
297
+ const title = ($("title").text() || "").trim() || route;
298
+ const markdownBody = turndown.turndown(mainHtml || "");
299
+ const mdRelPath = options.output.mode === "subdir" ? path.posix.join(options.output.subdir, routeToMdWebPath(route)) : routeToMdWebPath(route);
300
+ const fsPath = routeToMdFsPath(distDir, route);
301
+ await fs.mkdir(path.dirname(fsPath), { recursive: true });
302
+ const frontmatter = options.markdown.addFrontmatter ? `---
303
+ source: ${route}
304
+ title: ${title}
305
+ generated_at: ${(/* @__PURE__ */ new Date()).toISOString()}
306
+ ---
307
+
308
+ ` : "";
309
+ await fs.writeFile(fsPath, frontmatter + markdownBody, "utf8");
310
+ const meta = routeDefs.find((r) => r.path === route);
311
+ captured.push({
312
+ route,
313
+ title: (meta == null ? void 0 : meta.title) || title,
314
+ section: (meta == null ? void 0 : meta.section) || "Pages",
315
+ optional: !!(meta == null ? void 0 : meta.optional),
316
+ notes: meta == null ? void 0 : meta.notes,
317
+ mdRelPath
318
+ });
319
+ log.info(` \u2705 ${route} -> ${mdRelPath}`);
320
+ if ((_c2 = options.crawl) == null ? void 0 : _c2.enabled) {
321
+ for (const href of harvestedHrefs) {
322
+ const n = normalizeRoute(href, {
323
+ stripQuery: options.crawl.stripQuery
324
+ });
325
+ if (!n) continue;
326
+ let baseRelative = n;
327
+ if (normalizedBase !== "/" && baseRelative.startsWith(normalizedBase)) {
328
+ baseRelative = "/" + baseRelative.slice(normalizedBase.length);
329
+ baseRelative = baseRelative === "//" ? "/" : baseRelative.replace(/\/{2,}/g, "/");
330
+ }
331
+ if (!visited.has(baseRelative) && !isExcluded(baseRelative)) {
332
+ queue.push({ route: baseRelative, depth: -1 });
333
+ }
334
+ }
335
+ }
336
+ } catch (err) {
337
+ log.warn(` \u26A0\uFE0F failed ${route}: ${(err == null ? void 0 : err.message) || err}`);
338
+ } finally {
339
+ await page.close();
340
+ }
341
+ }
342
+ try {
343
+ while (queue.length && captured.length < maxPages) {
344
+ const batch = queue.splice(0, concurrency).map((item) => {
345
+ const depth = item.depth >= 0 ? item.depth : 1;
346
+ return { route: item.route, depth };
347
+ });
348
+ await Promise.all(
349
+ batch.map(async ({ route, depth }) => {
350
+ var _a2, _b2;
351
+ if (((_a2 = options.crawl) == null ? void 0 : _a2.enabled) && depth > maxDepth) return;
352
+ await captureOne(route);
353
+ if ((_b2 = options.crawl) == null ? void 0 : _b2.enabled) {
354
+ for (let i = 0; i < queue.length; i++) {
355
+ if (queue[i].depth === -1) queue[i].depth = depth + 1;
356
+ }
357
+ }
358
+ })
359
+ );
360
+ }
361
+ const llmsTitle = options.output.llmsTitle || ((_f = resolvedConfig == null ? void 0 : resolvedConfig.env) == null ? void 0 : _f.mode) || "Site";
362
+ const items = options.output.sort ? [...captured].sort((a, b) => a.route.localeCompare(b.route)) : captured;
363
+ const bySection = /* @__PURE__ */ new Map();
364
+ const optionalItems = [];
365
+ for (const item of items) {
366
+ if (item.optional) optionalItems.push(item);
367
+ else {
368
+ const s = item.section || "Pages";
369
+ bySection.set(s, [...bySection.get(s) || [], item]);
370
+ }
371
+ }
372
+ let llms = `# ${llmsTitle}
373
+
374
+ > ${options.output.llmsSummary}
375
+
376
+ `;
377
+ for (const [section, sectionItems] of bySection.entries()) {
378
+ llms += `## ${section}
379
+
380
+ `;
381
+ for (const it of sectionItems) {
382
+ const link = makeLlmsLink(it.mdRelPath);
383
+ const label = it.title || it.route;
384
+ const notes = it.notes ? `: ${it.notes}` : "";
385
+ llms += `- [${label}](${link})${notes}
386
+ `;
387
+ }
388
+ llms += `
389
+ `;
390
+ }
391
+ if (optionalItems.length) {
392
+ llms += `## Optional
393
+
394
+ `;
395
+ for (const it of optionalItems) {
396
+ const link = makeLlmsLink(it.mdRelPath);
397
+ const label = it.title || it.route;
398
+ const notes = it.notes ? `: ${it.notes}` : "";
399
+ llms += `- [${label}](${link})${notes}
400
+ `;
401
+ }
402
+ llms += `
403
+ `;
404
+ }
405
+ const llmsPath = path.join(distDir, options.output.llmsTxtFileName);
406
+ await fs.writeFile(llmsPath, llms, "utf8");
407
+ log.info(
408
+ `
409
+ LLM Spider: wrote ${captured.length} markdown pages + ${options.output.llmsTxtFileName}
410
+ `
411
+ );
412
+ } finally {
413
+ await browser.close();
414
+ await safeCloseHttpServer(previewServer.httpServer);
415
+ }
416
+ }
417
+ };
418
+ }
419
+ export {
420
+ llmSpiderPlugin as default,
421
+ llmSpiderPlugin
422
+ };
423
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/index.js"],"sourcesContent":["import { preview } from \"vite\";\nimport fs from \"node:fs/promises\";\nimport path from \"node:path\";\nimport * as cheerio from \"cheerio\";\nimport TurndownService from \"turndown\";\nimport { gfm } from \"turndown-plugin-gfm\";\nimport puppeteer from \"puppeteer\";\n\n/**\n * @typedef {{ path: string, title?: string, section?: string, optional?: boolean, notes?: string }} RouteDef\n */\n\n/**\n * Vite Plugin: LLM Spider\n * - Generates Markdown snapshots + dist/llms.txt\n * - Spec-aligned default output: \".md appended\" / \"index.html.md\" for directory URLs.\n */\nexport default function llmSpiderPlugin(userOptions = {}) {\n /** @type {import('vite').ResolvedConfig | undefined} */\n let resolvedConfig;\n\n // Deep merge helper\n function deepMerge(target, source) {\n const result = { ...target };\n for (const key of Object.keys(source)) {\n if (source[key] && typeof source[key] === 'object' && !Array.isArray(source[key]) && !(source[key] instanceof RegExp)) {\n result[key] = deepMerge(target[key] || {}, source[key]);\n } else {\n result[key] = source[key];\n }\n }\n return result;\n }\n\n const defaults = {\n enabled: true,\n\n // Recommended: explicit list\n routes: /** @type {RouteDef[] | undefined} */ (undefined),\n\n // Optional crawl mode (off by default)\n crawl: {\n enabled: false,\n seeds: [\"/\"],\n maxDepth: 2,\n maxPages: 50,\n concurrency: 3,\n stripQuery: true,\n },\n\n exclude: [\"/login\", \"/admin\", \"/account\"],\n\n render: {\n waitUntil: \"networkidle2\", // more forgiving than networkidle0 for SPAs\n timeoutMs: 30_000,\n waitForSelector: null, // e.g. \"main\" or \"#app main\"\n postLoadDelayMs: 0,\n blockRequests: [\n /google-analytics\\.com/i,\n /googletagmanager\\.com/i,\n /segment\\.com/i,\n /hotjar\\.com/i,\n ],\n launchOptions: {\n headless: \"new\",\n // For CI containers you may need:\n // args: [\"--no-sandbox\", \"--disable-setuid-sandbox\"],\n },\n /**\n * @param {import('puppeteer').Page} _page\n * @param {{ route: string }} _ctx\n */\n beforeGoto: async (_page, _ctx) => {},\n /**\n * @param {import('puppeteer').Page} _page\n * @param {{ route: string }} _ctx\n */\n beforeExtract: async (_page, _ctx) => {},\n },\n\n extract: {\n mainSelector: [\"main\", \"#main-content\", \"[data-main]\"],\n removeSelectors: [\n \"script\",\n \"style\",\n \"noscript\",\n \"nav\",\n \"header\",\n \"footer\",\n \"svg\",\n \"iframe\",\n \"[role='alert']\",\n \".cookie\",\n \".cookie-banner\",\n \".modal\",\n ],\n },\n\n markdown: {\n addFrontmatter: true,\n turndown: {\n headingStyle: \"atx\",\n codeBlockStyle: \"fenced\",\n emDelimiter: \"_\",\n },\n },\n\n output: {\n // \"sibling\" => /pricing -> pricing.md ; /docs/ -> docs/index.html.md ; / -> index.html.md\n mode: \"sibling\",\n subdir: \"ai\", // used only when mode === \"subdir\"\n llmsTxtFileName: \"llms.txt\",\n llmsTitle: null, // defaults to package name or project dir\n llmsSummary:\n \"LLM-friendly index of important pages and their Markdown equivalents.\",\n sort: true,\n },\n\n logLevel: \"info\", // \"silent\" | \"info\" | \"debug\"\n };\n\n const options = deepMerge(defaults, userOptions);\n\n const log = {\n info: (...args) =>\n options.logLevel === \"info\" || options.logLevel === \"debug\"\n ? console.log(...args)\n : undefined,\n debug: (...args) =>\n options.logLevel === \"debug\" ? console.log(...args) : undefined,\n warn: (...args) =>\n options.logLevel !== \"silent\" ? console.warn(...args) : undefined,\n };\n\n function isExcluded(route) {\n return (options.exclude || []).some((p) => {\n if (p instanceof RegExp) return p.test(route);\n return route.includes(p);\n });\n }\n\n function normalizeRoute(input, { stripQuery = true } = {}) {\n if (!input) return null;\n\n // Ignore non-page links\n if (\n input.startsWith(\"mailto:\") ||\n input.startsWith(\"tel:\") ||\n input.startsWith(\"javascript:\")\n )\n return null;\n\n // Convert relative -> absolute-ish (we only keep paths)\n // If input is like \"./about\" or \"about\", normalize to \"/about\"\n let s = input.trim();\n\n // Remove protocol absolute links\n if (s.startsWith(\"http://\") || s.startsWith(\"https://\")) return null;\n\n // Drop hash/query\n const hashIdx = s.indexOf(\"#\");\n if (hashIdx >= 0) s = s.slice(0, hashIdx);\n\n if (stripQuery) {\n const qIdx = s.indexOf(\"?\");\n if (qIdx >= 0) s = s.slice(0, qIdx);\n }\n\n // Ignore empty after stripping\n if (!s) return null;\n\n // Normalize relative paths\n if (!s.startsWith(\"/\")) {\n if (s.startsWith(\"./\"))\n s = s.slice(1); // \"./x\" -> \"/x\"\n else s = \"/\" + s;\n }\n\n // Collapse multiple slashes\n s = s.replace(/\\/{2,}/g, \"/\");\n\n return s;\n }\n\n function routeToMdWebPath(route) {\n // route is base-relative and starts with \"/\"\n if (route === \"/\") return \"index.html.md\";\n if (route.endsWith(\"/\")) return route.slice(1) + \"index.html.md\"; // \"docs/\" -> \"docs/index.html.md\"\n return route.slice(1) + \".md\"; // \"pricing\" -> \"pricing.md\"\n }\n\n function routeToMdFsPath(distDir, route) {\n const rel = routeToMdWebPath(route); // already relative\n if (options.output.mode === \"subdir\") {\n return path.join(distDir, options.output.subdir, rel);\n }\n return path.join(distDir, rel);\n }\n\n function makeLlmsLink(relMdPath) {\n // Use relative links (no leading slash) so it works in subpath deployments.\n // If subdir mode: links should include \"ai/...\"\n return relMdPath.replace(/\\\\/g, \"/\");\n }\n\n async function safeCloseHttpServer(server) {\n await new Promise((resolve, reject) => {\n server.close((err) => (err ? reject(err) : resolve()));\n });\n }\n\n return {\n name: \"vite-plugin-llm-spider\",\n apply: \"build\",\n\n configResolved(rc) {\n resolvedConfig = rc;\n },\n\n async closeBundle() {\n if (!options.enabled) return;\n if (!resolvedConfig)\n throw new Error(\"LLM Spider: missing resolved Vite config\");\n\n const distDir = resolvedConfig.build.outDir || \"dist\";\n const basePath = (resolvedConfig.base || \"/\").replace(/\\\\/g, \"/\");\n\n // ---- Resolve route list ----\n /** @type {RouteDef[]} */\n let routeDefs = [];\n\n if (Array.isArray(options.routes) && options.routes.length) {\n routeDefs = options.routes.map((r) => ({\n path: normalizeRoute(r.path, { stripQuery: true }) || \"/\",\n title: r.title,\n section: r.section || \"Pages\",\n optional: !!r.optional,\n notes: r.notes,\n }));\n } else if (options.crawl?.enabled) {\n // Crawl mode: route defs will be created as discovered.\n routeDefs = [];\n } else {\n // Default minimal route\n routeDefs = [{ path: \"/\", section: \"Pages\" }];\n }\n\n log.info(\"\\nLLM Spider: generating markdown + llms.txt\");\n log.debug(\"distDir:\", distDir, \"base:\", basePath);\n\n // ---- Start preview server for built output ----\n // Vite preview API returns a PreviewServer with httpServer + resolvedUrls.\n const previewServer = await preview({\n root: resolvedConfig.root,\n base: resolvedConfig.base,\n build: { outDir: distDir },\n preview: { port: 0, open: false, host: '127.0.0.1' },\n configFile: false,\n plugins: [], // avoid loading user plugins again\n logLevel: \"silent\",\n });\n\n // Wait for server to be fully listening\n await new Promise((resolve, reject) => {\n const server = previewServer.httpServer;\n if (server.listening) {\n resolve();\n } else {\n server.once('listening', resolve);\n server.once('error', reject);\n // Timeout after 5s\n setTimeout(() => reject(new Error('Preview server failed to start')), 5000);\n }\n });\n\n const addr = previewServer.httpServer.address();\n if (!addr || typeof addr === \"string\") {\n await safeCloseHttpServer(previewServer.httpServer);\n throw new Error(\"LLM Spider: could not determine preview server port\");\n }\n\n // Build a base URL that respects Vite's base path\n // Example: http://127.0.0.1:4173/app/ (if base=\"/app/\")\n const normalizedBase = basePath.endsWith(\"/\") ? basePath : basePath + \"/\";\n const baseUrl = `http://127.0.0.1:${addr.port}${normalizedBase}`;\n \n log.debug(\"Preview server at:\", baseUrl);\n\n const browser = await puppeteer.launch(options.render.launchOptions);\n const turndown = new TurndownService(options.markdown.turndown);\n turndown.use(gfm);\n\n /** @type {Set<string>} */\n const visited = new Set();\n\n /** @type {{ route: string, title?: string, section: string, optional: boolean, notes?: string, mdRelPath: string }[]} */\n const captured = [];\n\n // Crawl queue stores base-relative routes (no base prefix)\n /** @type {{ route: string, depth: number }[]} */\n const queue = [];\n\n // Seed queue\n if (options.crawl?.enabled) {\n for (const seed of options.crawl.seeds || [\"/\"]) {\n const nr = normalizeRoute(seed, {\n stripQuery: options.crawl.stripQuery,\n });\n if (nr) queue.push({ route: nr, depth: 0 });\n }\n } else {\n for (const rd of routeDefs) queue.push({ route: rd.path, depth: 0 });\n }\n\n const maxDepth = options.crawl?.enabled ? options.crawl.maxDepth : 0;\n const maxPages = options.crawl?.enabled\n ? options.crawl.maxPages\n : queue.length;\n const concurrency = options.crawl?.enabled\n ? options.crawl.concurrency\n : 3;\n\n async function captureOne(route) {\n if (visited.has(route)) return;\n if (isExcluded(route)) return;\n if (captured.length >= maxPages) return;\n\n visited.add(route);\n\n const page = await browser.newPage();\n\n // Request blocking (best effort)\n if (options.render.blockRequests?.length) {\n await page.setRequestInterception(true);\n page.on(\"request\", (req) => {\n const url = req.url();\n const blocked = options.render.blockRequests.some((p) =>\n p instanceof RegExp ? p.test(url) : url.includes(p),\n );\n if (blocked) req.abort();\n else req.continue();\n });\n }\n\n try {\n const pageUrl =\n route === \"/\" ? baseUrl : baseUrl + route.replace(/^\\//, \"\");\n await options.render.beforeGoto(page, { route });\n\n await page.goto(pageUrl, {\n waitUntil: options.render.waitUntil,\n timeout: options.render.timeoutMs,\n });\n\n if (options.render.waitForSelector) {\n await page.waitForSelector(options.render.waitForSelector, {\n timeout: options.render.timeoutMs,\n });\n }\n\n if (options.render.postLoadDelayMs > 0) {\n await new Promise((r) =>\n setTimeout(r, options.render.postLoadDelayMs),\n );\n }\n\n await options.render.beforeExtract(page, { route });\n\n const html = await page.content();\n const $ = cheerio.load(html);\n\n // Harvest links BEFORE removing nav elements (for crawl mode)\n let harvestedHrefs = [];\n if (options.crawl?.enabled) {\n harvestedHrefs = $(\"a[href]\")\n .map((_, a) => $(a).attr(\"href\"))\n .get();\n log.debug(` Found ${harvestedHrefs.length} links on ${route}:`, harvestedHrefs.slice(0, 15));\n }\n\n // Remove noisy elements (CSS selectors)\n for (const sel of options.extract.removeSelectors || [])\n $(sel).remove();\n\n // Pick main content\n const mainSelectors = Array.isArray(options.extract.mainSelector)\n ? options.extract.mainSelector\n : [options.extract.mainSelector];\n\n let mainHtml = null;\n for (const sel of mainSelectors) {\n if (!sel) continue;\n const node = $(sel).first();\n if (node && node.length) {\n mainHtml = node.html();\n break;\n }\n }\n if (!mainHtml) {\n const main = $(\"main\").first();\n mainHtml = main.length ? main.html() : $(\"body\").html();\n }\n\n const title = ($(\"title\").text() || \"\").trim() || route;\n\n // Convert to Markdown\n const markdownBody = turndown.turndown(mainHtml || \"\");\n\n // Write file\n const mdRelPath =\n options.output.mode === \"subdir\"\n ? path.posix.join(options.output.subdir, routeToMdWebPath(route))\n : routeToMdWebPath(route);\n\n const fsPath = routeToMdFsPath(distDir, route);\n await fs.mkdir(path.dirname(fsPath), { recursive: true });\n\n const frontmatter = options.markdown.addFrontmatter\n ? `---\\nsource: ${route}\\ntitle: ${title}\\ngenerated_at: ${new Date().toISOString()}\\n---\\n\\n`\n : \"\";\n\n await fs.writeFile(fsPath, frontmatter + markdownBody, \"utf8\");\n\n // Map metadata\n const meta = routeDefs.find((r) => r.path === route);\n captured.push({\n route,\n title: meta?.title || title,\n section: meta?.section || \"Pages\",\n optional: !!meta?.optional,\n notes: meta?.notes,\n mdRelPath,\n });\n\n log.info(` ✅ ${route} -> ${mdRelPath}`);\n\n // Harvest links (crawl mode only) - using pre-harvested links from before cleanup\n if (options.crawl?.enabled) {\n for (const href of harvestedHrefs) {\n const n = normalizeRoute(href, {\n stripQuery: options.crawl.stripQuery,\n });\n if (!n) continue;\n\n // If site is deployed under a base like \"/app/\", router-links usually include \"/app/...\"\n // Strip base prefix when present so our internal route stays base-relative.\n let baseRelative = n;\n if (\n normalizedBase !== \"/\" &&\n baseRelative.startsWith(normalizedBase)\n ) {\n baseRelative = \"/\" + baseRelative.slice(normalizedBase.length);\n baseRelative =\n baseRelative === \"//\"\n ? \"/\"\n : baseRelative.replace(/\\/{2,}/g, \"/\");\n }\n\n if (!visited.has(baseRelative) && !isExcluded(baseRelative)) {\n // Depth tracking is handled by the outer loop (we store depth in queue entries)\n // so just push; caller will attach depth.\n queue.push({ route: baseRelative, depth: -1 }); // placeholder depth; will be overwritten\n }\n }\n }\n } catch (err) {\n log.warn(` ⚠️ failed ${route}: ${err?.message || err}`);\n } finally {\n await page.close();\n }\n }\n\n try {\n // BFS: process queue in batches\n while (queue.length && captured.length < maxPages) {\n // Fix up crawl depths if needed\n // If we're in crawl mode, queue items may have depth=-1 from harvested links.\n // We'll conservatively treat them as depth=1 unless they were explicitly set.\n const batch = queue.splice(0, concurrency).map((item) => {\n const depth = item.depth >= 0 ? item.depth : 1;\n return { route: item.route, depth };\n });\n\n await Promise.all(\n batch.map(async ({ route, depth }) => {\n if (options.crawl?.enabled && depth > maxDepth) return;\n await captureOne(route);\n\n // If crawl mode, increase depth for any newly harvested links\n if (options.crawl?.enabled) {\n // Patch any depth=-1 entries added during captureOne\n for (let i = 0; i < queue.length; i++) {\n if (queue[i].depth === -1) queue[i].depth = depth + 1;\n }\n }\n }),\n );\n }\n\n // ---- Generate llms.txt ----\n const llmsTitle =\n options.output.llmsTitle || resolvedConfig?.env?.mode || \"Site\";\n\n // Deterministic ordering\n const items = options.output.sort\n ? [...captured].sort((a, b) => a.route.localeCompare(b.route))\n : captured;\n\n // Group by section, with Optional special handling\n /** @type {Map<string, typeof items>} */\n const bySection = new Map();\n /** @type {typeof items} */\n const optionalItems = [];\n\n for (const item of items) {\n if (item.optional) optionalItems.push(item);\n else {\n const s = item.section || \"Pages\";\n bySection.set(s, [...(bySection.get(s) || []), item]);\n }\n }\n\n let llms = `# ${llmsTitle}\\n\\n> ${options.output.llmsSummary}\\n\\n`;\n\n for (const [section, sectionItems] of bySection.entries()) {\n llms += `## ${section}\\n\\n`;\n for (const it of sectionItems) {\n const link = makeLlmsLink(it.mdRelPath);\n const label = it.title || it.route;\n const notes = it.notes ? `: ${it.notes}` : \"\";\n llms += `- [${label}](${link})${notes}\\n`;\n }\n llms += `\\n`;\n }\n\n if (optionalItems.length) {\n llms += `## Optional\\n\\n`;\n for (const it of optionalItems) {\n const link = makeLlmsLink(it.mdRelPath);\n const label = it.title || it.route;\n const notes = it.notes ? `: ${it.notes}` : \"\";\n llms += `- [${label}](${link})${notes}\\n`;\n }\n llms += `\\n`;\n }\n\n const llmsPath = path.join(distDir, options.output.llmsTxtFileName);\n await fs.writeFile(llmsPath, llms, \"utf8\");\n\n log.info(\n `\\nLLM Spider: wrote ${captured.length} markdown pages + ${options.output.llmsTxtFileName}\\n`,\n );\n } finally {\n await browser.close();\n await safeCloseHttpServer(previewServer.httpServer);\n }\n },\n };\n}\n\n// Named export for CJS compatibility\nexport { llmSpiderPlugin };\n"],"mappings":";AAAA,SAAS,eAAe;AACxB,OAAO,QAAQ;AACf,OAAO,UAAU;AACjB,YAAY,aAAa;AACzB,OAAO,qBAAqB;AAC5B,SAAS,WAAW;AACpB,OAAO,eAAe;AAWP,SAAR,gBAAiC,cAAc,CAAC,GAAG;AAExD,MAAI;AAGJ,WAAS,UAAU,QAAQ,QAAQ;AACjC,UAAM,SAAS,EAAE,GAAG,OAAO;AAC3B,eAAW,OAAO,OAAO,KAAK,MAAM,GAAG;AACrC,UAAI,OAAO,GAAG,KAAK,OAAO,OAAO,GAAG,MAAM,YAAY,CAAC,MAAM,QAAQ,OAAO,GAAG,CAAC,KAAK,EAAE,OAAO,GAAG,aAAa,SAAS;AACrH,eAAO,GAAG,IAAI,UAAU,OAAO,GAAG,KAAK,CAAC,GAAG,OAAO,GAAG,CAAC;AAAA,MACxD,OAAO;AACL,eAAO,GAAG,IAAI,OAAO,GAAG;AAAA,MAC1B;AAAA,IACF;AACA,WAAO;AAAA,EACT;AAEA,QAAM,WAAW;AAAA,IACf,SAAS;AAAA;AAAA,IAGT;AAAA;AAAA,MAA+C;AAAA;AAAA;AAAA,IAG/C,OAAO;AAAA,MACL,SAAS;AAAA,MACT,OAAO,CAAC,GAAG;AAAA,MACX,UAAU;AAAA,MACV,UAAU;AAAA,MACV,aAAa;AAAA,MACb,YAAY;AAAA,IACd;AAAA,IAEA,SAAS,CAAC,UAAU,UAAU,UAAU;AAAA,IAExC,QAAQ;AAAA,MACN,WAAW;AAAA;AAAA,MACX,WAAW;AAAA,MACX,iBAAiB;AAAA;AAAA,MACjB,iBAAiB;AAAA,MACjB,eAAe;AAAA,QACb;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,MACF;AAAA,MACA,eAAe;AAAA,QACb,UAAU;AAAA;AAAA;AAAA,MAGZ;AAAA;AAAA;AAAA;AAAA;AAAA,MAKA,YAAY,OAAO,OAAO,SAAS;AAAA,MAAC;AAAA;AAAA;AAAA;AAAA;AAAA,MAKpC,eAAe,OAAO,OAAO,SAAS;AAAA,MAAC;AAAA,IACzC;AAAA,IAEA,SAAS;AAAA,MACP,cAAc,CAAC,QAAQ,iBAAiB,aAAa;AAAA,MACrD,iBAAiB;AAAA,QACf;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,MACF;AAAA,IACF;AAAA,IAEA,UAAU;AAAA,MACR,gBAAgB;AAAA,MAChB,UAAU;AAAA,QACR,cAAc;AAAA,QACd,gBAAgB;AAAA,QAChB,aAAa;AAAA,MACf;AAAA,IACF;AAAA,IAEA,QAAQ;AAAA;AAAA,MAEN,MAAM;AAAA,MACN,QAAQ;AAAA;AAAA,MACR,iBAAiB;AAAA,MACjB,WAAW;AAAA;AAAA,MACX,aACE;AAAA,MACF,MAAM;AAAA,IACR;AAAA,IAEA,UAAU;AAAA;AAAA,EACZ;AAEA,QAAM,UAAU,UAAU,UAAU,WAAW;AAE/C,QAAM,MAAM;AAAA,IACV,MAAM,IAAI,SACR,QAAQ,aAAa,UAAU,QAAQ,aAAa,UAChD,QAAQ,IAAI,GAAG,IAAI,IACnB;AAAA,IACN,OAAO,IAAI,SACT,QAAQ,aAAa,UAAU,QAAQ,IAAI,GAAG,IAAI,IAAI;AAAA,IACxD,MAAM,IAAI,SACR,QAAQ,aAAa,WAAW,QAAQ,KAAK,GAAG,IAAI,IAAI;AAAA,EAC5D;AAEA,WAAS,WAAW,OAAO;AACzB,YAAQ,QAAQ,WAAW,CAAC,GAAG,KAAK,CAAC,MAAM;AACzC,UAAI,aAAa,OAAQ,QAAO,EAAE,KAAK,KAAK;AAC5C,aAAO,MAAM,SAAS,CAAC;AAAA,IACzB,CAAC;AAAA,EACH;AAEA,WAAS,eAAe,OAAO,EAAE,aAAa,KAAK,IAAI,CAAC,GAAG;AACzD,QAAI,CAAC,MAAO,QAAO;AAGnB,QACE,MAAM,WAAW,SAAS,KAC1B,MAAM,WAAW,MAAM,KACvB,MAAM,WAAW,aAAa;AAE9B,aAAO;AAIT,QAAI,IAAI,MAAM,KAAK;AAGnB,QAAI,EAAE,WAAW,SAAS,KAAK,EAAE,WAAW,UAAU,EAAG,QAAO;AAGhE,UAAM,UAAU,EAAE,QAAQ,GAAG;AAC7B,QAAI,WAAW,EAAG,KAAI,EAAE,MAAM,GAAG,OAAO;AAExC,QAAI,YAAY;AACd,YAAM,OAAO,EAAE,QAAQ,GAAG;AAC1B,UAAI,QAAQ,EAAG,KAAI,EAAE,MAAM,GAAG,IAAI;AAAA,IACpC;AAGA,QAAI,CAAC,EAAG,QAAO;AAGf,QAAI,CAAC,EAAE,WAAW,GAAG,GAAG;AACtB,UAAI,EAAE,WAAW,IAAI;AACnB,YAAI,EAAE,MAAM,CAAC;AAAA,UACV,KAAI,MAAM;AAAA,IACjB;AAGA,QAAI,EAAE,QAAQ,WAAW,GAAG;AAE5B,WAAO;AAAA,EACT;AAEA,WAAS,iBAAiB,OAAO;AAE/B,QAAI,UAAU,IAAK,QAAO;AAC1B,QAAI,MAAM,SAAS,GAAG,EAAG,QAAO,MAAM,MAAM,CAAC,IAAI;AACjD,WAAO,MAAM,MAAM,CAAC,IAAI;AAAA,EAC1B;AAEA,WAAS,gBAAgB,SAAS,OAAO;AACvC,UAAM,MAAM,iBAAiB,KAAK;AAClC,QAAI,QAAQ,OAAO,SAAS,UAAU;AACpC,aAAO,KAAK,KAAK,SAAS,QAAQ,OAAO,QAAQ,GAAG;AAAA,IACtD;AACA,WAAO,KAAK,KAAK,SAAS,GAAG;AAAA,EAC/B;AAEA,WAAS,aAAa,WAAW;AAG/B,WAAO,UAAU,QAAQ,OAAO,GAAG;AAAA,EACrC;AAEA,iBAAe,oBAAoB,QAAQ;AACzC,UAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,aAAO,MAAM,CAAC,QAAS,MAAM,OAAO,GAAG,IAAI,QAAQ,CAAE;AAAA,IACvD,CAAC;AAAA,EACH;AAEA,SAAO;AAAA,IACL,MAAM;AAAA,IACN,OAAO;AAAA,IAEP,eAAe,IAAI;AACjB,uBAAiB;AAAA,IACnB;AAAA,IAEA,MAAM,cAAc;AA3NxB;AA4NM,UAAI,CAAC,QAAQ,QAAS;AACtB,UAAI,CAAC;AACH,cAAM,IAAI,MAAM,0CAA0C;AAE5D,YAAM,UAAU,eAAe,MAAM,UAAU;AAC/C,YAAM,YAAY,eAAe,QAAQ,KAAK,QAAQ,OAAO,GAAG;AAIhE,UAAI,YAAY,CAAC;AAEjB,UAAI,MAAM,QAAQ,QAAQ,MAAM,KAAK,QAAQ,OAAO,QAAQ;AAC1D,oBAAY,QAAQ,OAAO,IAAI,CAAC,OAAO;AAAA,UACrC,MAAM,eAAe,EAAE,MAAM,EAAE,YAAY,KAAK,CAAC,KAAK;AAAA,UACtD,OAAO,EAAE;AAAA,UACT,SAAS,EAAE,WAAW;AAAA,UACtB,UAAU,CAAC,CAAC,EAAE;AAAA,UACd,OAAO,EAAE;AAAA,QACX,EAAE;AAAA,MACJ,YAAW,aAAQ,UAAR,mBAAe,SAAS;AAEjC,oBAAY,CAAC;AAAA,MACf,OAAO;AAEL,oBAAY,CAAC,EAAE,MAAM,KAAK,SAAS,QAAQ,CAAC;AAAA,MAC9C;AAEA,UAAI,KAAK,8CAA8C;AACvD,UAAI,MAAM,YAAY,SAAS,SAAS,QAAQ;AAIhD,YAAM,gBAAgB,MAAM,QAAQ;AAAA,QAClC,MAAM,eAAe;AAAA,QACrB,MAAM,eAAe;AAAA,QACrB,OAAO,EAAE,QAAQ,QAAQ;AAAA,QACzB,SAAS,EAAE,MAAM,GAAG,MAAM,OAAO,MAAM,YAAY;AAAA,QACnD,YAAY;AAAA,QACZ,SAAS,CAAC;AAAA;AAAA,QACV,UAAU;AAAA,MACZ,CAAC;AAGD,YAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,cAAM,SAAS,cAAc;AAC7B,YAAI,OAAO,WAAW;AACpB,kBAAQ;AAAA,QACV,OAAO;AACL,iBAAO,KAAK,aAAa,OAAO;AAChC,iBAAO,KAAK,SAAS,MAAM;AAE3B,qBAAW,MAAM,OAAO,IAAI,MAAM,gCAAgC,CAAC,GAAG,GAAI;AAAA,QAC5E;AAAA,MACF,CAAC;AAED,YAAM,OAAO,cAAc,WAAW,QAAQ;AAC9C,UAAI,CAAC,QAAQ,OAAO,SAAS,UAAU;AACrC,cAAM,oBAAoB,cAAc,UAAU;AAClD,cAAM,IAAI,MAAM,qDAAqD;AAAA,MACvE;AAIA,YAAM,iBAAiB,SAAS,SAAS,GAAG,IAAI,WAAW,WAAW;AACtE,YAAM,UAAU,oBAAoB,KAAK,IAAI,GAAG,cAAc;AAE9D,UAAI,MAAM,sBAAsB,OAAO;AAEvC,YAAM,UAAU,MAAM,UAAU,OAAO,QAAQ,OAAO,aAAa;AACnE,YAAM,WAAW,IAAI,gBAAgB,QAAQ,SAAS,QAAQ;AAC9D,eAAS,IAAI,GAAG;AAGhB,YAAM,UAAU,oBAAI,IAAI;AAGxB,YAAM,WAAW,CAAC;AAIlB,YAAM,QAAQ,CAAC;AAGf,WAAI,aAAQ,UAAR,mBAAe,SAAS;AAC1B,mBAAW,QAAQ,QAAQ,MAAM,SAAS,CAAC,GAAG,GAAG;AAC/C,gBAAM,KAAK,eAAe,MAAM;AAAA,YAC9B,YAAY,QAAQ,MAAM;AAAA,UAC5B,CAAC;AACD,cAAI,GAAI,OAAM,KAAK,EAAE,OAAO,IAAI,OAAO,EAAE,CAAC;AAAA,QAC5C;AAAA,MACF,OAAO;AACL,mBAAW,MAAM,UAAW,OAAM,KAAK,EAAE,OAAO,GAAG,MAAM,OAAO,EAAE,CAAC;AAAA,MACrE;AAEA,YAAM,aAAW,aAAQ,UAAR,mBAAe,WAAU,QAAQ,MAAM,WAAW;AACnE,YAAM,aAAW,aAAQ,UAAR,mBAAe,WAC5B,QAAQ,MAAM,WACd,MAAM;AACV,YAAM,gBAAc,aAAQ,UAAR,mBAAe,WAC/B,QAAQ,MAAM,cACd;AAEJ,qBAAe,WAAW,OAAO;AAlUvC,YAAAA,KAAAC,KAAAC;AAmUQ,YAAI,QAAQ,IAAI,KAAK,EAAG;AACxB,YAAI,WAAW,KAAK,EAAG;AACvB,YAAI,SAAS,UAAU,SAAU;AAEjC,gBAAQ,IAAI,KAAK;AAEjB,cAAM,OAAO,MAAM,QAAQ,QAAQ;AAGnC,aAAIF,MAAA,QAAQ,OAAO,kBAAf,gBAAAA,IAA8B,QAAQ;AACxC,gBAAM,KAAK,uBAAuB,IAAI;AACtC,eAAK,GAAG,WAAW,CAAC,QAAQ;AAC1B,kBAAM,MAAM,IAAI,IAAI;AACpB,kBAAM,UAAU,QAAQ,OAAO,cAAc;AAAA,cAAK,CAAC,MACjD,aAAa,SAAS,EAAE,KAAK,GAAG,IAAI,IAAI,SAAS,CAAC;AAAA,YACpD;AACA,gBAAI,QAAS,KAAI,MAAM;AAAA,gBAClB,KAAI,SAAS;AAAA,UACpB,CAAC;AAAA,QACH;AAEA,YAAI;AACF,gBAAM,UACJ,UAAU,MAAM,UAAU,UAAU,MAAM,QAAQ,OAAO,EAAE;AAC7D,gBAAM,QAAQ,OAAO,WAAW,MAAM,EAAE,MAAM,CAAC;AAE/C,gBAAM,KAAK,KAAK,SAAS;AAAA,YACvB,WAAW,QAAQ,OAAO;AAAA,YAC1B,SAAS,QAAQ,OAAO;AAAA,UAC1B,CAAC;AAED,cAAI,QAAQ,OAAO,iBAAiB;AAClC,kBAAM,KAAK,gBAAgB,QAAQ,OAAO,iBAAiB;AAAA,cACzD,SAAS,QAAQ,OAAO;AAAA,YAC1B,CAAC;AAAA,UACH;AAEA,cAAI,QAAQ,OAAO,kBAAkB,GAAG;AACtC,kBAAM,IAAI;AAAA,cAAQ,CAAC,MACjB,WAAW,GAAG,QAAQ,OAAO,eAAe;AAAA,YAC9C;AAAA,UACF;AAEA,gBAAM,QAAQ,OAAO,cAAc,MAAM,EAAE,MAAM,CAAC;AAElD,gBAAM,OAAO,MAAM,KAAK,QAAQ;AAChC,gBAAM,IAAY,aAAK,IAAI;AAG3B,cAAI,iBAAiB,CAAC;AACtB,eAAIC,MAAA,QAAQ,UAAR,gBAAAA,IAAe,SAAS;AAC1B,6BAAiB,EAAE,SAAS,EACzB,IAAI,CAAC,GAAG,MAAM,EAAE,CAAC,EAAE,KAAK,MAAM,CAAC,EAC/B,IAAI;AACP,gBAAI,MAAM,WAAW,eAAe,MAAM,aAAa,KAAK,KAAK,eAAe,MAAM,GAAG,EAAE,CAAC;AAAA,UAC9F;AAGA,qBAAW,OAAO,QAAQ,QAAQ,mBAAmB,CAAC;AACpD,cAAE,GAAG,EAAE,OAAO;AAGhB,gBAAM,gBAAgB,MAAM,QAAQ,QAAQ,QAAQ,YAAY,IAC5D,QAAQ,QAAQ,eAChB,CAAC,QAAQ,QAAQ,YAAY;AAEjC,cAAI,WAAW;AACf,qBAAW,OAAO,eAAe;AAC/B,gBAAI,CAAC,IAAK;AACV,kBAAM,OAAO,EAAE,GAAG,EAAE,MAAM;AAC1B,gBAAI,QAAQ,KAAK,QAAQ;AACvB,yBAAW,KAAK,KAAK;AACrB;AAAA,YACF;AAAA,UACF;AACA,cAAI,CAAC,UAAU;AACb,kBAAM,OAAO,EAAE,MAAM,EAAE,MAAM;AAC7B,uBAAW,KAAK,SAAS,KAAK,KAAK,IAAI,EAAE,MAAM,EAAE,KAAK;AAAA,UACxD;AAEA,gBAAM,SAAS,EAAE,OAAO,EAAE,KAAK,KAAK,IAAI,KAAK,KAAK;AAGlD,gBAAM,eAAe,SAAS,SAAS,YAAY,EAAE;AAGrD,gBAAM,YACJ,QAAQ,OAAO,SAAS,WACpB,KAAK,MAAM,KAAK,QAAQ,OAAO,QAAQ,iBAAiB,KAAK,CAAC,IAC9D,iBAAiB,KAAK;AAE5B,gBAAM,SAAS,gBAAgB,SAAS,KAAK;AAC7C,gBAAM,GAAG,MAAM,KAAK,QAAQ,MAAM,GAAG,EAAE,WAAW,KAAK,CAAC;AAExD,gBAAM,cAAc,QAAQ,SAAS,iBACjC;AAAA,UAAgB,KAAK;AAAA,SAAY,KAAK;AAAA,iBAAmB,oBAAI,KAAK,GAAE,YAAY,CAAC;AAAA;AAAA;AAAA,IACjF;AAEJ,gBAAM,GAAG,UAAU,QAAQ,cAAc,cAAc,MAAM;AAG7D,gBAAM,OAAO,UAAU,KAAK,CAAC,MAAM,EAAE,SAAS,KAAK;AACnD,mBAAS,KAAK;AAAA,YACZ;AAAA,YACA,QAAO,6BAAM,UAAS;AAAA,YACtB,UAAS,6BAAM,YAAW;AAAA,YAC1B,UAAU,CAAC,EAAC,6BAAM;AAAA,YAClB,OAAO,6BAAM;AAAA,YACb;AAAA,UACF,CAAC;AAED,cAAI,KAAK,YAAO,KAAK,OAAO,SAAS,EAAE;AAGvC,eAAIC,MAAA,QAAQ,UAAR,gBAAAA,IAAe,SAAS;AAC1B,uBAAW,QAAQ,gBAAgB;AACjC,oBAAM,IAAI,eAAe,MAAM;AAAA,gBAC7B,YAAY,QAAQ,MAAM;AAAA,cAC5B,CAAC;AACD,kBAAI,CAAC,EAAG;AAIR,kBAAI,eAAe;AACnB,kBACE,mBAAmB,OACnB,aAAa,WAAW,cAAc,GACtC;AACA,+BAAe,MAAM,aAAa,MAAM,eAAe,MAAM;AAC7D,+BACE,iBAAiB,OACb,MACA,aAAa,QAAQ,WAAW,GAAG;AAAA,cAC3C;AAEA,kBAAI,CAAC,QAAQ,IAAI,YAAY,KAAK,CAAC,WAAW,YAAY,GAAG;AAG3D,sBAAM,KAAK,EAAE,OAAO,cAAc,OAAO,GAAG,CAAC;AAAA,cAC/C;AAAA,YACF;AAAA,UACF;AAAA,QACF,SAAS,KAAK;AACZ,cAAI,KAAK,0BAAgB,KAAK,MAAK,2BAAK,YAAW,GAAG,EAAE;AAAA,QAC1D,UAAE;AACA,gBAAM,KAAK,MAAM;AAAA,QACnB;AAAA,MACF;AAEA,UAAI;AAEF,eAAO,MAAM,UAAU,SAAS,SAAS,UAAU;AAIjD,gBAAM,QAAQ,MAAM,OAAO,GAAG,WAAW,EAAE,IAAI,CAAC,SAAS;AACvD,kBAAM,QAAQ,KAAK,SAAS,IAAI,KAAK,QAAQ;AAC7C,mBAAO,EAAE,OAAO,KAAK,OAAO,MAAM;AAAA,UACpC,CAAC;AAED,gBAAM,QAAQ;AAAA,YACZ,MAAM,IAAI,OAAO,EAAE,OAAO,MAAM,MAAM;AApelD,kBAAAF,KAAAC;AAqec,oBAAID,MAAA,QAAQ,UAAR,gBAAAA,IAAe,YAAW,QAAQ,SAAU;AAChD,oBAAM,WAAW,KAAK;AAGtB,mBAAIC,MAAA,QAAQ,UAAR,gBAAAA,IAAe,SAAS;AAE1B,yBAAS,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK;AACrC,sBAAI,MAAM,CAAC,EAAE,UAAU,GAAI,OAAM,CAAC,EAAE,QAAQ,QAAQ;AAAA,gBACtD;AAAA,cACF;AAAA,YACF,CAAC;AAAA,UACH;AAAA,QACF;AAGA,cAAM,YACJ,QAAQ,OAAO,eAAa,sDAAgB,QAAhB,mBAAqB,SAAQ;AAG3D,cAAM,QAAQ,QAAQ,OAAO,OACzB,CAAC,GAAG,QAAQ,EAAE,KAAK,CAAC,GAAG,MAAM,EAAE,MAAM,cAAc,EAAE,KAAK,CAAC,IAC3D;AAIJ,cAAM,YAAY,oBAAI,IAAI;AAE1B,cAAM,gBAAgB,CAAC;AAEvB,mBAAW,QAAQ,OAAO;AACxB,cAAI,KAAK,SAAU,eAAc,KAAK,IAAI;AAAA,eACrC;AACH,kBAAM,IAAI,KAAK,WAAW;AAC1B,sBAAU,IAAI,GAAG,CAAC,GAAI,UAAU,IAAI,CAAC,KAAK,CAAC,GAAI,IAAI,CAAC;AAAA,UACtD;AAAA,QACF;AAEA,YAAI,OAAO,KAAK,SAAS;AAAA;AAAA,IAAS,QAAQ,OAAO,WAAW;AAAA;AAAA;AAE5D,mBAAW,CAAC,SAAS,YAAY,KAAK,UAAU,QAAQ,GAAG;AACzD,kBAAQ,MAAM,OAAO;AAAA;AAAA;AACrB,qBAAW,MAAM,cAAc;AAC7B,kBAAM,OAAO,aAAa,GAAG,SAAS;AACtC,kBAAM,QAAQ,GAAG,SAAS,GAAG;AAC7B,kBAAM,QAAQ,GAAG,QAAQ,KAAK,GAAG,KAAK,KAAK;AAC3C,oBAAQ,MAAM,KAAK,KAAK,IAAI,IAAI,KAAK;AAAA;AAAA,UACvC;AACA,kBAAQ;AAAA;AAAA,QACV;AAEA,YAAI,cAAc,QAAQ;AACxB,kBAAQ;AAAA;AAAA;AACR,qBAAW,MAAM,eAAe;AAC9B,kBAAM,OAAO,aAAa,GAAG,SAAS;AACtC,kBAAM,QAAQ,GAAG,SAAS,GAAG;AAC7B,kBAAM,QAAQ,GAAG,QAAQ,KAAK,GAAG,KAAK,KAAK;AAC3C,oBAAQ,MAAM,KAAK,KAAK,IAAI,IAAI,KAAK;AAAA;AAAA,UACvC;AACA,kBAAQ;AAAA;AAAA,QACV;AAEA,cAAM,WAAW,KAAK,KAAK,SAAS,QAAQ,OAAO,eAAe;AAClE,cAAM,GAAG,UAAU,UAAU,MAAM,MAAM;AAEzC,YAAI;AAAA,UACF;AAAA,oBAAuB,SAAS,MAAM,qBAAqB,QAAQ,OAAO,eAAe;AAAA;AAAA,QAC3F;AAAA,MACF,UAAE;AACA,cAAM,QAAQ,MAAM;AACpB,cAAM,oBAAoB,cAAc,UAAU;AAAA,MACpD;AAAA,IACF;AAAA,EACF;AACF;","names":["_a","_b","_c"]}
package/package.json ADDED
@@ -0,0 +1,68 @@
1
+ {
2
+ "name": "@happyalienai/vite-plugin-llm-spider",
3
+ "version": "0.1.0",
4
+ "description": "Vite plugin that generates LLM-friendly Markdown snapshots and llms.txt for SPAs",
5
+ "type": "module",
6
+ "main": "./dist/index.cjs",
7
+ "module": "./dist/index.js",
8
+ "types": "./dist/index.d.ts",
9
+ "exports": {
10
+ ".": {
11
+ "import": {
12
+ "types": "./dist/index.d.ts",
13
+ "default": "./dist/index.js"
14
+ },
15
+ "require": {
16
+ "types": "./dist/index.d.cts",
17
+ "default": "./dist/index.cjs"
18
+ }
19
+ }
20
+ },
21
+ "files": [
22
+ "dist"
23
+ ],
24
+ "scripts": {
25
+ "build": "tsup",
26
+ "prepublishOnly": "npm run build"
27
+ },
28
+ "keywords": [
29
+ "vite",
30
+ "vite-plugin",
31
+ "llm",
32
+ "llms.txt",
33
+ "markdown",
34
+ "ai",
35
+ "seo",
36
+ "spider",
37
+ "crawler"
38
+ ],
39
+ "author": {
40
+ "name": "Happy Alien AI",
41
+ "url": "https://happyalien.ai"
42
+ },
43
+ "license": "MIT",
44
+ "repository": {
45
+ "type": "git",
46
+ "url": "git+https://github.com/onEnterFrame/vite-plugin-llm-spider.git"
47
+ },
48
+ "bugs": {
49
+ "url": "https://github.com/onEnterFrame/vite-plugin-llm-spider/issues"
50
+ },
51
+ "homepage": "https://github.com/onEnterFrame/vite-plugin-llm-spider#readme",
52
+ "publishConfig": {
53
+ "access": "public"
54
+ },
55
+ "peerDependencies": {
56
+ "vite": "^4.0.0 || ^5.0.0 || ^6.0.0 || ^7.0.0"
57
+ },
58
+ "dependencies": {
59
+ "cheerio": "^1.0.0",
60
+ "puppeteer": "^24.0.0",
61
+ "turndown": "^7.2.0",
62
+ "turndown-plugin-gfm": "^1.0.2"
63
+ },
64
+ "devDependencies": {
65
+ "tsup": "^8.0.0",
66
+ "typescript": "^5.0.0"
67
+ }
68
+ }