@happyalienai/vite-plugin-llm-spider 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +24 -0
- package/dist/index.cjs +260 -166
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +7 -0
- package/dist/index.d.ts +7 -0
- package/dist/index.js +260 -166
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -5,7 +5,13 @@ import path from "path";
|
|
|
5
5
|
import * as cheerio from "cheerio";
|
|
6
6
|
import TurndownService from "turndown";
|
|
7
7
|
import { gfm } from "turndown-plugin-gfm";
|
|
8
|
-
|
|
8
|
+
var puppeteer = null;
|
|
9
|
+
async function loadPuppeteer() {
|
|
10
|
+
if (!puppeteer) {
|
|
11
|
+
puppeteer = await import("puppeteer");
|
|
12
|
+
}
|
|
13
|
+
return puppeteer.default || puppeteer;
|
|
14
|
+
}
|
|
9
15
|
function llmSpiderPlugin(userOptions = {}) {
|
|
10
16
|
let resolvedConfig;
|
|
11
17
|
function deepMerge(target, source) {
|
|
@@ -21,6 +27,11 @@ function llmSpiderPlugin(userOptions = {}) {
|
|
|
21
27
|
}
|
|
22
28
|
const defaults = {
|
|
23
29
|
enabled: true,
|
|
30
|
+
// Static mode: read HTML files directly from dist/ without browser
|
|
31
|
+
// - true: always use static mode (no Puppeteer)
|
|
32
|
+
// - false: always use browser rendering
|
|
33
|
+
// - "auto" (default): use static when crawl is disabled, browser when crawl is enabled
|
|
34
|
+
static: "auto",
|
|
24
35
|
// Recommended: explicit list
|
|
25
36
|
routes: (
|
|
26
37
|
/** @type {RouteDef[] | undefined} */
|
|
@@ -151,6 +162,11 @@ function llmSpiderPlugin(userOptions = {}) {
|
|
|
151
162
|
}
|
|
152
163
|
return path.join(distDir, rel);
|
|
153
164
|
}
|
|
165
|
+
function routeToHtmlFsPath(distDir, route) {
|
|
166
|
+
if (route === "/") return path.join(distDir, "index.html");
|
|
167
|
+
if (route.endsWith("/")) return path.join(distDir, route.slice(1), "index.html");
|
|
168
|
+
return path.join(distDir, route.slice(1) + ".html");
|
|
169
|
+
}
|
|
154
170
|
function makeLlmsLink(relMdPath) {
|
|
155
171
|
return relMdPath.replace(/\\/g, "/");
|
|
156
172
|
}
|
|
@@ -159,6 +175,12 @@ function llmSpiderPlugin(userOptions = {}) {
|
|
|
159
175
|
server.close((err) => err ? reject(err) : resolve());
|
|
160
176
|
});
|
|
161
177
|
}
|
|
178
|
+
function shouldUseStaticMode() {
|
|
179
|
+
var _a;
|
|
180
|
+
if (options.static === true) return true;
|
|
181
|
+
if (options.static === false) return false;
|
|
182
|
+
return !((_a = options.crawl) == null ? void 0 : _a.enabled);
|
|
183
|
+
}
|
|
162
184
|
return {
|
|
163
185
|
name: "vite-plugin-llm-spider",
|
|
164
186
|
apply: "build",
|
|
@@ -172,6 +194,7 @@ function llmSpiderPlugin(userOptions = {}) {
|
|
|
172
194
|
throw new Error("LLM Spider: missing resolved Vite config");
|
|
173
195
|
const distDir = resolvedConfig.build.outDir || "dist";
|
|
174
196
|
const basePath = (resolvedConfig.base || "/").replace(/\\/g, "/");
|
|
197
|
+
const useStaticMode = shouldUseStaticMode();
|
|
175
198
|
let routeDefs = [];
|
|
176
199
|
if (Array.isArray(options.routes) && options.routes.length) {
|
|
177
200
|
routeDefs = options.routes.map((r) => ({
|
|
@@ -186,98 +209,41 @@ function llmSpiderPlugin(userOptions = {}) {
|
|
|
186
209
|
} else {
|
|
187
210
|
routeDefs = [{ path: "/", section: "Pages" }];
|
|
188
211
|
}
|
|
189
|
-
log.info(
|
|
212
|
+
log.info(`
|
|
213
|
+
LLM Spider: generating markdown + llms.txt (${useStaticMode ? "static" : "browser"} mode)`);
|
|
190
214
|
log.debug("distDir:", distDir, "base:", basePath);
|
|
191
|
-
const previewServer = await preview({
|
|
192
|
-
root: resolvedConfig.root,
|
|
193
|
-
base: resolvedConfig.base,
|
|
194
|
-
build: { outDir: distDir },
|
|
195
|
-
preview: { port: 0, open: false, host: "127.0.0.1" },
|
|
196
|
-
configFile: false,
|
|
197
|
-
plugins: [],
|
|
198
|
-
// avoid loading user plugins again
|
|
199
|
-
logLevel: "silent"
|
|
200
|
-
});
|
|
201
|
-
await new Promise((resolve, reject) => {
|
|
202
|
-
const server = previewServer.httpServer;
|
|
203
|
-
if (server.listening) {
|
|
204
|
-
resolve();
|
|
205
|
-
} else {
|
|
206
|
-
server.once("listening", resolve);
|
|
207
|
-
server.once("error", reject);
|
|
208
|
-
setTimeout(() => reject(new Error("Preview server failed to start")), 5e3);
|
|
209
|
-
}
|
|
210
|
-
});
|
|
211
|
-
const addr = previewServer.httpServer.address();
|
|
212
|
-
if (!addr || typeof addr === "string") {
|
|
213
|
-
await safeCloseHttpServer(previewServer.httpServer);
|
|
214
|
-
throw new Error("LLM Spider: could not determine preview server port");
|
|
215
|
-
}
|
|
216
|
-
const normalizedBase = basePath.endsWith("/") ? basePath : basePath + "/";
|
|
217
|
-
const baseUrl = `http://127.0.0.1:${addr.port}${normalizedBase}`;
|
|
218
|
-
log.debug("Preview server at:", baseUrl);
|
|
219
|
-
const browser = await puppeteer.launch(options.render.launchOptions);
|
|
220
215
|
const turndown = new TurndownService(options.markdown.turndown);
|
|
221
216
|
turndown.use(gfm);
|
|
222
|
-
const visited = /* @__PURE__ */ new Set();
|
|
223
217
|
const captured = [];
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
for (const
|
|
227
|
-
const
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
(p) => p instanceof RegExp ? p.test(url) : url.includes(p)
|
|
251
|
-
);
|
|
252
|
-
if (blocked) req.abort();
|
|
253
|
-
else req.continue();
|
|
254
|
-
});
|
|
255
|
-
}
|
|
256
|
-
try {
|
|
257
|
-
const pageUrl = route === "/" ? baseUrl : baseUrl + route.replace(/^\//, "");
|
|
258
|
-
await options.render.beforeGoto(page, { route });
|
|
259
|
-
await page.goto(pageUrl, {
|
|
260
|
-
waitUntil: options.render.waitUntil,
|
|
261
|
-
timeout: options.render.timeoutMs
|
|
262
|
-
});
|
|
263
|
-
if (options.render.waitForSelector) {
|
|
264
|
-
await page.waitForSelector(options.render.waitForSelector, {
|
|
265
|
-
timeout: options.render.timeoutMs
|
|
266
|
-
});
|
|
267
|
-
}
|
|
268
|
-
if (options.render.postLoadDelayMs > 0) {
|
|
269
|
-
await new Promise(
|
|
270
|
-
(r) => setTimeout(r, options.render.postLoadDelayMs)
|
|
271
|
-
);
|
|
272
|
-
}
|
|
273
|
-
await options.render.beforeExtract(page, { route });
|
|
274
|
-
const html = await page.content();
|
|
275
|
-
const $ = cheerio.load(html);
|
|
276
|
-
let harvestedHrefs = [];
|
|
277
|
-
if ((_b2 = options.crawl) == null ? void 0 : _b2.enabled) {
|
|
278
|
-
harvestedHrefs = $("a[href]").map((_, a) => $(a).attr("href")).get();
|
|
279
|
-
log.debug(` Found ${harvestedHrefs.length} links on ${route}:`, harvestedHrefs.slice(0, 15));
|
|
218
|
+
if (useStaticMode) {
|
|
219
|
+
log.debug("Using static mode - reading HTML files directly from dist/");
|
|
220
|
+
for (const rd of routeDefs) {
|
|
221
|
+
const route = rd.path;
|
|
222
|
+
if (isExcluded(route)) continue;
|
|
223
|
+
let htmlPath = routeToHtmlFsPath(distDir, route);
|
|
224
|
+
let htmlContent = null;
|
|
225
|
+
try {
|
|
226
|
+
htmlContent = await fs.readFile(htmlPath, "utf8");
|
|
227
|
+
} catch {
|
|
228
|
+
if (!route.endsWith("/") && route !== "/") {
|
|
229
|
+
const altPath = path.join(distDir, route.slice(1), "index.html");
|
|
230
|
+
try {
|
|
231
|
+
htmlContent = await fs.readFile(altPath, "utf8");
|
|
232
|
+
htmlPath = altPath;
|
|
233
|
+
} catch {
|
|
234
|
+
try {
|
|
235
|
+
htmlContent = await fs.readFile(path.join(distDir, "index.html"), "utf8");
|
|
236
|
+
htmlPath = path.join(distDir, "index.html");
|
|
237
|
+
log.debug(` Using SPA fallback index.html for ${route}`);
|
|
238
|
+
} catch {
|
|
239
|
+
log.warn(` \u26A0\uFE0F No HTML found for ${route}`);
|
|
240
|
+
continue;
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
}
|
|
280
244
|
}
|
|
245
|
+
if (!htmlContent) continue;
|
|
246
|
+
const $ = cheerio.load(htmlContent);
|
|
281
247
|
for (const sel of options.extract.removeSelectors || [])
|
|
282
248
|
$(sel).remove();
|
|
283
249
|
const mainSelectors = Array.isArray(options.extract.mainSelector) ? options.extract.mainSelector : [options.extract.mainSelector];
|
|
@@ -307,112 +273,240 @@ generated_at: ${(/* @__PURE__ */ new Date()).toISOString()}
|
|
|
307
273
|
|
|
308
274
|
` : "";
|
|
309
275
|
await fs.writeFile(fsPath, frontmatter + markdownBody, "utf8");
|
|
310
|
-
const meta = routeDefs.find((r) => r.path === route);
|
|
311
276
|
captured.push({
|
|
312
277
|
route,
|
|
313
|
-
title:
|
|
314
|
-
section:
|
|
315
|
-
optional: !!
|
|
316
|
-
notes:
|
|
278
|
+
title: rd.title || title,
|
|
279
|
+
section: rd.section || "Pages",
|
|
280
|
+
optional: !!rd.optional,
|
|
281
|
+
notes: rd.notes,
|
|
317
282
|
mdRelPath
|
|
318
283
|
});
|
|
319
284
|
log.info(` \u2705 ${route} -> ${mdRelPath}`);
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
285
|
+
}
|
|
286
|
+
} else {
|
|
287
|
+
const previewServer = await preview({
|
|
288
|
+
root: resolvedConfig.root,
|
|
289
|
+
base: resolvedConfig.base,
|
|
290
|
+
build: { outDir: distDir },
|
|
291
|
+
preview: { port: 0, open: false, host: "127.0.0.1" },
|
|
292
|
+
configFile: false,
|
|
293
|
+
plugins: [],
|
|
294
|
+
logLevel: "silent"
|
|
295
|
+
});
|
|
296
|
+
await new Promise((resolve, reject) => {
|
|
297
|
+
const server = previewServer.httpServer;
|
|
298
|
+
if (server.listening) {
|
|
299
|
+
resolve();
|
|
300
|
+
} else {
|
|
301
|
+
server.once("listening", resolve);
|
|
302
|
+
server.once("error", reject);
|
|
303
|
+
setTimeout(() => reject(new Error("Preview server failed to start")), 5e3);
|
|
304
|
+
}
|
|
305
|
+
});
|
|
306
|
+
const addr = previewServer.httpServer.address();
|
|
307
|
+
if (!addr || typeof addr === "string") {
|
|
308
|
+
await safeCloseHttpServer(previewServer.httpServer);
|
|
309
|
+
throw new Error("LLM Spider: could not determine preview server port");
|
|
310
|
+
}
|
|
311
|
+
const normalizedBase = basePath.endsWith("/") ? basePath : basePath + "/";
|
|
312
|
+
const baseUrl = `http://127.0.0.1:${addr.port}${normalizedBase}`;
|
|
313
|
+
log.debug("Preview server at:", baseUrl);
|
|
314
|
+
const pup = await loadPuppeteer();
|
|
315
|
+
const browser = await pup.launch(options.render.launchOptions);
|
|
316
|
+
const visited = /* @__PURE__ */ new Set();
|
|
317
|
+
const queue = [];
|
|
318
|
+
if ((_b = options.crawl) == null ? void 0 : _b.enabled) {
|
|
319
|
+
for (const seed of options.crawl.seeds || ["/"]) {
|
|
320
|
+
const nr = normalizeRoute(seed, {
|
|
321
|
+
stripQuery: options.crawl.stripQuery
|
|
322
|
+
});
|
|
323
|
+
if (nr) queue.push({ route: nr, depth: 0 });
|
|
324
|
+
}
|
|
325
|
+
} else {
|
|
326
|
+
for (const rd of routeDefs) queue.push({ route: rd.path, depth: 0 });
|
|
327
|
+
}
|
|
328
|
+
const maxDepth = ((_c = options.crawl) == null ? void 0 : _c.enabled) ? options.crawl.maxDepth : 0;
|
|
329
|
+
const maxPages = ((_d = options.crawl) == null ? void 0 : _d.enabled) ? options.crawl.maxPages : queue.length;
|
|
330
|
+
const concurrency = ((_e = options.crawl) == null ? void 0 : _e.enabled) ? options.crawl.concurrency : 3;
|
|
331
|
+
async function captureOne(route) {
|
|
332
|
+
var _a2, _b2, _c2;
|
|
333
|
+
if (visited.has(route)) return;
|
|
334
|
+
if (isExcluded(route)) return;
|
|
335
|
+
if (captured.length >= maxPages) return;
|
|
336
|
+
visited.add(route);
|
|
337
|
+
const page = await browser.newPage();
|
|
338
|
+
if ((_a2 = options.render.blockRequests) == null ? void 0 : _a2.length) {
|
|
339
|
+
await page.setRequestInterception(true);
|
|
340
|
+
page.on("request", (req) => {
|
|
341
|
+
const url = req.url();
|
|
342
|
+
const blocked = options.render.blockRequests.some(
|
|
343
|
+
(p) => p instanceof RegExp ? p.test(url) : url.includes(p)
|
|
344
|
+
);
|
|
345
|
+
if (blocked) req.abort();
|
|
346
|
+
else req.continue();
|
|
347
|
+
});
|
|
348
|
+
}
|
|
349
|
+
try {
|
|
350
|
+
const pageUrl = route === "/" ? baseUrl : baseUrl + route.replace(/^\//, "");
|
|
351
|
+
await options.render.beforeGoto(page, { route });
|
|
352
|
+
await page.goto(pageUrl, {
|
|
353
|
+
waitUntil: options.render.waitUntil,
|
|
354
|
+
timeout: options.render.timeoutMs
|
|
355
|
+
});
|
|
356
|
+
if (options.render.waitForSelector) {
|
|
357
|
+
await page.waitForSelector(options.render.waitForSelector, {
|
|
358
|
+
timeout: options.render.timeoutMs
|
|
324
359
|
});
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
360
|
+
}
|
|
361
|
+
if (options.render.postLoadDelayMs > 0) {
|
|
362
|
+
await new Promise(
|
|
363
|
+
(r) => setTimeout(r, options.render.postLoadDelayMs)
|
|
364
|
+
);
|
|
365
|
+
}
|
|
366
|
+
await options.render.beforeExtract(page, { route });
|
|
367
|
+
const html = await page.content();
|
|
368
|
+
const $ = cheerio.load(html);
|
|
369
|
+
let harvestedHrefs = [];
|
|
370
|
+
if ((_b2 = options.crawl) == null ? void 0 : _b2.enabled) {
|
|
371
|
+
harvestedHrefs = $("a[href]").map((_, a) => $(a).attr("href")).get();
|
|
372
|
+
log.debug(` Found ${harvestedHrefs.length} links on ${route}:`, harvestedHrefs.slice(0, 15));
|
|
373
|
+
}
|
|
374
|
+
for (const sel of options.extract.removeSelectors || [])
|
|
375
|
+
$(sel).remove();
|
|
376
|
+
const mainSelectors = Array.isArray(options.extract.mainSelector) ? options.extract.mainSelector : [options.extract.mainSelector];
|
|
377
|
+
let mainHtml = null;
|
|
378
|
+
for (const sel of mainSelectors) {
|
|
379
|
+
if (!sel) continue;
|
|
380
|
+
const node = $(sel).first();
|
|
381
|
+
if (node && node.length) {
|
|
382
|
+
mainHtml = node.html();
|
|
383
|
+
break;
|
|
330
384
|
}
|
|
331
|
-
|
|
332
|
-
|
|
385
|
+
}
|
|
386
|
+
if (!mainHtml) {
|
|
387
|
+
const main = $("main").first();
|
|
388
|
+
mainHtml = main.length ? main.html() : $("body").html();
|
|
389
|
+
}
|
|
390
|
+
const title = ($("title").text() || "").trim() || route;
|
|
391
|
+
const markdownBody = turndown.turndown(mainHtml || "");
|
|
392
|
+
const mdRelPath = options.output.mode === "subdir" ? path.posix.join(options.output.subdir, routeToMdWebPath(route)) : routeToMdWebPath(route);
|
|
393
|
+
const fsPath = routeToMdFsPath(distDir, route);
|
|
394
|
+
await fs.mkdir(path.dirname(fsPath), { recursive: true });
|
|
395
|
+
const frontmatter = options.markdown.addFrontmatter ? `---
|
|
396
|
+
source: ${route}
|
|
397
|
+
title: ${title}
|
|
398
|
+
generated_at: ${(/* @__PURE__ */ new Date()).toISOString()}
|
|
399
|
+
---
|
|
400
|
+
|
|
401
|
+
` : "";
|
|
402
|
+
await fs.writeFile(fsPath, frontmatter + markdownBody, "utf8");
|
|
403
|
+
const meta = routeDefs.find((r) => r.path === route);
|
|
404
|
+
captured.push({
|
|
405
|
+
route,
|
|
406
|
+
title: (meta == null ? void 0 : meta.title) || title,
|
|
407
|
+
section: (meta == null ? void 0 : meta.section) || "Pages",
|
|
408
|
+
optional: !!(meta == null ? void 0 : meta.optional),
|
|
409
|
+
notes: meta == null ? void 0 : meta.notes,
|
|
410
|
+
mdRelPath
|
|
411
|
+
});
|
|
412
|
+
log.info(` \u2705 ${route} -> ${mdRelPath}`);
|
|
413
|
+
if ((_c2 = options.crawl) == null ? void 0 : _c2.enabled) {
|
|
414
|
+
for (const href of harvestedHrefs) {
|
|
415
|
+
const n = normalizeRoute(href, {
|
|
416
|
+
stripQuery: options.crawl.stripQuery
|
|
417
|
+
});
|
|
418
|
+
if (!n) continue;
|
|
419
|
+
let baseRelative = n;
|
|
420
|
+
if (normalizedBase !== "/" && baseRelative.startsWith(normalizedBase)) {
|
|
421
|
+
baseRelative = "/" + baseRelative.slice(normalizedBase.length);
|
|
422
|
+
baseRelative = baseRelative === "//" ? "/" : baseRelative.replace(/\/{2,}/g, "/");
|
|
423
|
+
}
|
|
424
|
+
if (!visited.has(baseRelative) && !isExcluded(baseRelative)) {
|
|
425
|
+
queue.push({ route: baseRelative, depth: -1 });
|
|
426
|
+
}
|
|
333
427
|
}
|
|
334
428
|
}
|
|
429
|
+
} catch (err) {
|
|
430
|
+
log.warn(` \u26A0\uFE0F failed ${route}: ${(err == null ? void 0 : err.message) || err}`);
|
|
431
|
+
} finally {
|
|
432
|
+
await page.close();
|
|
335
433
|
}
|
|
336
|
-
} catch (err) {
|
|
337
|
-
log.warn(` \u26A0\uFE0F failed ${route}: ${(err == null ? void 0 : err.message) || err}`);
|
|
338
|
-
} finally {
|
|
339
|
-
await page.close();
|
|
340
434
|
}
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
435
|
+
try {
|
|
436
|
+
while (queue.length && captured.length < maxPages) {
|
|
437
|
+
const batch = queue.splice(0, concurrency).map((item) => {
|
|
438
|
+
const depth = item.depth >= 0 ? item.depth : 1;
|
|
439
|
+
return { route: item.route, depth };
|
|
440
|
+
});
|
|
441
|
+
await Promise.all(
|
|
442
|
+
batch.map(async ({ route, depth }) => {
|
|
443
|
+
var _a2, _b2;
|
|
444
|
+
if (((_a2 = options.crawl) == null ? void 0 : _a2.enabled) && depth > maxDepth) return;
|
|
445
|
+
await captureOne(route);
|
|
446
|
+
if ((_b2 = options.crawl) == null ? void 0 : _b2.enabled) {
|
|
447
|
+
for (let i = 0; i < queue.length; i++) {
|
|
448
|
+
if (queue[i].depth === -1) queue[i].depth = depth + 1;
|
|
449
|
+
}
|
|
356
450
|
}
|
|
357
|
-
}
|
|
358
|
-
|
|
359
|
-
);
|
|
360
|
-
}
|
|
361
|
-
const llmsTitle = options.output.llmsTitle || ((_f = resolvedConfig == null ? void 0 : resolvedConfig.env) == null ? void 0 : _f.mode) || "Site";
|
|
362
|
-
const items = options.output.sort ? [...captured].sort((a, b) => a.route.localeCompare(b.route)) : captured;
|
|
363
|
-
const bySection = /* @__PURE__ */ new Map();
|
|
364
|
-
const optionalItems = [];
|
|
365
|
-
for (const item of items) {
|
|
366
|
-
if (item.optional) optionalItems.push(item);
|
|
367
|
-
else {
|
|
368
|
-
const s = item.section || "Pages";
|
|
369
|
-
bySection.set(s, [...bySection.get(s) || [], item]);
|
|
451
|
+
})
|
|
452
|
+
);
|
|
370
453
|
}
|
|
454
|
+
} finally {
|
|
455
|
+
await browser.close();
|
|
456
|
+
await safeCloseHttpServer(previewServer.httpServer);
|
|
371
457
|
}
|
|
372
|
-
|
|
458
|
+
}
|
|
459
|
+
const llmsTitle = options.output.llmsTitle || ((_f = resolvedConfig == null ? void 0 : resolvedConfig.env) == null ? void 0 : _f.mode) || "Site";
|
|
460
|
+
const items = options.output.sort ? [...captured].sort((a, b) => a.route.localeCompare(b.route)) : captured;
|
|
461
|
+
const bySection = /* @__PURE__ */ new Map();
|
|
462
|
+
const optionalItems = [];
|
|
463
|
+
for (const item of items) {
|
|
464
|
+
if (item.optional) optionalItems.push(item);
|
|
465
|
+
else {
|
|
466
|
+
const s = item.section || "Pages";
|
|
467
|
+
bySection.set(s, [...bySection.get(s) || [], item]);
|
|
468
|
+
}
|
|
469
|
+
}
|
|
470
|
+
let llms = `# ${llmsTitle}
|
|
373
471
|
|
|
374
472
|
> ${options.output.llmsSummary}
|
|
375
473
|
|
|
376
474
|
`;
|
|
377
|
-
|
|
378
|
-
|
|
475
|
+
for (const [section, sectionItems] of bySection.entries()) {
|
|
476
|
+
llms += `## ${section}
|
|
379
477
|
|
|
380
478
|
`;
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
`;
|
|
387
|
-
}
|
|
388
|
-
llms += `
|
|
479
|
+
for (const it of sectionItems) {
|
|
480
|
+
const link = makeLlmsLink(it.mdRelPath);
|
|
481
|
+
const label = it.title || it.route;
|
|
482
|
+
const notes = it.notes ? `: ${it.notes}` : "";
|
|
483
|
+
llms += `- [${label}](${link})${notes}
|
|
389
484
|
`;
|
|
390
485
|
}
|
|
391
|
-
|
|
392
|
-
llms += `## Optional
|
|
393
|
-
|
|
486
|
+
llms += `
|
|
394
487
|
`;
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
llms += `- [${label}](${link})${notes}
|
|
488
|
+
}
|
|
489
|
+
if (optionalItems.length) {
|
|
490
|
+
llms += `## Optional
|
|
491
|
+
|
|
400
492
|
`;
|
|
401
|
-
|
|
402
|
-
|
|
493
|
+
for (const it of optionalItems) {
|
|
494
|
+
const link = makeLlmsLink(it.mdRelPath);
|
|
495
|
+
const label = it.title || it.route;
|
|
496
|
+
const notes = it.notes ? `: ${it.notes}` : "";
|
|
497
|
+
llms += `- [${label}](${link})${notes}
|
|
403
498
|
`;
|
|
404
499
|
}
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
500
|
+
llms += `
|
|
501
|
+
`;
|
|
502
|
+
}
|
|
503
|
+
const llmsPath = path.join(distDir, options.output.llmsTxtFileName);
|
|
504
|
+
await fs.writeFile(llmsPath, llms, "utf8");
|
|
505
|
+
log.info(
|
|
506
|
+
`
|
|
409
507
|
LLM Spider: wrote ${captured.length} markdown pages + ${options.output.llmsTxtFileName}
|
|
410
508
|
`
|
|
411
|
-
|
|
412
|
-
} finally {
|
|
413
|
-
await browser.close();
|
|
414
|
-
await safeCloseHttpServer(previewServer.httpServer);
|
|
415
|
-
}
|
|
509
|
+
);
|
|
416
510
|
}
|
|
417
511
|
};
|
|
418
512
|
}
|