@happyalienai/vite-plugin-llm-spider 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,225 @@
1
+ # vite-plugin-llm-spider
2
+
3
+ > **Built by [Happy Alien AI](https://happyalien.ai)** โ€” AI-powered tools for eLearning creators.
4
+
5
+ A Vite build plugin that generates **LLM-friendly Markdown snapshots** of selected public routes and publishes a curated index at **`/llms.txt`**.
6
+
7
+ Makes SPAs and content-heavy Vite apps easier for AI agents/tools to understand by providing clean, low-noise text renditions plus a deterministic index.
8
+
9
+ ## Features
10
+
11
+ - ๐Ÿ•ท๏ธ **Two discovery modes:** explicit route list (recommended) or controlled BFS crawl
12
+ - ๐Ÿ“ **Markdown output:** clean, readable `.md` files following the [llms.txt spec](https://llmstxt.org/)
13
+ - ๐Ÿงน **Noise removal:** strips nav, footer, modals, cookie banners, etc.
14
+ - ๐Ÿ”’ **Safe by default:** explicit excludes, no auth pages by accident
15
+ - โšก **Works with any Vite framework:** Vue, React, Svelte, Solid, etc.
16
+
17
+ ## Installation
18
+
19
+ ```bash
20
+ npm i -D @happyalienai/vite-plugin-llm-spider
21
+ ```
22
+
23
+ ## Quick Start
24
+
25
+ ```js
26
+ // vite.config.js
27
+ import { defineConfig } from "vite";
28
+ import llmSpider from "@happyalienai/vite-plugin-llm-spider";
29
+
30
+ export default defineConfig({
31
+ plugins: [
32
+ llmSpider({
33
+ routes: [
34
+ { path: "/", title: "Home", section: "Product" },
35
+ { path: "/pricing", title: "Pricing", section: "Product" },
36
+ { path: "/docs/", title: "Docs", section: "Docs", optional: true },
37
+ ],
38
+ exclude: ["/login", "/admin", "/account"],
39
+ render: {
40
+ waitForSelector: "main",
41
+ },
42
+ }),
43
+ ],
44
+ });
45
+ ```
46
+
47
+ After `npm run build`, you'll get:
48
+
49
+ - `dist/llms.txt` โ€” curated index
50
+ - `dist/index.html.md` โ€” home page
51
+ - `dist/pricing.md` โ€” pricing page
52
+ - `dist/docs/index.html.md` โ€” docs page
53
+
54
+ ## Output Format
55
+
56
+ The generated `llms.txt` follows the [llmstxt.org](https://llmstxt.org/) spec:
57
+
58
+ ```markdown
59
+ # My Site
60
+
61
+ > LLM-friendly index of important pages and their Markdown equivalents.
62
+
63
+ ## Product
64
+
65
+ - [Home](index.html.md)
66
+ - [Pricing](pricing.md)
67
+
68
+ ## Optional
69
+
70
+ - [Docs](docs/index.html.md)
71
+ ```
72
+
73
+ ## Configuration
74
+
75
+ ### Route Definitions
76
+
77
+ ```js
78
+ routes: [
79
+ {
80
+ path: "/pricing", // URL path (required)
81
+ title: "Pricing", // Display title in llms.txt
82
+ section: "Product", // H2 section grouping
83
+ optional: false, // If true, goes under "## Optional"
84
+ notes: "Updated weekly" // Appended to link in llms.txt
85
+ }
86
+ ]
87
+ ```
88
+
89
+ ### Crawl Mode (opt-in)
90
+
91
+ ```js
92
+ llmSpider({
93
+ crawl: {
94
+ enabled: true,
95
+ seeds: ["/"],
96
+ maxDepth: 2,
97
+ maxPages: 50,
98
+ concurrency: 3,
99
+ },
100
+ exclude: ["/login", "/admin"],
101
+ })
102
+ ```
103
+
104
+ ### Rendering Options
105
+
106
+ ```js
107
+ render: {
108
+ waitUntil: "networkidle2", // Puppeteer wait strategy
109
+ timeoutMs: 30_000, // Page load timeout
110
+ waitForSelector: "main", // Wait for element before extracting
111
+ postLoadDelayMs: 200, // Extra delay after load
112
+ blockRequests: [ // Block analytics/trackers
113
+ /google-analytics\.com/i,
114
+ /hotjar\.com/i,
115
+ ],
116
+ launchOptions: { // Puppeteer launch options
117
+ headless: "new",
118
+ args: ["--no-sandbox"], // For CI/Docker
119
+ },
120
+ }
121
+ ```
122
+
123
+ ### Extraction Options
124
+
125
+ ```js
126
+ extract: {
127
+ mainSelector: ["main", "#content", "[data-main]"], // Content selectors (first match wins)
128
+ removeSelectors: [ // Elements to strip
129
+ "nav", "header", "footer", "svg", ".modal", ".cookie-banner"
130
+ ],
131
+ }
132
+ ```
133
+
134
+ ### Output Options
135
+
136
+ ```js
137
+ output: {
138
+ mode: "sibling", // "sibling" (default) or "subdir"
139
+ subdir: "ai", // Subdir name when mode="subdir"
140
+ llmsTxtFileName: "llms.txt", // Index filename
141
+ llmsTitle: "My App", // H1 title
142
+ llmsSummary: "AI-friendly pages", // Summary blockquote
143
+ sort: true, // Alphabetical ordering
144
+ }
145
+ ```
146
+
147
+ ### Markdown Options
148
+
149
+ ```js
150
+ markdown: {
151
+ addFrontmatter: true, // Add YAML frontmatter with source/title/date
152
+ turndown: { // Turndown options
153
+ headingStyle: "atx",
154
+ codeBlockStyle: "fenced",
155
+ },
156
+ }
157
+ ```
158
+
159
+ ## URL Mapping
160
+
161
+ Following the llms.txt spec:
162
+
163
+ | Route | Output File |
164
+ |-------|-------------|
165
+ | `/` | `index.html.md` |
166
+ | `/pricing` | `pricing.md` |
167
+ | `/docs/` | `docs/index.html.md` |
168
+ | `/docs/api` | `docs/api.md` |
169
+
170
+ ## Hooks
171
+
172
+ ```js
173
+ render: {
174
+ async beforeGoto(page, { route }) {
175
+ // Inject auth token for protected pages (use carefully!)
176
+ await page.evaluate(() => {
177
+ localStorage.setItem("token", "dev-token");
178
+ });
179
+ },
180
+ async beforeExtract(page, { route }) {
181
+ // Custom cleanup before extraction
182
+ },
183
+ }
184
+ ```
185
+
186
+ ## CI/Docker
187
+
188
+ For headless environments:
189
+
190
+ ```js
191
+ render: {
192
+ launchOptions: {
193
+ headless: "new",
194
+ args: ["--no-sandbox", "--disable-setuid-sandbox"],
195
+ },
196
+ }
197
+ ```
198
+
199
+ ## Troubleshooting
200
+
201
+ ### Timeouts
202
+ - Use `waitForSelector: "main"` instead of relying on `networkidle`
203
+ - Increase `timeoutMs` or add `postLoadDelayMs`
204
+
205
+ ### Output is mostly nav/footer
206
+ - Tighten `mainSelector` to your content wrapper
207
+ - Add more `removeSelectors`
208
+
209
+ ### CI fails to launch browser
210
+ - Add `--no-sandbox` to launch args
211
+ - Ensure Puppeteer dependencies are installed
212
+
213
+ ## License
214
+
215
+ MIT
216
+
217
+ ---
218
+
219
+ <p align="center">
220
+ <a href="https://happyalien.ai">
221
+ <strong>Happy Alien AI</strong>
222
+ </a>
223
+ <br>
224
+ AI-powered tools for instructional designers and eLearning teams
225
+ </p>
package/dist/index.cjs ADDED
@@ -0,0 +1,457 @@
1
+ var __create = Object.create;
2
+ var __defProp = Object.defineProperty;
3
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
+ var __getOwnPropNames = Object.getOwnPropertyNames;
5
+ var __getProtoOf = Object.getPrototypeOf;
6
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
7
+ var __export = (target, all) => {
8
+ for (var name in all)
9
+ __defProp(target, name, { get: all[name], enumerable: true });
10
+ };
11
+ var __copyProps = (to, from, except, desc) => {
12
+ if (from && typeof from === "object" || typeof from === "function") {
13
+ for (let key of __getOwnPropNames(from))
14
+ if (!__hasOwnProp.call(to, key) && key !== except)
15
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
16
+ }
17
+ return to;
18
+ };
19
+ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
20
+ // If the importer is in node compatibility mode or this is not an ESM
21
+ // file that has been converted to a CommonJS file using a Babel-
22
+ // compatible transform (i.e. "__esModule" has not been set), then set
23
+ // "default" to the CommonJS "module.exports" for node compatibility.
24
+ isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
25
+ mod
26
+ ));
27
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
28
+
29
+ // src/index.js
30
+ var index_exports = {};
31
+ __export(index_exports, {
32
+ default: () => llmSpiderPlugin,
33
+ llmSpiderPlugin: () => llmSpiderPlugin
34
+ });
35
+ module.exports = __toCommonJS(index_exports);
36
+ var import_vite = require("vite");
37
+ var import_promises = __toESM(require("fs/promises"), 1);
38
+ var import_node_path = __toESM(require("path"), 1);
39
+ var cheerio = __toESM(require("cheerio"), 1);
40
+ var import_turndown = __toESM(require("turndown"), 1);
41
+ var import_turndown_plugin_gfm = require("turndown-plugin-gfm");
42
+ var import_puppeteer = __toESM(require("puppeteer"), 1);
43
+ function llmSpiderPlugin(userOptions = {}) {
44
+ let resolvedConfig;
45
+ function deepMerge(target, source) {
46
+ const result = { ...target };
47
+ for (const key of Object.keys(source)) {
48
+ if (source[key] && typeof source[key] === "object" && !Array.isArray(source[key]) && !(source[key] instanceof RegExp)) {
49
+ result[key] = deepMerge(target[key] || {}, source[key]);
50
+ } else {
51
+ result[key] = source[key];
52
+ }
53
+ }
54
+ return result;
55
+ }
56
+ const defaults = {
57
+ enabled: true,
58
+ // Recommended: explicit list
59
+ routes: (
60
+ /** @type {RouteDef[] | undefined} */
61
+ void 0
62
+ ),
63
+ // Optional crawl mode (off by default)
64
+ crawl: {
65
+ enabled: false,
66
+ seeds: ["/"],
67
+ maxDepth: 2,
68
+ maxPages: 50,
69
+ concurrency: 3,
70
+ stripQuery: true
71
+ },
72
+ exclude: ["/login", "/admin", "/account"],
73
+ render: {
74
+ waitUntil: "networkidle2",
75
+ // more forgiving than networkidle0 for SPAs
76
+ timeoutMs: 3e4,
77
+ waitForSelector: null,
78
+ // e.g. "main" or "#app main"
79
+ postLoadDelayMs: 0,
80
+ blockRequests: [
81
+ /google-analytics\.com/i,
82
+ /googletagmanager\.com/i,
83
+ /segment\.com/i,
84
+ /hotjar\.com/i
85
+ ],
86
+ launchOptions: {
87
+ headless: "new"
88
+ // For CI containers you may need:
89
+ // args: ["--no-sandbox", "--disable-setuid-sandbox"],
90
+ },
91
+ /**
92
+ * @param {import('puppeteer').Page} _page
93
+ * @param {{ route: string }} _ctx
94
+ */
95
+ beforeGoto: async (_page, _ctx) => {
96
+ },
97
+ /**
98
+ * @param {import('puppeteer').Page} _page
99
+ * @param {{ route: string }} _ctx
100
+ */
101
+ beforeExtract: async (_page, _ctx) => {
102
+ }
103
+ },
104
+ extract: {
105
+ mainSelector: ["main", "#main-content", "[data-main]"],
106
+ removeSelectors: [
107
+ "script",
108
+ "style",
109
+ "noscript",
110
+ "nav",
111
+ "header",
112
+ "footer",
113
+ "svg",
114
+ "iframe",
115
+ "[role='alert']",
116
+ ".cookie",
117
+ ".cookie-banner",
118
+ ".modal"
119
+ ]
120
+ },
121
+ markdown: {
122
+ addFrontmatter: true,
123
+ turndown: {
124
+ headingStyle: "atx",
125
+ codeBlockStyle: "fenced",
126
+ emDelimiter: "_"
127
+ }
128
+ },
129
+ output: {
130
+ // "sibling" => /pricing -> pricing.md ; /docs/ -> docs/index.html.md ; / -> index.html.md
131
+ mode: "sibling",
132
+ subdir: "ai",
133
+ // used only when mode === "subdir"
134
+ llmsTxtFileName: "llms.txt",
135
+ llmsTitle: null,
136
+ // defaults to package name or project dir
137
+ llmsSummary: "LLM-friendly index of important pages and their Markdown equivalents.",
138
+ sort: true
139
+ },
140
+ logLevel: "info"
141
+ // "silent" | "info" | "debug"
142
+ };
143
+ const options = deepMerge(defaults, userOptions);
144
+ const log = {
145
+ info: (...args) => options.logLevel === "info" || options.logLevel === "debug" ? console.log(...args) : void 0,
146
+ debug: (...args) => options.logLevel === "debug" ? console.log(...args) : void 0,
147
+ warn: (...args) => options.logLevel !== "silent" ? console.warn(...args) : void 0
148
+ };
149
+ function isExcluded(route) {
150
+ return (options.exclude || []).some((p) => {
151
+ if (p instanceof RegExp) return p.test(route);
152
+ return route.includes(p);
153
+ });
154
+ }
155
+ function normalizeRoute(input, { stripQuery = true } = {}) {
156
+ if (!input) return null;
157
+ if (input.startsWith("mailto:") || input.startsWith("tel:") || input.startsWith("javascript:"))
158
+ return null;
159
+ let s = input.trim();
160
+ if (s.startsWith("http://") || s.startsWith("https://")) return null;
161
+ const hashIdx = s.indexOf("#");
162
+ if (hashIdx >= 0) s = s.slice(0, hashIdx);
163
+ if (stripQuery) {
164
+ const qIdx = s.indexOf("?");
165
+ if (qIdx >= 0) s = s.slice(0, qIdx);
166
+ }
167
+ if (!s) return null;
168
+ if (!s.startsWith("/")) {
169
+ if (s.startsWith("./"))
170
+ s = s.slice(1);
171
+ else s = "/" + s;
172
+ }
173
+ s = s.replace(/\/{2,}/g, "/");
174
+ return s;
175
+ }
176
+ function routeToMdWebPath(route) {
177
+ if (route === "/") return "index.html.md";
178
+ if (route.endsWith("/")) return route.slice(1) + "index.html.md";
179
+ return route.slice(1) + ".md";
180
+ }
181
+ function routeToMdFsPath(distDir, route) {
182
+ const rel = routeToMdWebPath(route);
183
+ if (options.output.mode === "subdir") {
184
+ return import_node_path.default.join(distDir, options.output.subdir, rel);
185
+ }
186
+ return import_node_path.default.join(distDir, rel);
187
+ }
188
+ function makeLlmsLink(relMdPath) {
189
+ return relMdPath.replace(/\\/g, "/");
190
+ }
191
+ async function safeCloseHttpServer(server) {
192
+ await new Promise((resolve, reject) => {
193
+ server.close((err) => err ? reject(err) : resolve());
194
+ });
195
+ }
196
+ return {
197
+ name: "vite-plugin-llm-spider",
198
+ apply: "build",
199
+ configResolved(rc) {
200
+ resolvedConfig = rc;
201
+ },
202
+ async closeBundle() {
203
+ var _a, _b, _c, _d, _e, _f;
204
+ if (!options.enabled) return;
205
+ if (!resolvedConfig)
206
+ throw new Error("LLM Spider: missing resolved Vite config");
207
+ const distDir = resolvedConfig.build.outDir || "dist";
208
+ const basePath = (resolvedConfig.base || "/").replace(/\\/g, "/");
209
+ let routeDefs = [];
210
+ if (Array.isArray(options.routes) && options.routes.length) {
211
+ routeDefs = options.routes.map((r) => ({
212
+ path: normalizeRoute(r.path, { stripQuery: true }) || "/",
213
+ title: r.title,
214
+ section: r.section || "Pages",
215
+ optional: !!r.optional,
216
+ notes: r.notes
217
+ }));
218
+ } else if ((_a = options.crawl) == null ? void 0 : _a.enabled) {
219
+ routeDefs = [];
220
+ } else {
221
+ routeDefs = [{ path: "/", section: "Pages" }];
222
+ }
223
+ log.info("\nLLM Spider: generating markdown + llms.txt");
224
+ log.debug("distDir:", distDir, "base:", basePath);
225
+ const previewServer = await (0, import_vite.preview)({
226
+ root: resolvedConfig.root,
227
+ base: resolvedConfig.base,
228
+ build: { outDir: distDir },
229
+ preview: { port: 0, open: false, host: "127.0.0.1" },
230
+ configFile: false,
231
+ plugins: [],
232
+ // avoid loading user plugins again
233
+ logLevel: "silent"
234
+ });
235
+ await new Promise((resolve, reject) => {
236
+ const server = previewServer.httpServer;
237
+ if (server.listening) {
238
+ resolve();
239
+ } else {
240
+ server.once("listening", resolve);
241
+ server.once("error", reject);
242
+ setTimeout(() => reject(new Error("Preview server failed to start")), 5e3);
243
+ }
244
+ });
245
+ const addr = previewServer.httpServer.address();
246
+ if (!addr || typeof addr === "string") {
247
+ await safeCloseHttpServer(previewServer.httpServer);
248
+ throw new Error("LLM Spider: could not determine preview server port");
249
+ }
250
+ const normalizedBase = basePath.endsWith("/") ? basePath : basePath + "/";
251
+ const baseUrl = `http://127.0.0.1:${addr.port}${normalizedBase}`;
252
+ log.debug("Preview server at:", baseUrl);
253
+ const browser = await import_puppeteer.default.launch(options.render.launchOptions);
254
+ const turndown = new import_turndown.default(options.markdown.turndown);
255
+ turndown.use(import_turndown_plugin_gfm.gfm);
256
+ const visited = /* @__PURE__ */ new Set();
257
+ const captured = [];
258
+ const queue = [];
259
+ if ((_b = options.crawl) == null ? void 0 : _b.enabled) {
260
+ for (const seed of options.crawl.seeds || ["/"]) {
261
+ const nr = normalizeRoute(seed, {
262
+ stripQuery: options.crawl.stripQuery
263
+ });
264
+ if (nr) queue.push({ route: nr, depth: 0 });
265
+ }
266
+ } else {
267
+ for (const rd of routeDefs) queue.push({ route: rd.path, depth: 0 });
268
+ }
269
+ const maxDepth = ((_c = options.crawl) == null ? void 0 : _c.enabled) ? options.crawl.maxDepth : 0;
270
+ const maxPages = ((_d = options.crawl) == null ? void 0 : _d.enabled) ? options.crawl.maxPages : queue.length;
271
+ const concurrency = ((_e = options.crawl) == null ? void 0 : _e.enabled) ? options.crawl.concurrency : 3;
272
+ async function captureOne(route) {
273
+ var _a2, _b2, _c2;
274
+ if (visited.has(route)) return;
275
+ if (isExcluded(route)) return;
276
+ if (captured.length >= maxPages) return;
277
+ visited.add(route);
278
+ const page = await browser.newPage();
279
+ if ((_a2 = options.render.blockRequests) == null ? void 0 : _a2.length) {
280
+ await page.setRequestInterception(true);
281
+ page.on("request", (req) => {
282
+ const url = req.url();
283
+ const blocked = options.render.blockRequests.some(
284
+ (p) => p instanceof RegExp ? p.test(url) : url.includes(p)
285
+ );
286
+ if (blocked) req.abort();
287
+ else req.continue();
288
+ });
289
+ }
290
+ try {
291
+ const pageUrl = route === "/" ? baseUrl : baseUrl + route.replace(/^\//, "");
292
+ await options.render.beforeGoto(page, { route });
293
+ await page.goto(pageUrl, {
294
+ waitUntil: options.render.waitUntil,
295
+ timeout: options.render.timeoutMs
296
+ });
297
+ if (options.render.waitForSelector) {
298
+ await page.waitForSelector(options.render.waitForSelector, {
299
+ timeout: options.render.timeoutMs
300
+ });
301
+ }
302
+ if (options.render.postLoadDelayMs > 0) {
303
+ await new Promise(
304
+ (r) => setTimeout(r, options.render.postLoadDelayMs)
305
+ );
306
+ }
307
+ await options.render.beforeExtract(page, { route });
308
+ const html = await page.content();
309
+ const $ = cheerio.load(html);
310
+ let harvestedHrefs = [];
311
+ if ((_b2 = options.crawl) == null ? void 0 : _b2.enabled) {
312
+ harvestedHrefs = $("a[href]").map((_, a) => $(a).attr("href")).get();
313
+ log.debug(` Found ${harvestedHrefs.length} links on ${route}:`, harvestedHrefs.slice(0, 15));
314
+ }
315
+ for (const sel of options.extract.removeSelectors || [])
316
+ $(sel).remove();
317
+ const mainSelectors = Array.isArray(options.extract.mainSelector) ? options.extract.mainSelector : [options.extract.mainSelector];
318
+ let mainHtml = null;
319
+ for (const sel of mainSelectors) {
320
+ if (!sel) continue;
321
+ const node = $(sel).first();
322
+ if (node && node.length) {
323
+ mainHtml = node.html();
324
+ break;
325
+ }
326
+ }
327
+ if (!mainHtml) {
328
+ const main = $("main").first();
329
+ mainHtml = main.length ? main.html() : $("body").html();
330
+ }
331
+ const title = ($("title").text() || "").trim() || route;
332
+ const markdownBody = turndown.turndown(mainHtml || "");
333
+ const mdRelPath = options.output.mode === "subdir" ? import_node_path.default.posix.join(options.output.subdir, routeToMdWebPath(route)) : routeToMdWebPath(route);
334
+ const fsPath = routeToMdFsPath(distDir, route);
335
+ await import_promises.default.mkdir(import_node_path.default.dirname(fsPath), { recursive: true });
336
+ const frontmatter = options.markdown.addFrontmatter ? `---
337
+ source: ${route}
338
+ title: ${title}
339
+ generated_at: ${(/* @__PURE__ */ new Date()).toISOString()}
340
+ ---
341
+
342
+ ` : "";
343
+ await import_promises.default.writeFile(fsPath, frontmatter + markdownBody, "utf8");
344
+ const meta = routeDefs.find((r) => r.path === route);
345
+ captured.push({
346
+ route,
347
+ title: (meta == null ? void 0 : meta.title) || title,
348
+ section: (meta == null ? void 0 : meta.section) || "Pages",
349
+ optional: !!(meta == null ? void 0 : meta.optional),
350
+ notes: meta == null ? void 0 : meta.notes,
351
+ mdRelPath
352
+ });
353
+ log.info(` \u2705 ${route} -> ${mdRelPath}`);
354
+ if ((_c2 = options.crawl) == null ? void 0 : _c2.enabled) {
355
+ for (const href of harvestedHrefs) {
356
+ const n = normalizeRoute(href, {
357
+ stripQuery: options.crawl.stripQuery
358
+ });
359
+ if (!n) continue;
360
+ let baseRelative = n;
361
+ if (normalizedBase !== "/" && baseRelative.startsWith(normalizedBase)) {
362
+ baseRelative = "/" + baseRelative.slice(normalizedBase.length);
363
+ baseRelative = baseRelative === "//" ? "/" : baseRelative.replace(/\/{2,}/g, "/");
364
+ }
365
+ if (!visited.has(baseRelative) && !isExcluded(baseRelative)) {
366
+ queue.push({ route: baseRelative, depth: -1 });
367
+ }
368
+ }
369
+ }
370
+ } catch (err) {
371
+ log.warn(` \u26A0\uFE0F failed ${route}: ${(err == null ? void 0 : err.message) || err}`);
372
+ } finally {
373
+ await page.close();
374
+ }
375
+ }
376
+ try {
377
+ while (queue.length && captured.length < maxPages) {
378
+ const batch = queue.splice(0, concurrency).map((item) => {
379
+ const depth = item.depth >= 0 ? item.depth : 1;
380
+ return { route: item.route, depth };
381
+ });
382
+ await Promise.all(
383
+ batch.map(async ({ route, depth }) => {
384
+ var _a2, _b2;
385
+ if (((_a2 = options.crawl) == null ? void 0 : _a2.enabled) && depth > maxDepth) return;
386
+ await captureOne(route);
387
+ if ((_b2 = options.crawl) == null ? void 0 : _b2.enabled) {
388
+ for (let i = 0; i < queue.length; i++) {
389
+ if (queue[i].depth === -1) queue[i].depth = depth + 1;
390
+ }
391
+ }
392
+ })
393
+ );
394
+ }
395
+ const llmsTitle = options.output.llmsTitle || ((_f = resolvedConfig == null ? void 0 : resolvedConfig.env) == null ? void 0 : _f.mode) || "Site";
396
+ const items = options.output.sort ? [...captured].sort((a, b) => a.route.localeCompare(b.route)) : captured;
397
+ const bySection = /* @__PURE__ */ new Map();
398
+ const optionalItems = [];
399
+ for (const item of items) {
400
+ if (item.optional) optionalItems.push(item);
401
+ else {
402
+ const s = item.section || "Pages";
403
+ bySection.set(s, [...bySection.get(s) || [], item]);
404
+ }
405
+ }
406
+ let llms = `# ${llmsTitle}
407
+
408
+ > ${options.output.llmsSummary}
409
+
410
+ `;
411
+ for (const [section, sectionItems] of bySection.entries()) {
412
+ llms += `## ${section}
413
+
414
+ `;
415
+ for (const it of sectionItems) {
416
+ const link = makeLlmsLink(it.mdRelPath);
417
+ const label = it.title || it.route;
418
+ const notes = it.notes ? `: ${it.notes}` : "";
419
+ llms += `- [${label}](${link})${notes}
420
+ `;
421
+ }
422
+ llms += `
423
+ `;
424
+ }
425
+ if (optionalItems.length) {
426
+ llms += `## Optional
427
+
428
+ `;
429
+ for (const it of optionalItems) {
430
+ const link = makeLlmsLink(it.mdRelPath);
431
+ const label = it.title || it.route;
432
+ const notes = it.notes ? `: ${it.notes}` : "";
433
+ llms += `- [${label}](${link})${notes}
434
+ `;
435
+ }
436
+ llms += `
437
+ `;
438
+ }
439
+ const llmsPath = import_node_path.default.join(distDir, options.output.llmsTxtFileName);
440
+ await import_promises.default.writeFile(llmsPath, llms, "utf8");
441
+ log.info(
442
+ `
443
+ LLM Spider: wrote ${captured.length} markdown pages + ${options.output.llmsTxtFileName}
444
+ `
445
+ );
446
+ } finally {
447
+ await browser.close();
448
+ await safeCloseHttpServer(previewServer.httpServer);
449
+ }
450
+ }
451
+ };
452
+ }
453
+ // Annotate the CommonJS export names for ESM import in node:
454
+ 0 && (module.exports = {
455
+ llmSpiderPlugin
456
+ });
457
+ //# sourceMappingURL=index.cjs.map