@happyalienai/vite-plugin-llm-spider 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/index.js"],"sourcesContent":["import { preview } from \"vite\";\nimport fs from \"node:fs/promises\";\nimport path from \"node:path\";\nimport * as cheerio from \"cheerio\";\nimport TurndownService from \"turndown\";\nimport { gfm } from \"turndown-plugin-gfm\";\nimport puppeteer from \"puppeteer\";\n\n/**\n * @typedef {{ path: string, title?: string, section?: string, optional?: boolean, notes?: string }} RouteDef\n */\n\n/**\n * Vite Plugin: LLM Spider\n * - Generates Markdown snapshots + dist/llms.txt\n * - Spec-aligned default output: \".md appended\" / \"index.html.md\" for directory URLs.\n */\nexport default function llmSpiderPlugin(userOptions = {}) {\n /** @type {import('vite').ResolvedConfig | undefined} */\n let resolvedConfig;\n\n // Deep merge helper\n function deepMerge(target, source) {\n const result = { ...target };\n for (const key of Object.keys(source)) {\n if (source[key] && typeof source[key] === 'object' && !Array.isArray(source[key]) && !(source[key] instanceof RegExp)) {\n result[key] = deepMerge(target[key] || {}, source[key]);\n } else {\n result[key] = source[key];\n }\n }\n return result;\n }\n\n const defaults = {\n enabled: true,\n\n // Recommended: explicit list\n routes: /** @type {RouteDef[] | undefined} */ (undefined),\n\n // Optional crawl mode (off by default)\n crawl: {\n enabled: false,\n seeds: [\"/\"],\n maxDepth: 2,\n maxPages: 50,\n concurrency: 3,\n stripQuery: true,\n },\n\n exclude: [\"/login\", \"/admin\", \"/account\"],\n\n render: {\n waitUntil: \"networkidle2\", // more forgiving than networkidle0 for SPAs\n timeoutMs: 30_000,\n waitForSelector: null, // e.g. \"main\" or \"#app main\"\n postLoadDelayMs: 0,\n blockRequests: [\n /google-analytics\\.com/i,\n /googletagmanager\\.com/i,\n /segment\\.com/i,\n /hotjar\\.com/i,\n ],\n launchOptions: {\n headless: \"new\",\n // For CI containers you may need:\n // args: [\"--no-sandbox\", \"--disable-setuid-sandbox\"],\n },\n /**\n * @param {import('puppeteer').Page} _page\n * @param {{ route: string }} _ctx\n */\n beforeGoto: async (_page, _ctx) => {},\n /**\n * @param {import('puppeteer').Page} _page\n * @param {{ route: string }} _ctx\n */\n beforeExtract: async (_page, _ctx) => {},\n },\n\n extract: {\n mainSelector: [\"main\", \"#main-content\", \"[data-main]\"],\n removeSelectors: [\n \"script\",\n \"style\",\n \"noscript\",\n \"nav\",\n \"header\",\n \"footer\",\n \"svg\",\n \"iframe\",\n \"[role='alert']\",\n \".cookie\",\n \".cookie-banner\",\n \".modal\",\n ],\n },\n\n markdown: {\n addFrontmatter: true,\n turndown: {\n headingStyle: \"atx\",\n codeBlockStyle: \"fenced\",\n emDelimiter: \"_\",\n },\n },\n\n output: {\n // \"sibling\" => /pricing -> pricing.md ; /docs/ -> docs/index.html.md ; / -> index.html.md\n mode: \"sibling\",\n subdir: \"ai\", // used only when mode === \"subdir\"\n llmsTxtFileName: \"llms.txt\",\n llmsTitle: null, // defaults to package name or project dir\n llmsSummary:\n \"LLM-friendly index of important pages and their Markdown equivalents.\",\n sort: true,\n },\n\n logLevel: \"info\", // \"silent\" | \"info\" | \"debug\"\n };\n\n const options = deepMerge(defaults, userOptions);\n\n const log = {\n info: (...args) =>\n options.logLevel === \"info\" || options.logLevel === \"debug\"\n ? console.log(...args)\n : undefined,\n debug: (...args) =>\n options.logLevel === \"debug\" ? console.log(...args) : undefined,\n warn: (...args) =>\n options.logLevel !== \"silent\" ? console.warn(...args) : undefined,\n };\n\n function isExcluded(route) {\n return (options.exclude || []).some((p) => {\n if (p instanceof RegExp) return p.test(route);\n return route.includes(p);\n });\n }\n\n function normalizeRoute(input, { stripQuery = true } = {}) {\n if (!input) return null;\n\n // Ignore non-page links\n if (\n input.startsWith(\"mailto:\") ||\n input.startsWith(\"tel:\") ||\n input.startsWith(\"javascript:\")\n )\n return null;\n\n // Convert relative -> absolute-ish (we only keep paths)\n // If input is like \"./about\" or \"about\", normalize to \"/about\"\n let s = input.trim();\n\n // Remove protocol absolute links\n if (s.startsWith(\"http://\") || s.startsWith(\"https://\")) return null;\n\n // Drop hash/query\n const hashIdx = s.indexOf(\"#\");\n if (hashIdx >= 0) s = s.slice(0, hashIdx);\n\n if (stripQuery) {\n const qIdx = s.indexOf(\"?\");\n if (qIdx >= 0) s = s.slice(0, qIdx);\n }\n\n // Ignore empty after stripping\n if (!s) return null;\n\n // Normalize relative paths\n if (!s.startsWith(\"/\")) {\n if (s.startsWith(\"./\"))\n s = s.slice(1); // \"./x\" -> \"/x\"\n else s = \"/\" + s;\n }\n\n // Collapse multiple slashes\n s = s.replace(/\\/{2,}/g, \"/\");\n\n return s;\n }\n\n function routeToMdWebPath(route) {\n // route is base-relative and starts with \"/\"\n if (route === \"/\") return \"index.html.md\";\n if (route.endsWith(\"/\")) return route.slice(1) + \"index.html.md\"; // \"docs/\" -> \"docs/index.html.md\"\n return route.slice(1) + \".md\"; // \"pricing\" -> \"pricing.md\"\n }\n\n function routeToMdFsPath(distDir, route) {\n const rel = routeToMdWebPath(route); // already relative\n if (options.output.mode === \"subdir\") {\n return path.join(distDir, options.output.subdir, rel);\n }\n return path.join(distDir, rel);\n }\n\n function makeLlmsLink(relMdPath) {\n // Use relative links (no leading slash) so it works in subpath deployments.\n // If subdir mode: links should include \"ai/...\"\n return relMdPath.replace(/\\\\/g, \"/\");\n }\n\n async function safeCloseHttpServer(server) {\n await new Promise((resolve, reject) => {\n server.close((err) => (err ? reject(err) : resolve()));\n });\n }\n\n return {\n name: \"vite-plugin-llm-spider\",\n apply: \"build\",\n\n configResolved(rc) {\n resolvedConfig = rc;\n },\n\n async closeBundle() {\n if (!options.enabled) return;\n if (!resolvedConfig)\n throw new Error(\"LLM Spider: missing resolved Vite config\");\n\n const distDir = resolvedConfig.build.outDir || \"dist\";\n const basePath = (resolvedConfig.base || \"/\").replace(/\\\\/g, \"/\");\n\n // ---- Resolve route list ----\n /** @type {RouteDef[]} */\n let routeDefs = [];\n\n if (Array.isArray(options.routes) && options.routes.length) {\n routeDefs = options.routes.map((r) => ({\n path: normalizeRoute(r.path, { stripQuery: true }) || \"/\",\n title: r.title,\n section: r.section || \"Pages\",\n optional: !!r.optional,\n notes: r.notes,\n }));\n } else if (options.crawl?.enabled) {\n // Crawl mode: route defs will be created as discovered.\n routeDefs = [];\n } else {\n // Default minimal route\n routeDefs = [{ path: \"/\", section: \"Pages\" }];\n }\n\n log.info(\"\\nLLM Spider: generating markdown + llms.txt\");\n log.debug(\"distDir:\", distDir, \"base:\", basePath);\n\n // ---- Start preview server for built output ----\n // Vite preview API returns a PreviewServer with httpServer + resolvedUrls.\n const previewServer = await preview({\n root: resolvedConfig.root,\n base: resolvedConfig.base,\n build: { outDir: distDir },\n preview: { port: 0, open: false, host: '127.0.0.1' },\n configFile: false,\n plugins: [], // avoid loading user plugins again\n logLevel: \"silent\",\n });\n\n // Wait for server to be fully listening\n await new Promise((resolve, reject) => {\n const server = previewServer.httpServer;\n if (server.listening) {\n resolve();\n } else {\n server.once('listening', resolve);\n server.once('error', reject);\n // Timeout after 5s\n setTimeout(() => reject(new Error('Preview server failed to start')), 5000);\n }\n });\n\n const addr = previewServer.httpServer.address();\n if (!addr || typeof addr === \"string\") {\n await safeCloseHttpServer(previewServer.httpServer);\n throw new Error(\"LLM Spider: could not determine preview server port\");\n }\n\n // Build a base URL that respects Vite's base path\n // Example: http://127.0.0.1:4173/app/ (if base=\"/app/\")\n const normalizedBase = basePath.endsWith(\"/\") ? basePath : basePath + \"/\";\n const baseUrl = `http://127.0.0.1:${addr.port}${normalizedBase}`;\n \n log.debug(\"Preview server at:\", baseUrl);\n\n const browser = await puppeteer.launch(options.render.launchOptions);\n const turndown = new TurndownService(options.markdown.turndown);\n turndown.use(gfm);\n\n /** @type {Set<string>} */\n const visited = new Set();\n\n /** @type {{ route: string, title?: string, section: string, optional: boolean, notes?: string, mdRelPath: string }[]} */\n const captured = [];\n\n // Crawl queue stores base-relative routes (no base prefix)\n /** @type {{ route: string, depth: number }[]} */\n const queue = [];\n\n // Seed queue\n if (options.crawl?.enabled) {\n for (const seed of options.crawl.seeds || [\"/\"]) {\n const nr = normalizeRoute(seed, {\n stripQuery: options.crawl.stripQuery,\n });\n if (nr) queue.push({ route: nr, depth: 0 });\n }\n } else {\n for (const rd of routeDefs) queue.push({ route: rd.path, depth: 0 });\n }\n\n const maxDepth = options.crawl?.enabled ? options.crawl.maxDepth : 0;\n const maxPages = options.crawl?.enabled\n ? options.crawl.maxPages\n : queue.length;\n const concurrency = options.crawl?.enabled\n ? options.crawl.concurrency\n : 3;\n\n async function captureOne(route) {\n if (visited.has(route)) return;\n if (isExcluded(route)) return;\n if (captured.length >= maxPages) return;\n\n visited.add(route);\n\n const page = await browser.newPage();\n\n // Request blocking (best effort)\n if (options.render.blockRequests?.length) {\n await page.setRequestInterception(true);\n page.on(\"request\", (req) => {\n const url = req.url();\n const blocked = options.render.blockRequests.some((p) =>\n p instanceof RegExp ? p.test(url) : url.includes(p),\n );\n if (blocked) req.abort();\n else req.continue();\n });\n }\n\n try {\n const pageUrl =\n route === \"/\" ? baseUrl : baseUrl + route.replace(/^\\//, \"\");\n await options.render.beforeGoto(page, { route });\n\n await page.goto(pageUrl, {\n waitUntil: options.render.waitUntil,\n timeout: options.render.timeoutMs,\n });\n\n if (options.render.waitForSelector) {\n await page.waitForSelector(options.render.waitForSelector, {\n timeout: options.render.timeoutMs,\n });\n }\n\n if (options.render.postLoadDelayMs > 0) {\n await new Promise((r) =>\n setTimeout(r, options.render.postLoadDelayMs),\n );\n }\n\n await options.render.beforeExtract(page, { route });\n\n const html = await page.content();\n const $ = cheerio.load(html);\n\n // Harvest links BEFORE removing nav elements (for crawl mode)\n let harvestedHrefs = [];\n if (options.crawl?.enabled) {\n harvestedHrefs = $(\"a[href]\")\n .map((_, a) => $(a).attr(\"href\"))\n .get();\n log.debug(` Found ${harvestedHrefs.length} links on ${route}:`, harvestedHrefs.slice(0, 15));\n }\n\n // Remove noisy elements (CSS selectors)\n for (const sel of options.extract.removeSelectors || [])\n $(sel).remove();\n\n // Pick main content\n const mainSelectors = Array.isArray(options.extract.mainSelector)\n ? options.extract.mainSelector\n : [options.extract.mainSelector];\n\n let mainHtml = null;\n for (const sel of mainSelectors) {\n if (!sel) continue;\n const node = $(sel).first();\n if (node && node.length) {\n mainHtml = node.html();\n break;\n }\n }\n if (!mainHtml) {\n const main = $(\"main\").first();\n mainHtml = main.length ? main.html() : $(\"body\").html();\n }\n\n const title = ($(\"title\").text() || \"\").trim() || route;\n\n // Convert to Markdown\n const markdownBody = turndown.turndown(mainHtml || \"\");\n\n // Write file\n const mdRelPath =\n options.output.mode === \"subdir\"\n ? path.posix.join(options.output.subdir, routeToMdWebPath(route))\n : routeToMdWebPath(route);\n\n const fsPath = routeToMdFsPath(distDir, route);\n await fs.mkdir(path.dirname(fsPath), { recursive: true });\n\n const frontmatter = options.markdown.addFrontmatter\n ? `---\\nsource: ${route}\\ntitle: ${title}\\ngenerated_at: ${new Date().toISOString()}\\n---\\n\\n`\n : \"\";\n\n await fs.writeFile(fsPath, frontmatter + markdownBody, \"utf8\");\n\n // Map metadata\n const meta = routeDefs.find((r) => r.path === route);\n captured.push({\n route,\n title: meta?.title || title,\n section: meta?.section || \"Pages\",\n optional: !!meta?.optional,\n notes: meta?.notes,\n mdRelPath,\n });\n\n log.info(` ✅ ${route} -> ${mdRelPath}`);\n\n // Harvest links (crawl mode only) - using pre-harvested links from before cleanup\n if (options.crawl?.enabled) {\n for (const href of harvestedHrefs) {\n const n = normalizeRoute(href, {\n stripQuery: options.crawl.stripQuery,\n });\n if (!n) continue;\n\n // If site is deployed under a base like \"/app/\", router-links usually include \"/app/...\"\n // Strip base prefix when present so our internal route stays base-relative.\n let baseRelative = n;\n if (\n normalizedBase !== \"/\" &&\n baseRelative.startsWith(normalizedBase)\n ) {\n baseRelative = \"/\" + baseRelative.slice(normalizedBase.length);\n baseRelative =\n baseRelative === \"//\"\n ? \"/\"\n : baseRelative.replace(/\\/{2,}/g, \"/\");\n }\n\n if (!visited.has(baseRelative) && !isExcluded(baseRelative)) {\n // Depth tracking is handled by the outer loop (we store depth in queue entries)\n // so just push; caller will attach depth.\n queue.push({ route: baseRelative, depth: -1 }); // placeholder depth; will be overwritten\n }\n }\n }\n } catch (err) {\n log.warn(` ⚠️ failed ${route}: ${err?.message || err}`);\n } finally {\n await page.close();\n }\n }\n\n try {\n // BFS: process queue in batches\n while (queue.length && captured.length < maxPages) {\n // Fix up crawl depths if needed\n // If we're in crawl mode, queue items may have depth=-1 from harvested links.\n // We'll conservatively treat them as depth=1 unless they were explicitly set.\n const batch = queue.splice(0, concurrency).map((item) => {\n const depth = item.depth >= 0 ? item.depth : 1;\n return { route: item.route, depth };\n });\n\n await Promise.all(\n batch.map(async ({ route, depth }) => {\n if (options.crawl?.enabled && depth > maxDepth) return;\n await captureOne(route);\n\n // If crawl mode, increase depth for any newly harvested links\n if (options.crawl?.enabled) {\n // Patch any depth=-1 entries added during captureOne\n for (let i = 0; i < queue.length; i++) {\n if (queue[i].depth === -1) queue[i].depth = depth + 1;\n }\n }\n }),\n );\n }\n\n // ---- Generate llms.txt ----\n const llmsTitle =\n options.output.llmsTitle || resolvedConfig?.env?.mode || \"Site\";\n\n // Deterministic ordering\n const items = options.output.sort\n ? [...captured].sort((a, b) => a.route.localeCompare(b.route))\n : captured;\n\n // Group by section, with Optional special handling\n /** @type {Map<string, typeof items>} */\n const bySection = new Map();\n /** @type {typeof items} */\n const optionalItems = [];\n\n for (const item of items) {\n if (item.optional) optionalItems.push(item);\n else {\n const s = item.section || \"Pages\";\n bySection.set(s, [...(bySection.get(s) || []), item]);\n }\n }\n\n let llms = `# ${llmsTitle}\\n\\n> ${options.output.llmsSummary}\\n\\n`;\n\n for (const [section, sectionItems] of bySection.entries()) {\n llms += `## ${section}\\n\\n`;\n for (const it of sectionItems) {\n const link = makeLlmsLink(it.mdRelPath);\n const label = it.title || it.route;\n const notes = it.notes ? `: ${it.notes}` : \"\";\n llms += `- [${label}](${link})${notes}\\n`;\n }\n llms += `\\n`;\n }\n\n if (optionalItems.length) {\n llms += `## Optional\\n\\n`;\n for (const it of optionalItems) {\n const link = makeLlmsLink(it.mdRelPath);\n const label = it.title || it.route;\n const notes = it.notes ? `: ${it.notes}` : \"\";\n llms += `- [${label}](${link})${notes}\\n`;\n }\n llms += `\\n`;\n }\n\n const llmsPath = path.join(distDir, options.output.llmsTxtFileName);\n await fs.writeFile(llmsPath, llms, \"utf8\");\n\n log.info(\n `\\nLLM Spider: wrote ${captured.length} markdown pages + ${options.output.llmsTxtFileName}\\n`,\n );\n } finally {\n await browser.close();\n await safeCloseHttpServer(previewServer.httpServer);\n }\n },\n };\n}\n\n// Named export for CJS compatibility\nexport { llmSpiderPlugin };\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,kBAAwB;AACxB,sBAAe;AACf,uBAAiB;AACjB,cAAyB;AACzB,sBAA4B;AAC5B,iCAAoB;AACpB,uBAAsB;AAWP,SAAR,gBAAiC,cAAc,CAAC,GAAG;AAExD,MAAI;AAGJ,WAAS,UAAU,QAAQ,QAAQ;AACjC,UAAM,SAAS,EAAE,GAAG,OAAO;AAC3B,eAAW,OAAO,OAAO,KAAK,MAAM,GAAG;AACrC,UAAI,OAAO,GAAG,KAAK,OAAO,OAAO,GAAG,MAAM,YAAY,CAAC,MAAM,QAAQ,OAAO,GAAG,CAAC,KAAK,EAAE,OAAO,GAAG,aAAa,SAAS;AACrH,eAAO,GAAG,IAAI,UAAU,OAAO,GAAG,KAAK,CAAC,GAAG,OAAO,GAAG,CAAC;AAAA,MACxD,OAAO;AACL,eAAO,GAAG,IAAI,OAAO,GAAG;AAAA,MAC1B;AAAA,IACF;AACA,WAAO;AAAA,EACT;AAEA,QAAM,WAAW;AAAA,IACf,SAAS;AAAA;AAAA,IAGT;AAAA;AAAA,MAA+C;AAAA;AAAA;AAAA,IAG/C,OAAO;AAAA,MACL,SAAS;AAAA,MACT,OAAO,CAAC,GAAG;AAAA,MACX,UAAU;AAAA,MACV,UAAU;AAAA,MACV,aAAa;AAAA,MACb,YAAY;AAAA,IACd;AAAA,IAEA,SAAS,CAAC,UAAU,UAAU,UAAU;AAAA,IAExC,QAAQ;AAAA,MACN,WAAW;AAAA;AAAA,MACX,WAAW;AAAA,MACX,iBAAiB;AAAA;AAAA,MACjB,iBAAiB;AAAA,MACjB,eAAe;AAAA,QACb;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,MACF;AAAA,MACA,eAAe;AAAA,QACb,UAAU;AAAA;AAAA;AAAA,MAGZ;AAAA;AAAA;AAAA;AAAA;AAAA,MAKA,YAAY,OAAO,OAAO,SAAS;AAAA,MAAC;AAAA;AAAA;AAAA;AAAA;AAAA,MAKpC,eAAe,OAAO,OAAO,SAAS;AAAA,MAAC;AAAA,IACzC;AAAA,IAEA,SAAS;AAAA,MACP,cAAc,CAAC,QAAQ,iBAAiB,aAAa;AAAA,MACrD,iBAAiB;AAAA,QACf;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,MACF;AAAA,IACF;AAAA,IAEA,UAAU;AAAA,MACR,gBAAgB;AAAA,MAChB,UAAU;AAAA,QACR,cAAc;AAAA,QACd,gBAAgB;AAAA,QAChB,aAAa;AAAA,MACf;AAAA,IACF;AAAA,IAEA,QAAQ;AAAA;AAAA,MAEN,MAAM;AAAA,MACN,QAAQ;AAAA;AAAA,MACR,iBAAiB;AAAA,MACjB,WAAW;AAAA;AAAA,MACX,aACE;AAAA,MACF,MAAM;AAAA,IACR;AAAA,IAEA,UAAU;AAAA;AAAA,EACZ;AAEA,QAAM,UAAU,UAAU,UAAU,WAAW;AAE/C,QAAM,MAAM;AAAA,IACV,MAAM,IAAI,SACR,QAAQ,aAAa,UAAU,QAAQ,aAAa,UAChD,QAAQ,IAAI,GAAG,IAAI,IACnB;AAAA,IACN,OAAO,IAAI,SACT,QAAQ,aAAa,UAAU,QAAQ,IAAI,GAAG,IAAI,IAAI;AAAA,IACxD,MAAM,IAAI,SACR,QAAQ,aAAa,WAAW,QAAQ,KAAK,GAAG,IAAI,IAAI;AAAA,EAC5D;AAEA,WAAS,WAAW,OAAO;AACzB,YAAQ,QAAQ,WAAW,CAAC,GAAG,KAAK,CAAC,MAAM;AACzC,UAAI,aAAa,OAAQ,QAAO,EAAE,KAAK,KAAK;AAC5C,aAAO,MAAM,SAAS,CAAC;AAAA,IACzB,CAAC;AAAA,EACH;AAEA,WAAS,eAAe,OAAO,EAAE,aAAa,KAAK,IAAI,CAAC,GAAG;AACzD,QAAI,CAAC,MAAO,QAAO;AAGnB,QACE,MAAM,WAAW,SAAS,KAC1B,MAAM,WAAW,MAAM,KACvB,MAAM,WAAW,aAAa;AAE9B,aAAO;AAIT,QAAI,IAAI,MAAM,KAAK;AAGnB,QAAI,EAAE,WAAW,SAAS,KAAK,EAAE,WAAW,UAAU,EAAG,QAAO;AAGhE,UAAM,UAAU,EAAE,QAAQ,GAAG;AAC7B,QAAI,WAAW,EAAG,KAAI,EAAE,MAAM,GAAG,OAAO;AAExC,QAAI,YAAY;AACd,YAAM,OAAO,EAAE,QAAQ,GAAG;AAC1B,UAAI,QAAQ,EAAG,KAAI,EAAE,MAAM,GAAG,IAAI;AAAA,IACpC;AAGA,QAAI,CAAC,EAAG,QAAO;AAGf,QAAI,CAAC,EAAE,WAAW,GAAG,GAAG;AACtB,UAAI,EAAE,WAAW,IAAI;AACnB,YAAI,EAAE,MAAM,CAAC;AAAA,UACV,KAAI,MAAM;AAAA,IACjB;AAGA,QAAI,EAAE,QAAQ,WAAW,GAAG;AAE5B,WAAO;AAAA,EACT;AAEA,WAAS,iBAAiB,OAAO;AAE/B,QAAI,UAAU,IAAK,QAAO;AAC1B,QAAI,MAAM,SAAS,GAAG,EAAG,QAAO,MAAM,MAAM,CAAC,IAAI;AACjD,WAAO,MAAM,MAAM,CAAC,IAAI;AAAA,EAC1B;AAEA,WAAS,gBAAgB,SAAS,OAAO;AACvC,UAAM,MAAM,iBAAiB,KAAK;AAClC,QAAI,QAAQ,OAAO,SAAS,UAAU;AACpC,aAAO,iBAAAA,QAAK,KAAK,SAAS,QAAQ,OAAO,QAAQ,GAAG;AAAA,IACtD;AACA,WAAO,iBAAAA,QAAK,KAAK,SAAS,GAAG;AAAA,EAC/B;AAEA,WAAS,aAAa,WAAW;AAG/B,WAAO,UAAU,QAAQ,OAAO,GAAG;AAAA,EACrC;AAEA,iBAAe,oBAAoB,QAAQ;AACzC,UAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,aAAO,MAAM,CAAC,QAAS,MAAM,OAAO,GAAG,IAAI,QAAQ,CAAE;AAAA,IACvD,CAAC;AAAA,EACH;AAEA,SAAO;AAAA,IACL,MAAM;AAAA,IACN,OAAO;AAAA,IAEP,eAAe,IAAI;AACjB,uBAAiB;AAAA,IACnB;AAAA,IAEA,MAAM,cAAc;AA3NxB;AA4NM,UAAI,CAAC,QAAQ,QAAS;AACtB,UAAI,CAAC;AACH,cAAM,IAAI,MAAM,0CAA0C;AAE5D,YAAM,UAAU,eAAe,MAAM,UAAU;AAC/C,YAAM,YAAY,eAAe,QAAQ,KAAK,QAAQ,OAAO,GAAG;AAIhE,UAAI,YAAY,CAAC;AAEjB,UAAI,MAAM,QAAQ,QAAQ,MAAM,KAAK,QAAQ,OAAO,QAAQ;AAC1D,oBAAY,QAAQ,OAAO,IAAI,CAAC,OAAO;AAAA,UACrC,MAAM,eAAe,EAAE,MAAM,EAAE,YAAY,KAAK,CAAC,KAAK;AAAA,UACtD,OAAO,EAAE;AAAA,UACT,SAAS,EAAE,WAAW;AAAA,UACtB,UAAU,CAAC,CAAC,EAAE;AAAA,UACd,OAAO,EAAE;AAAA,QACX,EAAE;AAAA,MACJ,YAAW,aAAQ,UAAR,mBAAe,SAAS;AAEjC,oBAAY,CAAC;AAAA,MACf,OAAO;AAEL,oBAAY,CAAC,EAAE,MAAM,KAAK,SAAS,QAAQ,CAAC;AAAA,MAC9C;AAEA,UAAI,KAAK,8CAA8C;AACvD,UAAI,MAAM,YAAY,SAAS,SAAS,QAAQ;AAIhD,YAAM,gBAAgB,UAAM,qBAAQ;AAAA,QAClC,MAAM,eAAe;AAAA,QACrB,MAAM,eAAe;AAAA,QACrB,OAAO,EAAE,QAAQ,QAAQ;AAAA,QACzB,SAAS,EAAE,MAAM,GAAG,MAAM,OAAO,MAAM,YAAY;AAAA,QACnD,YAAY;AAAA,QACZ,SAAS,CAAC;AAAA;AAAA,QACV,UAAU;AAAA,MACZ,CAAC;AAGD,YAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,cAAM,SAAS,cAAc;AAC7B,YAAI,OAAO,WAAW;AACpB,kBAAQ;AAAA,QACV,OAAO;AACL,iBAAO,KAAK,aAAa,OAAO;AAChC,iBAAO,KAAK,SAAS,MAAM;AAE3B,qBAAW,MAAM,OAAO,IAAI,MAAM,gCAAgC,CAAC,GAAG,GAAI;AAAA,QAC5E;AAAA,MACF,CAAC;AAED,YAAM,OAAO,cAAc,WAAW,QAAQ;AAC9C,UAAI,CAAC,QAAQ,OAAO,SAAS,UAAU;AACrC,cAAM,oBAAoB,cAAc,UAAU;AAClD,cAAM,IAAI,MAAM,qDAAqD;AAAA,MACvE;AAIA,YAAM,iBAAiB,SAAS,SAAS,GAAG,IAAI,WAAW,WAAW;AACtE,YAAM,UAAU,oBAAoB,KAAK,IAAI,GAAG,cAAc;AAE9D,UAAI,MAAM,sBAAsB,OAAO;AAEvC,YAAM,UAAU,MAAM,iBAAAC,QAAU,OAAO,QAAQ,OAAO,aAAa;AACnE,YAAM,WAAW,IAAI,gBAAAC,QAAgB,QAAQ,SAAS,QAAQ;AAC9D,eAAS,IAAI,8BAAG;AAGhB,YAAM,UAAU,oBAAI,IAAI;AAGxB,YAAM,WAAW,CAAC;AAIlB,YAAM,QAAQ,CAAC;AAGf,WAAI,aAAQ,UAAR,mBAAe,SAAS;AAC1B,mBAAW,QAAQ,QAAQ,MAAM,SAAS,CAAC,GAAG,GAAG;AAC/C,gBAAM,KAAK,eAAe,MAAM;AAAA,YAC9B,YAAY,QAAQ,MAAM;AAAA,UAC5B,CAAC;AACD,cAAI,GAAI,OAAM,KAAK,EAAE,OAAO,IAAI,OAAO,EAAE,CAAC;AAAA,QAC5C;AAAA,MACF,OAAO;AACL,mBAAW,MAAM,UAAW,OAAM,KAAK,EAAE,OAAO,GAAG,MAAM,OAAO,EAAE,CAAC;AAAA,MACrE;AAEA,YAAM,aAAW,aAAQ,UAAR,mBAAe,WAAU,QAAQ,MAAM,WAAW;AACnE,YAAM,aAAW,aAAQ,UAAR,mBAAe,WAC5B,QAAQ,MAAM,WACd,MAAM;AACV,YAAM,gBAAc,aAAQ,UAAR,mBAAe,WAC/B,QAAQ,MAAM,cACd;AAEJ,qBAAe,WAAW,OAAO;AAlUvC,YAAAC,KAAAC,KAAAC;AAmUQ,YAAI,QAAQ,IAAI,KAAK,EAAG;AACxB,YAAI,WAAW,KAAK,EAAG;AACvB,YAAI,SAAS,UAAU,SAAU;AAEjC,gBAAQ,IAAI,KAAK;AAEjB,cAAM,OAAO,MAAM,QAAQ,QAAQ;AAGnC,aAAIF,MAAA,QAAQ,OAAO,kBAAf,gBAAAA,IAA8B,QAAQ;AACxC,gBAAM,KAAK,uBAAuB,IAAI;AACtC,eAAK,GAAG,WAAW,CAAC,QAAQ;AAC1B,kBAAM,MAAM,IAAI,IAAI;AACpB,kBAAM,UAAU,QAAQ,OAAO,cAAc;AAAA,cAAK,CAAC,MACjD,aAAa,SAAS,EAAE,KAAK,GAAG,IAAI,IAAI,SAAS,CAAC;AAAA,YACpD;AACA,gBAAI,QAAS,KAAI,MAAM;AAAA,gBAClB,KAAI,SAAS;AAAA,UACpB,CAAC;AAAA,QACH;AAEA,YAAI;AACF,gBAAM,UACJ,UAAU,MAAM,UAAU,UAAU,MAAM,QAAQ,OAAO,EAAE;AAC7D,gBAAM,QAAQ,OAAO,WAAW,MAAM,EAAE,MAAM,CAAC;AAE/C,gBAAM,KAAK,KAAK,SAAS;AAAA,YACvB,WAAW,QAAQ,OAAO;AAAA,YAC1B,SAAS,QAAQ,OAAO;AAAA,UAC1B,CAAC;AAED,cAAI,QAAQ,OAAO,iBAAiB;AAClC,kBAAM,KAAK,gBAAgB,QAAQ,OAAO,iBAAiB;AAAA,cACzD,SAAS,QAAQ,OAAO;AAAA,YAC1B,CAAC;AAAA,UACH;AAEA,cAAI,QAAQ,OAAO,kBAAkB,GAAG;AACtC,kBAAM,IAAI;AAAA,cAAQ,CAAC,MACjB,WAAW,GAAG,QAAQ,OAAO,eAAe;AAAA,YAC9C;AAAA,UACF;AAEA,gBAAM,QAAQ,OAAO,cAAc,MAAM,EAAE,MAAM,CAAC;AAElD,gBAAM,OAAO,MAAM,KAAK,QAAQ;AAChC,gBAAM,IAAY,aAAK,IAAI;AAG3B,cAAI,iBAAiB,CAAC;AACtB,eAAIC,MAAA,QAAQ,UAAR,gBAAAA,IAAe,SAAS;AAC1B,6BAAiB,EAAE,SAAS,EACzB,IAAI,CAAC,GAAG,MAAM,EAAE,CAAC,EAAE,KAAK,MAAM,CAAC,EAC/B,IAAI;AACP,gBAAI,MAAM,WAAW,eAAe,MAAM,aAAa,KAAK,KAAK,eAAe,MAAM,GAAG,EAAE,CAAC;AAAA,UAC9F;AAGA,qBAAW,OAAO,QAAQ,QAAQ,mBAAmB,CAAC;AACpD,cAAE,GAAG,EAAE,OAAO;AAGhB,gBAAM,gBAAgB,MAAM,QAAQ,QAAQ,QAAQ,YAAY,IAC5D,QAAQ,QAAQ,eAChB,CAAC,QAAQ,QAAQ,YAAY;AAEjC,cAAI,WAAW;AACf,qBAAW,OAAO,eAAe;AAC/B,gBAAI,CAAC,IAAK;AACV,kBAAM,OAAO,EAAE,GAAG,EAAE,MAAM;AAC1B,gBAAI,QAAQ,KAAK,QAAQ;AACvB,yBAAW,KAAK,KAAK;AACrB;AAAA,YACF;AAAA,UACF;AACA,cAAI,CAAC,UAAU;AACb,kBAAM,OAAO,EAAE,MAAM,EAAE,MAAM;AAC7B,uBAAW,KAAK,SAAS,KAAK,KAAK,IAAI,EAAE,MAAM,EAAE,KAAK;AAAA,UACxD;AAEA,gBAAM,SAAS,EAAE,OAAO,EAAE,KAAK,KAAK,IAAI,KAAK,KAAK;AAGlD,gBAAM,eAAe,SAAS,SAAS,YAAY,EAAE;AAGrD,gBAAM,YACJ,QAAQ,OAAO,SAAS,WACpB,iBAAAJ,QAAK,MAAM,KAAK,QAAQ,OAAO,QAAQ,iBAAiB,KAAK,CAAC,IAC9D,iBAAiB,KAAK;AAE5B,gBAAM,SAAS,gBAAgB,SAAS,KAAK;AAC7C,gBAAM,gBAAAM,QAAG,MAAM,iBAAAN,QAAK,QAAQ,MAAM,GAAG,EAAE,WAAW,KAAK,CAAC;AAExD,gBAAM,cAAc,QAAQ,SAAS,iBACjC;AAAA,UAAgB,KAAK;AAAA,SAAY,KAAK;AAAA,iBAAmB,oBAAI,KAAK,GAAE,YAAY,CAAC;AAAA;AAAA;AAAA,IACjF;AAEJ,gBAAM,gBAAAM,QAAG,UAAU,QAAQ,cAAc,cAAc,MAAM;AAG7D,gBAAM,OAAO,UAAU,KAAK,CAAC,MAAM,EAAE,SAAS,KAAK;AACnD,mBAAS,KAAK;AAAA,YACZ;AAAA,YACA,QAAO,6BAAM,UAAS;AAAA,YACtB,UAAS,6BAAM,YAAW;AAAA,YAC1B,UAAU,CAAC,EAAC,6BAAM;AAAA,YAClB,OAAO,6BAAM;AAAA,YACb;AAAA,UACF,CAAC;AAED,cAAI,KAAK,YAAO,KAAK,OAAO,SAAS,EAAE;AAGvC,eAAID,MAAA,QAAQ,UAAR,gBAAAA,IAAe,SAAS;AAC1B,uBAAW,QAAQ,gBAAgB;AACjC,oBAAM,IAAI,eAAe,MAAM;AAAA,gBAC7B,YAAY,QAAQ,MAAM;AAAA,cAC5B,CAAC;AACD,kBAAI,CAAC,EAAG;AAIR,kBAAI,eAAe;AACnB,kBACE,mBAAmB,OACnB,aAAa,WAAW,cAAc,GACtC;AACA,+BAAe,MAAM,aAAa,MAAM,eAAe,MAAM;AAC7D,+BACE,iBAAiB,OACb,MACA,aAAa,QAAQ,WAAW,GAAG;AAAA,cAC3C;AAEA,kBAAI,CAAC,QAAQ,IAAI,YAAY,KAAK,CAAC,WAAW,YAAY,GAAG;AAG3D,sBAAM,KAAK,EAAE,OAAO,cAAc,OAAO,GAAG,CAAC;AAAA,cAC/C;AAAA,YACF;AAAA,UACF;AAAA,QACF,SAAS,KAAK;AACZ,cAAI,KAAK,0BAAgB,KAAK,MAAK,2BAAK,YAAW,GAAG,EAAE;AAAA,QAC1D,UAAE;AACA,gBAAM,KAAK,MAAM;AAAA,QACnB;AAAA,MACF;AAEA,UAAI;AAEF,eAAO,MAAM,UAAU,SAAS,SAAS,UAAU;AAIjD,gBAAM,QAAQ,MAAM,OAAO,GAAG,WAAW,EAAE,IAAI,CAAC,SAAS;AACvD,kBAAM,QAAQ,KAAK,SAAS,IAAI,KAAK,QAAQ;AAC7C,mBAAO,EAAE,OAAO,KAAK,OAAO,MAAM;AAAA,UACpC,CAAC;AAED,gBAAM,QAAQ;AAAA,YACZ,MAAM,IAAI,OAAO,EAAE,OAAO,MAAM,MAAM;AApelD,kBAAAF,KAAAC;AAqec,oBAAID,MAAA,QAAQ,UAAR,gBAAAA,IAAe,YAAW,QAAQ,SAAU;AAChD,oBAAM,WAAW,KAAK;AAGtB,mBAAIC,MAAA,QAAQ,UAAR,gBAAAA,IAAe,SAAS;AAE1B,yBAAS,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK;AACrC,sBAAI,MAAM,CAAC,EAAE,UAAU,GAAI,OAAM,CAAC,EAAE,QAAQ,QAAQ;AAAA,gBACtD;AAAA,cACF;AAAA,YACF,CAAC;AAAA,UACH;AAAA,QACF;AAGA,cAAM,YACJ,QAAQ,OAAO,eAAa,sDAAgB,QAAhB,mBAAqB,SAAQ;AAG3D,cAAM,QAAQ,QAAQ,OAAO,OACzB,CAAC,GAAG,QAAQ,EAAE,KAAK,CAAC,GAAG,MAAM,EAAE,MAAM,cAAc,EAAE,KAAK,CAAC,IAC3D;AAIJ,cAAM,YAAY,oBAAI,IAAI;AAE1B,cAAM,gBAAgB,CAAC;AAEvB,mBAAW,QAAQ,OAAO;AACxB,cAAI,KAAK,SAAU,eAAc,KAAK,IAAI;AAAA,eACrC;AACH,kBAAM,IAAI,KAAK,WAAW;AAC1B,sBAAU,IAAI,GAAG,CAAC,GAAI,UAAU,IAAI,CAAC,KAAK,CAAC,GAAI,IAAI,CAAC;AAAA,UACtD;AAAA,QACF;AAEA,YAAI,OAAO,KAAK,SAAS;AAAA;AAAA,IAAS,QAAQ,OAAO,WAAW;AAAA;AAAA;AAE5D,mBAAW,CAAC,SAAS,YAAY,KAAK,UAAU,QAAQ,GAAG;AACzD,kBAAQ,MAAM,OAAO;AAAA;AAAA;AACrB,qBAAW,MAAM,cAAc;AAC7B,kBAAM,OAAO,aAAa,GAAG,SAAS;AACtC,kBAAM,QAAQ,GAAG,SAAS,GAAG;AAC7B,kBAAM,QAAQ,GAAG,QAAQ,KAAK,GAAG,KAAK,KAAK;AAC3C,oBAAQ,MAAM,KAAK,KAAK,IAAI,IAAI,KAAK;AAAA;AAAA,UACvC;AACA,kBAAQ;AAAA;AAAA,QACV;AAEA,YAAI,cAAc,QAAQ;AACxB,kBAAQ;AAAA;AAAA;AACR,qBAAW,MAAM,eAAe;AAC9B,kBAAM,OAAO,aAAa,GAAG,SAAS;AACtC,kBAAM,QAAQ,GAAG,SAAS,GAAG;AAC7B,kBAAM,QAAQ,GAAG,QAAQ,KAAK,GAAG,KAAK,KAAK;AAC3C,oBAAQ,MAAM,KAAK,KAAK,IAAI,IAAI,KAAK;AAAA;AAAA,UACvC;AACA,kBAAQ;AAAA;AAAA,QACV;AAEA,cAAM,WAAW,iBAAAJ,QAAK,KAAK,SAAS,QAAQ,OAAO,eAAe;AAClE,cAAM,gBAAAM,QAAG,UAAU,UAAU,MAAM,MAAM;AAEzC,YAAI;AAAA,UACF;AAAA,oBAAuB,SAAS,MAAM,qBAAqB,QAAQ,OAAO,eAAe;AAAA;AAAA,QAC3F;AAAA,MACF,UAAE;AACA,cAAM,QAAQ,MAAM;AACpB,cAAM,oBAAoB,cAAc,UAAU;AAAA,MACpD;AAAA,IACF;AAAA,EACF;AACF;","names":["path","puppeteer","TurndownService","_a","_b","_c","fs"]}
@@ -0,0 +1,111 @@
1
+ import { Plugin } from 'vite';
2
+ import { LaunchOptions, Page } from 'puppeteer';
3
+
4
+ interface RouteDef {
5
+ /** URL path (e.g., '/pricing', '/docs/') */
6
+ path: string
7
+ /** Display title in llms.txt */
8
+ title?: string
9
+ /** H2 section grouping in llms.txt */
10
+ section?: string
11
+ /** If true, goes under "## Optional" section */
12
+ optional?: boolean
13
+ /** Appended to link in llms.txt */
14
+ notes?: string
15
+ }
16
+
17
+ interface CrawlOptions {
18
+ /** Enable BFS crawl mode */
19
+ enabled?: boolean
20
+ /** Starting URLs (default: ['/']) */
21
+ seeds?: string[]
22
+ /** Max link depth to follow (default: 2) */
23
+ maxDepth?: number
24
+ /** Max pages to capture (default: 50) */
25
+ maxPages?: number
26
+ /** Concurrent page loads (default: 3) */
27
+ concurrency?: number
28
+ /** Strip query params from URLs (default: true) */
29
+ stripQuery?: boolean
30
+ }
31
+
32
+ interface RenderOptions {
33
+ /** Puppeteer waitUntil option (default: 'networkidle2') */
34
+ waitUntil?: 'load' | 'domcontentloaded' | 'networkidle0' | 'networkidle2'
35
+ /** Page load timeout in ms (default: 30000) */
36
+ timeoutMs?: number
37
+ /** Wait for this selector before extracting (recommended for SPAs) */
38
+ waitForSelector?: string | null
39
+ /** Extra delay after load in ms (default: 0) */
40
+ postLoadDelayMs?: number
41
+ /** URL patterns to block (analytics, trackers) */
42
+ blockRequests?: (string | RegExp)[]
43
+ /** Puppeteer launch options */
44
+ launchOptions?: LaunchOptions
45
+ /** Called before page.goto() */
46
+ beforeGoto?: (page: Page, ctx: { route: string }) => Promise<void>
47
+ /** Called before content extraction */
48
+ beforeExtract?: (page: Page, ctx: { route: string }) => Promise<void>
49
+ }
50
+
51
+ interface ExtractOptions {
52
+ /** CSS selectors for main content (first match wins) */
53
+ mainSelector?: string | string[]
54
+ /** CSS selectors to remove before extraction */
55
+ removeSelectors?: string[]
56
+ }
57
+
58
+ interface MarkdownOptions {
59
+ /** Add YAML frontmatter with source/title/date (default: true) */
60
+ addFrontmatter?: boolean
61
+ /** Turndown options */
62
+ turndown?: {
63
+ headingStyle?: 'setext' | 'atx'
64
+ codeBlockStyle?: 'indented' | 'fenced'
65
+ emDelimiter?: '_' | '*'
66
+ }
67
+ }
68
+
69
+ interface OutputOptions {
70
+ /** Output mode: 'sibling' (default) or 'subdir' */
71
+ mode?: 'sibling' | 'subdir'
72
+ /** Subdirectory name when mode='subdir' (default: 'ai') */
73
+ subdir?: string
74
+ /** Index filename (default: 'llms.txt') */
75
+ llmsTxtFileName?: string
76
+ /** H1 title in llms.txt */
77
+ llmsTitle?: string | null
78
+ /** Summary blockquote in llms.txt */
79
+ llmsSummary?: string
80
+ /** Sort pages alphabetically (default: true) */
81
+ sort?: boolean
82
+ }
83
+
84
+ interface LlmSpiderOptions {
85
+ /** Enable/disable plugin (default: true) */
86
+ enabled?: boolean
87
+ /** Explicit route list (recommended) */
88
+ routes?: RouteDef[]
89
+ /** Crawl mode options (off by default) */
90
+ crawl?: CrawlOptions
91
+ /** URL patterns to exclude */
92
+ exclude?: (string | RegExp)[]
93
+ /** Rendering options */
94
+ render?: RenderOptions
95
+ /** Content extraction options */
96
+ extract?: ExtractOptions
97
+ /** Markdown generation options */
98
+ markdown?: MarkdownOptions
99
+ /** Output options */
100
+ output?: OutputOptions
101
+ /** Log level (default: 'info') */
102
+ logLevel?: 'silent' | 'info' | 'debug'
103
+ }
104
+
105
+ /**
106
+ * Vite plugin that generates LLM-friendly Markdown snapshots
107
+ * and a curated llms.txt index.
108
+ */
109
+ declare function llmSpiderPlugin(options?: LlmSpiderOptions): Plugin
110
+
111
+ export { type CrawlOptions, type ExtractOptions, type LlmSpiderOptions, type MarkdownOptions, type OutputOptions, type RenderOptions, type RouteDef, llmSpiderPlugin as default, llmSpiderPlugin };
@@ -0,0 +1,111 @@
1
+ import { Plugin } from 'vite';
2
+ import { LaunchOptions, Page } from 'puppeteer';
3
+
4
+ interface RouteDef {
5
+ /** URL path (e.g., '/pricing', '/docs/') */
6
+ path: string
7
+ /** Display title in llms.txt */
8
+ title?: string
9
+ /** H2 section grouping in llms.txt */
10
+ section?: string
11
+ /** If true, goes under "## Optional" section */
12
+ optional?: boolean
13
+ /** Appended to link in llms.txt */
14
+ notes?: string
15
+ }
16
+
17
+ interface CrawlOptions {
18
+ /** Enable BFS crawl mode */
19
+ enabled?: boolean
20
+ /** Starting URLs (default: ['/']) */
21
+ seeds?: string[]
22
+ /** Max link depth to follow (default: 2) */
23
+ maxDepth?: number
24
+ /** Max pages to capture (default: 50) */
25
+ maxPages?: number
26
+ /** Concurrent page loads (default: 3) */
27
+ concurrency?: number
28
+ /** Strip query params from URLs (default: true) */
29
+ stripQuery?: boolean
30
+ }
31
+
32
+ interface RenderOptions {
33
+ /** Puppeteer waitUntil option (default: 'networkidle2') */
34
+ waitUntil?: 'load' | 'domcontentloaded' | 'networkidle0' | 'networkidle2'
35
+ /** Page load timeout in ms (default: 30000) */
36
+ timeoutMs?: number
37
+ /** Wait for this selector before extracting (recommended for SPAs) */
38
+ waitForSelector?: string | null
39
+ /** Extra delay after load in ms (default: 0) */
40
+ postLoadDelayMs?: number
41
+ /** URL patterns to block (analytics, trackers) */
42
+ blockRequests?: (string | RegExp)[]
43
+ /** Puppeteer launch options */
44
+ launchOptions?: LaunchOptions
45
+ /** Called before page.goto() */
46
+ beforeGoto?: (page: Page, ctx: { route: string }) => Promise<void>
47
+ /** Called before content extraction */
48
+ beforeExtract?: (page: Page, ctx: { route: string }) => Promise<void>
49
+ }
50
+
51
+ interface ExtractOptions {
52
+ /** CSS selectors for main content (first match wins) */
53
+ mainSelector?: string | string[]
54
+ /** CSS selectors to remove before extraction */
55
+ removeSelectors?: string[]
56
+ }
57
+
58
+ interface MarkdownOptions {
59
+ /** Add YAML frontmatter with source/title/date (default: true) */
60
+ addFrontmatter?: boolean
61
+ /** Turndown options */
62
+ turndown?: {
63
+ headingStyle?: 'setext' | 'atx'
64
+ codeBlockStyle?: 'indented' | 'fenced'
65
+ emDelimiter?: '_' | '*'
66
+ }
67
+ }
68
+
69
+ interface OutputOptions {
70
+ /** Output mode: 'sibling' (default) or 'subdir' */
71
+ mode?: 'sibling' | 'subdir'
72
+ /** Subdirectory name when mode='subdir' (default: 'ai') */
73
+ subdir?: string
74
+ /** Index filename (default: 'llms.txt') */
75
+ llmsTxtFileName?: string
76
+ /** H1 title in llms.txt */
77
+ llmsTitle?: string | null
78
+ /** Summary blockquote in llms.txt */
79
+ llmsSummary?: string
80
+ /** Sort pages alphabetically (default: true) */
81
+ sort?: boolean
82
+ }
83
+
84
+ interface LlmSpiderOptions {
85
+ /** Enable/disable plugin (default: true) */
86
+ enabled?: boolean
87
+ /** Explicit route list (recommended) */
88
+ routes?: RouteDef[]
89
+ /** Crawl mode options (off by default) */
90
+ crawl?: CrawlOptions
91
+ /** URL patterns to exclude */
92
+ exclude?: (string | RegExp)[]
93
+ /** Rendering options */
94
+ render?: RenderOptions
95
+ /** Content extraction options */
96
+ extract?: ExtractOptions
97
+ /** Markdown generation options */
98
+ markdown?: MarkdownOptions
99
+ /** Output options */
100
+ output?: OutputOptions
101
+ /** Log level (default: 'info') */
102
+ logLevel?: 'silent' | 'info' | 'debug'
103
+ }
104
+
105
+ /**
106
+ * Vite plugin that generates LLM-friendly Markdown snapshots
107
+ * and a curated llms.txt index.
108
+ */
109
+ declare function llmSpiderPlugin(options?: LlmSpiderOptions): Plugin
110
+
111
+ export { type CrawlOptions, type ExtractOptions, type LlmSpiderOptions, type MarkdownOptions, type OutputOptions, type RenderOptions, type RouteDef, llmSpiderPlugin as default, llmSpiderPlugin };