docmunch 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.mjs ADDED
@@ -0,0 +1,1619 @@
1
+ #!/usr/bin/env node
2
+ var __defProp = Object.defineProperty;
3
+ var __getOwnPropNames = Object.getOwnPropertyNames;
4
+ var __esm = (fn, res) => function __init() {
5
+ return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
6
+ };
7
+ var __export = (target, all) => {
8
+ for (var name in all)
9
+ __defProp(target, name, { get: all[name], enumerable: true });
10
+ };
11
+
12
+ // src/mcp/loader.ts
13
+ import { readFileSync as readFileSync3 } from "fs";
14
+ import { join as join6 } from "path";
15
+ import matter2 from "gray-matter";
16
+ function loadDocs(docsDir) {
17
+ const sources = [];
18
+ const pages = [];
19
+ let rootManifest;
20
+ try {
21
+ const raw = readFileSync3(join6(docsDir, "manifest.json"), "utf-8");
22
+ rootManifest = JSON.parse(raw);
23
+ } catch {
24
+ return { sources: [], pages: [] };
25
+ }
26
+ for (const sourceEntry of rootManifest.sources) {
27
+ const sourceDir = join6(docsDir, sourceEntry.path);
28
+ let sourceManifest;
29
+ try {
30
+ const raw = readFileSync3(join6(sourceDir, "_index.json"), "utf-8");
31
+ sourceManifest = JSON.parse(raw);
32
+ } catch {
33
+ continue;
34
+ }
35
+ let pageCount = 0;
36
+ for (const pageEntry of sourceManifest.pages) {
37
+ try {
38
+ const raw = readFileSync3(join6(sourceDir, pageEntry.path), "utf-8");
39
+ const parsed = matter2(raw);
40
+ pages.push({
41
+ source: sourceManifest.name,
42
+ path: pageEntry.path,
43
+ title: pageEntry.title,
44
+ url: String(parsed.data.source || ""),
45
+ platform: String(parsed.data.platform || sourceManifest.platform),
46
+ fetchedAt: String(parsed.data.fetched_at || sourceManifest.fetched_at),
47
+ content: parsed.content.trim()
48
+ });
49
+ pageCount++;
50
+ } catch {
51
+ continue;
52
+ }
53
+ }
54
+ const loadedSource = {
55
+ name: sourceManifest.name,
56
+ url: sourceManifest.url,
57
+ platform: sourceManifest.platform,
58
+ fetchedAt: sourceManifest.fetched_at,
59
+ pageCount
60
+ };
61
+ if (sourceManifest.display_name) loadedSource.displayName = sourceManifest.display_name;
62
+ if (sourceManifest.description) loadedSource.description = sourceManifest.description;
63
+ if (sourceManifest.icon_url !== void 0) loadedSource.iconUrl = sourceManifest.icon_url;
64
+ sources.push(loadedSource);
65
+ }
66
+ return { sources, pages };
67
+ }
68
+ var init_loader = __esm({
69
+ "src/mcp/loader.ts"() {
70
+ "use strict";
71
+ }
72
+ });
73
+
74
+ // src/mcp/search.ts
75
+ import MiniSearch from "minisearch";
76
+ function buildSearchIndex(pages) {
77
+ const miniSearch = new MiniSearch({
78
+ fields: ["title", "content"],
79
+ storeFields: ["source", "path", "title", "url"],
80
+ idField: "id"
81
+ });
82
+ const documents = pages.map((page, i) => ({
83
+ id: String(i),
84
+ ...page
85
+ }));
86
+ miniSearch.addAll(documents);
87
+ return {
88
+ search(query, options) {
89
+ if (!query.trim()) return [];
90
+ const filter = options?.source ? (result) => result.source === options.source : void 0;
91
+ const results = miniSearch.search(query, {
92
+ prefix: true,
93
+ fuzzy: 0.2,
94
+ filter
95
+ });
96
+ const limit = options?.limit ?? 10;
97
+ return results.slice(0, limit).map((r) => ({
98
+ source: r.source,
99
+ path: r.path,
100
+ title: r.title,
101
+ url: r.url,
102
+ score: r.score
103
+ }));
104
+ }
105
+ };
106
+ }
107
+ var init_search = __esm({
108
+ "src/mcp/search.ts"() {
109
+ "use strict";
110
+ }
111
+ });
112
+
113
+ // src/mcp/server.ts
114
+ var server_exports = {};
115
+ __export(server_exports, {
116
+ createMcpServer: () => createMcpServer
117
+ });
118
+ import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
119
+ import { z } from "zod";
120
+ function createMcpServer(docsDir) {
121
+ const docs = loadDocs(docsDir);
122
+ const searchIndex = buildSearchIndex(docs.pages);
123
+ const server = new McpServer({
124
+ name: "doc2ctx",
125
+ version: "0.1.0"
126
+ });
127
+ server.tool(
128
+ "list_sources",
129
+ "List all documentation sources available in the docs directory",
130
+ {},
131
+ async () => {
132
+ return {
133
+ content: [
134
+ {
135
+ type: "text",
136
+ text: JSON.stringify(
137
+ docs.sources.map((s) => ({
138
+ name: s.name,
139
+ url: s.url,
140
+ platform: s.platform,
141
+ fetchedAt: s.fetchedAt,
142
+ pageCount: s.pageCount,
143
+ ...s.displayName && { displayName: s.displayName },
144
+ ...s.description && { description: s.description },
145
+ ...s.iconUrl !== void 0 && { iconUrl: s.iconUrl }
146
+ })),
147
+ null,
148
+ 2
149
+ )
150
+ }
151
+ ]
152
+ };
153
+ }
154
+ );
155
+ server.tool(
156
+ "list_pages",
157
+ "List all pages for a specific documentation source",
158
+ { source: z.string().describe("Name of the documentation source") },
159
+ async ({ source }) => {
160
+ const sourceEntry = docs.sources.find((s) => s.name === source);
161
+ if (!sourceEntry) {
162
+ return {
163
+ content: [{ type: "text", text: `Source "${source}" not found. Use list_sources to see available sources.` }],
164
+ isError: true
165
+ };
166
+ }
167
+ const pages = docs.pages.filter((p) => p.source === source).map((p) => ({ title: p.title, path: p.path }));
168
+ return {
169
+ content: [{ type: "text", text: JSON.stringify(pages, null, 2) }]
170
+ };
171
+ }
172
+ );
173
+ server.tool(
174
+ "read_page",
175
+ "Read the full markdown content of a documentation page",
176
+ {
177
+ source: z.string().describe("Name of the documentation source"),
178
+ path: z.string().describe("Path of the page within the source (from list_pages)")
179
+ },
180
+ async ({ source, path }) => {
181
+ const page = docs.pages.find((p) => p.source === source && p.path === path);
182
+ if (!page) {
183
+ return {
184
+ content: [{ type: "text", text: `Page "${path}" not found in source "${source}". Use list_pages to see available pages.` }],
185
+ isError: true
186
+ };
187
+ }
188
+ return {
189
+ content: [{ type: "text", text: page.content }]
190
+ };
191
+ }
192
+ );
193
+ server.tool(
194
+ "search_docs",
195
+ "Search across all documentation pages by keyword",
196
+ {
197
+ query: z.string().describe("Search query"),
198
+ source: z.string().optional().describe("Filter results to a specific source"),
199
+ limit: z.number().optional().describe("Maximum number of results (default 10)")
200
+ },
201
+ async ({ query, source, limit }) => {
202
+ if (!query.trim()) {
203
+ return {
204
+ content: [{ type: "text", text: "Search query cannot be empty." }],
205
+ isError: true
206
+ };
207
+ }
208
+ const results = searchIndex.search(query, { source, limit });
209
+ return {
210
+ content: [
211
+ {
212
+ type: "text",
213
+ text: JSON.stringify(
214
+ results.map((r) => ({
215
+ source: r.source,
216
+ path: r.path,
217
+ title: r.title,
218
+ score: r.score
219
+ })),
220
+ null,
221
+ 2
222
+ )
223
+ }
224
+ ]
225
+ };
226
+ }
227
+ );
228
+ return server;
229
+ }
230
+ var init_server = __esm({
231
+ "src/mcp/server.ts"() {
232
+ "use strict";
233
+ init_loader();
234
+ init_search();
235
+ }
236
+ });
237
+
238
+ // src/cli.ts
239
+ import { defineCommand as defineCommand6, runMain, runCommand } from "citty";
240
+ import consola6 from "consola";
241
+
242
+ // src/commands/fetch.ts
243
+ import { defineCommand } from "citty";
244
+ import { dirname as dirname3 } from "path";
245
+ import consola2 from "consola";
246
+ import * as cheerio6 from "cheerio";
247
+
248
+ // src/pipeline/fetcher.ts
249
+ import { ofetch } from "ofetch";
250
+ import { execSync } from "child_process";
251
+ import consola from "consola";
252
+ var BROWSER_RETRY_CODES = /* @__PURE__ */ new Set([403, 406, 429]);
253
+ var CHALLENGE_PATTERNS = [
254
+ "verify you are human",
255
+ "just a moment",
256
+ "checking your browser",
257
+ "attention required",
258
+ "enable javascript and cookies"
259
+ ];
260
+ function isChallengeContent(html) {
261
+ const lower = html.toLowerCase();
262
+ return CHALLENGE_PATTERNS.some((p) => lower.includes(p));
263
+ }
264
+ async function fetchPage(url) {
265
+ try {
266
+ return await ofetch(url, { responseType: "text" });
267
+ } catch (err) {
268
+ const status = err?.response?.status ?? err?.statusCode;
269
+ if (status && BROWSER_RETRY_CODES.has(status)) {
270
+ consola.warn(
271
+ `Static fetch returned ${status}, retrying with browser...`
272
+ );
273
+ return fetchWithBrowser(url);
274
+ }
275
+ throw err;
276
+ }
277
+ }
278
+ async function fetchWithBrowser(url) {
279
+ const playwright = await loadPlaywright();
280
+ let html = await launchAndFetch(playwright, url, true);
281
+ if (isChallengeContent(html)) {
282
+ consola.warn("Bot protection detected, retrying with visible browser...");
283
+ html = await launchAndFetch(playwright, url, false);
284
+ }
285
+ return html;
286
+ }
287
+ async function loadPlaywright() {
288
+ try {
289
+ return await import("playwright");
290
+ } catch {
291
+ consola.info(
292
+ "This site requires a browser to fetch. Installing Playwright..."
293
+ );
294
+ try {
295
+ execSync("npm install -g playwright", { stdio: "inherit" });
296
+ execSync("npx playwright install chromium", { stdio: "inherit" });
297
+ return await import("playwright");
298
+ } catch {
299
+ const err = new Error(
300
+ "Failed to auto-install Playwright. Install it manually:\n\n npm install -g playwright && npx playwright install chromium\n"
301
+ );
302
+ err.code = "ERR_PLAYWRIGHT_NOT_INSTALLED";
303
+ throw err;
304
+ }
305
+ }
306
+ }
307
+ async function launchAndFetch(playwright, url, headless) {
308
+ const browser = await playwright.chromium.launch({ headless });
309
+ try {
310
+ const page = await browser.newPage();
311
+ await page.goto(url, { waitUntil: "domcontentloaded", timeout: 3e4 });
312
+ await page.waitForTimeout(2e3);
313
+ return await page.content();
314
+ } finally {
315
+ await browser.close();
316
+ }
317
+ }
318
+
319
+ // src/pipeline/extractor.ts
320
+ import * as cheerio3 from "cheerio";
321
+ import { Readability } from "@mozilla/readability";
322
+ import { parseHTML } from "linkedom";
323
+
324
+ // src/platforms/mintlify.ts
325
+ import * as cheerio from "cheerio";
326
+ var mintlify = {
327
+ id: "mintlify",
328
+ detect(url, $) {
329
+ if ($('meta[name="generator"][content*="Mintlify"]').length > 0) return true;
330
+ if ($("script[src*='mintlify']").length > 0) return true;
331
+ if ($("[data-mintlify]").length > 0) return true;
332
+ return false;
333
+ },
334
+ contentSelector() {
335
+ return "article, main";
336
+ },
337
+ removeSelectors() {
338
+ return [
339
+ "nav",
340
+ "header",
341
+ "footer",
342
+ "[role='navigation']",
343
+ ".sidebar",
344
+ "[class*='sidebar']",
345
+ "[class*='cookie']",
346
+ "[class*='banner']",
347
+ "script",
348
+ "style"
349
+ ];
350
+ },
351
+ navLinkSelector() {
352
+ return "nav a[href], .sidebar a[href], [class*='sidebar'] a[href]";
353
+ },
354
+ discoverUrls(html, baseUrl) {
355
+ const $ = cheerio.load(html);
356
+ const paths = /* @__PURE__ */ new Set();
357
+ $("script").each((_, el) => {
358
+ const text = $(el).html() || "";
359
+ const escaped = /\\?"href\\?"\s*:\s*\\?"(\/[a-z0-9][a-z0-9\/-]*)\\?"/g;
360
+ let match = escaped.exec(text);
361
+ while (match !== null) {
362
+ paths.add(match[1]);
363
+ match = escaped.exec(text);
364
+ }
365
+ });
366
+ const origin = new URL(baseUrl).origin;
367
+ const basePath = new URL(baseUrl).pathname.split("/").slice(0, 2).join("/");
368
+ return [...paths].map((p) => {
369
+ if (p.startsWith(basePath)) {
370
+ return origin + p;
371
+ }
372
+ return origin + basePath + p;
373
+ });
374
+ }
375
+ };
376
+
377
+ // src/platforms/docusaurus.ts
378
+ var docusaurus = {
379
+ id: "docusaurus",
380
+ detect(url, $) {
381
+ if ($('meta[name="generator"][content*="Docusaurus"]').length > 0)
382
+ return true;
383
+ if ($(".theme-doc-sidebar-container").length > 0) return true;
384
+ if ($('meta[name="docusaurus_locale"]').length > 0) return true;
385
+ return false;
386
+ },
387
+ contentSelector() {
388
+ return "article, [role='main'], .theme-doc-markdown";
389
+ },
390
+ removeSelectors() {
391
+ return [
392
+ ".navbar",
393
+ "footer",
394
+ ".theme-doc-toc-desktop",
395
+ ".theme-doc-sidebar-container",
396
+ ".pagination-nav",
397
+ ".theme-doc-breadcrumbs",
398
+ "nav",
399
+ "script",
400
+ "style"
401
+ ];
402
+ },
403
+ navLinkSelector() {
404
+ return ".menu__link[href]";
405
+ }
406
+ };
407
+
408
+ // src/platforms/readme.ts
409
+ var readme = {
410
+ id: "readme",
411
+ detect(url, $) {
412
+ let rmClassCount = 0;
413
+ $("[class]").each((_, el) => {
414
+ const cls = $(el).attr("class") || "";
415
+ if (/\brm-/.test(cls)) rmClassCount++;
416
+ });
417
+ if (rmClassCount > 2) return true;
418
+ if ($(".rm-Article").length > 0) return true;
419
+ if ($(".rm-Markdown").length > 0) return true;
420
+ return false;
421
+ },
422
+ contentSelector() {
423
+ return ".markdown-body, .rm-Article, .rm-Markdown";
424
+ },
425
+ removeSelectors() {
426
+ return [
427
+ "nav",
428
+ "header",
429
+ "footer",
430
+ ".rm-Sidebar",
431
+ ".rm-TableOfContents",
432
+ "[class*='cookie']",
433
+ "script",
434
+ "style"
435
+ ];
436
+ },
437
+ navLinkSelector() {
438
+ return ".rm-Sidebar a[href]";
439
+ }
440
+ };
441
+
442
+ // src/platforms/gitbook.ts
443
+ var gitbook = {
444
+ id: "gitbook",
445
+ detect(url, $) {
446
+ if ($('meta[name="generator"][content*="GitBook"]').length > 0) return true;
447
+ try {
448
+ const parsed = new URL(url);
449
+ if (parsed.hostname.endsWith(".gitbook.io")) return true;
450
+ } catch {
451
+ }
452
+ if ($('[data-testid="page.contentEditor"]').length > 0) return true;
453
+ return false;
454
+ },
455
+ contentSelector() {
456
+ return '[data-testid="page.contentEditor"], main, article';
457
+ },
458
+ removeSelectors() {
459
+ return [
460
+ "nav",
461
+ "header",
462
+ "footer",
463
+ "[class*='sidebar']",
464
+ "[class*='toc']",
465
+ "[class*='cookie']",
466
+ "script",
467
+ "style"
468
+ ];
469
+ },
470
+ navLinkSelector() {
471
+ return "nav a[href], aside a[href]";
472
+ }
473
+ };
474
+
475
+ // src/platforms/generic.ts
476
+ import * as cheerio2 from "cheerio";
477
+ var SIDEBAR_SELECTORS = [
478
+ "aside nav a[href]",
479
+ "aside a[href]",
480
+ '[class*="sidebar"] a[href]',
481
+ '[class*="side-bar"] a[href]',
482
+ '[role="complementary"] a[href]',
483
+ '[class*="toc"] a[href]',
484
+ '[class*="table-of-contents"] a[href]'
485
+ ];
486
+ var MIN_SIDEBAR_LINKS = 3;
487
+ function resolveLinks($, selector, baseUrl, scope) {
488
+ const links = [];
489
+ const els = scope ? scope.find(selector) : $(selector);
490
+ els.each((_, el) => {
491
+ const href = $(el).attr("href");
492
+ if (!href || href.startsWith("#") || href.startsWith("mailto:")) return;
493
+ try {
494
+ links.push(new URL(href, baseUrl).href);
495
+ } catch {
496
+ }
497
+ });
498
+ return [...new Set(links)];
499
+ }
500
+ var generic = {
501
+ id: "generic",
502
+ detect(_url, _$) {
503
+ return true;
504
+ },
505
+ contentSelector() {
506
+ return "article, main, [role='main'], .content";
507
+ },
508
+ removeSelectors() {
509
+ return [
510
+ "nav",
511
+ "header",
512
+ "footer",
513
+ "[role='navigation']",
514
+ "[class*='sidebar']",
515
+ "[class*='cookie']",
516
+ "[class*='banner']",
517
+ "script",
518
+ "style",
519
+ "noscript"
520
+ ];
521
+ },
522
+ navLinkSelector() {
523
+ return null;
524
+ },
525
+ discoverUrls(html, baseUrl) {
526
+ const $ = cheerio2.load(html);
527
+ for (const selector of SIDEBAR_SELECTORS) {
528
+ const links = resolveLinks($, selector, baseUrl);
529
+ if (links.length >= MIN_SIDEBAR_LINKS) {
530
+ return links;
531
+ }
532
+ }
533
+ return [];
534
+ }
535
+ };
536
+
537
+ // src/platforms/registry.ts
538
+ var platformStrategies = [
539
+ mintlify,
540
+ docusaurus,
541
+ readme,
542
+ gitbook,
543
+ generic
544
+ ];
545
+ function getStrategy(id) {
546
+ const strategy = platformStrategies.find((s) => s.id === id);
547
+ if (!strategy) {
548
+ throw new Error(`Unknown platform: ${id}`);
549
+ }
550
+ return strategy;
551
+ }
552
+
553
+ // src/pipeline/resolver.ts
554
+ function resolve(url, $) {
555
+ for (const strategy of platformStrategies) {
556
+ if (strategy.detect(url, $)) {
557
+ return strategy.id;
558
+ }
559
+ }
560
+ return "generic";
561
+ }
562
+
563
+ // src/pipeline/extractor.ts
564
+ function extract(html, url) {
565
+ const $ = cheerio3.load(html);
566
+ const platform = resolve(url, $);
567
+ const strategy = getStrategy(platform);
568
+ const title = extractTitle($);
569
+ if (platform !== "generic") {
570
+ for (const sel of strategy.removeSelectors()) {
571
+ $(sel).remove();
572
+ }
573
+ const contentEl = $(strategy.contentSelector()).first();
574
+ const selectorContent = contentEl.html();
575
+ if (selectorContent && selectorContent.trim().length >= 100) {
576
+ return { content: selectorContent, title, platform };
577
+ }
578
+ }
579
+ let article = null;
580
+ try {
581
+ const { document } = parseHTML(html);
582
+ const reader = new Readability(document);
583
+ article = reader.parse();
584
+ } catch {
585
+ }
586
+ const content = article?.content || $("body").html() || html;
587
+ return {
588
+ content,
589
+ title: title || article?.title || "",
590
+ platform
591
+ };
592
+ }
593
+ function extractTitle($) {
594
+ const h1 = $("h1").first().text().trim();
595
+ if (h1) return h1;
596
+ const ogTitle = $('meta[property="og:title"]').attr("content")?.trim();
597
+ if (ogTitle) return ogTitle;
598
+ return $("title").text().trim();
599
+ }
600
+
601
+ // src/pipeline/transformer.ts
602
+ import TurndownService from "turndown";
603
+ import { gfm } from "turndown-plugin-gfm";
604
+ function transform(html) {
605
+ const td = new TurndownService({
606
+ headingStyle: "atx",
607
+ codeBlockStyle: "fenced",
608
+ bulletListMarker: "-"
609
+ });
610
+ td.use(gfm);
611
+ addCalloutRule(td);
612
+ addTabbedContentRule(td);
613
+ addCodeBlockLangRule(td);
614
+ addHiddenElementRule(td);
615
+ return td.turndown(html);
616
+ }
617
+ function isElement(node) {
618
+ return node.nodeType === 1;
619
+ }
620
+ function getAttr(node, attr) {
621
+ if (isElement(node)) {
622
+ return node.getAttribute(attr) || "";
623
+ }
624
+ return "";
625
+ }
626
+ function getTagName(node) {
627
+ if (isElement(node)) {
628
+ return node.tagName.toLowerCase();
629
+ }
630
+ return "";
631
+ }
632
+ function addCalloutRule(td) {
633
+ td.addRule("callouts", {
634
+ filter(node) {
635
+ if (!isElement(node)) return false;
636
+ const tag = getTagName(node);
637
+ if (tag === "aside") return true;
638
+ const cls = getAttr(node, "class");
639
+ if (/\b(admonition|callout|alert|notice|warning|info|tip|note|caution|danger)\b/i.test(
640
+ cls
641
+ ))
642
+ return true;
643
+ if (getAttr(node, "role") === "alert") return true;
644
+ return false;
645
+ },
646
+ replacement(content, node) {
647
+ const cls = getAttr(node, "class").toLowerCase();
648
+ let type = "Note";
649
+ if (/warning|caution/.test(cls)) type = "Warning";
650
+ else if (/danger|error/.test(cls)) type = "Danger";
651
+ else if (/tip|success/.test(cls)) type = "Tip";
652
+ else if (/info/.test(cls)) type = "Info";
653
+ const lines = content.trim().split("\n");
654
+ const quoted = lines.map((line) => `> ${line}`).join("\n");
655
+ return `
656
+ > **${type}**
657
+ ${quoted}
658
+
659
+ `;
660
+ }
661
+ });
662
+ }
663
+ function addTabbedContentRule(td) {
664
+ td.addRule("tabbed-content", {
665
+ filter(node) {
666
+ if (!isElement(node)) return false;
667
+ const cls = getAttr(node, "class");
668
+ if (/\b(tab-panel|tabpanel|tabs__item)\b/i.test(cls)) return true;
669
+ if (getAttr(node, "role") === "tabpanel") return true;
670
+ return false;
671
+ },
672
+ replacement(content, node) {
673
+ const label = getAttr(node, "aria-label") || getAttr(node, "data-label") || getAttr(node, "data-value") || "";
674
+ if (label) {
675
+ return `
676
+ **${label}**
677
+
678
+ ${content.trim()}
679
+
680
+ `;
681
+ }
682
+ return `
683
+ ${content.trim()}
684
+
685
+ `;
686
+ }
687
+ });
688
+ }
689
+ function addCodeBlockLangRule(td) {
690
+ td.addRule("code-block-lang", {
691
+ filter(node) {
692
+ if (!isElement(node)) return false;
693
+ if (getTagName(node) !== "pre") return false;
694
+ const codeEl = node.querySelector("code");
695
+ if (!codeEl) return false;
696
+ const lang = getAttr(node, "data-language") || getAttr(node, "data-lang") || (codeEl.getAttribute("data-language") || "") || (codeEl.getAttribute("data-lang") || "");
697
+ return lang.length > 0;
698
+ },
699
+ replacement(_content, node) {
700
+ if (!isElement(node)) return _content;
701
+ const codeEl = node.querySelector("code");
702
+ const lang = getAttr(node, "data-language") || getAttr(node, "data-lang") || (codeEl.getAttribute("data-language") || "") || (codeEl.getAttribute("data-lang") || "");
703
+ const code = codeEl.textContent || "";
704
+ return `
705
+ \`\`\`${lang}
706
+ ${code}
707
+ \`\`\`
708
+ `;
709
+ }
710
+ });
711
+ }
712
+ function addHiddenElementRule(td) {
713
+ td.addRule("hidden-elements", {
714
+ filter(node) {
715
+ if (!isElement(node)) return false;
716
+ const style = getAttr(node, "style");
717
+ if (!/display\s*:\s*none/i.test(style)) return false;
718
+ const cls = getAttr(node, "class");
719
+ if (/\b(tab-panel|tabpanel)\b/i.test(cls)) return false;
720
+ if (getAttr(node, "role") === "tabpanel") return false;
721
+ return true;
722
+ },
723
+ replacement() {
724
+ return "";
725
+ }
726
+ });
727
+ }
728
+
729
+ // src/pipeline/writer.ts
730
+ import { writeFileSync, mkdirSync } from "fs";
731
+ import { dirname, join } from "path";
732
+ import matter from "gray-matter";
733
+
734
+ // src/utils/slug.ts
735
+ function filePathForPage(pageUrl, basePrefix) {
736
+ const parsed = new URL(pageUrl);
737
+ let pathname = parsed.pathname.replace(/\/+$/, "");
738
+ const normalizedPrefix = basePrefix.replace(/\/+$/, "");
739
+ if (pathname.startsWith(normalizedPrefix)) {
740
+ pathname = pathname.slice(normalizedPrefix.length);
741
+ }
742
+ pathname = pathname.replace(/^\/+/, "");
743
+ if (!pathname) return "index.md";
744
+ return pathname + ".md";
745
+ }
746
+
747
+ // src/pipeline/writer.ts
748
+ function write(markdown, outputPath, options) {
749
+ const content = matter.stringify(markdown, {
750
+ source: options.sourceUrl,
751
+ fetched_at: (/* @__PURE__ */ new Date()).toISOString(),
752
+ platform: options.platform,
753
+ title: options.title,
754
+ docs2ai_version: "0.1.0"
755
+ });
756
+ if (outputPath) {
757
+ mkdirSync(dirname(outputPath), { recursive: true });
758
+ writeFileSync(outputPath, content, "utf-8");
759
+ } else {
760
+ process.stdout.write(content);
761
+ }
762
+ }
763
+ function writePage(markdown, filePath, options) {
764
+ const content = matter.stringify(markdown, {
765
+ source: options.sourceUrl,
766
+ fetched_at: (/* @__PURE__ */ new Date()).toISOString(),
767
+ platform: options.platform,
768
+ title: options.title,
769
+ docs2ai_version: "0.1.0"
770
+ });
771
+ mkdirSync(dirname(filePath), { recursive: true });
772
+ writeFileSync(filePath, content, "utf-8");
773
+ }
774
+ function writePages(pages, outputDir, basePrefix) {
775
+ const usedPaths = /* @__PURE__ */ new Set();
776
+ const entries = [];
777
+ for (const page of pages) {
778
+ let relPath = filePathForPage(page.url, basePrefix);
779
+ if (usedPaths.has(relPath)) {
780
+ const base = relPath.replace(/\.md$/, "");
781
+ let i = 2;
782
+ while (usedPaths.has(`${base}-${i}.md`)) i++;
783
+ relPath = `${base}-${i}.md`;
784
+ }
785
+ usedPaths.add(relPath);
786
+ const filePath = join(outputDir, relPath);
787
+ writePage(page.markdown, filePath, {
788
+ sourceUrl: page.url,
789
+ title: page.title,
790
+ platform: page.platform
791
+ });
792
+ entries.push({ title: page.title, path: relPath });
793
+ }
794
+ return entries;
795
+ }
796
+
797
+ // src/crawl/crawler.ts
798
+ import * as cheerio4 from "cheerio";
799
+ import * as readline from "readline";
800
+
801
+ // src/utils/url.ts
802
+ function normalizeUrl(url) {
803
+ const parsed = new URL(url);
804
+ parsed.hash = "";
805
+ parsed.search = "";
806
+ return parsed.href.replace(/\/$/, "");
807
+ }
808
+ function slugFromUrl(url) {
809
+ try {
810
+ const parsed = new URL(url);
811
+ return parsed.hostname.replace(/\./g, "-").replace(/^www-/, "");
812
+ } catch {
813
+ return "source";
814
+ }
815
+ }
816
+
817
+ // src/crawl/boundary.ts
818
+ function getCrawlPrefix(url) {
819
+ const parsed = new URL(url);
820
+ const pathParts = parsed.pathname.split("/");
821
+ pathParts.pop();
822
+ const pathPrefix = pathParts.join("/") + "/";
823
+ return { origin: parsed.origin, pathPrefix };
824
+ }
825
+ function computeCommonPrefix(startUrl, navUrls) {
826
+ const startParts = new URL(startUrl).pathname.split("/").filter(Boolean);
827
+ const parts = [...startParts];
828
+ for (const url of navUrls) {
829
+ const urlParts = new URL(url).pathname.split("/").filter(Boolean);
830
+ let i = 0;
831
+ while (i < parts.length && i < urlParts.length && parts[i] === urlParts[i]) {
832
+ i++;
833
+ }
834
+ parts.length = i;
835
+ }
836
+ return "/" + (parts.length > 0 ? parts.join("/") + "/" : "");
837
+ }
838
+ function isInBounds(candidateUrl, origin, pathPrefix) {
839
+ try {
840
+ const parsed = new URL(candidateUrl);
841
+ return parsed.origin === origin && parsed.pathname.startsWith(pathPrefix);
842
+ } catch {
843
+ return false;
844
+ }
845
+ }
846
+
847
+ // src/crawl/crawler.ts
848
+ async function crawl(startUrl, options) {
849
+ const { origin } = getCrawlPrefix(startUrl);
850
+ let { pathPrefix } = getCrawlPrefix(startUrl);
851
+ const visited = /* @__PURE__ */ new Set();
852
+ const results = [];
853
+ let isFirstPage = true;
854
+ let interrupted = false;
855
+ let saveOnInterrupt = false;
856
+ const onSigint = async () => {
857
+ if (interrupted) {
858
+ process.exit(1);
859
+ }
860
+ interrupted = true;
861
+ if (results.length === 0) {
862
+ process.exit(0);
863
+ }
864
+ const rl = readline.createInterface({
865
+ input: process.stdin,
866
+ output: process.stderr
867
+ });
868
+ const answer = await new Promise((resolve3) => {
869
+ rl.question(
870
+ `
871
+ Crawl interrupted. Save ${results.length} page(s) collected so far? (y/n) `,
872
+ resolve3
873
+ );
874
+ });
875
+ rl.close();
876
+ saveOnInterrupt = answer.trim().toLowerCase().startsWith("y");
877
+ if (!saveOnInterrupt) {
878
+ process.exit(0);
879
+ }
880
+ };
881
+ process.on("SIGINT", onSigint);
882
+ const queue = [[startUrl, 0]];
883
+ visited.add(normalizeUrl(startUrl));
884
+ try {
885
+ while (queue.length > 0 && !interrupted) {
886
+ const [url, depth] = queue.shift();
887
+ let html;
888
+ try {
889
+ html = await fetchPage(url);
890
+ } catch {
891
+ options.onPageFetched?.(url, results.length, results.length + queue.length);
892
+ continue;
893
+ }
894
+ results.push({ url, html });
895
+ options.onPageFetched?.(url, results.length, results.length + queue.length);
896
+ if (depth < options.maxDepth) {
897
+ if (isFirstPage) {
898
+ const hasNavScope = !!options.navLinkSelector;
899
+ if (hasNavScope) {
900
+ const allNavUrls = options.discoverUrls ? discoverSameOriginCustom(html, url, origin, options.discoverUrls) : discoverSameOrigin(html, url, origin, options.navLinkSelector);
901
+ if (allNavUrls.length > 0) {
902
+ pathPrefix = computeCommonPrefix(startUrl, allNavUrls);
903
+ }
904
+ }
905
+ isFirstPage = false;
906
+ }
907
+ const links = options.discoverUrls ? discoverLinksCustom(html, url, origin, pathPrefix, options.discoverUrls) : discoverLinks(html, url, origin, pathPrefix, options.navLinkSelector);
908
+ for (const link of links) {
909
+ const normalized = normalizeUrl(link);
910
+ if (!visited.has(normalized)) {
911
+ visited.add(normalized);
912
+ queue.push([link, depth + 1]);
913
+ }
914
+ }
915
+ }
916
+ if (queue.length > 0 && !interrupted) {
917
+ await delay(200);
918
+ }
919
+ }
920
+ } finally {
921
+ process.off("SIGINT", onSigint);
922
+ }
923
+ return { pages: results, effectivePrefix: pathPrefix };
924
+ }
925
+ function discoverLinks(html, baseUrl, origin, pathPrefix, navLinkSelector) {
926
+ const $ = cheerio4.load(html);
927
+ const links = [];
928
+ const selector = navLinkSelector || "a[href]";
929
+ $(selector).each((_, el) => {
930
+ const href = $(el).attr("href");
931
+ if (!href) return;
932
+ try {
933
+ const resolved = new URL(href, baseUrl).href;
934
+ if (isInBounds(resolved, origin, pathPrefix)) {
935
+ links.push(resolved);
936
+ }
937
+ } catch {
938
+ }
939
+ });
940
+ return [...new Set(links)];
941
+ }
942
+ function discoverSameOrigin(html, baseUrl, origin, navLinkSelector) {
943
+ const $ = cheerio4.load(html);
944
+ const links = [];
945
+ const selector = navLinkSelector || "a[href]";
946
+ $(selector).each((_, el) => {
947
+ const href = $(el).attr("href");
948
+ if (!href) return;
949
+ try {
950
+ const resolved = new URL(href, baseUrl).href;
951
+ if (new URL(resolved).origin === origin) {
952
+ links.push(resolved);
953
+ }
954
+ } catch {
955
+ }
956
+ });
957
+ return [...new Set(links)];
958
+ }
959
+ function discoverSameOriginCustom(html, baseUrl, origin, discoverUrls) {
960
+ const urls = discoverUrls(html, baseUrl);
961
+ return [
962
+ ...new Set(
963
+ urls.filter((u) => {
964
+ try {
965
+ return new URL(u).origin === origin;
966
+ } catch {
967
+ return false;
968
+ }
969
+ })
970
+ )
971
+ ];
972
+ }
973
+ function discoverLinksCustom(html, baseUrl, origin, pathPrefix, discoverUrls) {
974
+ const urls = discoverUrls(html, baseUrl);
975
+ return [...new Set(urls.filter((u) => isInBounds(u, origin, pathPrefix)))];
976
+ }
977
+ function delay(ms) {
978
+ return new Promise((resolve3) => setTimeout(resolve3, ms));
979
+ }
980
+
981
+ // src/pipeline/manifest.ts
982
+ import { readFileSync, writeFileSync as writeFileSync2, mkdirSync as mkdirSync2 } from "fs";
983
+ import { join as join2 } from "path";
984
+ function buildSourceManifest(name, url, platform, pages, siteMeta) {
985
+ const manifest = {
986
+ name,
987
+ url,
988
+ platform,
989
+ fetched_at: (/* @__PURE__ */ new Date()).toISOString(),
990
+ pages
991
+ };
992
+ if (siteMeta) {
993
+ manifest.display_name = siteMeta.displayName;
994
+ manifest.description = siteMeta.description;
995
+ manifest.icon_url = siteMeta.iconUrl;
996
+ manifest.og_image = siteMeta.ogImage;
997
+ manifest.language = siteMeta.language;
998
+ manifest.page_count = pages.length;
999
+ }
1000
+ return manifest;
1001
+ }
1002
+ function writeSourceManifest(manifest, outputDir) {
1003
+ mkdirSync2(outputDir, { recursive: true });
1004
+ writeFileSync2(
1005
+ join2(outputDir, "_index.json"),
1006
+ JSON.stringify(manifest, null, 2) + "\n",
1007
+ "utf-8"
1008
+ );
1009
+ }
1010
+ function loadRootManifest(rootDir) {
1011
+ try {
1012
+ const raw = readFileSync(join2(rootDir, "manifest.json"), "utf-8");
1013
+ return JSON.parse(raw);
1014
+ } catch {
1015
+ return { sources: [] };
1016
+ }
1017
+ }
1018
+ function updateRootManifest(rootDir, entry) {
1019
+ const manifest = loadRootManifest(rootDir);
1020
+ const idx = manifest.sources.findIndex((s) => s.name === entry.name);
1021
+ if (idx >= 0) {
1022
+ manifest.sources[idx] = entry;
1023
+ } else {
1024
+ manifest.sources.push(entry);
1025
+ }
1026
+ mkdirSync2(rootDir, { recursive: true });
1027
+ writeFileSync2(
1028
+ join2(rootDir, "manifest.json"),
1029
+ JSON.stringify(manifest, null, 2) + "\n",
1030
+ "utf-8"
1031
+ );
1032
+ }
1033
+
1034
+ // src/pipeline/meta-extractor.ts
1035
+ import * as cheerio5 from "cheerio";
1036
+ function extractSiteMeta(html, url) {
1037
+ const $ = cheerio5.load(html);
1038
+ const origin = new URL(url).origin;
1039
+ return {
1040
+ displayName: extractDisplayName($, url),
1041
+ description: extractDescription($),
1042
+ iconUrl: extractIconUrl($, origin),
1043
+ ogImage: extractOgImage($, origin),
1044
+ language: extractLanguage($)
1045
+ };
1046
+ }
1047
+ function nonEmpty(value) {
1048
+ return value?.trim() || void 0;
1049
+ }
1050
+ function extractDisplayName($, url) {
1051
+ const ogSiteName = nonEmpty($('meta[property="og:site_name"]').attr("content"));
1052
+ if (ogSiteName) return ogSiteName;
1053
+ const appName = nonEmpty($('meta[name="application-name"]').attr("content"));
1054
+ if (appName) return appName;
1055
+ const title = nonEmpty($("title").text());
1056
+ if (title) {
1057
+ const parts = title.split(/\s[-|—]\s/);
1058
+ return parts[0].trim();
1059
+ }
1060
+ return new URL(url).hostname;
1061
+ }
1062
+ function extractDescription($) {
1063
+ const ogDesc = nonEmpty($('meta[property="og:description"]').attr("content"));
1064
+ if (ogDesc) return ogDesc;
1065
+ const metaDesc = nonEmpty($('meta[name="description"]').attr("content"));
1066
+ if (metaDesc) return metaDesc;
1067
+ return "";
1068
+ }
1069
+ function extractIconUrl($, origin) {
1070
+ const selectors = [
1071
+ 'link[rel="apple-touch-icon"]',
1072
+ 'link[rel="icon"][type="image/svg+xml"]',
1073
+ 'link[rel="icon"]',
1074
+ 'link[rel="shortcut icon"]'
1075
+ ];
1076
+ for (const selector of selectors) {
1077
+ const href = nonEmpty($(selector).attr("href"));
1078
+ if (href) return resolveUrl(href, origin);
1079
+ }
1080
+ return `${origin}/favicon.ico`;
1081
+ }
1082
+ function extractOgImage($, origin) {
1083
+ const ogImage = nonEmpty($('meta[property="og:image"]').attr("content"));
1084
+ if (ogImage) return resolveUrl(ogImage, origin);
1085
+ return null;
1086
+ }
1087
+ function extractLanguage($) {
1088
+ const htmlLang = nonEmpty($("html").attr("lang"));
1089
+ if (htmlLang) return htmlLang;
1090
+ const ogLocale = nonEmpty($('meta[property="og:locale"]').attr("content"));
1091
+ if (ogLocale) return ogLocale;
1092
+ return null;
1093
+ }
1094
+ function resolveUrl(href, origin) {
1095
+ if (href.startsWith("http://") || href.startsWith("https://")) return href;
1096
+ if (href.startsWith("//")) return `https:${href}`;
1097
+ if (href.startsWith("/")) return `${origin}${href}`;
1098
+ return `${origin}/${href}`;
1099
+ }
1100
+
1101
+ // src/commands/fetch.ts
1102
+ function resolveOutputMode(output, shouldCrawl, name) {
1103
+ if (!shouldCrawl) {
1104
+ return { mode: "single-file", outputPath: output, outputDir: "" };
1105
+ }
1106
+ if (output && output.endsWith(".md")) {
1107
+ return { mode: "single-file", outputPath: output, outputDir: "" };
1108
+ }
1109
+ if (output) {
1110
+ const dir = output.endsWith("/") ? output : output + "/";
1111
+ return { mode: "directory", outputPath: void 0, outputDir: dir };
1112
+ }
1113
+ return { mode: "directory", outputPath: void 0, outputDir: `.ai/docs/${name}/` };
1114
+ }
1115
+ var fetchCommand = defineCommand({
1116
+ meta: {
1117
+ name: "fetch",
1118
+ description: "Fetch a documentation URL and convert to Markdown"
1119
+ },
1120
+ args: {
1121
+ url: {
1122
+ type: "positional",
1123
+ description: "Documentation URL to convert",
1124
+ required: true
1125
+ },
1126
+ output: {
1127
+ type: "string",
1128
+ alias: "o",
1129
+ description: "Output file path or directory"
1130
+ },
1131
+ name: {
1132
+ type: "string",
1133
+ description: "Name for this source (auto-derived from hostname if omitted)"
1134
+ },
1135
+ crawl: {
1136
+ type: "boolean",
1137
+ description: "Follow sidebar/nav links",
1138
+ default: false
1139
+ },
1140
+ "max-depth": {
1141
+ type: "string",
1142
+ description: "Maximum crawl depth",
1143
+ default: "2"
1144
+ }
1145
+ },
1146
+ async run({ args }) {
1147
+ const url = args.url;
1148
+ const output = args.output;
1149
+ const shouldCrawl = args.crawl;
1150
+ const maxDepth = parseInt(args["max-depth"], 10);
1151
+ const name = args.name || slugFromUrl(url);
1152
+ const { mode, outputPath, outputDir } = resolveOutputMode(output, shouldCrawl, name);
1153
+ const silent = mode === "single-file" && !outputPath;
1154
+ if (shouldCrawl) {
1155
+ if (!silent) consola2.start(`Crawling from ${url} (max depth: ${maxDepth})...`);
1156
+ const firstHtml = await fetchPage(url);
1157
+ const $ = cheerio6.load(firstHtml);
1158
+ const platformId = resolve(url, $);
1159
+ const strategy = getStrategy(platformId);
1160
+ const navLinkSelector = strategy.navLinkSelector();
1161
+ const crawlResult = await crawl(url, {
1162
+ maxDepth,
1163
+ navLinkSelector,
1164
+ discoverUrls: strategy.discoverUrls?.bind(strategy),
1165
+ onPageFetched: (pageUrl, current, total) => {
1166
+ if (!silent) consola2.info(`[${current}/${total}] ${pageUrl}`);
1167
+ }
1168
+ });
1169
+ const { pages, effectivePrefix } = crawlResult;
1170
+ if (!silent) consola2.success(`Crawled ${pages.length} pages`);
1171
+ if (mode === "directory") {
1172
+ const pageEntries = pages.map((page) => {
1173
+ const { content, title, platform } = extract(page.html, page.url);
1174
+ const md = transform(content);
1175
+ return { url: page.url, title, platform, markdown: md };
1176
+ });
1177
+ const firstPlatform = pageEntries[0]?.platform || "generic";
1178
+ const manifestPages = writePages(pageEntries, outputDir, effectivePrefix);
1179
+ const siteMeta = extractSiteMeta(firstHtml, url);
1180
+ const sourceManifest = buildSourceManifest(name, url, firstPlatform, manifestPages, siteMeta);
1181
+ writeSourceManifest(sourceManifest, outputDir);
1182
+ const rootDir = dirname3(outputDir.replace(/\/$/, ""));
1183
+ updateRootManifest(rootDir, {
1184
+ name,
1185
+ path: name + "/",
1186
+ fetched_at: sourceManifest.fetched_at,
1187
+ display_name: siteMeta.displayName,
1188
+ description: siteMeta.description,
1189
+ icon_url: siteMeta.iconUrl,
1190
+ page_count: manifestPages.length
1191
+ });
1192
+ consola2.success(`Written ${pages.length} pages to ${outputDir}`);
1193
+ } else {
1194
+ const sections = [];
1195
+ let firstTitle = "";
1196
+ let firstPlatform = "";
1197
+ for (const page of pages) {
1198
+ const { content, title, platform } = extract(page.html, page.url);
1199
+ if (!firstTitle) {
1200
+ firstTitle = title;
1201
+ firstPlatform = platform;
1202
+ }
1203
+ const md = transform(content);
1204
+ sections.push(`## ${title}
1205
+
1206
+ Source: ${page.url}
1207
+
1208
+ ${md}`);
1209
+ }
1210
+ const markdown = sections.join("\n\n---\n\n");
1211
+ write(markdown, outputPath, {
1212
+ sourceUrl: url,
1213
+ title: firstTitle,
1214
+ platform: firstPlatform
1215
+ });
1216
+ if (!silent) consola2.success(`Written to ${outputPath}`);
1217
+ }
1218
+ } else {
1219
+ if (!silent) consola2.start(`Fetching ${url}...`);
1220
+ let html = await fetchPage(url);
1221
+ const { content, title, platform } = extract(html, url);
1222
+ if (content.trim().length < 200) {
1223
+ if (!silent) consola2.warn("Content looks thin, retrying with browser...");
1224
+ try {
1225
+ html = await fetchWithBrowser(url);
1226
+ const result = extract(html, url);
1227
+ const markdown2 = transform(result.content);
1228
+ write(markdown2, outputPath, {
1229
+ sourceUrl: url,
1230
+ title: result.title || title,
1231
+ platform: result.platform
1232
+ });
1233
+ if (!silent) consola2.success(`Written to ${outputPath}`);
1234
+ return;
1235
+ } catch (err) {
1236
+ if (err?.code === "ERR_PLAYWRIGHT_NOT_INSTALLED") {
1237
+ consola2.warn(
1238
+ "This page may require a browser to render. Install Playwright:\n npm install -D playwright && npx playwright install chromium"
1239
+ );
1240
+ } else {
1241
+ consola2.warn("Browser fallback failed, using static content.");
1242
+ }
1243
+ }
1244
+ }
1245
+ if (!silent) consola2.success(`Extracted content (platform: ${platform})`);
1246
+ const markdown = transform(content);
1247
+ write(markdown, outputPath, {
1248
+ sourceUrl: url,
1249
+ title,
1250
+ platform
1251
+ });
1252
+ if (!silent) consola2.success(`Written to ${outputPath}`);
1253
+ }
1254
+ }
1255
+ });
1256
+
1257
+ // src/commands/add.ts
1258
+ import { defineCommand as defineCommand2 } from "citty";
1259
+ import { join as join4 } from "path";
1260
+ import consola3 from "consola";
1261
+
1262
+ // src/config/manager.ts
1263
+ import { readFileSync as readFileSync2, writeFileSync as writeFileSync3, existsSync } from "fs";
1264
+ import { join as join3, dirname as dirname4 } from "path";
1265
+ import yaml from "js-yaml";
1266
+ var CONFIG_FILENAME = ".docs2ai.yaml";
1267
+ function loadConfig(startDir) {
1268
+ const configPath = findConfigFile(startDir || process.cwd());
1269
+ if (!configPath) return null;
1270
+ const raw = readFileSync2(configPath, "utf-8");
1271
+ const data = yaml.load(raw);
1272
+ const config = {
1273
+ version: data.version ?? 1,
1274
+ outputDir: data.output_dir ?? ".ai/docs",
1275
+ sources: (data.sources ?? []).map(snakeToCamelSource)
1276
+ };
1277
+ return { config, configPath };
1278
+ }
1279
+ function saveConfig(config, configPath) {
1280
+ const data = {
1281
+ version: config.version,
1282
+ output_dir: config.outputDir,
1283
+ sources: config.sources.map(camelToSnakeSource)
1284
+ };
1285
+ const content = yaml.dump(data, { lineWidth: -1 });
1286
+ writeFileSync3(configPath, content, "utf-8");
1287
+ }
1288
+ function addSource(config, source) {
1289
+ const idx = config.sources.findIndex((s) => s.name === source.name);
1290
+ if (idx >= 0) {
1291
+ config.sources[idx] = source;
1292
+ } else {
1293
+ config.sources.push(source);
1294
+ }
1295
+ }
1296
+ function findConfigFile(startDir) {
1297
+ let dir = startDir;
1298
+ while (true) {
1299
+ const candidate = join3(dir, CONFIG_FILENAME);
1300
+ if (existsSync(candidate)) return candidate;
1301
+ const parent = dirname4(dir);
1302
+ if (parent === dir) return null;
1303
+ dir = parent;
1304
+ }
1305
+ }
1306
+ function snakeToCamelSource(s) {
1307
+ return {
1308
+ name: s.name ?? "",
1309
+ url: s.url ?? "",
1310
+ crawl: s.crawl ?? false,
1311
+ maxDepth: s.max_depth ?? 2,
1312
+ output: s.output ?? ""
1313
+ };
1314
+ }
1315
+ function camelToSnakeSource(s) {
1316
+ return {
1317
+ name: s.name,
1318
+ url: s.url,
1319
+ crawl: s.crawl,
1320
+ max_depth: s.maxDepth,
1321
+ output: s.output
1322
+ };
1323
+ }
1324
+
1325
+ // src/commands/add.ts
1326
+ var addCommand = defineCommand2({
1327
+ meta: {
1328
+ name: "add",
1329
+ description: "Add a documentation source to .docs2ai.yaml"
1330
+ },
1331
+ args: {
1332
+ url: {
1333
+ type: "positional",
1334
+ description: "Documentation URL to add",
1335
+ required: true
1336
+ },
1337
+ name: {
1338
+ type: "string",
1339
+ description: "Name for this source (auto-derived from hostname if omitted)"
1340
+ },
1341
+ crawl: {
1342
+ type: "boolean",
1343
+ description: "Enable crawl mode for this source",
1344
+ default: false
1345
+ },
1346
+ "max-depth": {
1347
+ type: "string",
1348
+ description: "Maximum crawl depth",
1349
+ default: "2"
1350
+ },
1351
+ output: {
1352
+ type: "string",
1353
+ alias: "o",
1354
+ description: "Output filename or directory"
1355
+ }
1356
+ },
1357
+ run({ args }) {
1358
+ const url = args.url;
1359
+ const shouldCrawl = args.crawl;
1360
+ const maxDepth = parseInt(args["max-depth"], 10);
1361
+ const name = args.name || slugFromUrl(url);
1362
+ const output = args.output || (shouldCrawl ? `${name}/` : `${name}.md`);
1363
+ const existing = loadConfig();
1364
+ let config;
1365
+ let configPath;
1366
+ if (existing) {
1367
+ config = existing.config;
1368
+ configPath = existing.configPath;
1369
+ } else {
1370
+ configPath = join4(process.cwd(), ".docs2ai.yaml");
1371
+ config = { version: 1, outputDir: ".ai/docs", sources: [] };
1372
+ }
1373
+ addSource(config, { name, url, crawl: shouldCrawl, maxDepth, output });
1374
+ saveConfig(config, configPath);
1375
+ consola3.success(`Added source "${name}" \u2192 ${url}`);
1376
+ consola3.info(`Config: ${configPath}`);
1377
+ }
1378
+ });
1379
+
1380
+ // src/commands/update.ts
1381
+ import { defineCommand as defineCommand3 } from "citty";
1382
+ import { join as join5, dirname as dirname5 } from "path";
1383
+ import { mkdirSync as mkdirSync3 } from "fs";
1384
+ import * as cheerio7 from "cheerio";
1385
+ import consola4 from "consola";
1386
+ var updateCommand = defineCommand3({
1387
+ meta: {
1388
+ name: "update",
1389
+ description: "Refresh configured documentation sources"
1390
+ },
1391
+ args: {
1392
+ name: {
1393
+ type: "string",
1394
+ description: "Update only the named source"
1395
+ }
1396
+ },
1397
+ async run({ args }) {
1398
+ const result = loadConfig();
1399
+ if (!result) {
1400
+ consola4.error("No .docs2ai.yaml found. Run `docs2ai add <url>` first.");
1401
+ process.exit(1);
1402
+ }
1403
+ const { config, configPath } = result;
1404
+ const configDir = dirname5(configPath);
1405
+ const filterName = args.name;
1406
+ const sources = filterName ? config.sources.filter((s) => s.name === filterName) : config.sources;
1407
+ if (sources.length === 0) {
1408
+ if (filterName) {
1409
+ consola4.error(`Source "${filterName}" not found in config.`);
1410
+ } else {
1411
+ consola4.error("No sources configured.");
1412
+ }
1413
+ process.exit(1);
1414
+ }
1415
+ for (const source of sources) {
1416
+ const isDirectoryOutput = !source.output.endsWith(".md");
1417
+ consola4.start(`Updating "${source.name}" from ${source.url}...`);
1418
+ if (source.crawl) {
1419
+ const firstHtml = await fetchPage(source.url);
1420
+ const $ = cheerio7.load(firstHtml);
1421
+ const platformId = resolve(source.url, $);
1422
+ const strategy = getStrategy(platformId);
1423
+ const crawlResult = await crawl(source.url, {
1424
+ maxDepth: source.maxDepth,
1425
+ navLinkSelector: strategy.navLinkSelector(),
1426
+ discoverUrls: strategy.discoverUrls?.bind(strategy),
1427
+ onPageFetched: (url, current, total) => {
1428
+ consola4.info(` [${current}/${total}] ${url}`);
1429
+ }
1430
+ });
1431
+ const { pages, effectivePrefix } = crawlResult;
1432
+ if (isDirectoryOutput) {
1433
+ const outputDir = join5(configDir, config.outputDir, source.output);
1434
+ const pageEntries = pages.map((page) => {
1435
+ const { content, title, platform } = extract(page.html, page.url);
1436
+ const md = transform(content);
1437
+ return { url: page.url, title, platform, markdown: md };
1438
+ });
1439
+ const firstPlatform = pageEntries[0]?.platform || "generic";
1440
+ const manifestPages = writePages(pageEntries, outputDir, effectivePrefix);
1441
+ const siteMeta = extractSiteMeta(firstHtml, source.url);
1442
+ const sourceManifest = buildSourceManifest(
1443
+ source.name,
1444
+ source.url,
1445
+ firstPlatform,
1446
+ manifestPages,
1447
+ siteMeta
1448
+ );
1449
+ writeSourceManifest(sourceManifest, outputDir);
1450
+ const rootDir = join5(configDir, config.outputDir);
1451
+ updateRootManifest(rootDir, {
1452
+ name: source.name,
1453
+ path: source.output,
1454
+ fetched_at: sourceManifest.fetched_at,
1455
+ display_name: siteMeta.displayName,
1456
+ description: siteMeta.description,
1457
+ icon_url: siteMeta.iconUrl,
1458
+ page_count: manifestPages.length
1459
+ });
1460
+ consola4.success(`Updated "${source.name}" \u2192 ${outputDir} (${pages.length} pages)`);
1461
+ } else {
1462
+ const outputPath = join5(configDir, config.outputDir, source.output);
1463
+ mkdirSync3(dirname5(outputPath), { recursive: true });
1464
+ const sections = [];
1465
+ let firstTitle = "";
1466
+ let firstPlatform = "";
1467
+ for (const page of pages) {
1468
+ const { content, title, platform } = extract(page.html, page.url);
1469
+ if (!firstTitle) {
1470
+ firstTitle = title;
1471
+ firstPlatform = platform;
1472
+ }
1473
+ const md = transform(content);
1474
+ sections.push(`## ${title}
1475
+
1476
+ Source: ${page.url}
1477
+
1478
+ ${md}`);
1479
+ }
1480
+ const markdown = sections.join("\n\n---\n\n");
1481
+ write(markdown, outputPath, {
1482
+ sourceUrl: source.url,
1483
+ title: firstTitle,
1484
+ platform: firstPlatform
1485
+ });
1486
+ consola4.success(`Updated "${source.name}" \u2192 ${outputPath}`);
1487
+ }
1488
+ } else {
1489
+ const outputPath = join5(configDir, config.outputDir, source.output);
1490
+ mkdirSync3(dirname5(outputPath), { recursive: true });
1491
+ const html = await fetchPage(source.url);
1492
+ const { content, title, platform } = extract(html, source.url);
1493
+ const markdown = transform(content);
1494
+ write(markdown, outputPath, {
1495
+ sourceUrl: source.url,
1496
+ title,
1497
+ platform
1498
+ });
1499
+ consola4.success(`Updated "${source.name}" \u2192 ${outputPath}`);
1500
+ }
1501
+ }
1502
+ }
1503
+ });
1504
+
1505
+ // src/commands/list.ts
1506
+ import { defineCommand as defineCommand4 } from "citty";
1507
+ import consola5 from "consola";
1508
+ var listCommand = defineCommand4({
1509
+ meta: {
1510
+ name: "list",
1511
+ description: "List configured documentation sources"
1512
+ },
1513
+ run() {
1514
+ const result = loadConfig();
1515
+ if (!result) {
1516
+ consola5.info("No .docs2ai.yaml found. Run `docs2ai add <url>` to get started.");
1517
+ return;
1518
+ }
1519
+ const { config, configPath } = result;
1520
+ consola5.info(`Config: ${configPath}`);
1521
+ consola5.info(`Output dir: ${config.outputDir}
1522
+ `);
1523
+ if (config.sources.length === 0) {
1524
+ consola5.info("No sources configured.");
1525
+ return;
1526
+ }
1527
+ for (const source of config.sources) {
1528
+ const crawlInfo = source.crawl ? ` (crawl, depth: ${source.maxDepth})` : "";
1529
+ console.log(` ${source.name}${crawlInfo}`);
1530
+ console.log(` URL: ${source.url}`);
1531
+ console.log(` Output: ${source.output}`);
1532
+ console.log();
1533
+ }
1534
+ }
1535
+ });
1536
+
1537
+ // src/commands/serve.ts
1538
+ import { defineCommand as defineCommand5 } from "citty";
1539
+ import { resolve as resolve2 } from "path";
1540
+ var serveCommand = defineCommand5({
1541
+ meta: {
1542
+ name: "serve",
1543
+ description: "Start an MCP server exposing documentation tools"
1544
+ },
1545
+ args: {
1546
+ dir: {
1547
+ type: "string",
1548
+ alias: "d",
1549
+ description: "Documentation directory to serve",
1550
+ default: ".ai/docs/"
1551
+ }
1552
+ },
1553
+ async run({ args }) {
1554
+ const docsDir = resolve2(process.cwd(), args.dir);
1555
+ const { createMcpServer: createMcpServer2 } = await Promise.resolve().then(() => (init_server(), server_exports));
1556
+ const { StdioServerTransport } = await import("@modelcontextprotocol/sdk/server/stdio.js");
1557
+ const server = createMcpServer2(docsDir);
1558
+ const transport = new StdioServerTransport();
1559
+ await server.connect(transport);
1560
+ }
1561
+ });
1562
+
1563
+ // src/cli.ts
1564
+ process.on("uncaughtException", (err) => {
1565
+ if (err.code === "ERR_PLAYWRIGHT_NOT_INSTALLED") {
1566
+ consola6.error(err.message);
1567
+ } else {
1568
+ consola6.error(err.message || err);
1569
+ }
1570
+ process.exit(1);
1571
+ });
1572
+ process.on("unhandledRejection", (err) => {
1573
+ if (err?.code === "ERR_PLAYWRIGHT_NOT_INSTALLED") {
1574
+ consola6.error(err.message);
1575
+ } else {
1576
+ consola6.error(err?.message || err);
1577
+ }
1578
+ process.exit(1);
1579
+ });
1580
+ var subCommands = {
1581
+ add: addCommand,
1582
+ update: updateCommand,
1583
+ list: listCommand,
1584
+ serve: serveCommand
1585
+ };
1586
+ var firstArg = process.argv[2];
1587
+ var isSubCommand = firstArg && firstArg in subCommands;
1588
+ if (isSubCommand) {
1589
+ const main = defineCommand6({
1590
+ meta: {
1591
+ name: "docs2ai",
1592
+ version: "0.1.0",
1593
+ description: "Convert documentation URLs into AI-ready Markdown files"
1594
+ },
1595
+ subCommands
1596
+ });
1597
+ runMain(main);
1598
+ } else if (firstArg && !firstArg.startsWith("-") && firstArg !== "--help") {
1599
+ runCommand(fetchCommand, { rawArgs: process.argv.slice(2) });
1600
+ } else {
1601
+ const main = defineCommand6({
1602
+ meta: {
1603
+ name: "docs2ai",
1604
+ version: "0.1.0",
1605
+ description: "Convert documentation URLs into AI-ready Markdown files"
1606
+ },
1607
+ subCommands,
1608
+ run() {
1609
+ console.log("Usage: docs2ai <url> [-o output.md] [--crawl]");
1610
+ console.log(" docs2ai add <url> [--name name] [--crawl]");
1611
+ console.log(" docs2ai update [--name name]");
1612
+ console.log(" docs2ai list");
1613
+ console.log(" docs2ai serve [-d dir]");
1614
+ console.log("\nRun `docs2ai --help` for full usage.");
1615
+ }
1616
+ });
1617
+ runMain(main);
1618
+ }
1619
+ //# sourceMappingURL=cli.mjs.map