@dynamik-dev/refdocs 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,222 @@
1
+ import { mkdirSync, writeFileSync } from "node:fs";
2
+ import { join, dirname, extname } from "node:path";
3
+ import * as cheerio from "cheerio";
4
+ import { Readability } from "@mozilla/readability";
5
+ import TurndownService from "turndown";
6
+ import { JSDOM } from "jsdom";
7
+ const DEFAULT_CRAWL_OPTIONS = {
8
+ maxPages: 200,
9
+ depth: 3,
10
+ delayMs: 150,
11
+ };
12
+ const USER_AGENT = "refdocs-cli/0.4.0";
13
+ function sleep(ms) {
14
+ return new Promise((resolve) => setTimeout(resolve, ms));
15
+ }
16
+ function normalizeUrl(raw) {
17
+ try {
18
+ const u = new URL(raw);
19
+ u.hash = "";
20
+ // Strip trailing slash unless it's the root
21
+ if (u.pathname.length > 1 && u.pathname.endsWith("/")) {
22
+ u.pathname = u.pathname.slice(0, -1);
23
+ }
24
+ return u.toString();
25
+ }
26
+ catch {
27
+ return raw;
28
+ }
29
+ }
30
+ function isSubPath(candidate, scope) {
31
+ const candidateUrl = new URL(candidate);
32
+ const scopeUrl = new URL(scope);
33
+ if (candidateUrl.origin !== scopeUrl.origin)
34
+ return false;
35
+ const scopePath = scopeUrl.pathname.endsWith("/")
36
+ ? scopeUrl.pathname
37
+ : scopeUrl.pathname + "/";
38
+ return (candidateUrl.pathname === scopeUrl.pathname ||
39
+ candidateUrl.pathname.startsWith(scopePath));
40
+ }
41
+ function urlToFilePath(pageUrl) {
42
+ const u = new URL(pageUrl);
43
+ let path = u.pathname;
44
+ if (path.endsWith("/"))
45
+ path = path + "index";
46
+ if (path === "")
47
+ path = "/index";
48
+ // Strip leading slash
49
+ path = path.replace(/^\//, "");
50
+ // If it already has an extension, keep it but ensure .md for HTML pages
51
+ const ext = extname(path);
52
+ if (!ext || ext === ".html" || ext === ".htm") {
53
+ path = path.replace(/\.(html|htm)$/, "") + ".md";
54
+ }
55
+ // Ensure .md extension for extensionless paths
56
+ if (!extname(path)) {
57
+ path = path + ".md";
58
+ }
59
+ return path;
60
+ }
61
+ function htmlToMarkdown(html, url) {
62
+ const dom = new JSDOM(html, { url });
63
+ const reader = new Readability(dom.window.document);
64
+ const article = reader.parse();
65
+ const turndown = new TurndownService({
66
+ headingStyle: "atx",
67
+ codeBlockStyle: "fenced",
68
+ bulletListMarker: "-",
69
+ });
70
+ if (article?.content) {
71
+ let md = turndown.turndown(article.content);
72
+ // Prepend the title as an h1 if readability extracted one
73
+ if (article.title) {
74
+ md = `# ${article.title}\n\n${md}`;
75
+ }
76
+ return md;
77
+ }
78
+ // Fallback: convert the whole body
79
+ return turndown.turndown(html);
80
+ }
81
+ function discoverLinks(html, pageUrl, scope) {
82
+ const $ = cheerio.load(html);
83
+ const links = [];
84
+ const base = new URL(pageUrl);
85
+ $("a[href]").each((_, el) => {
86
+ const href = $(el).attr("href");
87
+ if (!href)
88
+ return;
89
+ // Skip anchors, javascript:, mailto:, etc.
90
+ if (href.startsWith("#") || href.startsWith("javascript:") || href.startsWith("mailto:")) {
91
+ return;
92
+ }
93
+ try {
94
+ const resolved = new URL(href, base).toString();
95
+ const normalized = normalizeUrl(resolved);
96
+ // Only follow links within scope and same origin
97
+ if (isSubPath(normalized, scope)) {
98
+ // Skip common non-page extensions
99
+ const ext = extname(new URL(normalized).pathname).toLowerCase();
100
+ if (ext && ![".html", ".htm", ""].includes(ext))
101
+ return;
102
+ links.push(normalized);
103
+ }
104
+ }
105
+ catch {
106
+ // Invalid URL, skip
107
+ }
108
+ });
109
+ return [...new Set(links)];
110
+ }
111
+ async function fetchPage(url) {
112
+ const response = await fetch(url, {
113
+ headers: {
114
+ "User-Agent": USER_AGENT,
115
+ Accept: "text/html,text/plain,text/markdown,*/*",
116
+ },
117
+ redirect: "follow",
118
+ });
119
+ if (!response.ok) {
120
+ throw new Error(`HTTP ${response.status} fetching ${url}`);
121
+ }
122
+ const contentType = response.headers.get("content-type") ?? "";
123
+ const html = await response.text();
124
+ return { html, contentType };
125
+ }
126
+ export function isTextFileUrl(url) {
127
+ try {
128
+ const pathname = new URL(url).pathname.toLowerCase();
129
+ return pathname.endsWith(".txt") || pathname.endsWith(".md");
130
+ }
131
+ catch {
132
+ return false;
133
+ }
134
+ }
135
+ export function isGitHubUrl(url) {
136
+ try {
137
+ return new URL(url).hostname === "github.com";
138
+ }
139
+ catch {
140
+ return false;
141
+ }
142
+ }
143
+ export function deriveLocalPath(url) {
144
+ const u = new URL(url);
145
+ const hostname = u.hostname;
146
+ let path = u.pathname.replace(/^\//, "");
147
+ if (!path)
148
+ path = "index";
149
+ return `ref-docs/${hostname}/${path}`;
150
+ }
151
+ export function deriveCrawlDir(url) {
152
+ const u = new URL(url);
153
+ const hostname = u.hostname;
154
+ let path = u.pathname.replace(/^\//, "").replace(/\/+$/, "");
155
+ if (!path)
156
+ path = "root";
157
+ return `ref-docs/${hostname}/${path}`;
158
+ }
159
+ export async function fetchSingleFile(url) {
160
+ const response = await fetch(url, {
161
+ headers: { "User-Agent": USER_AGENT },
162
+ redirect: "follow",
163
+ });
164
+ if (!response.ok) {
165
+ throw new Error(`HTTP ${response.status} fetching ${url}`);
166
+ }
167
+ const contentType = response.headers.get("content-type") ?? "";
168
+ const content = await response.text();
169
+ return { content, contentType };
170
+ }
171
+ export async function crawlSite(startUrl, outputDir, options) {
172
+ const opts = { ...DEFAULT_CRAWL_OPTIONS, ...options };
173
+ const scope = normalizeUrl(startUrl);
174
+ const visited = new Set();
175
+ const queue = [{ url: scope, depth: 0 }];
176
+ const pages = [];
177
+ let filesWritten = 0;
178
+ while (queue.length > 0 && filesWritten < opts.maxPages) {
179
+ const item = queue.shift();
180
+ const normalized = normalizeUrl(item.url);
181
+ if (visited.has(normalized))
182
+ continue;
183
+ visited.add(normalized);
184
+ try {
185
+ const { html, contentType } = await fetchPage(normalized);
186
+ // Only process HTML pages
187
+ if (!contentType.includes("text/html"))
188
+ continue;
189
+ const markdown = htmlToMarkdown(html, normalized);
190
+ const filePath = urlToFilePath(normalized);
191
+ const fullPath = join(outputDir, filePath);
192
+ mkdirSync(dirname(fullPath), { recursive: true });
193
+ writeFileSync(fullPath, markdown, "utf-8");
194
+ filesWritten++;
195
+ pages.push(normalized);
196
+ // Discover links if we haven't hit max depth
197
+ if (item.depth < opts.depth) {
198
+ const links = discoverLinks(html, normalized, scope);
199
+ for (const link of links) {
200
+ if (!visited.has(normalizeUrl(link))) {
201
+ queue.push({ url: link, depth: item.depth + 1 });
202
+ }
203
+ }
204
+ }
205
+ // Throttle between requests
206
+ if (queue.length > 0) {
207
+ await sleep(opts.delayMs);
208
+ }
209
+ }
210
+ catch (err) {
211
+ // Log but don't fail the whole crawl for one bad page
212
+ const msg = err instanceof Error ? err.message : String(err);
213
+ process.stderr.write(`Warning: skipping ${normalized} (${msg})\n`);
214
+ }
215
+ }
216
+ const hitLimit = filesWritten >= opts.maxPages && queue.length > 0;
217
+ if (hitLimit) {
218
+ process.stderr.write(`Reached ${opts.maxPages} page limit. Use --max-pages to increase.\n`);
219
+ }
220
+ return { filesWritten, pages };
221
+ }
222
+ //# sourceMappingURL=crawl.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"crawl.js","sourceRoot":"","sources":["../../src/crawl.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,aAAa,EAAE,MAAM,SAAS,CAAC;AACnD,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACnD,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AACnC,OAAO,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAC;AACnD,OAAO,eAAe,MAAM,UAAU,CAAC;AACvC,OAAO,EAAE,KAAK,EAAE,MAAM,OAAO,CAAC;AAkB9B,MAAM,qBAAqB,GAAiB;IAC1C,QAAQ,EAAE,GAAG;IACb,KAAK,EAAE,CAAC;IACR,OAAO,EAAE,GAAG;CACb,CAAC;AAEF,MAAM,UAAU,GAAG,mBAAmB,CAAC;AAEvC,SAAS,KAAK,CAAC,EAAU;IACvB,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC,CAAC;AAC3D,CAAC;AAED,SAAS,YAAY,CAAC,GAAW;IAC/B,IAAI,CAAC;QACH,MAAM,CAAC,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;QACvB,CAAC,CAAC,IAAI,GAAG,EAAE,CAAC;QACZ,4CAA4C;QAC5C,IAAI,CAAC,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,CAAC,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;YACtD,CAAC,CAAC,QAAQ,GAAG,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;QACvC,CAAC;QACD,OAAO,CAAC,CAAC,QAAQ,EAAE,CAAC;IACtB,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,GAAG,CAAC;IACb,CAAC;AACH,CAAC;AAED,SAAS,SAAS,CAAC,SAAiB,EAAE,KAAa;IACjD,MAAM,YAAY,GAAG,IAAI,GAAG,CAAC,SAAS,CAAC,CAAC;IACxC,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,KAAK,CAAC,CAAC;IAChC,IAAI,YAAY,CAAC,MAAM,KAAK,QAAQ,CAAC,MAAM;QAAE,OAAO,KAAK,CAAC;IAC1D,MAAM,SAAS,GAAG,QAAQ,CAAC,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC;QAC/C,CAAC,CAAC,QAAQ,CAAC,QAAQ;QACnB,CAAC,CAAC,QAAQ,CAAC,QAAQ,GAAG,GAAG,CAAC;IAC5B,OAAO,CACL,YAAY,CAAC,QAAQ,KAAK,QAAQ,CAAC,QAAQ;QAC3C,YAAY,CAAC,QAAQ,CAAC,UAAU,CAAC,SAAS,CAAC,CAC5C,CAAC;AACJ,CAAC;AAED,SAAS,aAAa,CAAC,OAAe;IACpC,MAAM,CAAC,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC;IAC3B,IAAI,IAAI,GAAG,CAAC,CAAC,QAAQ,CAAC;IACtB,IAAI,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC;QAAE,IAAI,GAAG,IAAI,GAAG,OAAO,CAAC;IAC9C,IAAI,IAAI,KAAK,EAAE;QAAE,IAAI,GAAG,QAAQ,CAAC;IACjC,sBAAsB;IACtB,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;IAC/B,wEAAwE;IACxE,MAAM,GAAG,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAC1B,IAAI,CAAC,GAAG,IAAI,GAAG,KAAK,OAAO,IAAI,GAAG,KAAK,MAAM,EAAE,CAAC;QAC9C,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,eAAe,EAAE,EAAE,CAAC,GAAG,KAAK,CAAC;IACnD,CAAC;IACD,+CAA+C;IAC/C,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC;QACnB,IAAI,GAAG,IAAI,GAAG,KAAK,CAAC;IACtB,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,cAAc,CAAC,IAAY,EAAE,GAAW;IAC/C,MAAM,GAAG,GAAG,IAAI,KAAK,CAAC,IAAI,EAAE,EAAE,GAAG,EAAE,CAAC,CAAC;IACrC,MAAM,MAAM,GAAG,IAAI,WAAW,CAAC,GAAG,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;IACpD,MAAM,OAAO,GAAG,MAAM,CAAC,KAAK,EAAE,CAAC;IAE/B,MAAM,QAAQ,GAAG,IAAI,eAAe,CAAC;QACnC,YAAY,EAAE,KAAK;QACnB,cAAc,EAAE,QAAQ;QACxB,gBAAgB,EAAE,GAAG;KACtB,CAAC,CAAC;IAEH,IAAI,OAAO,EAAE,OAAO,EAAE,CAAC;QACrB,IAAI,EAAE,GAAG,QAAQ,CAAC,QAAQ,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;QAC5C,0DAA0D;QAC1D,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;YAClB,EAAE,GAAG,KAAK,OAAO,CAAC,KAAK,OAAO,EAAE,EAAE,CAAC;QACrC,CAAC;QACD,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,mCAAmC;IACnC,OAAO,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;AACjC,CAAC;AAED,SAAS,aAAa,CAAC,IAAY,EAAE,OAAe,EAAE,KAAa;IACjE,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC7B,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,MAAM,IAAI,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC;IAE9B,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE;QAC1B,MAAM,IAAI,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAChC,IAAI,CAAC,IAAI;YAAE,OAAO;QAElB,2CAA2C;QAC3C,IAAI,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,UAAU,CAAC,aAAa,CAAC,IAAI,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;YACzF,OAAO;QACT,CAAC;QAED,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC,QAAQ,EAAE,CAAC;YAChD,MAAM,UAAU,GAAG,YAAY,CAAC,QAAQ,CAAC,CAAC;YAE1C,iDAAiD;YACjD,IAAI,SAAS,CAAC,UAAU,EAAE,KAAK,CAAC,EAAE,CAAC;gBACjC,kCAAkC;gBAClC,MAAM,GAAG,GAAG,OAAO,CAAC,IAAI,GAAG,CAAC,UAAU,CAAC,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC;gBAChE,IAAI,GAAG,IAAI,CAAC,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE,CAAC,CAAC,QAAQ,CAAC,GAAG,CAAC;oBAAE,OAAO;gBACxD,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;YACzB,CAAC;QACH,CAAC;QAAC,MAAM,CAAC;YACP,oBAAoB;QACtB,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,OAAO,CAAC,GAAG,IAAI,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC;AAC7B,CAAC;AAED,KAAK,UAAU,SAAS,CAAC,GAAW;IAClC,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;QAChC,OAAO,EAAE;YACP,YAAY,EAAE,UAAU;YACxB,MAAM,EAAE,wCAAwC;SACjD;QACD,QAAQ,EAAE,QAAQ;KACnB,CAAC,CAAC;IAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;QACjB,MAAM,IAAI,KAAK,CAAC,QAAQ,QAAQ,CAAC,MAAM,aAAa,GAAG,EAAE,CAAC,CAAC;IAC7D,CAAC;IAED,MAAM,WAAW,GAAG,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,IAAI,EAAE,CAAC;IAC/D,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;IACnC,OAAO,EAAE,IAAI,EAAE,WAAW,EAAE,CAAC;AAC/B,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,GAAW;IACvC,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC;QACrD,OAAO,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,QAAQ,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;IAC/D,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC;AAED,MAAM,UAAU,WAAW,CAAC,GAAW;IACrC,IAAI,CAAC;QACH,OAAO,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,KAAK,YAAY,CAAC;IAChD,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC;AAED,MAAM,UAAU,eAAe,CAAC,GAAW;IACzC,MAAM,CAAC,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;IACvB,MAAM,QAAQ,GAAG,CAAC,CAAC,QAAQ,CAAC;IAC5B,IAAI,IAAI,GAAG,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;IACzC,IAAI,CAAC,IAAI;QAAE,IAAI,GAAG,OAAO,CAAC;IAC1B,OAAO,YAAY,QAAQ,IAAI,IAAI,EAAE,CAAC;AACxC,CAAC;AAED,MAAM,UAAU,cAAc,CAAC,GAAW;IACxC,MAAM,CAAC,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;IACvB,MAAM,QAAQ,GAAG,CAAC,CAAC,QAAQ,CAAC;IAC5B,IAAI,IAAI,GAAG,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;IAC7D,IAAI,CAAC,IAAI;QAAE,IAAI,GAAG,MAAM,CAAC;IACzB,OAAO,YAAY,QAAQ,IAAI,IAAI,EAAE,CAAC;AACxC,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,eAAe,CAAC,GAAW;IAC/C,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;QAChC,OAAO,EAAE,EAAE,YAAY,EAAE,UAAU,EAAE;QACrC,QAAQ,EAAE,QAAQ;KACnB,CAAC,CAAC;IAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;QACjB,MAAM,IAAI,KAAK,CAAC,QAAQ,QAAQ,CAAC,MAAM,aAAa,GAAG,EAAE,CAAC,CAAC;IAC7D,CAAC;IAED,MAAM,WAAW,GAAG,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,IAAI,EAAE,CAAC;IAC/D,MAAM,OAAO,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;IACtC,OAAO,EAAE,OAAO,EAAE,WAAW,EAAE,CAAC;AAClC,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,SAAS,CAC7B,QAAgB,EAChB,SAAiB,EACjB,OAA+B;IAE/B,MAAM,IAAI,GAAG,EAAE,GAAG,qBAAqB,EAAE,GAAG,OAAO,EAAE,CAAC;IACtD,MAAM,KAAK,GAAG,YAAY,CAAC,QAAQ,CAAC,CAAC;IACrC,MAAM,OAAO,GAAG,IAAI,GAAG,EAAU,CAAC;IAClC,MAAM,KAAK,GAAqC,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC,CAAC;IAC3E,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,IAAI,YAAY,GAAG,CAAC,CAAC;IAErB,OAAO,KAAK,CAAC,MAAM,GAAG,CAAC,IAAI,YAAY,GAAG,IAAI,CAAC,QAAQ,EAAE,CAAC;QACxD,MAAM,IAAI,GAAG,KAAK,CAAC,KAAK,EAAG,CAAC;QAC5B,MAAM,UAAU,GAAG,YAAY,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAE1C,IAAI,OAAO,CAAC,GAAG,CAAC,UAAU,CAAC;YAAE,SAAS;QACtC,OAAO,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;QAExB,IAAI,CAAC;YACH,MAAM,EAAE,IAAI,EAAE,WAAW,EAAE,GAAG,MAAM,SAAS,CAAC,UAAU,CAAC,CAAC;YAE1D,0BAA0B;YAC1B,IAAI,CAAC,WAAW,CAAC,QAAQ,CAAC,WAAW,CAAC;gBAAE,SAAS;YAEjD,MAAM,QAAQ,GAAG,cAAc,CAAC,IAAI,EAAE,UAAU,CAAC,CAAC;YAClD,MAAM,QAAQ,GAAG,aAAa,CAAC,UAAU,CAAC,CAAC;YAC3C,MAAM,QAAQ,GAAG,IAAI,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;YAE3C,SAAS,CAAC,OAAO,CAAC,QAAQ,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;YAClD,aAAa,CAAC,QAAQ,EAAE,QAAQ,EAAE,OAAO,CAAC,CAAC;YAC3C,YAAY,EAAE,CAAC;YACf,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;YAEvB,6CAA6C;YAC7C,IAAI,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,EAAE,CAAC;gBAC5B,MAAM,KAAK,GAAG,aAAa,CAAC,IAAI,EAAE,UAAU,EAAE,KAAK,CAAC,CAAC;gBACrD,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;oBACzB,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC,EAAE,CAAC;wBACrC,KAAK,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,CAAC,KAAK,GAAG,CAAC,EAAE,CAAC,CAAC;oBACnD,CAAC;gBACH,CAAC;YACH,CAAC;YAED,4BAA4B;YAC5B,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACrB,MAAM,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YAC5B,CAAC;QACH,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,sDAAsD;YACtD,MAAM,GAAG,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;YAC7D,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,qBAAqB,UAAU,KAAK,GAAG,KAAK,CAAC,CAAC;QACrE,CAAC;IACH,CAAC;IAED,MAAM,QAAQ,GAAG,YAAY,IAAI,IAAI,CAAC,QAAQ,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC;IACnE,IAAI,QAAQ,EAAE,CAAC;QACb,OAAO,CAAC,MAAM,CAAC,KAAK,CAClB,WAAW,IAAI,CAAC,QAAQ,6CAA6C,CACtE,CAAC;IACJ,CAAC;IAED,OAAO,EAAE,YAAY,EAAE,KAAK,EAAE,CAAC;AACjC,CAAC"}
package/dist/src/index.js CHANGED
@@ -1,22 +1,31 @@
1
1
  #!/usr/bin/env node
2
2
  import { Command } from "commander";
3
3
  import { join } from "node:path";
4
- import { loadConfig, configExists, initConfig } from "./config.js";
4
+ import { existsSync } from "node:fs";
5
+ import { loadConfig, configExists, initConfig, loadGlobalConfig, initGlobalConfig, getGlobalConfigDir } from "./config.js";
5
6
  import { buildIndex, loadPersistedIndex } from "./indexer.js";
6
- import { search } from "./search.js";
7
- import { addFromUrl, addLocalPath, removePath, updateSources } from "./add.js";
7
+ import { search, mergeSearchResults } from "./search.js";
8
+ import { addFromGitHub, addFromFileUrl, addFromCrawl, addLocalPath, removePath, updateSources } from "./add.js";
9
+ import { isGitHubUrl } from "./crawl.js";
8
10
  const program = new Command();
9
11
  program
10
12
  .name("refdocs")
11
13
  .description("Local CLI tool for indexing and searching markdown documentation")
12
- .version("0.3.0");
14
+ .version("0.5.0");
13
15
  program
14
16
  .command("init")
15
17
  .description("Create a .refdocs.json config file with defaults")
16
- .action(() => {
18
+ .option("-g, --global", "initialize global config at ~/.refdocs/")
19
+ .action((opts) => {
17
20
  try {
18
- initConfig(process.cwd());
19
- console.log("Created .refdocs.json with default configuration.");
21
+ if (opts.global) {
22
+ initGlobalConfig();
23
+ console.log(`Created global config at ${getGlobalConfigDir()}/.refdocs.json`);
24
+ }
25
+ else {
26
+ initConfig(process.cwd());
27
+ console.log("Created .refdocs.json with default configuration.");
28
+ }
20
29
  }
21
30
  catch (err) {
22
31
  console.error(err.message);
@@ -26,13 +35,27 @@ program
26
35
  program
27
36
  .command("index")
28
37
  .description("Index all markdown files in configured paths")
29
- .action(() => {
38
+ .option("-g, --global", "index the global config")
39
+ .action((opts) => {
30
40
  try {
31
- const { config, configDir } = loadConfig();
32
- const summary = buildIndex(config, configDir);
33
- console.log(`Indexed ${summary.filesIndexed} files → ${summary.chunksCreated} chunks`);
34
- console.log(`Index size: ${(summary.indexSizeBytes / 1024).toFixed(1)} KB`);
35
- console.log(`Done in ${summary.elapsedMs}ms`);
41
+ if (opts.global) {
42
+ const globalResult = loadGlobalConfig();
43
+ if (!globalResult) {
44
+ console.error("No global config found. Run `refdocs init --global` first.");
45
+ process.exit(1);
46
+ }
47
+ const summary = buildIndex(globalResult.config, globalResult.configDir);
48
+ console.log(`[global] Indexed ${summary.filesIndexed} files → ${summary.chunksCreated} chunks`);
49
+ console.log(`Index size: ${(summary.indexSizeBytes / 1024).toFixed(1)} KB`);
50
+ console.log(`Done in ${summary.elapsedMs}ms`);
51
+ }
52
+ else {
53
+ const { config, configDir } = loadConfig();
54
+ const summary = buildIndex(config, configDir);
55
+ console.log(`Indexed ${summary.filesIndexed} files → ${summary.chunksCreated} chunks`);
56
+ console.log(`Index size: ${(summary.indexSizeBytes / 1024).toFixed(1)} KB`);
57
+ console.log(`Done in ${summary.elapsedMs}ms`);
58
+ }
36
59
  }
37
60
  catch (err) {
38
61
  console.error(err.message);
@@ -48,14 +71,44 @@ program
48
71
  .option("--raw", "output chunk body only, no metadata")
49
72
  .action((query, opts) => {
50
73
  try {
51
- const { config, configDir } = loadConfig();
52
- const indexPath = join(configDir, config.index);
53
- const { index } = loadPersistedIndex(indexPath, config);
54
74
  const maxResults = Math.min(Math.max(1, parseInt(opts.results, 10) || 3), 10);
55
- const results = search(index, query, {
56
- maxResults,
57
- fileFilter: opts.file,
58
- });
75
+ let localResults = [];
76
+ let hasLocalIndex = false;
77
+ let localError = null;
78
+ try {
79
+ const { config, configDir } = loadConfig();
80
+ const indexPath = join(configDir, config.index);
81
+ const { index } = loadPersistedIndex(indexPath, config);
82
+ hasLocalIndex = true;
83
+ localResults = search(index, query, { maxResults, fileFilter: opts.file });
84
+ }
85
+ catch (err) {
86
+ localError = err;
87
+ }
88
+ let globalResults = [];
89
+ let hasGlobalIndex = false;
90
+ const globalConfig = loadGlobalConfig();
91
+ if (globalConfig) {
92
+ const globalIndexPath = join(globalConfig.configDir, globalConfig.config.index);
93
+ if (existsSync(globalIndexPath)) {
94
+ try {
95
+ const { index: globalIndex } = loadPersistedIndex(globalIndexPath, globalConfig.config);
96
+ hasGlobalIndex = true;
97
+ globalResults = search(globalIndex, query, { maxResults, fileFilter: opts.file });
98
+ globalResults = globalResults.map((r) => ({
99
+ ...r,
100
+ file: `[global] ${r.file}`,
101
+ }));
102
+ }
103
+ catch {
104
+ // Global index exists but failed to load, skip
105
+ }
106
+ }
107
+ }
108
+ if (!hasLocalIndex && !hasGlobalIndex) {
109
+ throw localError ?? new Error("Index not found. Run `refdocs index` first.");
110
+ }
111
+ const results = mergeSearchResults(localResults, globalResults, maxResults);
59
112
  if (results.length === 0) {
60
113
  console.log("No results found.");
61
114
  return;
@@ -81,17 +134,31 @@ program
81
134
  program
82
135
  .command("list")
83
136
  .description("List all indexed files and their chunk counts")
84
- .action(() => {
137
+ .option("-g, --global", "list global indexed files")
138
+ .action((opts) => {
85
139
  try {
86
- const { config, configDir } = loadConfig();
140
+ let config, configDir;
141
+ if (opts.global) {
142
+ const globalResult = loadGlobalConfig();
143
+ if (!globalResult) {
144
+ console.error("No global config found. Run `refdocs init --global` first.");
145
+ process.exit(1);
146
+ }
147
+ config = globalResult.config;
148
+ configDir = globalResult.configDir;
149
+ }
150
+ else {
151
+ ({ config, configDir } = loadConfig());
152
+ }
87
153
  const indexPath = join(configDir, config.index);
88
154
  const { chunks } = loadPersistedIndex(indexPath, config);
89
155
  const byFile = new Map();
90
156
  for (const chunk of chunks) {
91
157
  byFile.set(chunk.file, (byFile.get(chunk.file) || 0) + 1);
92
158
  }
159
+ const label = opts.global ? "[global] " : "";
93
160
  for (const [file, count] of [...byFile.entries()].sort((a, b) => a[0].localeCompare(b[0]))) {
94
- console.log(`${file} (${count} chunk${count !== 1 ? "s" : ""})`);
161
+ console.log(`${label}${file} (${count} chunk${count !== 1 ? "s" : ""})`);
95
162
  }
96
163
  console.log(`\n${byFile.size} files, ${chunks.length} chunks total`);
97
164
  }
@@ -103,9 +170,22 @@ program
103
170
  program
104
171
  .command("info <file>")
105
172
  .description("Show all chunks for a specific file")
106
- .action((file) => {
173
+ .option("-g, --global", "show info from global index")
174
+ .action((file, opts) => {
107
175
  try {
108
- const { config, configDir } = loadConfig();
176
+ let config, configDir;
177
+ if (opts.global) {
178
+ const globalResult = loadGlobalConfig();
179
+ if (!globalResult) {
180
+ console.error("No global config found. Run `refdocs init --global` first.");
181
+ process.exit(1);
182
+ }
183
+ config = globalResult.config;
184
+ configDir = globalResult.configDir;
185
+ }
186
+ else {
187
+ ({ config, configDir } = loadConfig());
188
+ }
109
189
  const indexPath = join(configDir, config.index);
110
190
  const { chunks } = loadPersistedIndex(indexPath, config);
111
191
  const fileChunks = chunks.filter((c) => c.file === file);
@@ -125,36 +205,78 @@ program
125
205
  });
126
206
  program
127
207
  .command("add <source>")
128
- .description("Add a local path or download markdown docs from a GitHub URL")
208
+ .description("Add docs from a local path, GitHub URL, file URL, or crawled website")
129
209
  .option("--path <dir>", "override local storage directory")
130
210
  .option("--branch <branch>", "override branch detection from URL")
131
211
  .option("--no-index", "skip auto re-indexing after download")
212
+ .option("-g, --global", "store docs in global ~/.refdocs/ directory")
213
+ .option("--crawl", "crawl a website and convert pages to markdown")
214
+ .option("--max-pages <count>", "max pages to crawl (default: 200)")
215
+ .option("--depth <levels>", "max crawl depth (default: 3)")
132
216
  .action(async (source, opts) => {
133
217
  try {
134
- const cwd = process.cwd();
135
- if (!configExists(cwd)) {
136
- initConfig(cwd);
137
- console.log("Initialized .refdocs.json with default configuration.");
138
- }
139
- const { config, configDir } = loadConfig();
140
218
  const isUrl = source.startsWith("http://") || source.startsWith("https://");
141
- if (isUrl) {
142
- const result = await addFromUrl(source, { path: opts.path, branch: opts.branch }, configDir, config);
143
- console.log(`Downloaded ${result.filesWritten} markdown files → ${result.localPath}/`);
144
- console.log(`Source: ${result.source.owner}/${result.source.repo} (${result.source.branch})`);
219
+ if (opts.global && !isUrl) {
220
+ console.error("The --global flag can only be used with URLs, not local paths.");
221
+ process.exit(1);
222
+ }
223
+ let configDir;
224
+ let config;
225
+ if (opts.global) {
226
+ initGlobalConfig();
227
+ const globalResult = loadGlobalConfig();
228
+ if (!globalResult) {
229
+ console.error("Failed to initialize global config.");
230
+ process.exit(1);
231
+ }
232
+ configDir = globalResult.configDir;
233
+ config = globalResult.config;
234
+ }
235
+ else {
236
+ const cwd = process.cwd();
237
+ if (!configExists(cwd)) {
238
+ initConfig(cwd);
239
+ console.log("Initialized .refdocs.json with default configuration.");
240
+ }
241
+ ({ config, configDir } = loadConfig());
242
+ }
243
+ const label = opts.global ? "[global] " : "";
244
+ if (isUrl && opts.crawl) {
245
+ // Crawl mode: spider the site and convert to markdown
246
+ const maxPages = opts.maxPages ? parseInt(opts.maxPages, 10) : undefined;
247
+ const depth = opts.depth ? parseInt(opts.depth, 10) : undefined;
248
+ console.log(`Crawling ${source}...`);
249
+ const result = await addFromCrawl(source, { path: opts.path, maxPages, depth }, configDir, config);
250
+ console.log(`${label}Converted ${result.filesWritten} pages → ${result.localPath}/`);
251
+ if (opts.index && result.filesWritten > 0) {
252
+ reindex(opts.global, label);
253
+ }
254
+ }
255
+ else if (isUrl && isGitHubUrl(source)) {
256
+ // GitHub mode: download tarball
257
+ const result = await addFromGitHub(source, { path: opts.path, branch: opts.branch }, configDir, config);
258
+ console.log(`${label}Downloaded ${result.filesWritten} markdown files → ${result.localPath}/`);
259
+ if (result.source.type === "github") {
260
+ console.log(`Source: ${result.source.owner}/${result.source.repo} (${result.source.branch})`);
261
+ }
145
262
  if (opts.index && result.filesWritten > 0) {
146
- const { config: freshConfig, configDir: freshDir } = loadConfig();
147
- const summary = buildIndex(freshConfig, freshDir);
148
- console.log(`Indexed ${summary.filesIndexed} files → ${summary.chunksCreated} chunks`);
263
+ reindex(opts.global, label);
264
+ }
265
+ }
266
+ else if (isUrl) {
267
+ // Single file URL mode
268
+ const result = await addFromFileUrl(source, { path: opts.path }, configDir, config);
269
+ console.log(`${label}Downloaded 1 file → ${result.localPath}`);
270
+ if (opts.index && result.filesWritten > 0) {
271
+ reindex(opts.global, label);
149
272
  }
150
273
  }
151
274
  else {
275
+ // Local path mode
152
276
  const result = addLocalPath(source, configDir, config);
153
277
  console.log(`Added ${result.localPath} to paths`);
154
278
  if (opts.index) {
155
- const { config: freshConfig, configDir: freshDir } = loadConfig();
156
- const summary = buildIndex(freshConfig, freshDir);
157
- console.log(`Indexed ${summary.filesIndexed} files → ${summary.chunksCreated} chunks`);
279
+ reindex(opts.global, label);
158
280
  }
159
281
  }
160
282
  }
@@ -167,20 +289,33 @@ program
167
289
  .command("update")
168
290
  .description("Re-pull all tracked sources and re-index")
169
291
  .option("--no-index", "skip auto re-indexing after update")
292
+ .option("-g, --global", "update global sources")
170
293
  .action(async (opts) => {
171
294
  try {
172
- const { config, configDir } = loadConfig();
295
+ let config, configDir;
296
+ if (opts.global) {
297
+ const globalResult = loadGlobalConfig();
298
+ if (!globalResult) {
299
+ console.error("No global config found. Run `refdocs init --global` first.");
300
+ process.exit(1);
301
+ }
302
+ config = globalResult.config;
303
+ configDir = globalResult.configDir;
304
+ }
305
+ else {
306
+ ({ config, configDir } = loadConfig());
307
+ }
173
308
  const token = process.env.GITHUB_TOKEN ?? undefined;
174
309
  const results = await updateSources(config, configDir, token);
310
+ const label = opts.global ? "[global] " : "";
175
311
  for (const r of results) {
176
- console.log(`Updated ${r.source.owner}/${r.source.repo} ${r.filesWritten} files`);
312
+ const desc = formatSourceDescription(r.source);
313
+ console.log(`${label}Updated ${desc} → ${r.filesWritten} files`);
177
314
  }
178
315
  const totalFiles = results.reduce((sum, r) => sum + r.filesWritten, 0);
179
316
  console.log(`\n${results.length} source${results.length !== 1 ? "s" : ""} updated (${totalFiles} files total)`);
180
317
  if (opts.index && totalFiles > 0) {
181
- const { config: freshConfig, configDir: freshDir } = loadConfig();
182
- const summary = buildIndex(freshConfig, freshDir);
183
- console.log(`Indexed ${summary.filesIndexed} files → ${summary.chunksCreated} chunks`);
318
+ reindex(opts.global, label);
184
319
  }
185
320
  }
186
321
  catch (err) {
@@ -192,22 +327,34 @@ program
192
327
  .command("remove <path>")
193
328
  .description("Remove a path from the index configuration")
194
329
  .option("--no-index", "skip auto re-indexing after removal")
330
+ .option("-g, --global", "remove from global config")
195
331
  .action((path, opts) => {
196
332
  try {
197
- const { config, configDir } = loadConfig();
333
+ let config, configDir;
334
+ if (opts.global) {
335
+ const globalResult = loadGlobalConfig();
336
+ if (!globalResult) {
337
+ console.error("No global config found. Run `refdocs init --global` first.");
338
+ process.exit(1);
339
+ }
340
+ config = globalResult.config;
341
+ configDir = globalResult.configDir;
342
+ }
343
+ else {
344
+ ({ config, configDir } = loadConfig());
345
+ }
198
346
  const result = removePath(path, configDir, config);
199
347
  if (!result.removed) {
200
348
  console.error(`Path "${path}" not found in configured paths.`);
201
349
  process.exit(1);
202
350
  }
203
- console.log(`Removed ${path} from paths`);
351
+ const label = opts.global ? "[global] " : "";
352
+ console.log(`${label}Removed ${path} from paths`);
204
353
  if (result.sourceRemoved) {
205
- console.log(`Removed associated source`);
354
+ console.log(`${label}Removed associated source`);
206
355
  }
207
356
  if (opts.index) {
208
- const { config: freshConfig, configDir: freshDir } = loadConfig();
209
- const summary = buildIndex(freshConfig, freshDir);
210
- console.log(`Indexed ${summary.filesIndexed} files → ${summary.chunksCreated} chunks`);
357
+ reindex(opts.global, label);
211
358
  }
212
359
  }
213
360
  catch (err) {
@@ -215,6 +362,30 @@ program
215
362
  process.exit(1);
216
363
  }
217
364
  });
365
+ function reindex(global, label) {
366
+ if (global) {
367
+ const freshGlobal = loadGlobalConfig();
368
+ if (freshGlobal) {
369
+ const summary = buildIndex(freshGlobal.config, freshGlobal.configDir);
370
+ console.log(`${label}Indexed ${summary.filesIndexed} files → ${summary.chunksCreated} chunks`);
371
+ }
372
+ }
373
+ else {
374
+ const { config: freshConfig, configDir: freshDir } = loadConfig();
375
+ const summary = buildIndex(freshConfig, freshDir);
376
+ console.log(`Indexed ${summary.filesIndexed} files → ${summary.chunksCreated} chunks`);
377
+ }
378
+ }
379
+ function formatSourceDescription(source) {
380
+ switch (source.type) {
381
+ case "github":
382
+ return `${source.owner}/${source.repo}`;
383
+ case "file":
384
+ return source.url;
385
+ case "crawl":
386
+ return source.url;
387
+ }
388
+ }
218
389
  function formatResults(results) {
219
390
  results.forEach((r, i) => {
220
391
  console.log(`# [${i + 1}] ${r.file}:${r.lines[0]}-${r.lines[1]}`);