@ebowwa/markdown-docs-scraper 1.1.0 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,448 @@
1
+ import { createRequire } from "node:module";
2
+ var __create = Object.create;
3
+ var __getProtoOf = Object.getPrototypeOf;
4
+ var __defProp = Object.defineProperty;
5
+ var __getOwnPropNames = Object.getOwnPropertyNames;
6
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
7
+ var __toESM = (mod, isNodeMode, target) => {
8
+ target = mod != null ? __create(__getProtoOf(mod)) : {};
9
+ const to = isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target;
10
+ for (let key of __getOwnPropNames(mod))
11
+ if (!__hasOwnProp.call(to, key))
12
+ __defProp(to, key, {
13
+ get: () => mod[key],
14
+ enumerable: true
15
+ });
16
+ return to;
17
+ };
18
+ var __require = /* @__PURE__ */ createRequire(import.meta.url);
19
+ // src/index.ts
20
+ var GENERIC_LINK_PATTERN = /\[([^\]]+)\]\((https?:\/\/[^/]+\/([^\s)]+\.md))\)/g;
21
+ function extractTitle(markdown) {
22
+ const titleMatch = markdown.match(/^#\s+(.+)$/m);
23
+ return titleMatch ? titleMatch[1].trim() : "Untitled";
24
+ }
25
+ function parsePagePath(pagePath) {
26
+ const pageName = pagePath.replace(".md", "");
27
+ const pathParts = pageName.split("/");
28
+ if (pathParts.length === 1) {
29
+ return { category: "", page: pathParts[0] };
30
+ } else if (pathParts.length === 2) {
31
+ return { category: pathParts[0], page: pathParts[1] };
32
+ } else {
33
+ return {
34
+ category: pathParts.slice(0, -1).join("/"),
35
+ page: pathParts[pathParts.length - 1]
36
+ };
37
+ }
38
+ }
39
+ async function fetchMarkdown(url, userAgent = "@ebowwa/markdown-docs-scraper") {
40
+ try {
41
+ const response = await fetch(url, {
42
+ headers: {
43
+ Accept: "text/markdown, text/plain",
44
+ "User-Agent": userAgent
45
+ }
46
+ });
47
+ if (!response.ok) {
48
+ return null;
49
+ }
50
+ return await response.text();
51
+ } catch (error) {
52
+ console.error(`Error fetching ${url}:`, error);
53
+ return null;
54
+ }
55
+ }
56
+
57
+ class MarkdownDocsScraper {
58
+ options;
59
+ constructor(options) {
60
+ this.options = {
61
+ baseUrl: options.baseUrl,
62
+ docsPath: options.docsPath || "/docs/en",
63
+ categories: options.categories || {},
64
+ outputDir: options.outputDir || "./docs",
65
+ concurrency: options.concurrency || 5,
66
+ onProgress: options.onProgress || (() => {}),
67
+ llmsPaths: options.llmsPaths || ["/llms.txt", "/docs/llms.txt"],
68
+ tryDocsSubdomain: options.tryDocsSubdomain ?? true,
69
+ linkPattern: options.linkPattern || GENERIC_LINK_PATTERN,
70
+ useDirectUrls: options.useDirectUrls ?? true
71
+ };
72
+ }
73
+ buildUrl(category, page) {
74
+ if (category) {
75
+ return `${this.options.baseUrl}${this.options.docsPath}/${category}/${page}.md`;
76
+ } else if (this.options.docsPath) {
77
+ return `${this.options.baseUrl}${this.options.docsPath}/${page}.md`;
78
+ } else {
79
+ return `${this.options.baseUrl}/${page}.md`;
80
+ }
81
+ }
82
+ async downloadPage(pageInfo) {
83
+ const url = this.options.useDirectUrls && pageInfo.fullUrl ? pageInfo.fullUrl : this.buildUrl(pageInfo.category, pageInfo.page);
84
+ const content = await fetchMarkdown(url);
85
+ if (!content) {
86
+ return null;
87
+ }
88
+ return {
89
+ url,
90
+ title: extractTitle(content),
91
+ content,
92
+ category: pageInfo.category,
93
+ pageName: pageInfo.page
94
+ };
95
+ }
96
+ getLlmsUrls() {
97
+ const urls = [];
98
+ const baseUrl = this.options.baseUrl;
99
+ for (const path of this.options.llmsPaths) {
100
+ urls.push(`${baseUrl}${path}`);
101
+ }
102
+ if (this.options.tryDocsSubdomain) {
103
+ try {
104
+ const url = new URL(baseUrl);
105
+ const hostname = url.hostname;
106
+ if (!hostname.startsWith("docs.") && !hostname.startsWith("doc.")) {
107
+ const docsDomain = hostname.replace(/^www\./, "");
108
+ urls.push(`${url.protocol}//docs.${docsDomain}/llms.txt`);
109
+ urls.push(`${url.protocol}//docs.${docsDomain}/docs/llms.txt`);
110
+ }
111
+ } catch {}
112
+ }
113
+ return urls;
114
+ }
115
+ async fetchLlmsTxt() {
116
+ const urls = this.getLlmsUrls();
117
+ console.log(`DEBUG: Trying URLs: ${urls.join(", ")}`);
118
+ for (const llmsUrl of urls) {
119
+ try {
120
+ console.log(`DEBUG: Fetching ${llmsUrl}...`);
121
+ const response = await fetch(llmsUrl, {
122
+ headers: {
123
+ Accept: "text/plain",
124
+ "User-Agent": "@ebowwa/markdown-docs-scraper"
125
+ }
126
+ });
127
+ console.log(`DEBUG: Response status: ${response.status}`);
128
+ if (response.ok) {
129
+ const content = await response.text();
130
+ console.log(`Found llms.txt at ${llmsUrl}`);
131
+ return { content, url: llmsUrl };
132
+ }
133
+ } catch (error) {
134
+ console.log(`DEBUG: Error: ${error}`);
135
+ continue;
136
+ }
137
+ }
138
+ return null;
139
+ }
140
+ async discoverPages() {
141
+ const pages = [];
142
+ try {
143
+ const llmsResult = await this.fetchLlmsTxt();
144
+ if (!llmsResult) {
145
+ const attemptedUrls = this.getLlmsUrls();
146
+ console.warn(`Could not fetch llms.txt from any of: ${attemptedUrls.join(", ")}`);
147
+ return pages;
148
+ }
149
+ const { content } = llmsResult;
150
+ const pattern = this.options.linkPattern;
151
+ const regex = new RegExp(pattern.source, pattern.flags);
152
+ let match;
153
+ console.log(`DEBUG: Using pattern: ${pattern.source}`);
154
+ console.log(`DEBUG: Content length: ${content.length}`);
155
+ while ((match = regex.exec(content)) !== null) {
156
+ const fullUrl = match[2];
157
+ const pagePath = match[3];
158
+ const { category, page } = parsePagePath(pagePath);
159
+ pages.push({ category, page, fullUrl });
160
+ }
161
+ console.log(`Discovered ${pages.length} pages from llms.txt`);
162
+ } catch (error) {
163
+ console.error("Error discovering pages:", error);
164
+ }
165
+ return pages;
166
+ }
167
+ async scrapeFromLlms() {
168
+ const startTime = Date.now();
169
+ const downloaded = [];
170
+ const failed = [];
171
+ const pages = await this.discoverPages();
172
+ if (pages.length === 0) {
173
+ console.log("No pages discovered, falling back to categories");
174
+ return this.scrape();
175
+ }
176
+ console.log(`Scraping ${pages.length} discovered pages...`);
177
+ for (let i = 0;i < pages.length; i += this.options.concurrency) {
178
+ const batch = pages.slice(i, i + this.options.concurrency);
179
+ const results = await Promise.allSettled(batch.map((page) => this.downloadPage(page)));
180
+ results.forEach((result, index) => {
181
+ const page = batch[index];
182
+ if (result.status === "fulfilled" && result.value) {
183
+ downloaded.push(result.value);
184
+ } else {
185
+ const url = this.options.useDirectUrls && page.fullUrl ? page.fullUrl : this.buildUrl(page.category, page.page);
186
+ failed.push({
187
+ url,
188
+ error: result.status === "rejected" ? result.reason : "Not found"
189
+ });
190
+ }
191
+ this.options.onProgress(downloaded.length + failed.length, pages.length);
192
+ });
193
+ }
194
+ const duration = Date.now() - startTime;
195
+ console.log(`✅ Downloaded: ${downloaded.length} pages`);
196
+ console.log(`❌ Failed: ${failed.length} pages`);
197
+ console.log(`⏱️ Duration: ${(duration / 1000).toFixed(2)}s`);
198
+ return { downloaded, failed, duration };
199
+ }
200
+ async scrape() {
201
+ const startTime = Date.now();
202
+ const downloaded = [];
203
+ const failed = [];
204
+ const pages = this.getPagesToScrape();
205
+ const total = pages.length;
206
+ console.log(`Scraping ${total} pages from ${this.options.baseUrl}...`);
207
+ for (let i = 0;i < pages.length; i += this.options.concurrency) {
208
+ const batch = pages.slice(i, i + this.options.concurrency);
209
+ const results = await Promise.allSettled(batch.map((page) => this.downloadPage({ ...page, fullUrl: "" })));
210
+ results.forEach((result, index) => {
211
+ const page = batch[index];
212
+ if (result.status === "fulfilled" && result.value) {
213
+ downloaded.push(result.value);
214
+ } else {
215
+ failed.push({
216
+ url: this.buildUrl(page.category, page.page),
217
+ error: result.status === "rejected" ? result.reason : "Not found"
218
+ });
219
+ }
220
+ this.options.onProgress(downloaded.length + failed.length, total);
221
+ });
222
+ }
223
+ const duration = Date.now() - startTime;
224
+ console.log(`✅ Downloaded: ${downloaded.length} pages`);
225
+ console.log(`❌ Failed: ${failed.length} pages`);
226
+ console.log(`⏱️ Duration: ${(duration / 1000).toFixed(2)}s`);
227
+ return { downloaded, failed, duration };
228
+ }
229
+ extractBody(content) {
230
+ const headerRegex = /^<!--\nSource: [^\n]+\nDownloaded: [^\n]+\n-->\n\n/;
231
+ return content.replace(headerRegex, "");
232
+ }
233
+ async savePages(pages) {
234
+ const fs = await import("fs/promises");
235
+ const path = await import("path");
236
+ let updated = 0;
237
+ let skipped = 0;
238
+ for (const page of pages) {
239
+ const nameToUse = page.pageName || page.url.split("/").pop()?.replace(".md", "") || "untitled";
240
+ const dir = page.category ? path.join(this.options.outputDir, page.category) : this.options.outputDir;
241
+ await fs.mkdir(dir, { recursive: true });
242
+ const filepath = path.join(dir, `${nameToUse}.md`);
243
+ try {
244
+ const existingContent = await fs.readFile(filepath, "utf-8");
245
+ const existingBody = this.extractBody(existingContent);
246
+ if (existingBody === page.content) {
247
+ skipped++;
248
+ continue;
249
+ }
250
+ } catch {}
251
+ const header = `<!--
252
+ Source: ${page.url}
253
+ Downloaded: ${new Date().toISOString()}
254
+ -->
255
+
256
+ `;
257
+ await fs.writeFile(filepath, header + page.content, "utf-8");
258
+ updated++;
259
+ }
260
+ return { updated, skipped };
261
+ }
262
+ getPagesToScrape() {
263
+ const pages = [];
264
+ for (const [category, pageList] of Object.entries(this.options.categories)) {
265
+ for (const page of pageList) {
266
+ pages.push({ category, page, fullUrl: "" });
267
+ }
268
+ }
269
+ return pages;
270
+ }
271
+ }
272
+ async function scrapeMarkdownDocs(options) {
273
+ const scraper = new MarkdownDocsScraper(options);
274
+ const result = options.useLlms ? await scraper.scrapeFromLlms() : await scraper.scrape();
275
+ let saveStats;
276
+ if (options.outputDir) {
277
+ saveStats = await scraper.savePages(result.downloaded);
278
+ if (saveStats.updated > 0 || saveStats.skipped > 0) {
279
+ console.log(` Saved: ${saveStats.updated} updated, ${saveStats.skipped} unchanged`);
280
+ }
281
+ }
282
+ return { ...result, saveStats };
283
+ }
284
+
285
+ // src/scrapers/llms-txt.ts
286
+ var CLAUDE_CODE_PATTERN = /\[([^\]]+)\]\((https?:\/\/[^\s)]+\/docs\/en\/([^)]+\.md))\)/g;
287
+ var GENERIC_PATTERN = /\[([^\]]+)\]\((https?:\/\/[^/]+\/([^\s)]+\.md))\)/g;
288
+ var llmsTxtScraper = {
289
+ type: "llms-txt",
290
+ async scrape(config) {
291
+ const options = getScraperOptions(config);
292
+ const result = await scrapeMarkdownDocs(options);
293
+ const downloaded = result.downloaded.map((page) => {
294
+ const category = page.category || "";
295
+ const filename = `${page.pageName || "untitled"}.md`;
296
+ const path = category ? `${category}/${filename}` : filename;
297
+ return {
298
+ success: true,
299
+ path,
300
+ title: page.title
301
+ };
302
+ });
303
+ return {
304
+ downloaded,
305
+ failed: result.failed,
306
+ duration: result.duration
307
+ };
308
+ }
309
+ };
310
+ function getScraperOptions(config) {
311
+ const baseOptions = {
312
+ baseUrl: config.baseUrl,
313
+ docsPath: config.docsPath,
314
+ outputDir: config.outputDir,
315
+ concurrency: 10,
316
+ useLlms: true,
317
+ tryDocsSubdomain: false
318
+ };
319
+ if (config.name === "Claude Code") {
320
+ return {
321
+ ...baseOptions,
322
+ llmsPaths: ["/docs/llms.txt"],
323
+ linkPattern: CLAUDE_CODE_PATTERN
324
+ };
325
+ }
326
+ if (config.name === "Polymarket") {
327
+ return {
328
+ ...baseOptions,
329
+ llmsPaths: ["/llms.txt"],
330
+ linkPattern: GENERIC_PATTERN
331
+ };
332
+ }
333
+ if (config.name === "Bun") {
334
+ return {
335
+ ...baseOptions,
336
+ llmsPaths: ["/docs/llms.txt", "/llms.txt"],
337
+ linkPattern: GENERIC_PATTERN
338
+ };
339
+ }
340
+ return {
341
+ ...baseOptions,
342
+ llmsPaths: config.llmsTxtPath ? [config.llmsTxtPath] : ["/llms.txt", "/docs/llms.txt"],
343
+ linkPattern: config.linkPattern || GENERIC_PATTERN
344
+ };
345
+ }
346
+ // src/scrapers/github-raw.ts
347
+ var githubRawScraper = {
348
+ type: "github-raw",
349
+ async scrape(config) {
350
+ const startTime = Date.now();
351
+ const downloaded = [];
352
+ const failed = [];
353
+ if (!config.github?.repo) {
354
+ throw new Error(`GitHub source "${config.name}" missing github.repo config`);
355
+ }
356
+ const files = await fetchGitHubMarkdownFiles(config.github.repo, config.docsPath.replace(/^\//, ""));
357
+ for (const file of files) {
358
+ const content = await fetchGitHubRawContent(config.github.repo, file.path);
359
+ if (content) {
360
+ downloaded.push({
361
+ success: true,
362
+ path: file.name,
363
+ title: extractTitle2(content) || file.name.replace(".md", "")
364
+ });
365
+ await saveFile(config.outputDir, file.name, content);
366
+ } else {
367
+ failed.push({
368
+ url: `https://raw.githubusercontent.com/${config.github.repo}/main/${file.path}`,
369
+ error: "Failed to fetch content"
370
+ });
371
+ }
372
+ }
373
+ return {
374
+ downloaded,
375
+ failed,
376
+ duration: Date.now() - startTime
377
+ };
378
+ }
379
+ };
380
+ async function fetchGitHubMarkdownFiles(repo, path) {
381
+ const url = `https://api.github.com/repos/${repo}/contents/${path}`;
382
+ const response = await fetch(url, {
383
+ headers: {
384
+ Accept: "application/vnd.github.v3+json",
385
+ "User-Agent": "@ebowwa/markdown-docs-scraper"
386
+ }
387
+ });
388
+ if (!response.ok) {
389
+ throw new Error(`GitHub API error: ${response.status} ${response.statusText}`);
390
+ }
391
+ const contents = await response.json();
392
+ return contents.filter((item) => item.type === "file" && item.name.endsWith(".md"));
393
+ }
394
+ async function fetchGitHubRawContent(repo, path) {
395
+ const url = `https://raw.githubusercontent.com/${repo}/main/${path}`;
396
+ try {
397
+ const response = await fetch(url, {
398
+ headers: {
399
+ Accept: "text/plain",
400
+ "User-Agent": "@ebowwa/markdown-docs-scraper"
401
+ }
402
+ });
403
+ if (!response.ok) {
404
+ return null;
405
+ }
406
+ return await response.text();
407
+ } catch (error) {
408
+ console.error(`Error fetching ${url}:`, error);
409
+ return null;
410
+ }
411
+ }
412
+ function extractTitle2(markdown) {
413
+ const titleMatch = markdown.match(/^#\s+(.+)$/m);
414
+ return titleMatch ? titleMatch[1].trim() : null;
415
+ }
416
+ async function saveFile(outputDir, filename, content) {
417
+ const fs = await import("fs/promises");
418
+ const path = await import("path");
419
+ const outputPath = path.join(outputDir, filename);
420
+ await fs.mkdir(path.dirname(outputPath), { recursive: true });
421
+ await fs.writeFile(outputPath, content, "utf-8");
422
+ }
423
+ // src/scrapers/registry.ts
424
+ var scrapers = new Map;
425
+ function registerScraper(scraper) {
426
+ scrapers.set(scraper.type, scraper);
427
+ }
428
+ function getScraper(type) {
429
+ return scrapers.get(type);
430
+ }
431
+ async function scrapeSource(config) {
432
+ const scraper = scrapers.get(config.sourceType);
433
+ if (!scraper) {
434
+ throw new Error(`No scraper registered for type: ${config.sourceType}`);
435
+ }
436
+ return scraper.scrape(config);
437
+ }
438
+ registerScraper(llmsTxtScraper);
439
+ registerScraper(githubRawScraper);
440
+ export {
441
+ scrapeSource,
442
+ registerScraper,
443
+ llmsTxtScraper,
444
+ githubRawScraper,
445
+ getScraper,
446
+ GENERIC_PATTERN,
447
+ CLAUDE_CODE_PATTERN
448
+ };
@@ -0,0 +1,13 @@
1
+ /**
2
+ * LLMS-TXT Scraper
3
+ *
4
+ * Scrapes documentation sites that provide llms.txt index files.
5
+ * Uses the core MarkdownDocsScraper under the hood.
6
+ */
7
+ import type { Scraper } from "./types";
8
+ /** Pattern for Claude Code docs: /docs/en/page.md */
9
+ export declare const CLAUDE_CODE_PATTERN: RegExp;
10
+ /** Pattern for generic docs: any domain/path.md */
11
+ export declare const GENERIC_PATTERN: RegExp;
12
+ export declare const llmsTxtScraper: Scraper;
13
+ //# sourceMappingURL=llms-txt.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"llms-txt.d.ts","sourceRoot":"","sources":["../../src/scrapers/llms-txt.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAGH,OAAO,KAAK,EAAE,OAAO,EAA8C,MAAM,SAAS,CAAC;AAMnF,qDAAqD;AACrD,eAAO,MAAM,mBAAmB,QAAiE,CAAC;AAElG,mDAAmD;AACnD,eAAO,MAAM,eAAe,QAAuD,CAAC;AAMpF,eAAO,MAAM,cAAc,EAAE,OA0B5B,CAAC"}
@@ -0,0 +1,23 @@
1
+ /**
2
+ * Scraper Registry
3
+ *
4
+ * Maps source types to scraper implementations.
5
+ * Allows registering new scrapers and looking them up by type.
6
+ */
7
+ import type { Scraper, SourceType, SourceConfig, ScrapeResult } from "./types";
8
+ import { llmsTxtScraper } from "./llms-txt";
9
+ import { githubRawScraper } from "./github-raw";
10
+ /**
11
+ * Register a scraper implementation
12
+ */
13
+ export declare function registerScraper(scraper: Scraper): void;
14
+ /**
15
+ * Get a scraper by type
16
+ */
17
+ export declare function getScraper(type: SourceType): Scraper | undefined;
18
+ /**
19
+ * Scrape a source using the appropriate scraper
20
+ */
21
+ export declare function scrapeSource(config: SourceConfig): Promise<ScrapeResult>;
22
+ export { llmsTxtScraper, githubRawScraper };
23
+ //# sourceMappingURL=registry.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"registry.d.ts","sourceRoot":"","sources":["../../src/scrapers/registry.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,EAAE,OAAO,EAAE,UAAU,EAAE,YAAY,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AAC/E,OAAO,EAAE,cAAc,EAAE,MAAM,YAAY,CAAC;AAC5C,OAAO,EAAE,gBAAgB,EAAE,MAAM,cAAc,CAAC;AAShD;;GAEG;AACH,wBAAgB,eAAe,CAAC,OAAO,EAAE,OAAO,GAAG,IAAI,CAEtD;AAED;;GAEG;AACH,wBAAgB,UAAU,CAAC,IAAI,EAAE,UAAU,GAAG,OAAO,GAAG,SAAS,CAEhE;AAED;;GAEG;AACH,wBAAsB,YAAY,CAAC,MAAM,EAAE,YAAY,GAAG,OAAO,CAAC,YAAY,CAAC,CAQ9E;AAWD,OAAO,EAAE,cAAc,EAAE,gBAAgB,EAAE,CAAC"}
@@ -0,0 +1,57 @@
1
+ /**
2
+ * Scraper Types
3
+ *
4
+ * Core types for the composable scraper architecture.
5
+ * These types define the interface that all scrapers must implement.
6
+ */
7
+ /** Supported documentation source types */
8
+ export type SourceType = "llms-txt" | "github-raw";
9
+ /** Result from scraping a source */
10
+ export interface ScrapeResult {
11
+ downloaded: DownloadResult[];
12
+ failed: Array<{
13
+ url: string;
14
+ error: string;
15
+ }>;
16
+ duration?: number;
17
+ }
18
+ /** Individual download result */
19
+ export interface DownloadResult {
20
+ success: boolean;
21
+ path: string;
22
+ title?: string;
23
+ }
24
+ /** Scraper interface - all scrapers must implement this */
25
+ export interface Scraper {
26
+ /** Source type identifier */
27
+ type: SourceType;
28
+ /** Scrape documentation from a source */
29
+ scrape(config: SourceConfig): Promise<ScrapeResult>;
30
+ }
31
+ /** Source configuration */
32
+ export interface SourceConfig {
33
+ /** Display name */
34
+ name: string;
35
+ /** Source type - determines which scraper to use */
36
+ sourceType: SourceType;
37
+ /** Base URL for the documentation */
38
+ baseUrl: string;
39
+ /** Path to docs (e.g., /docs, /docs/en) */
40
+ docsPath: string;
41
+ /** Output directory for downloaded docs */
42
+ outputDir: string;
43
+ /** Output directory for daily reports */
44
+ reportDir: string;
45
+ /** llms.txt path (for llms-txt sources) */
46
+ llmsTxtPath?: string;
47
+ /** Custom link pattern for llms.txt parsing */
48
+ linkPattern?: RegExp;
49
+ /** GitHub config (for github-raw sources or GitHub API data) */
50
+ github?: {
51
+ repo: string;
52
+ includeCommits: boolean;
53
+ includeReleases: boolean;
54
+ includePRs: boolean;
55
+ };
56
+ }
57
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/scrapers/types.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAMH,2CAA2C;AAC3C,MAAM,MAAM,UAAU,GAAG,UAAU,GAAG,YAAY,CAAC;AAMnD,oCAAoC;AACpC,MAAM,WAAW,YAAY;IAC3B,UAAU,EAAE,cAAc,EAAE,CAAC;IAC7B,MAAM,EAAE,KAAK,CAAC;QAAE,GAAG,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;IAC9C,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,iCAAiC;AACjC,MAAM,WAAW,cAAc;IAC7B,OAAO,EAAE,OAAO,CAAC;IACjB,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,2DAA2D;AAC3D,MAAM,WAAW,OAAO;IACtB,6BAA6B;IAC7B,IAAI,EAAE,UAAU,CAAC;IAEjB,yCAAyC;IACzC,MAAM,CAAC,MAAM,EAAE,YAAY,GAAG,OAAO,CAAC,YAAY,CAAC,CAAC;CACrD;AAMD,2BAA2B;AAC3B,MAAM,WAAW,YAAY;IAC3B,mBAAmB;IACnB,IAAI,EAAE,MAAM,CAAC;IAEb,oDAAoD;IACpD,UAAU,EAAE,UAAU,CAAC;IAEvB,qCAAqC;IACrC,OAAO,EAAE,MAAM,CAAC;IAEhB,2CAA2C;IAC3C,QAAQ,EAAE,MAAM,CAAC;IAEjB,2CAA2C;IAC3C,SAAS,EAAE,MAAM,CAAC;IAElB,yCAAyC;IACzC,SAAS,EAAE,MAAM,CAAC;IAElB,2CAA2C;IAC3C,WAAW,CAAC,EAAE,MAAM,CAAC;IAErB,+CAA+C;IAC/C,WAAW,CAAC,EAAE,MAAM,CAAC;IAErB,gEAAgE;IAChE,MAAM,CAAC,EAAE;QACP,IAAI,EAAE,MAAM,CAAC;QACb,cAAc,EAAE,OAAO,CAAC;QACxB,eAAe,EAAE,OAAO,CAAC;QACzB,UAAU,EAAE,OAAO,CAAC;KACrB,CAAC;CACH"}
package/package.json CHANGED
@@ -1,6 +1,10 @@
1
1
  {
2
2
  "name": "@ebowwa/markdown-docs-scraper",
3
- "version": "1.1.0",
3
+ "version": "1.2.1",
4
+ "files": [
5
+ "dist",
6
+ "src"
7
+ ],
4
8
  "description": "Scrape and mirror markdown-based documentation sites",
5
9
  "type": "module",
6
10
  "main": "./dist/index.js",
@@ -13,13 +17,17 @@
13
17
  "import": "./dist/index.js",
14
18
  "types": "./dist/index.d.ts"
15
19
  },
20
+ "./scrapers": {
21
+ "import": "./dist/scrapers/index.js",
22
+ "types": "./dist/scrapers/index.d.ts"
23
+ },
16
24
  "./cli": {
17
25
  "import": "./dist/cli.js",
18
26
  "types": "./dist/cli.d.ts"
19
27
  }
20
28
  },
21
29
  "scripts": {
22
- "build": "bun build src/index.ts --outdir dist --target node && bun build src/cli.ts --outdir dist --target node",
30
+ "build": "bun build src/index.ts --outdir dist --target node && mkdir -p dist/scrapers && bun build src/scrapers/index.ts --outdir dist/scrapers --target node && bun build src/cli.ts --outdir dist --target node && tsc --build",
23
31
  "dev": "bun run src/cli.ts",
24
32
  "test": "bun test",
25
33
  "prepublishOnly": "bun run build"