@memvid/maw 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. package/README.md +188 -0
  2. package/dist/bin/maw.d.ts +6 -0
  3. package/dist/bin/maw.d.ts.map +1 -0
  4. package/dist/bin/maw.js +275 -0
  5. package/dist/bin/maw.js.map +1 -0
  6. package/dist/src/crawler/index.d.ts +71 -0
  7. package/dist/src/crawler/index.d.ts.map +1 -0
  8. package/dist/src/crawler/index.js +249 -0
  9. package/dist/src/crawler/index.js.map +1 -0
  10. package/dist/src/crawler/robots.d.ts +26 -0
  11. package/dist/src/crawler/robots.d.ts.map +1 -0
  12. package/dist/src/crawler/robots.js +179 -0
  13. package/dist/src/crawler/robots.js.map +1 -0
  14. package/dist/src/crawler/sitemap.d.ts +36 -0
  15. package/dist/src/crawler/sitemap.d.ts.map +1 -0
  16. package/dist/src/crawler/sitemap.js +209 -0
  17. package/dist/src/crawler/sitemap.js.map +1 -0
  18. package/dist/src/engine/detector.d.ts +18 -0
  19. package/dist/src/engine/detector.d.ts.map +1 -0
  20. package/dist/src/engine/detector.js +155 -0
  21. package/dist/src/engine/detector.js.map +1 -0
  22. package/dist/src/engine/fetch.d.ts +18 -0
  23. package/dist/src/engine/fetch.d.ts.map +1 -0
  24. package/dist/src/engine/fetch.js +53 -0
  25. package/dist/src/engine/fetch.js.map +1 -0
  26. package/dist/src/engine/index.d.ts +39 -0
  27. package/dist/src/engine/index.d.ts.map +1 -0
  28. package/dist/src/engine/index.js +116 -0
  29. package/dist/src/engine/index.js.map +1 -0
  30. package/dist/src/engine/playwright.d.ts +23 -0
  31. package/dist/src/engine/playwright.d.ts.map +1 -0
  32. package/dist/src/engine/playwright.js +88 -0
  33. package/dist/src/engine/playwright.js.map +1 -0
  34. package/dist/src/engine/rebrowser.d.ts +22 -0
  35. package/dist/src/engine/rebrowser.d.ts.map +1 -0
  36. package/dist/src/engine/rebrowser.js +142 -0
  37. package/dist/src/engine/rebrowser.js.map +1 -0
  38. package/dist/src/extractor/cleaner.d.ts +13 -0
  39. package/dist/src/extractor/cleaner.d.ts.map +1 -0
  40. package/dist/src/extractor/cleaner.js +122 -0
  41. package/dist/src/extractor/cleaner.js.map +1 -0
  42. package/dist/src/extractor/index.d.ts +29 -0
  43. package/dist/src/extractor/index.d.ts.map +1 -0
  44. package/dist/src/extractor/index.js +162 -0
  45. package/dist/src/extractor/index.js.map +1 -0
  46. package/dist/src/extractor/links.d.ts +22 -0
  47. package/dist/src/extractor/links.d.ts.map +1 -0
  48. package/dist/src/extractor/links.js +92 -0
  49. package/dist/src/extractor/links.js.map +1 -0
  50. package/dist/src/extractor/markdown.d.ts +13 -0
  51. package/dist/src/extractor/markdown.d.ts.map +1 -0
  52. package/dist/src/extractor/markdown.js +94 -0
  53. package/dist/src/extractor/markdown.js.map +1 -0
  54. package/dist/src/git/index.d.ts +40 -0
  55. package/dist/src/git/index.d.ts.map +1 -0
  56. package/dist/src/git/index.js +303 -0
  57. package/dist/src/git/index.js.map +1 -0
  58. package/dist/src/index.d.ts +103 -0
  59. package/dist/src/index.d.ts.map +1 -0
  60. package/dist/src/index.js +229 -0
  61. package/dist/src/index.js.map +1 -0
  62. package/dist/src/ingestor/index.d.ts +95 -0
  63. package/dist/src/ingestor/index.d.ts.map +1 -0
  64. package/dist/src/ingestor/index.js +471 -0
  65. package/dist/src/ingestor/index.js.map +1 -0
  66. package/dist/src/utils/dedup.d.ts +66 -0
  67. package/dist/src/utils/dedup.d.ts.map +1 -0
  68. package/dist/src/utils/dedup.js +296 -0
  69. package/dist/src/utils/dedup.js.map +1 -0
  70. package/dist/src/utils/index.d.ts +3 -0
  71. package/dist/src/utils/index.d.ts.map +1 -0
  72. package/dist/src/utils/index.js +3 -0
  73. package/dist/src/utils/index.js.map +1 -0
  74. package/dist/src/utils/logger.d.ts +12 -0
  75. package/dist/src/utils/logger.d.ts.map +1 -0
  76. package/dist/src/utils/logger.js +49 -0
  77. package/dist/src/utils/logger.js.map +1 -0
  78. package/dist/src/utils/ui.d.ts +126 -0
  79. package/dist/src/utils/ui.d.ts.map +1 -0
  80. package/dist/src/utils/ui.js +357 -0
  81. package/dist/src/utils/ui.js.map +1 -0
  82. package/dist/src/utils/url.d.ts +21 -0
  83. package/dist/src/utils/url.d.ts.map +1 -0
  84. package/dist/src/utils/url.js +107 -0
  85. package/dist/src/utils/url.js.map +1 -0
  86. package/package.json +71 -0
package/README.md ADDED
@@ -0,0 +1,188 @@
1
+ <div align="center">
2
+
3
+ <img src="s_maw.svg" alt="maw" width="400">
4
+
5
+ **Crawl any site. Search it forever.**
6
+
7
+ [![npm](https://img.shields.io/npm/v/@memvid/maw)](https://www.npmjs.com/package/@memvid/maw)
8
+ [![downloads](https://img.shields.io/npm/dm/@memvid/maw)](https://www.npmjs.com/package/@memvid/maw)
9
+ [![license](https://img.shields.io/npm/l/@memvid/maw)](LICENSE)
10
+
11
+ [Install](#install) · [Commands](#commands) · [Examples](#examples) · [FAQ](#faq)
12
+
13
+ </div>
14
+
15
+ ---
16
+
17
+ Feed the maw. It never forgets.
18
+
19
+ ```bash
20
+ npx @memvid/maw https://stripe.com/docs
21
+ ```
22
+
23
+ That's it. The entire Stripe docs are now in a 40MB file you can search and ask questions to. Offline. Forever.
24
+
25
+ ## Why?
26
+
27
+ Because you shouldn't need to keep 47 browser tabs open or bookmark links you'll never read again. Crawl once, query forever.
28
+
29
+ ```bash
30
+ # later, when you actually need it
31
+ maw ask stripe.mv2 "how do webhooks work?"
32
+ ```
33
+
34
+ ## Install
35
+
36
+ ```bash
37
+ npm i -g @memvid/maw
38
+ ```
39
+
40
+ Or just use `npx @memvid/maw` without installing.
41
+
42
+ ## What it does
43
+
44
+ ```
45
+ maw https://react.dev → react.mv2 (312 pages, 18s)
46
+ maw https://docs.python.org → python.mv2 (2,847 pages, 4min)
47
+ maw . → repo.mv2 (your local git repo)
48
+ maw https://news.ycombinator.com/item?id=12345 → just that page
49
+ ```
50
+
51
+ Smart defaults:
52
+ - **Single page URL?** Fetches just that page
53
+ - **Domain root?** Crawls the whole site
54
+ - **Local path?** Reads your git repo
55
+ - **Protected site?** Auto-switches to stealth browser
56
+
57
+ ## Commands
58
+
59
+ ### Crawl
60
+
61
+ ```bash
62
+ maw <url> # → maw.mv2
63
+ maw <url> -o docs.mv2 # custom output
64
+ maw <url> docs.mv2 # same thing (appends if exists)
65
+ maw <url> --depth 5 --max-pages 500 # go deeper
66
+ ```
67
+
68
+ ### Query
69
+
70
+ ```bash
71
+ maw find docs.mv2 "authentication" # full-text search
72
+ maw ask docs.mv2 "how does X work?" # AI answer (needs OPENAI_API_KEY)
73
+ maw list docs.mv2 # see what's inside
74
+ ```
75
+
76
+ ### Preview
77
+
78
+ ```bash
79
+ maw preview stripe.com # shows sitemap, estimated page count
80
+ ```
81
+
82
+ ### Export
83
+
84
+ ```bash
85
+ maw export docs.mv2 -f markdown -o docs.md
86
+ maw export docs.mv2 -f json -o docs.json
87
+ ```
88
+
89
+ ## Embeddings
90
+
91
+ Want semantic search? Add `--embed`:
92
+
93
+ ```bash
94
+ maw https://docs.whatever.com --embed openai
95
+ ```
96
+
97
+ Uses OpenAI embeddings for semantic search. Costs ~$0.01 per 1000 pages. Without it, you get BM25 keyword search (still good, just different).
98
+
99
+ ## How it works
100
+
101
+ Most sites work with a simple fetch. When that fails (Cloudflare, JS-heavy SPAs), maw falls back to a real browser. When *that* fails (aggressive anti-bot), it uses stealth mode.
102
+
103
+ ```
104
+ fetch (fast) → playwright (slower) → rebrowser (stealth)
105
+ ↓ ↓ ↓
106
+ works? blocked? blocked?
107
+ ↓ ↓ ↓
108
+ done retry done
109
+ ```
110
+
111
+ 90% of sites never need the browser. The 10% that do, just work.
112
+
113
+ ## Options
114
+
115
+ | Flag | Description | Default |
116
+ |------|-------------|---------|
117
+ | `-o, --output <file>` | Output file | `maw.mv2` |
118
+ | `-d, --depth <n>` | Crawl depth | `2` |
119
+ | `-m, --max-pages <n>` | Max pages to crawl | `150` |
120
+ | `-c, --concurrency <n>` | Parallel requests | `10` |
121
+ | `-r, --rate-limit <n>` | Requests per second | `10` |
122
+ | `--include <regex>` | Only crawl matching URLs | - |
123
+ | `--exclude <regex>` | Skip matching URLs | - |
124
+ | `--browser` | Force browser mode | - |
125
+ | `--stealth` | Force stealth mode | - |
126
+ | `--embed [model]` | Enable embeddings | - |
127
+ | `--no-robots` | Ignore robots.txt | - |
128
+ | `--no-sitemap` | Skip sitemap discovery | - |
129
+
130
+ ## Examples
131
+
132
+ ```bash
133
+ # documentation sites
134
+ maw https://react.dev
135
+ maw https://docs.python.org
136
+ maw https://stripe.com/docs
137
+
138
+ # news/blogs
139
+ maw https://paulgraham.com/articles.html
140
+ maw "https://news.ycombinator.com/item?id=40000000"
141
+
142
+ # your own repos
143
+ maw . -o my-project.mv2
144
+ maw https://github.com/user/repo
145
+
146
+ # combine multiple sources
147
+ maw https://react.dev https://nextjs.org -o frontend.mv2
148
+
149
+ # deep crawl with embeddings
150
+ maw https://kubernetes.io/docs --depth 4 --max-pages 1000 --embed openai
151
+ ```
152
+
153
+ ## Limits
154
+
155
+ Files up to **50MB** work without any API key. That's roughly 500-2000 pages depending on content.
156
+
157
+ For bigger crawls, get a key at [memvid.com](https://memvid.com).
158
+
159
+ ## FAQ
160
+
161
+ **Is this legal?**
162
+
163
+ Respects robots.txt by default. Use `--no-robots` at your own discretion.
164
+
165
+ **Why .mv2?**
166
+
167
+ It's a [memvid](https://memvid.com) file — single-file database with full-text search, embeddings, and temporal queries baked in. Think SQLite but for documents.
168
+
169
+ **Can I use it programmatically?**
170
+
171
+ ```javascript
172
+ import { crawl, query } from 'maw'
173
+
174
+ await crawl('https://example.com', { output: 'site.mv2' })
175
+ const results = await query('site.mv2', 'search term')
176
+ ```
177
+
178
+ **What about rate limiting?**
179
+
180
+ Default is 10 req/sec with automatic backoff. Most sites won't notice you. If you're hitting APIs, consider `--rate-limit 2`.
181
+
182
+ **Does it handle JavaScript-rendered content?**
183
+
184
+ Yes. If fetch fails, it automatically tries Playwright. For heavily protected sites, use `--stealth`.
185
+
186
+ ---
187
+
188
+ [MIT License](LICENSE) · Built on [memvid](https://memvid.com)
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * maw CLI - Feed the maw. It never forgets.
4
+ */
5
+ export {};
6
+ //# sourceMappingURL=maw.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"maw.d.ts","sourceRoot":"","sources":["../../bin/maw.ts"],"names":[],"mappings":";AAEA;;GAEG"}
@@ -0,0 +1,275 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * maw CLI - Feed the maw. It never forgets.
4
+ */
5
+ import { existsSync } from 'fs';
6
+ import { Command } from 'commander';
7
+ import { maw, find, ask, list, preview, exportDocs } from '../src/index.js';
8
+ import { setLogMode } from '../src/utils/logger.js';
9
+ import * as ui from '../src/utils/ui.js';
10
+ const VERSION = '1.0.0';
11
+ const program = new Command();
12
+ program
13
+ .name('maw')
14
+ .description('Feed the maw. It never forgets.')
15
+ .version(VERSION);
16
+ // Main command: maw <urls...> [file.mv2]
17
+ program
18
+ .argument('[urls...]', 'URLs/repos to consume, optionally followed by target.mv2 to append')
19
+ .option('-o, --output <file>', 'Output .mv2 file', 'maw.mv2')
20
+ .option('-d, --depth <n>', 'Crawl depth (auto: 0 for pages, 2 for domains)')
21
+ .option('-c, --concurrency <n>', 'Concurrent requests', '10')
22
+ .option('-m, --max-pages <n>', 'Maximum pages to crawl (default: 150)')
23
+ .option('-r, --rate-limit <n>', 'Requests per second', '10')
24
+ .option('-t, --timeout <ms>', 'Request timeout in ms', '10000')
25
+ .option('--include <pattern>', 'URL pattern to include (regex)')
26
+ .option('--exclude <pattern>', 'URL pattern to exclude (regex)')
27
+ .option('--label <label>', 'Label for ingested documents', 'web')
28
+ .option('--memory <id>', 'Cloud memory ID to bind to (from memvid.com/dashboard)')
29
+ .option('--sitemap', 'Use sitemap.xml for discovery (default: true)')
30
+ .option('--no-sitemap', 'Disable sitemap discovery')
31
+ .option('--no-robots', 'Ignore robots.txt')
32
+ .option('--browser', 'Force browser mode (for JavaScript-heavy sites)')
33
+ .option('--stealth', 'Force stealth mode (bypasses anti-bot)')
34
+ .option('--embed [model]', 'Enable semantic embeddings (models: bge-small, openai, nvidia)')
35
+ .option('-q, --quiet', 'Minimal output')
36
+ .option('-v, --verbose', 'Verbose output')
37
+ .action(async (urls, options) => {
38
+ if (urls.length === 0) {
39
+ // Show banner and help
40
+ console.log(ui.banner());
41
+ program.help();
42
+ return;
43
+ }
44
+ setLogMode(options.quiet, options.verbose);
45
+ // Check if any argument is an .mv2 file (use as output target for appending)
46
+ // e.g., `maw https://example.com knowledge.mv2` or `maw knowledge.mv2 https://example.com`
47
+ const mv2Files = urls.filter((u) => u.endsWith('.mv2'));
48
+ const sources = urls.filter((u) => !u.endsWith('.mv2'));
49
+ // Determine output file: explicit -o flag > .mv2 in args > default
50
+ let outputFile = options.output;
51
+ if (mv2Files.length > 0 && options.output === 'maw.mv2') {
52
+ // Use the .mv2 file from args if no explicit -o was given
53
+ outputFile = mv2Files[0];
54
+ if (mv2Files.length > 1) {
55
+ console.error(ui.errorMessage('Only one .mv2 file can be specified as target'));
56
+ process.exit(1);
57
+ }
58
+ }
59
+ if (sources.length === 0) {
60
+ console.error(ui.errorMessage('No URLs or sources provided'));
61
+ process.exit(1);
62
+ }
63
+ // Show header - detect git repos vs URLs
64
+ if (!options.quiet) {
65
+ const isGit = sources.some((u) => u.startsWith('https://github.com/') ||
66
+ u.startsWith('https://gitlab.com/') ||
67
+ u.includes('.git') ||
68
+ u.startsWith('.') ||
69
+ u.startsWith('/'));
70
+ const label = isGit ? 'maw (git)' : 'maw';
71
+ const urlDisplay = sources.length === 1 ? sources[0] : `${sources.length} sources`;
72
+ console.log(ui.header(label, urlDisplay));
73
+ // Show if appending to existing file
74
+ if (existsSync(outputFile)) {
75
+ console.log(ui.theme.info(` → Adding to ${outputFile}\n`));
76
+ }
77
+ // Show embedding mode if enabled
78
+ if (options.embed) {
79
+ const model = typeof options.embed === 'string' ? options.embed : 'bge-small';
80
+ console.log(ui.theme.info(` Semantic embeddings enabled (${model})`));
81
+ console.log(ui.theme.dim(' This improves search quality but takes longer.\n'));
82
+ }
83
+ }
84
+ try {
85
+ const result = await maw(sources, {
86
+ output: outputFile,
87
+ depth: options.depth ? parseInt(options.depth, 10) : undefined, // undefined triggers auto-detect
88
+ concurrency: parseInt(options.concurrency, 10),
89
+ maxPages: options.maxPages ? parseInt(options.maxPages, 10) : undefined,
90
+ rateLimit: parseInt(options.rateLimit, 10),
91
+ timeout: parseInt(options.timeout, 10),
92
+ includePattern: options.include ? new RegExp(options.include) : undefined,
93
+ excludePattern: options.exclude ? new RegExp(options.exclude) : undefined,
94
+ label: options.label,
95
+ memoryId: options.memory,
96
+ useSitemap: options.sitemap,
97
+ respectRobots: options.robots,
98
+ forceEngine: options.stealth ? 'rebrowser' : options.browser ? 'playwright' : undefined,
99
+ enableEmbedding: !!options.embed,
100
+ embeddingModel: typeof options.embed === 'string' ? options.embed : 'bge-small',
101
+ quiet: options.quiet,
102
+ verbose: options.verbose,
103
+ });
104
+ // Success output
105
+ console.log(ui.successMessage(result.output, result.size, result.pages, result.duration));
106
+ // Show dedup stats if any skipped
107
+ const dedupStats = result.stats.dedup;
108
+ if (dedupStats && (dedupStats.localeSkipped > 0 || dedupStats.similarSkipped > 0)) {
109
+ console.log(ui.dedupStats(dedupStats));
110
+ }
111
+ // Show engine stats in verbose mode
112
+ if (options.verbose) {
113
+ console.log(ui.engineStats(result.stats));
114
+ }
115
+ // Warnings and cloud sync status
116
+ if (result.stoppedAtLimit) {
117
+ console.log(ui.limitWarning());
118
+ }
119
+ else if (result.memoryId) {
120
+ console.log(ui.cloudSyncMessage(result.memoryId));
121
+ }
122
+ else if (!options.quiet) {
123
+ console.log(ui.theme.dim(' It will never forget.'));
124
+ }
125
+ console.log();
126
+ }
127
+ catch (error) {
128
+ console.error(ui.errorMessage(error.message));
129
+ process.exit(1);
130
+ }
131
+ });
132
+ // find command: maw find <file> <query>
133
+ program
134
+ .command('find <file> <query>')
135
+ .description('Search in an .mv2 file')
136
+ .option('-k, --top <n>', 'Number of results (default: 10)', '10')
137
+ .option('--json', 'Output as JSON')
138
+ .action(async (file, query, options) => {
139
+ try {
140
+ const results = await find(file, query, { k: parseInt(options.top, 10) });
141
+ if (options.json) {
142
+ console.log(JSON.stringify(results, null, 2));
143
+ return;
144
+ }
145
+ console.log();
146
+ console.log(ui.searchResults(results.hits || []));
147
+ }
148
+ catch (error) {
149
+ console.error(ui.errorMessage(error.message));
150
+ process.exit(1);
151
+ }
152
+ });
153
+ // ask command: maw ask <file> <question>
154
+ program
155
+ .command('ask <file> <question>')
156
+ .description('Ask a question using an .mv2 file')
157
+ .option('--model <model>', 'LLM model to use (default: gpt-4o-mini)', 'gpt-4o-mini')
158
+ .option('--api-key <key>', 'API key for the model')
159
+ .option('-k, --context <n>', 'Number of context chunks to retrieve (auto: 15 for overview questions, 8 otherwise)')
160
+ .option('--json', 'Output as JSON')
161
+ .action(async (file, question, options) => {
162
+ try {
163
+ const result = await ask(file, question, {
164
+ model: options.model,
165
+ apiKey: options.apiKey || process.env.OPENAI_API_KEY,
166
+ k: options.context ? parseInt(options.context, 10) : undefined, // Let ingestor decide default
167
+ });
168
+ if (options.json) {
169
+ console.log(JSON.stringify(result, null, 2));
170
+ return;
171
+ }
172
+ console.log(ui.askResult(result.answer, result.sources));
173
+ }
174
+ catch (error) {
175
+ console.error(ui.errorMessage(error.message));
176
+ process.exit(1);
177
+ }
178
+ });
179
+ // list command: maw list <file>
180
+ program
181
+ .command('list <file>')
182
+ .description('List documents in an .mv2 file')
183
+ .option('-l, --limit <n>', 'Number of documents to show (default: 20)', '20')
184
+ .option('--json', 'Output as JSON')
185
+ .action(async (file, options) => {
186
+ try {
187
+ const results = await list(file, { limit: parseInt(options.limit, 10) });
188
+ if (options.json) {
189
+ console.log(JSON.stringify(results, null, 2));
190
+ return;
191
+ }
192
+ const items = results.hits || results.frames || results;
193
+ if (Array.isArray(items) && items.length > 0) {
194
+ console.log(ui.listDocuments(items.map((item) => ({
195
+ title: item.title || item.preview?.slice(0, 60) || `Frame ${item.frame_id}`,
196
+ url: item.metadata?.url || item.uri,
197
+ preview: item.preview,
198
+ }))));
199
+ }
200
+ else {
201
+ console.log(`\n ${ui.theme.muted('No documents found.')}\n`);
202
+ }
203
+ }
204
+ catch (error) {
205
+ console.error(ui.errorMessage(error.message));
206
+ process.exit(1);
207
+ }
208
+ });
209
+ // preview command: maw preview <url> (or np)
210
+ program
211
+ .command('preview <url>')
212
+ .alias('np')
213
+ .description('Preview available pages on a site (sitemap discovery)')
214
+ .option('-l, --limit <n>', 'Number of pages to show', '20')
215
+ .option('--json', 'Output as JSON')
216
+ .action(async (url, options) => {
217
+ try {
218
+ const result = await preview(url, { limit: parseInt(options.limit, 10) });
219
+ if (options.json) {
220
+ console.log(JSON.stringify(result, null, 2));
221
+ return;
222
+ }
223
+ console.log(ui.previewResults(result));
224
+ }
225
+ catch (error) {
226
+ console.error(ui.errorMessage(error.message));
227
+ process.exit(1);
228
+ }
229
+ });
230
+ // export command: maw export <file>
231
+ program
232
+ .command('export <file>')
233
+ .description('Export .mv2 file to other formats')
234
+ .option('-f, --format <format>', 'Output format: json, markdown, csv', 'json')
235
+ .option('--out <file>', 'Output file (default: stdout)')
236
+ .action(async (file, options) => {
237
+ try {
238
+ // Get full content for all documents
239
+ const docs = await exportDocs(file, { limit: 10000 });
240
+ let output;
241
+ switch (options.format) {
242
+ case 'markdown':
243
+ output = docs.map((doc) => {
244
+ return `# ${doc.title}\n\n${doc.content}\n\n---\n`;
245
+ }).join('\n');
246
+ break;
247
+ case 'csv':
248
+ const headers = ['title', 'uri'];
249
+ const rows = docs.map((doc) => {
250
+ return [
251
+ `"${(doc.title || '').replace(/"/g, '""')}"`,
252
+ `"${(doc.uri || '').replace(/"/g, '""')}"`,
253
+ ].join(',');
254
+ });
255
+ output = [headers.join(','), ...rows].join('\n');
256
+ break;
257
+ default:
258
+ output = JSON.stringify(docs, null, 2);
259
+ }
260
+ if (options.out) {
261
+ const { writeFileSync } = await import('fs');
262
+ writeFileSync(options.out, output);
263
+ console.log(ui.theme.success(`\n Exported ${docs.length} documents to ${options.out}\n`));
264
+ }
265
+ else {
266
+ console.log(output);
267
+ }
268
+ }
269
+ catch (error) {
270
+ console.error(ui.errorMessage(error.message));
271
+ process.exit(1);
272
+ }
273
+ });
274
+ program.parse();
275
+ //# sourceMappingURL=maw.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"maw.js","sourceRoot":"","sources":["../../bin/maw.ts"],"names":[],"mappings":";AAEA;;GAEG;AAEH,OAAO,EAAE,UAAU,EAAE,MAAM,IAAI,CAAC;AAChC,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACpC,OAAO,EAAE,GAAG,EAAE,IAAI,EAAE,GAAG,EAAE,IAAI,EAAE,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAC5E,OAAO,EAAE,UAAU,EAAE,MAAM,wBAAwB,CAAC;AACpD,OAAO,KAAK,EAAE,MAAM,oBAAoB,CAAC;AAEzC,MAAM,OAAO,GAAG,OAAO,CAAC;AAExB,MAAM,OAAO,GAAG,IAAI,OAAO,EAAE,CAAC;AAE9B,OAAO;KACJ,IAAI,CAAC,KAAK,CAAC;KACX,WAAW,CAAC,iCAAiC,CAAC;KAC9C,OAAO,CAAC,OAAO,CAAC,CAAC;AAEpB,yCAAyC;AACzC,OAAO;KACJ,QAAQ,CAAC,WAAW,EAAE,oEAAoE,CAAC;KAC3F,MAAM,CAAC,qBAAqB,EAAE,kBAAkB,EAAE,SAAS,CAAC;KAC5D,MAAM,CAAC,iBAAiB,EAAE,gDAAgD,CAAC;KAC3E,MAAM,CAAC,uBAAuB,EAAE,qBAAqB,EAAE,IAAI,CAAC;KAC5D,MAAM,CAAC,qBAAqB,EAAE,uCAAuC,CAAC;KACtE,MAAM,CAAC,sBAAsB,EAAE,qBAAqB,EAAE,IAAI,CAAC;KAC3D,MAAM,CAAC,oBAAoB,EAAE,uBAAuB,EAAE,OAAO,CAAC;KAC9D,MAAM,CAAC,qBAAqB,EAAE,gCAAgC,CAAC;KAC/D,MAAM,CAAC,qBAAqB,EAAE,gCAAgC,CAAC;KAC/D,MAAM,CAAC,iBAAiB,EAAE,8BAA8B,EAAE,KAAK,CAAC;KAChE,MAAM,CAAC,eAAe,EAAE,wDAAwD,CAAC;KACjF,MAAM,CAAC,WAAW,EAAE,+CAA+C,CAAC;KACpE,MAAM,CAAC,cAAc,EAAE,2BAA2B,CAAC;KACnD,MAAM,CAAC,aAAa,EAAE,mBAAmB,CAAC;KAC1C,MAAM,CAAC,WAAW,EAAE,iDAAiD,CAAC;KACtE,MAAM,CAAC,WAAW,EAAE,wCAAwC,CAAC;KAC7D,MAAM,CAAC,iBAAiB,EAAE,gEAAgE,CAAC;KAC3F,MAAM,CAAC,aAAa,EAAE,gBAAgB,CAAC;KACvC,MAAM,CAAC,eAAe,EAAE,gBAAgB,CAAC;KACzC,MAAM,CAAC,KAAK,EAAE,IAAI,EAAE,OAAO,EAAE,EAAE;IAC9B,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACtB,uBAAuB;QACvB,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,CAAC;QACzB,OAAO,CAAC,IAAI,EAAE,CAAC;QACf,OAAO;IACT,CAAC;IAED,UAAU,CAAC,OAAO,CAAC,KAAK,EAAE,OAAO,CAAC,OAAO,CAAC,CAAC;IAE3C,6EAA6E;IAC7E,2FAA2F;IAC3F,MAAM,QAAQ,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAS,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC;IAChE,MAAM,OAAO,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAS,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC;IAEhE,mEAAmE;IACnE,IAAI,UAAU,GAAG,OAAO,CAAC,MAAM,CAAC;IAChC,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,IAAI,OAAO,CAAC,MAAM,KAAK,SAAS,EAAE,CAAC;QACxD,0DAA0D;QAC1D,UAAU,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC;QACzB,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACxB,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC,YAAY,CAAC,+CAA+C,CAAC,CAAC,CAAC;YAChF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;IACH,CAAC;IAED,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACzB,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC,YAAY,CAAC,6BAA6B,CAAC,CAAC,CAAC;QAC9D,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,yCAAyC;IACzC,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC;QACnB,MAAM,KAAK,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAS,EAAE,EAAE,CACvC,CAAC,CAAC,UAAU,CAAC,qBAAqB,CAAC;YACnC,CAAC,CAAC,UAAU,CAAC,qBAAqB,CAAC;YACnC,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC;YAClB,CAAC,CAAC,UAAU,CAAC,GAAG,CAAC;YACjB,CAAC,CAAC,UAAU,CAAC,GAAG,CAAC,CAClB,CAAC;QACF,MAAM,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,KAAK,CAAC;QAC1C,MAAM,UAAU,GAAG,OAAO,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,OAAO,CAAC,MAAM,UAAU,CAAC;QACnF,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,MAAM,CAAC,KAAK,EAAE,UAAU,CAAC,CAAC,CAAC;QAE1C,qCAAqC;QACrC,IAAI,UAAU,CAAC,UAAU,CAAC,EAAE,CAAC;YAC3B,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,iBAAiB,UAAU,IAAI,CAAC,CAAC,CAAC;QAC9D,CAAC;QAED,iCAAiC;QACjC,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;YAClB,MAAM,KAAK,GAAG,OAAO,OAAO,CAAC,KAAK,KAAK,QAAQ,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,WAAW,CAAC;YAC9E,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,kCAAkC,KAAK,GAAG,CAAC,CAAC,CAAC;YACvE,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,oDAAoD,CAAC,CAAC,CAAC;QAClF,CAAC;IACH,CAAC;IAED,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,MAAM,GAAG,CAAC,OAAO,EAAE;YAChC,MAAM,EAAE,UAAU;YAClB,KAAK,EAAE,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,SAAS,EAAG,iCAAiC;YAClG,WAAW,EAAE,QAAQ,CAAC,OAAO,CAAC,WAAW,EAAE,EAAE,CAAC;YAC9C,QAAQ,EAAE,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,SAAS;YACvE,SAAS,EAAE,QAAQ,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC;YAC1C,OAAO,EAAE,QAAQ,CAAC,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC;YACtC,cAAc,EAAE,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,SAAS;YACzE,cAAc,EAAE,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,SAAS;YACzE,KAAK,EAAE,OAAO,CAAC,KAAK;YACpB,QAAQ,EAAE,OAAO,CAAC,MAAM;YACxB,UAAU,EAAE,OAAO,CAAC,OAAO;YAC3B,aAAa,EAAE,OAAO,CAAC,MAAM;YAC7B,WAAW,EAAE,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,SAAS;YACvF,eAAe,EAAE,CAAC,CAAC,OAAO,CAAC,KAAK;YAChC,cAAc,EAAE,OAAO,OAAO,CAAC,KAAK,KAAK,QAAQ,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,WAAW;YAC/E,KAAK,EAAE,OAAO,CAAC,KAAK;YACpB,OAAO,EAAE,OAAO,CAAC,OAAO;SACzB,CAAC,CAAC;QAEH,iBAAiB;QACjB,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,cAAc,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,IAAI,EAAE,MAAM,CAAC,KAAK,EAAE,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC;QAE1F,kCAAkC;QAClC,MAAM,UAAU,GAAG,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC;QACtC,IAAI,UAAU,IAAI,CAAC,UAAU,CAAC,aAAa,GAAG,CAAC,IAAI,UAAU,CAAC,cAAc,GAAG,CAAC,CAAC,EAAE,CAAC;YAClF,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,UAAU,CAAC,UAAU,CAAC,CAAC,CAAC;QACzC,CAAC;QAED,oCAAoC;QACpC,IAAI,OAAO,CAAC,OAAO,EAAE,CAAC;YACpB,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,WAAW,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC;QAC5C,CAAC;QAED,iCAAiC;QACjC,IAAI,MAAM,CAAC,cAAc,EAAE,CAAC;YAC1B,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,YAAY,EAAE,CAAC,CAAC;QACjC,CAAC;aAAM,IAAI,MAAM,CAAC,QAAQ,EAAE,CAAC;YAC3B,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,gBAAgB,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC;QACpD,CAAC;aAAM,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC;YAC1B,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,yBAAyB,CAAC,CAAC,CAAC;QACvD,CAAC;QAED,OAAO,CAAC,GAAG,EAAE,CAAC;IAChB,CAAC;IAAC,OAAO,KAAU,EAAE,CAAC;QACpB,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC,YAAY,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC;QAC9C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;AACH,CAAC,CAAC,CAAC;AAEL,wCAAwC;AACxC,OAAO;KACJ,OAAO,CAAC,qBAAqB,CAAC;KAC9B,WAAW,CAAC,wBAAwB,CAAC;KACrC,MAAM,CAAC,eAAe,EAAE,iCAAiC,EAAE,IAAI,CAAC;KAChE,MAAM,CAAC,QAAQ,EAAE,gBAAgB,CAAC;KAClC,MAAM,CAAC,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,OAAO,EAAE,EAAE;IACrC,IAAI,CAAC;QACH,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC,EAAE,QAAQ,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,CAAC,EAAE,CAAC,CAAC;QAE1E,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC;YACjB,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;YAC9C,OAAO;QACT,CAAC;QAED,OAAO,CAAC,GAAG,EAAE,CAAC;QACd,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,aAAa,CAAC,OAAO,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC,CAAC;IACpD,CAAC;IAAC,OAAO,KAAU,EAAE,CAAC;QACpB,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC,YAAY,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC;QAC9C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;AACH,CAAC,CAAC,CAAC;AAEL,yCAAyC;AACzC,OAAO;KACJ,OAAO,CAAC,uBAAuB,CAAC;KAChC,WAAW,CAAC,mCAAmC,CAAC;KAChD,MAAM,CAAC,iBAAiB,EAAE,yCAAyC,EAAE,aAAa,CAAC;KACnF,MAAM,CAAC,iBAAiB,EAAE,uBAAuB,CAAC;KAClD,MAAM,CAAC,mBAAmB,EAAE,qFAAqF,CAAC;KAClH,MAAM,CAAC,QAAQ,EAAE,gBAAgB,CAAC;KAClC,MAAM,CAAC,KAAK,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,EAAE;IACxC,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,MAAM,GAAG,CAAC,IAAI,EAAE,QAAQ,EAAE;YACvC,KAAK,EAAE,OAAO,CAAC,KAAK;YACpB,MAAM,EAAE,OAAO,CAAC,MAAM,IAAI,OAAO,CAAC,GAAG,CAAC,cAAc;YACpD,CAAC,EAAE,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,SAAS,EAAE,8BAA8B;SAC/F,CAAC,CAAC;QAEH,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC;YACjB,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;YAC7C,OAAO;QACT,CAAC;QAED,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,SAAS,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC;IAC3D,CAAC;IAAC,OAAO,KAAU,EAAE,CAAC;QACpB,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC,YAAY,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC;QAC9C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;AACH,CAAC,CAAC,CAAC;AAEL,gCAAgC;AAChC,OAAO;KACJ,OAAO,CAAC,aAAa,CAAC;KACtB,WAAW,CAAC,gCAAgC,CAAC;KAC7C,MAAM,CAAC,iBAAiB,EAAE,2CAA2C,EAAE,IAAI,CAAC;KAC5E,MAAM,CAAC,QAAQ,EAAE,gBAAgB,CAAC;KAClC,MAAM,CAAC,KAAK,EAAE,IAAI,EAAE,OAAO,EAAE,EAAE;IAC9B,IAAI,CAAC;QACH,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,IAAI,EAAE,EAAE,KAAK,EAAE,QAAQ,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,EAAE,CAAC,CAAC;QAEzE,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC;YACjB,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;YAC9C,OAAO;QACT,CAAC;QAED,MAAM,KAAK,GAAG,OAAO,CAAC,IAAI,IAAI,OAAO,CAAC,MAAM,IAAI,OAAO,CAAC;QACxD,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC7C,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,aAAa,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAS,EAAE,EAAE,CAAC,CAAC;gBACrD,KAAK,EAAE,IAAI,CAAC,KAAK,IAAI,IAAI,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,SAAS,IAAI,CAAC,QAAQ,EAAE;gBAC3E,GAAG,EAAE,IAAI,CAAC,QAAQ,EAAE,GAAG,IAAI,IAAI,CAAC,GAAG;gBACnC,OAAO,EAAE,IAAI,CAAC,OAAO;aACtB,CAAC,CAAC,CAAC,CAAC,CAAC;QACR,CAAC;aAAM,CAAC;YACN,OAAO,CAAC,GAAG,CAAC,OAAO,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,qBAAqB,CAAC,IAAI,CAAC,CAAC;QAChE,CAAC;IACH,CAAC;IAAC,OAAO,KAAU,EAAE,CAAC;QACpB,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC,YAAY,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC;QAC9C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;AACH,CAAC,CAAC,CAAC;AAEL,6CAA6C;AAC7C,OAAO;KACJ,OAAO,CAAC,eAAe,CAAC;KACxB,KAAK,CAAC,IAAI,CAAC;KACX,WAAW,CAAC,uDAAuD,CAAC;KACpE,MAAM,CAAC,iBAAiB,EAAE,yBAAyB,EAAE,IAAI,CAAC;KAC1D,MAAM,CAAC,QAAQ,EAAE,gBAAgB,CAAC;KAClC,MAAM,CAAC,KAAK,EAAE,GAAG,EAAE,OAAO,EAAE,EAAE;IAC7B,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,GAAG,EAAE,EAAE,KAAK,EAAE,QAAQ,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,EAAE,CAAC,CAAC;QAE1E,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC;YACjB,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;YAC7C,OAAO;QACT,CAAC;QAED,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,cAAc,CAAC,MAAM,CAAC,CAAC,CAAC;IACzC,CAAC;IAAC,OAAO,KAAU,EAAE,CAAC;QACpB,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC,YAAY,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC;QAC9C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;AACH,CAAC,CAAC,CAAC;AAEL,oCAAoC;AACpC,OAAO;KACJ,OAAO,CAAC,eAAe,CAAC;KACxB,WAAW,CAAC,mCAAmC,CAAC;KAChD,MAAM,CAAC,uBAAuB,EAAE,oCAAoC,EAAE,MAAM,CAAC;KAC7E,MAAM,CAAC,cAAc,EAAE,+BAA+B,CAAC;KACvD,MAAM,CAAC,KAAK,EAAE,IAAI,EAAE,OAAO,EAAE,EAAE;IAC9B,IAAI,CAAC;QACH,qCAAqC;QACrC,MAAM,IAAI,GAAG,MAAM,UAAU,CAAC,IAAI,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC,CAAC;QAEtD,IAAI,MAAc,CAAC;QAEnB,QAAQ,OAAO,CAAC,MAAM,EAAE,CAAC;YACvB,KAAK,UAAU;gBACb,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE;oBACxB,OAAO,KAAK,GAAG,CAAC,KAAK,OAAO,GAAG,CAAC,OAAO,WAAW,CAAC;gBACrD,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBACd,MAAM;YAER,KAAK,KAAK;gBACR,MAAM,OAAO,GAAG,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC;gBACjC,MAAM,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE;oBAC5B,OAAO;wBACL,IAAI,CAAC,GAAG,CAAC,KAAK,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,IAAI,CAAC,GAAG;wBAC5C,IAAI,CAAC,GAAG,CAAC,GAAG,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,IAAI,CAAC,GAAG;qBAC3C,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;gBACd,CAAC,CAAC,CAAC;gBACH,MAAM,GAAG,CAAC,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,GAAG,IAAI,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBACjD,MAAM;YAER;gBACE,MAAM,GAAG,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC;QAC3C,CAAC;QAED,IAAI,OAAO,CAAC,GAAG,EAAE,CAAC;YAChB,MAAM,EAAE,aAAa,EAAE,GAAG,MAAM,MAAM,CAAC,IAAI,CAAC,CAAC;YAC7C,aAAa,CAAC,OAAO,CAAC,GAAG,EAAE,MAAM,CAAC,CAAC;YACnC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,KAAK,CAAC,OAAO,CAAC,gBAAgB,IAAI,CAAC,MAAM,iBAAiB,OAAO,CAAC,GAAG,IAAI,CAAC,CAAC,CAAC;QAC7F,CAAC;aAAM,CAAC;YACN,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;QACtB,CAAC;IACH,CAAC;IAAC,OAAO,KAAU,EAAE,CAAC;QACpB,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC,YAAY,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC;QAC9C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;AACH,CAAC,CAAC,CAAC;AAEL,OAAO,CAAC,KAAK,EAAE,CAAC"}
@@ -0,0 +1,71 @@
1
+ /**
2
+ * Web crawler with rate limiting
3
+ */
4
+ import { type ExtractResult } from '../extractor/index.js';
5
+ export interface CrawlOptions {
6
+ depth: number;
7
+ concurrency: number;
8
+ maxPages: number;
9
+ rateLimit: number;
10
+ timeout: number;
11
+ respectRobots: boolean;
12
+ useSitemap: boolean;
13
+ includePattern?: RegExp;
14
+ excludePattern?: RegExp;
15
+ forceEngine?: 'fetch' | 'playwright' | 'rebrowser';
16
+ }
17
+ export interface CrawlResult {
18
+ url: string;
19
+ finalUrl: string;
20
+ extracted: ExtractResult;
21
+ depth: number;
22
+ engine: string;
23
+ }
24
+ export declare class Crawler {
25
+ private options;
26
+ private engine;
27
+ private extractor;
28
+ private robots;
29
+ private sitemap;
30
+ private dedup;
31
+ private visited;
32
+ private queue;
33
+ private baseHosts;
34
+ private results;
35
+ private pending;
36
+ constructor(options?: Partial<CrawlOptions>);
37
+ /**
38
+ * Crawl URLs and yield results as they complete
39
+ */
40
+ crawl(startUrls: string[]): AsyncGenerator<CrawlResult>;
41
+ private parseSitemaps;
42
+ private addToQueue;
43
+ private processUrl;
44
+ private shouldCrawl;
45
+ /**
46
+ * Get engine statistics
47
+ */
48
+ getStats(): {
49
+ visited: number;
50
+ queued: number;
51
+ pending: number;
52
+ dedup: {
53
+ uniquePaths: number;
54
+ uniqueContent: number;
55
+ localeSkipped: number;
56
+ similarSkipped: number;
57
+ total: number;
58
+ };
59
+ fetch: number;
60
+ playwright: number;
61
+ rebrowser: number;
62
+ blocked: number;
63
+ };
64
+ /**
65
+ * Close all resources
66
+ */
67
+ close(): Promise<void>;
68
+ }
69
+ export { RobotsParser } from './robots.js';
70
+ export { SitemapParser } from './sitemap.js';
71
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/crawler/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAIH,OAAO,EAAa,KAAK,aAAa,EAAE,MAAM,uBAAuB,CAAC;AAOtE,MAAM,WAAW,YAAY;IAC3B,KAAK,EAAE,MAAM,CAAC;IACd,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,aAAa,EAAE,OAAO,CAAC;IACvB,UAAU,EAAE,OAAO,CAAC;IACpB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,WAAW,CAAC,EAAE,OAAO,GAAG,YAAY,GAAG,WAAW,CAAC;CACpD;AAED,MAAM,WAAW,WAAW;IAC1B,GAAG,EAAE,MAAM,CAAC;IACZ,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,EAAE,aAAa,CAAC;IACzB,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;CAChB;AAcD,qBAAa,OAAO;IAClB,OAAO,CAAC,OAAO,CAAe;IAC9B,OAAO,CAAC,MAAM,CAAkB;IAChC,OAAO,CAAC,SAAS,CAAY;IAC7B,OAAO,CAAC,MAAM,CAAe;IAC7B,OAAO,CAAC,OAAO,CAAgB;IAC/B,OAAO,CAAC,KAAK,CAAe;IAC5B,OAAO,CAAC,OAAO,CAA0B;IACzC,OAAO,CAAC,KAAK,CAAS;IACtB,OAAO,CAAC,SAAS,CAA0B;IAC3C,OAAO,CAAC,OAAO,CAAqB;IACpC,OAAO,CAAC,OAAO,CAA0D;gBAE7D,OAAO,GAAE,OAAO,CAAC,YAAY,CAAM;IAgB/C;;OAEG;IACI,KAAK,CAAC,SAAS,EAAE,MAAM,EAAE,GAAG,cAAc,CAAC,WAAW,CAAC;YAkEhD,aAAa;IA2B3B,OAAO,CAAC,UAAU;YAaJ,UAAU;IA+CxB,OAAO,CAAC,WAAW;IAyCnB;;OAEG;IACH,QAAQ;;;;;;;;;;;;;;;;IAUR;;OAEG;IACG,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;CAI7B;AAED,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAC3C,OAAO,EAAE,aAAa,EAAE,MAAM,cAAc,CAAC"}