webcontext-ai 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +583 -0
  3. package/dist/browser/manager.d.ts +47 -0
  4. package/dist/browser/manager.d.ts.map +1 -0
  5. package/dist/browser/manager.js +215 -0
  6. package/dist/browser/manager.js.map +1 -0
  7. package/dist/cache/cache.d.ts +22 -0
  8. package/dist/cache/cache.d.ts.map +1 -0
  9. package/dist/cache/cache.js +150 -0
  10. package/dist/cache/cache.js.map +1 -0
  11. package/dist/chunking/chunker.d.ts +26 -0
  12. package/dist/chunking/chunker.d.ts.map +1 -0
  13. package/dist/chunking/chunker.js +208 -0
  14. package/dist/chunking/chunker.js.map +1 -0
  15. package/dist/cli/index.d.ts +3 -0
  16. package/dist/cli/index.d.ts.map +1 -0
  17. package/dist/cli/index.js +406 -0
  18. package/dist/cli/index.js.map +1 -0
  19. package/dist/core/pipeline.d.ts +35 -0
  20. package/dist/core/pipeline.d.ts.map +1 -0
  21. package/dist/core/pipeline.js +476 -0
  22. package/dist/core/pipeline.js.map +1 -0
  23. package/dist/core/stream.d.ts +48 -0
  24. package/dist/core/stream.d.ts.map +1 -0
  25. package/dist/core/stream.js +72 -0
  26. package/dist/core/stream.js.map +1 -0
  27. package/dist/core/types.d.ts +259 -0
  28. package/dist/core/types.d.ts.map +1 -0
  29. package/dist/core/types.js +4 -0
  30. package/dist/core/types.js.map +1 -0
  31. package/dist/export/index.d.ts +3 -0
  32. package/dist/export/index.d.ts.map +1 -0
  33. package/dist/export/index.js +8 -0
  34. package/dist/export/index.js.map +1 -0
  35. package/dist/export/templates.d.ts +25 -0
  36. package/dist/export/templates.d.ts.map +1 -0
  37. package/dist/export/templates.js +76 -0
  38. package/dist/export/templates.js.map +1 -0
  39. package/dist/export/vectordb.d.ts +21 -0
  40. package/dist/export/vectordb.d.ts.map +1 -0
  41. package/dist/export/vectordb.js +101 -0
  42. package/dist/export/vectordb.js.map +1 -0
  43. package/dist/extractors/content.d.ts +23 -0
  44. package/dist/extractors/content.d.ts.map +1 -0
  45. package/dist/extractors/content.js +328 -0
  46. package/dist/extractors/content.js.map +1 -0
  47. package/dist/extractors/github.d.ts +19 -0
  48. package/dist/extractors/github.d.ts.map +1 -0
  49. package/dist/extractors/github.js +150 -0
  50. package/dist/extractors/github.js.map +1 -0
  51. package/dist/extractors/images.d.ts +20 -0
  52. package/dist/extractors/images.d.ts.map +1 -0
  53. package/dist/extractors/images.js +73 -0
  54. package/dist/extractors/images.js.map +1 -0
  55. package/dist/extractors/pdf.d.ts +11 -0
  56. package/dist/extractors/pdf.d.ts.map +1 -0
  57. package/dist/extractors/pdf.js +107 -0
  58. package/dist/extractors/pdf.js.map +1 -0
  59. package/dist/extractors/screenshot.d.ts +21 -0
  60. package/dist/extractors/screenshot.d.ts.map +1 -0
  61. package/dist/extractors/screenshot.js +85 -0
  62. package/dist/extractors/screenshot.js.map +1 -0
  63. package/dist/index.d.ts +70 -0
  64. package/dist/index.d.ts.map +1 -0
  65. package/dist/index.js +206 -0
  66. package/dist/index.js.map +1 -0
  67. package/dist/mcp-server.d.ts +3 -0
  68. package/dist/mcp-server.d.ts.map +1 -0
  69. package/dist/mcp-server.js +108 -0
  70. package/dist/mcp-server.js.map +1 -0
  71. package/dist/sdk/client.d.ts +48 -0
  72. package/dist/sdk/client.d.ts.map +1 -0
  73. package/dist/sdk/client.js +120 -0
  74. package/dist/sdk/client.js.map +1 -0
  75. package/dist/sdk/mcp.d.ts +12 -0
  76. package/dist/sdk/mcp.d.ts.map +1 -0
  77. package/dist/sdk/mcp.js +146 -0
  78. package/dist/sdk/mcp.js.map +1 -0
  79. package/dist/sdk/server.d.ts +5 -0
  80. package/dist/sdk/server.d.ts.map +1 -0
  81. package/dist/sdk/server.js +158 -0
  82. package/dist/sdk/server.js.map +1 -0
  83. package/dist/search/vector.d.ts +26 -0
  84. package/dist/search/vector.d.ts.map +1 -0
  85. package/dist/search/vector.js +142 -0
  86. package/dist/search/vector.js.map +1 -0
  87. package/dist/transformers/markdown.d.ts +21 -0
  88. package/dist/transformers/markdown.d.ts.map +1 -0
  89. package/dist/transformers/markdown.js +242 -0
  90. package/dist/transformers/markdown.js.map +1 -0
  91. package/dist/utils/dedup.d.ts +20 -0
  92. package/dist/utils/dedup.d.ts.map +1 -0
  93. package/dist/utils/dedup.js +61 -0
  94. package/dist/utils/dedup.js.map +1 -0
  95. package/dist/utils/index.d.ts +6 -0
  96. package/dist/utils/index.d.ts.map +1 -0
  97. package/dist/utils/index.js +15 -0
  98. package/dist/utils/index.js.map +1 -0
  99. package/dist/utils/metrics.d.ts +16 -0
  100. package/dist/utils/metrics.d.ts.map +1 -0
  101. package/dist/utils/metrics.js +28 -0
  102. package/dist/utils/metrics.js.map +1 -0
  103. package/dist/utils/scheduler.d.ts +19 -0
  104. package/dist/utils/scheduler.d.ts.map +1 -0
  105. package/dist/utils/scheduler.js +63 -0
  106. package/dist/utils/scheduler.js.map +1 -0
  107. package/dist/utils/sitemap.d.ts +17 -0
  108. package/dist/utils/sitemap.d.ts.map +1 -0
  109. package/dist/utils/sitemap.js +118 -0
  110. package/dist/utils/sitemap.js.map +1 -0
  111. package/dist/utils/validation.d.ts +142 -0
  112. package/dist/utils/validation.d.ts.map +1 -0
  113. package/dist/utils/validation.js +35 -0
  114. package/dist/utils/validation.js.map +1 -0
  115. package/dist/utils/webhook.d.ts +21 -0
  116. package/dist/utils/webhook.d.ts.map +1 -0
  117. package/dist/utils/webhook.js +108 -0
  118. package/dist/utils/webhook.js.map +1 -0
  119. package/package.json +109 -0
@@ -0,0 +1,476 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || function (mod) {
19
+ if (mod && mod.__esModule) return mod;
20
+ var result = {};
21
+ if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
22
+ __setModuleDefault(result, mod);
23
+ return result;
24
+ };
25
+ var __importDefault = (this && this.__importDefault) || function (mod) {
26
+ return (mod && mod.__esModule) ? mod : { "default": mod };
27
+ };
28
+ Object.defineProperty(exports, "__esModule", { value: true });
29
+ exports.CrawlPipeline = void 0;
30
+ const manager_1 = require("../browser/manager");
31
+ const content_1 = require("../extractors/content");
32
+ const pdf_1 = require("../extractors/pdf");
33
+ const github_1 = require("../extractors/github");
34
+ const markdown_1 = require("../transformers/markdown");
35
+ const chunker_1 = require("../chunking/chunker");
36
+ const cache_1 = require("../cache/cache");
37
+ const sitemap_1 = require("../utils/sitemap");
38
+ const dedup_1 = require("../utils/dedup");
39
+ const p_queue_1 = __importDefault(require("p-queue"));
40
+ const crypto_1 = require("crypto");
41
+ const cheerio = __importStar(require("cheerio"));
42
+ const fs_1 = require("fs");
43
+ const path_1 = require("path");
44
+ /**
45
+ * Main crawling pipeline that orchestrates the full extraction workflow:
46
+ * URL → Fetch → Extract → Transform → Chunk → Output
47
+ */
48
+ class CrawlPipeline {
49
+ browser;
50
+ extractor;
51
+ pdfExtractor;
52
+ githubExtractor;
53
+ transformer;
54
+ chunker;
55
+ cache;
56
+ sitemapParser;
57
+ dedup;
58
+ config;
59
+ constructor(config = {}) {
60
+ this.config = config;
61
+ this.browser = new manager_1.BrowserManager(config.browser, config.rateLimit);
62
+ this.extractor = new content_1.ContentExtractor();
63
+ this.pdfExtractor = new pdf_1.PdfExtractor();
64
+ this.githubExtractor = new github_1.GitHubExtractor();
65
+ this.transformer = new markdown_1.MarkdownTransformer({
66
+ preserveImages: config.extraction?.preserveImages,
67
+ });
68
+ this.chunker = new chunker_1.ContentChunker(config.chunking);
69
+ this.cache = new cache_1.CrawlCache({
70
+ enabled: config.cache?.enabled ?? true,
71
+ ttl: config.cache?.ttl ?? 3600,
72
+ maxSize: config.cache?.maxSize ?? 500,
73
+ directory: config.cache?.directory,
74
+ contentHashing: config.cache?.contentHashing ?? true,
75
+ });
76
+ this.sitemapParser = new sitemap_1.SitemapParser();
77
+ this.dedup = new dedup_1.Deduplicator();
78
+ }
79
+ async crawl(options) {
80
+ const startTime = Date.now();
81
+ const pages = [];
82
+ const errors = [];
83
+ const diffs = [];
84
+ const visited = new Set();
85
+ const maxPages = options.maxPages ?? 50;
86
+ const depth = options.depth ?? 0;
87
+ let cacheHits = 0;
88
+ let cacheMisses = 0;
89
+ let retries = 0;
90
+ // Load checkpoint if resuming
91
+ let checkpoint = null;
92
+ if (options.checkpoint && options.checkpointDir) {
93
+ checkpoint = this.loadCheckpoint(options.checkpointDir);
94
+ if (checkpoint) {
95
+ checkpoint.visitedUrls.forEach(u => visited.add(u));
96
+ pages.push(...checkpoint.pages);
97
+ errors.push(...checkpoint.errors);
98
+ }
99
+ }
100
+ // Discover URLs: sitemap or link-based crawling
101
+ let queue = [];
102
+ if (options.sitemapUrl) {
103
+ queue = await this.parseSitemap(options.sitemapUrl);
104
+ }
105
+ else if (checkpoint?.pendingUrls?.length) {
106
+ queue = checkpoint.pendingUrls;
107
+ }
108
+ else {
109
+ // Auto-discover sitemap before crawling
110
+ if (depth > 0) {
111
+ const sitemapUrl = await this.sitemapParser.discover(options.url).catch(() => null);
112
+ if (sitemapUrl) {
113
+ const entries = await this.sitemapParser.parse(sitemapUrl).catch(() => []);
114
+ if (entries.length > 0) {
115
+ queue = entries
116
+ .map(e => e.url)
117
+ .filter(u => this.matchesPatterns(u, options.includePatterns, options.excludePatterns))
118
+ .slice(0, maxPages);
119
+ }
120
+ }
121
+ }
122
+ if (!queue.length)
123
+ queue = [options.url];
124
+ }
125
+ // Handle special sources: PDF and GitHub
126
+ if (this.pdfExtractor.isPdf(options.url)) {
127
+ return this.handlePdf(options, startTime);
128
+ }
129
+ if (this.githubExtractor.isGitHubUrl(options.url)) {
130
+ return this.handleGitHub(options, startTime);
131
+ }
132
+ // Initialize browser only if JS rendering is explicitly requested
133
+ if (options.javascript === true) {
134
+ await this.browser.launch();
135
+ }
136
+ const concurrency = this.config.concurrency ?? 3;
137
+ const pQueue = new p_queue_1.default({ concurrency });
138
+ const processUrl = async (url, currentDepth) => {
139
+ if (visited.has(url) || pages.length >= maxPages)
140
+ return;
141
+ visited.add(url);
142
+ // Progress callback
143
+ options.onProgress?.({
144
+ pagesProcessed: pages.length,
145
+ totalDiscovered: visited.size + queue.length,
146
+ currentUrl: url,
147
+ status: 'crawling',
148
+ });
149
+ try {
150
+ // Run pre-fetch plugins
151
+ let ctx = { url };
152
+ ctx = await this.runPlugins('pre-fetch', ctx, options.plugins);
153
+ // Check cache first
154
+ if (options.cache !== false) {
155
+ const cached = this.cache.get(url);
156
+ if (cached) {
157
+ cacheHits++;
158
+ pages.push(cached);
159
+ // Check for content changes if hashing enabled
160
+ if (this.config.cache?.contentHashing) {
161
+ // We'll compare on next fresh fetch
162
+ }
163
+ // Still discover links for recursive crawl
164
+ if (currentDepth < depth) {
165
+ const newUrls = cached.links
166
+ .filter(l => l.isInternal)
167
+ .filter(l => this.matchesPatterns(l.href, options.includePatterns, options.excludePatterns))
168
+ .filter(l => !visited.has(l.href))
169
+ .map(l => l.href);
170
+ for (const newUrl of newUrls.slice(0, maxPages - pages.length)) {
171
+ pQueue.add(() => processUrl(newUrl, currentDepth + 1));
172
+ }
173
+ }
174
+ return;
175
+ }
176
+ cacheMisses++;
177
+ }
178
+ // Fetch page
179
+ let html;
180
+ let status;
181
+ if (options.javascript === true) {
182
+ const result = await this.browser.fetchPage(url, {
183
+ respectRobots: options.respectRobotsTxt,
184
+ cookies: options.cookies,
185
+ headers: options.headers,
186
+ retryConfig: options.retry,
187
+ });
188
+ html = result.content;
189
+ status = result.status;
190
+ }
191
+ else {
192
+ const result = await this.browser.fetchStatic(url, {
193
+ respectRobots: options.respectRobotsTxt,
194
+ headers: options.headers,
195
+ retryConfig: options.retry,
196
+ });
197
+ html = result.body.toString('utf-8');
198
+ status = result.status;
199
+ }
200
+ if (status >= 400) {
201
+ errors.push({ url, error: `HTTP ${status}`, statusCode: status });
202
+ return;
203
+ }
204
+ // Run post-fetch plugins
205
+ ctx = await this.runPlugins('post-fetch', { ...ctx, html }, options.plugins);
206
+ const finalHtml = ctx.html ?? html;
207
+ // Run pre-extract plugins
208
+ ctx = await this.runPlugins('pre-extract', ctx, options.plugins);
209
+ // Extract content
210
+ const extracted = this.extractor.extract(finalHtml, url, options.focusMode);
211
+ // Run post-extract plugins
212
+ ctx = await this.runPlugins('post-extract', { ...ctx, extracted }, options.plugins);
213
+ const finalExtracted = ctx.extracted ?? extracted;
214
+ // Run pre-transform plugins
215
+ ctx = await this.runPlugins('pre-transform', ctx, options.plugins);
216
+ // Transform to markdown
217
+ finalExtracted.markdown = this.transformer.transform(finalExtracted.html || '');
218
+ // Run post-transform plugins
219
+ ctx = await this.runPlugins('post-transform', { ...ctx, markdown: finalExtracted.markdown }, options.plugins);
220
+ if (ctx.markdown)
221
+ finalExtracted.markdown = ctx.markdown;
222
+ // Resolve relative links to absolute URLs
223
+ finalExtracted.markdown = this.resolveLinks(finalExtracted.markdown, url);
224
+ // Deduplication check
225
+ const dupOf = this.dedup.isDuplicate(url, finalExtracted.text);
226
+ if (dupOf) {
227
+ return; // Skip duplicate content
228
+ }
229
+ // Check content diff
230
+ if (options.cache !== false && this.config.cache?.contentHashing) {
231
+ const diff = this.cache.hasChanged(url, finalExtracted.markdown);
232
+ if (diff.changed)
233
+ diffs.push(diff);
234
+ }
235
+ // Cache result
236
+ if (options.cache !== false) {
237
+ this.cache.set(url, finalExtracted);
238
+ }
239
+ pages.push(finalExtracted);
240
+ // Queue internal links for recursive crawl
241
+ if (currentDepth < depth) {
242
+ const newUrls = finalExtracted.links
243
+ .filter(l => l.isInternal)
244
+ .filter(l => this.matchesPatterns(l.href, options.includePatterns, options.excludePatterns))
245
+ .filter(l => !visited.has(l.href))
246
+ .map(l => l.href);
247
+ for (const newUrl of newUrls.slice(0, maxPages - pages.length)) {
248
+ pQueue.add(() => processUrl(newUrl, currentDepth + 1));
249
+ }
250
+ }
251
+ // Respect delay
252
+ if (options.delay) {
253
+ await new Promise(r => setTimeout(r, options.delay));
254
+ }
255
+ // Save checkpoint
256
+ if (options.checkpoint && options.checkpointDir) {
257
+ this.saveCheckpoint({
258
+ visitedUrls: [...visited],
259
+ pendingUrls: queue.filter(u => !visited.has(u)),
260
+ pages,
261
+ errors,
262
+ timestamp: new Date().toISOString(),
263
+ }, options.checkpointDir);
264
+ }
265
+ }
266
+ catch (err) {
267
+ errors.push({ url, error: err.message });
268
+ }
269
+ };
270
+ // Process initial queue
271
+ for (const url of queue) {
272
+ if (pages.length >= maxPages)
273
+ break;
274
+ pQueue.add(() => processUrl(url, 0));
275
+ }
276
+ await pQueue.onIdle();
277
+ // Build chunks from all pages
278
+ const allChunks = [];
279
+ for (const page of pages) {
280
+ // Run pre-chunk plugins
281
+ let ctx = { page };
282
+ ctx = await this.runPlugins('pre-chunk', ctx, options.plugins);
283
+ const chunks = this.chunker.chunk(page.markdown, page.url, page.title, page.headings, this.config.chunking);
284
+ // Run post-chunk plugins
285
+ ctx = await this.runPlugins('post-chunk', { ...ctx, chunks }, options.plugins);
286
+ allChunks.push(...(ctx.chunks ?? chunks));
287
+ }
288
+ const relationships = this.buildRelationships(pages);
289
+ const totalTokens = allChunks.reduce((sum, c) => sum + c.tokens, 0);
290
+ const summary = this.generateSummary(pages);
291
+ const context = {
292
+ id: (0, crypto_1.createHash)('sha256').update(options.url + Date.now()).digest('hex').slice(0, 16),
293
+ source: options.url,
294
+ chunks: allChunks,
295
+ summary,
296
+ totalTokens,
297
+ metadata: {
298
+ crawledAt: new Date().toISOString(),
299
+ pageCount: pages.length,
300
+ contentType: pages[0]?.metadata.type || 'unknown',
301
+ framework: pages[0]?.metadata.framework,
302
+ version: pages[0]?.metadata.version,
303
+ relationships,
304
+ },
305
+ format: this.config.output?.format || 'markdown',
306
+ };
307
+ const stats = {
308
+ pagesProcessed: pages.length,
309
+ totalTokens,
310
+ duration: Date.now() - startTime,
311
+ errors,
312
+ cached: cacheHits,
313
+ cacheHits,
314
+ cacheMisses,
315
+ retries,
316
+ };
317
+ // Notify completion
318
+ options.onProgress?.({
319
+ pagesProcessed: pages.length,
320
+ totalDiscovered: visited.size,
321
+ currentUrl: '',
322
+ status: 'complete',
323
+ });
324
+ // Cleanup browser (chunker is reusable, disposed via WebContext.dispose())
325
+ await this.browser.close();
326
+ return { pages, context, stats, diffs: diffs.length ? diffs : undefined };
327
+ }
328
+ async parseSitemap(sitemapUrl) {
329
+ try {
330
+ const result = await this.browser.fetchStatic(sitemapUrl);
331
+ const xml = result.body.toString('utf-8');
332
+ const $ = cheerio.load(xml, { xmlMode: true });
333
+ return $('url > loc').map((_, el) => $(el).text()).get();
334
+ }
335
+ catch {
336
+ return [];
337
+ }
338
+ }
339
+ saveCheckpoint(state, dir) {
340
+ if (!(0, fs_1.existsSync)(dir))
341
+ (0, fs_1.mkdirSync)(dir, { recursive: true });
342
+ (0, fs_1.writeFileSync)((0, path_1.join)(dir, 'checkpoint.json'), JSON.stringify(state));
343
+ }
344
+ loadCheckpoint(dir) {
345
+ const file = (0, path_1.join)(dir, 'checkpoint.json');
346
+ if (!(0, fs_1.existsSync)(file))
347
+ return null;
348
+ try {
349
+ return JSON.parse((0, fs_1.readFileSync)(file, 'utf-8'));
350
+ }
351
+ catch {
352
+ return null;
353
+ }
354
+ }
355
+ async runPlugins(phase, ctx, plugins) {
356
+ if (!plugins?.length)
357
+ return ctx;
358
+ for (const plugin of plugins) {
359
+ if (plugin.hooks[phase]) {
360
+ ctx = (await plugin.hooks[phase](ctx)) || ctx;
361
+ }
362
+ }
363
+ return ctx;
364
+ }
365
+ generateSummary(pages) {
366
+ const combined = pages.map(p => p.text).join(' ');
367
+ const sentences = combined.match(/[^.!?]+[.!?]+/g) || [];
368
+ return sentences.slice(0, 3).join(' ').trim();
369
+ }
370
+ buildRelationships(pages) {
371
+ const relationships = [];
372
+ const urls = new Set(pages.map(p => p.url));
373
+ for (const page of pages) {
374
+ for (const link of page.links) {
375
+ if (urls.has(link.href) && link.href !== page.url) {
376
+ relationships.push({ from: page.url, to: link.href, type: 'links-to' });
377
+ }
378
+ }
379
+ }
380
+ return relationships;
381
+ }
382
+ matchesPatterns(url, include, exclude) {
383
+ if (exclude?.some(p => url.includes(p)))
384
+ return false;
385
+ if (include?.length && !include.some(p => url.includes(p)))
386
+ return false;
387
+ return true;
388
+ }
389
+ /** Resolve relative markdown links to absolute URLs */
390
+ resolveLinks(markdown, baseUrl) {
391
+ return markdown.replace(/\[([^\]]*)\]\(([^)]+)\)/g, (match, text, href) => {
392
+ if (href.startsWith('http') || href.startsWith('#') || href.startsWith('mailto:'))
393
+ return match;
394
+ try {
395
+ const resolved = new URL(href, baseUrl).href;
396
+ return `[${text}](${resolved})`;
397
+ }
398
+ catch {
399
+ return match;
400
+ }
401
+ });
402
+ }
403
+ /** Handle PDF extraction */
404
+ async handlePdf(options, startTime) {
405
+ const extracted = await this.pdfExtractor.extract(options.url);
406
+ const chunks = this.chunker.chunk(extracted.markdown, extracted.url, extracted.title, extracted.headings, this.config.chunking);
407
+ const totalTokens = chunks.reduce((s, c) => s + c.tokens, 0);
408
+ return {
409
+ pages: [extracted],
410
+ context: {
411
+ id: (0, crypto_1.createHash)('sha256').update(options.url).digest('hex').slice(0, 16),
412
+ source: options.url,
413
+ chunks,
414
+ summary: extracted.description,
415
+ totalTokens,
416
+ metadata: {
417
+ crawledAt: new Date().toISOString(),
418
+ pageCount: 1,
419
+ contentType: 'documentation',
420
+ relationships: [],
421
+ },
422
+ format: 'markdown',
423
+ },
424
+ stats: {
425
+ pagesProcessed: 1, totalTokens, duration: Date.now() - startTime,
426
+ errors: [], cached: 0, cacheHits: 0, cacheMisses: 0, retries: 0,
427
+ },
428
+ };
429
+ }
430
+ /** Handle GitHub repository extraction */
431
+ async handleGitHub(options, startTime) {
432
+ const pages = [];
433
+ // Always get README
434
+ const readme = await this.githubExtractor.extractReadme(options.url);
435
+ pages.push(readme);
436
+ // If depth > 0, also get docs folder
437
+ if ((options.depth ?? 0) > 0) {
438
+ const docs = await this.githubExtractor.extractDocs(options.url);
439
+ pages.push(...docs.slice(0, (options.maxPages ?? 50) - 1));
440
+ }
441
+ const allChunks = [];
442
+ for (const page of pages) {
443
+ const chunks = this.chunker.chunk(page.markdown, page.url, page.title, page.headings, this.config.chunking);
444
+ allChunks.push(...chunks);
445
+ }
446
+ const totalTokens = allChunks.reduce((s, c) => s + c.tokens, 0);
447
+ return {
448
+ pages,
449
+ context: {
450
+ id: (0, crypto_1.createHash)('sha256').update(options.url).digest('hex').slice(0, 16),
451
+ source: options.url,
452
+ chunks: allChunks,
453
+ summary: pages[0]?.description,
454
+ totalTokens,
455
+ metadata: {
456
+ crawledAt: new Date().toISOString(),
457
+ pageCount: pages.length,
458
+ contentType: 'readme',
459
+ relationships: this.buildRelationships(pages),
460
+ },
461
+ format: 'markdown',
462
+ },
463
+ stats: {
464
+ pagesProcessed: pages.length, totalTokens, duration: Date.now() - startTime,
465
+ errors: [], cached: 0, cacheHits: 0, cacheMisses: 0, retries: 0,
466
+ },
467
+ };
468
+ }
469
+ /** Free resources (tiktoken encoder) */
470
+ dispose() {
471
+ this.chunker.dispose();
472
+ this.dedup.clear();
473
+ }
474
+ }
475
+ exports.CrawlPipeline = CrawlPipeline;
476
+ //# sourceMappingURL=pipeline.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"pipeline.js","sourceRoot":"","sources":["../../src/core/pipeline.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAKA,gDAAoD;AACpD,mDAAyD;AACzD,2CAAiD;AACjD,iDAAuD;AACvD,uDAA+D;AAC/D,iDAAqD;AACrD,0CAA4C;AAC5C,8CAAiD;AACjD,0CAA8C;AAC9C,sDAA6B;AAC7B,mCAAoC;AACpC,iDAAmC;AACnC,2BAAwE;AACxE,+BAA4B;AAE5B;;;GAGG;AACH,MAAa,aAAa;IAChB,OAAO,CAAiB;IACxB,SAAS,CAAmB;IAC5B,YAAY,CAAe;IAC3B,eAAe,CAAkB;IACjC,WAAW,CAAsB;IACjC,OAAO,CAAiB;IACxB,KAAK,CAAa;IAClB,aAAa,CAAgB;IAC7B,KAAK,CAAe;IACpB,MAAM,CAAmB;IAEjC,YAAY,SAA2B,EAAE;QACvC,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;QACrB,IAAI,CAAC,OAAO,GAAG,IAAI,wBAAc,CAAC,MAAM,CAAC,OAAO,EAAE,MAAM,CAAC,SAAS,CAAC,CAAC;QACpE,IAAI,CAAC,SAAS,GAAG,IAAI,0BAAgB,EAAE,CAAC;QACxC,IAAI,CAAC,YAAY,GAAG,IAAI,kBAAY,EAAE,CAAC;QACvC,IAAI,CAAC,eAAe,GAAG,IAAI,wBAAe,EAAE,CAAC;QAC7C,IAAI,CAAC,WAAW,GAAG,IAAI,8BAAmB,CAAC;YACzC,cAAc,EAAE,MAAM,CAAC,UAAU,EAAE,cAAc;SAClD,CAAC,CAAC;QACH,IAAI,CAAC,OAAO,GAAG,IAAI,wBAAc,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;QACnD,IAAI,CAAC,KAAK,GAAG,IAAI,kBAAU,CAAC;YAC1B,OAAO,EAAE,MAAM,CAAC,KAAK,EAAE,OAAO,IAAI,IAAI;YACtC,GAAG,EAAE,MAAM,CAAC,KAAK,EAAE,GAAG,IAAI,IAAI;YAC9B,OAAO,EAAE,MAAM,CAAC,KAAK,EAAE,OAAO,IAAI,GAAG;YACrC,SAAS,EAAE,MAAM,CAAC,KAAK,EAAE,SAAS;YAClC,cAAc,EAAE,MAAM,CAAC,KAAK,EAAE,cAAc,IAAI,IAAI;SACrD,CAAC,CAAC;QACH,IAAI,CAAC,aAAa,GAAG,IAAI,uBAAa,EAAE,CAAC;QACzC,IAAI,CAAC,KAAK,GAAG,IAAI,oBAAY,EAAE,CAAC;IAClC,CAAC;IAED,KAAK,CAAC,KAAK,CAAC,OAAqB;QAC/B,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC7B,MAAM,KAAK,GAAuB,EAAE,CAAC;QACrC,MAAM,MAAM,GAAiB,EAAE,CAAC;QAChC,MAAM,KAAK,GAAkB,EAAE,CAAC;QAChC,MAAM,OAAO,GAAG,IAAI,GAAG,EAAU,CAAC;QAClC,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,IAAI,EAAE,CAAC;QACxC,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,IAAI,CAAC,CAAC;QACjC,IAAI,SAAS,GAAG,CAAC,CAAC;QAClB,IAAI,WAAW,GAAG,CAAC,CAAC;QACpB,IAAI,OAAO,GAAG,CAAC,CAAC;QAEhB,8BAA8B;QAC9B,IAAI,UAAU,GAA2B,IAAI,CAAC;QAC9C,IAAI,OAAO,CAAC,UAAU,IAAI,OAAO,CAAC,aAAa,EAAE,CAAC;YAChD,UAAU,GAAG,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC,aAAa,CAAC,CAAC;YACxD,IAAI,UAAU,EAAE,CAAC;gBACf,UAAU,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;gBACpD,KAAK,CAAC,IAAI,CAAC,GAAG,UAAU,CAAC,KAAK,CAAC,CAAC;gBAChC,MAAM,CAAC,IAAI,CAAC,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC;YACpC,CAAC;QACH,CAAC;QAED,gDAAgD;QAChD,IAAI,KAAK,GAAa,EAAE,CAAC;QACzB,IAAI,OAAO,CAAC,UAAU,EAAE,CAAC;YACvB,KAAK,GAAG,MAAM,IAAI,CAAC,YAAY,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;QACtD,CAAC;aAAM,IAAI,UAAU,EAAE,WAAW,EAAE,MAAM,EAAE,CAAC;YAC3C,KAAK,GAAG,UAAU,CAAC,WAAW,CAAC;QACjC,CAAC;aAAM,CAAC;YACN,wCAAwC;YACxC,IAAI,KAAK,GAAG,CAAC,EAAE,CAAC;gBACd,MAAM,UAAU,GAAG,MAAM,IAAI,CAAC,aAAa,CAAC,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC,CAAC;gBACpF,IAAI,UAAU,EAAE,CAAC;oBACf,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,aAAa,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,EAAE,CAAC,CAAC;oBAC3E,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;wBACvB,KAAK,GAAG,OAAO;6BACZ,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC;6BACf,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC,EAAE,OAAO,CAAC,eAAe,EAAE,OAAO,CAAC,eAAe,CAAC,CAAC;6BACtF,KAAK,CAAC,CAAC,EAAE,QAAQ,CAAC,CAAC;oBACxB,CAAC;gBACH,CAAC;YACH,CAAC;YACD,IAAI,CAAC,KAAK,CAAC,MAAM;gBAAE,KAAK,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;QAC3C,CAAC;QAED,yCAAyC;QACzC,IAAI,IAAI,CAAC,YAAY,CAAC,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC;YACzC,OAAO,IAAI,CAAC,SAAS,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;QAC5C,CAAC;QACD,IAAI,IAAI,CAAC,eAAe,CAAC,WAAW,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC;YAClD,OAAO,IAAI,CAAC,YAAY,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;QAC/C,CAAC;QAED,kEAAkE;QAClE,IAAI,OAAO,CAAC,UAAU,KAAK,IAAI,EAAE,CAAC;YAChC,MAAM,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC;QAC9B,CAAC;QAED,MAAM,WAAW,GAAG,IAAI,CAAC,MAAM,CAAC,WAAW,IAAI,CAAC,CAAC;QACjD,MAAM,MAAM,GAAG,IAAI,iBAAM,CAAC,EAAE,WAAW,EAAE,CAAC,CAAC;QAE3C,MAAM,UAAU,GAAG,KAAK,EAAE,GAAW,EAAE,YAAoB,EAAE,EAAE;YAC7D,IAAI,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,KAAK,CAAC,MAAM,IAAI,QAAQ;gBAAE,OAAO;YACzD,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YAEjB,oBAAoB;YACpB,OAAO,CAAC,UAAU,EAAE,CAAC;gBACnB,cAAc,EAAE,KAAK,CAAC,MAAM;gBAC5B,eAAe,EAAE,OAAO,CAAC,IAAI,GAAG,KAAK,CAAC,MAAM;gBAC5C,UAAU,EAAE,GAAG;gBACf,MAAM,EAAE,UAAU;aACnB,CAAC,CAAC;YAEH,IAAI,CAAC;gBACH,wBAAwB;gBACxB,IAAI,GAAG,GAAQ,EAAE,GAAG,EAAE,CAAC;gBACvB,GAAG,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,WAAW,EAAE,GAAG,EAAE,OAAO,CAAC,OAAO,CAAC,CAAC;gBAE/D,oBAAoB;gBACpB,IAAI,OAAO,CAAC,KAAK,KAAK,KAAK,EAAE,CAAC;oBAC5B,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;oBACnC,IAAI,MAAM,EAAE,CAAC;wBACX,SAAS,EAAE,CAAC;wBACZ,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;wBAEnB,+CAA+C;wBAC/C,IAAI,IAAI,CAAC,MAAM,CAAC,KAAK,EAAE,cAAc,EAAE,CAAC;4BACtC,oCAAoC;wBACtC,CAAC;wBAED,2CAA2C;wBAC3C,IAAI,YAAY,GAAG,KAAK,EAAE,CAAC;4BACzB,MAAM,OAAO,GAAG,MAAM,CAAC,KAAK;iCACzB,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC;iCACzB,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC,CAAC,IAAI,EAAE,OAAO,CAAC,eAAe,EAAE,OAAO,CAAC,eAAe,CAAC,CAAC;iCAC3F,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;iCACjC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;4BACpB,KAAK,MAAM,MAAM,IAAI,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,GAAG,KAAK,CAAC,MAAM,CAAC,EAAE,CAAC;gCAC/D,MAAM,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,UAAU,CAAC,MAAM,EAAE,YAAY,GAAG,CAAC,CAAC,CAAC,CAAC;4BACzD,CAAC;wBACH,CAAC;wBACD,OAAO;oBACT,CAAC;oBACD,WAAW,EAAE,CAAC;gBAChB,CAAC;gBAED,aAAa;gBACb,IAAI,IAAY,CAAC;gBACjB,IAAI,MAAc,CAAC;gBACnB,IAAI,OAAO,CAAC,UAAU,KAAK,IAAI,EAAE,CAAC;oBAChC,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,SAAS,CAAC,GAAG,EAAE;wBAC/C,aAAa,EAAE,OAAO,CAAC,gBAAgB;wBACvC,OAAO,EAAE,OAAO,CAAC,OAAO;wBACxB,OAAO,EAAE,OAAO,CAAC,OAAO;wBACxB,WAAW,EAAE,OAAO,CAAC,KAAK;qBAC3B,CAAC,CAAC;oBACH,IAAI,GAAG,MAAM,CAAC,OAAO,CAAC;oBACtB,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC;gBACzB,CAAC;qBAAM,CAAC;oBACN,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,WAAW,CAAC,GAAG,EAAE;wBACjD,aAAa,EAAE,OAAO,CAAC,gBAAgB;wBACvC,OAAO,EAAE,OAAO,CAAC,OAAO;wBACxB,WAAW,EAAE,OAAO,CAAC,KAAK;qBAC3B,CAAC,CAAC;oBACH,IAAI,GAAG,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;oBACrC,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC;gBACzB,CAAC;gBAED,IAAI,MAAM,IAAI,GAAG,EAAE,CAAC;oBAClB,MAAM,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,QAAQ,MAAM,EAAE,EAAE,UAAU,EAAE,MAAM,EAAE,CAAC,CAAC;oBAClE,OAAO;gBACT,CAAC;gBAED,yBAAyB;gBACzB,GAAG,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,YAAY,EAAE,EAAE,GAAG,GAAG,EAAE,IAAI,EAAE,EAAE,OAAO,CAAC,OAAO,CAAC,CAAC;gBAC7E,MAAM,SAAS,GAAG,GAAG,CAAC,IAAI,IAAI,IAAI,CAAC;gBAEnC,0BAA0B;gBAC1B,GAAG,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,aAAa,EAAE,GAAG,EAAE,OAAO,CAAC,OAAO,CAAC,CAAC;gBAEjE,kBAAkB;gBAClB,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,SAAS,EAAE,GAAG,EAAE,OAAO,CAAC,SAAS,CAAC,CAAC;gBAE5E,2BAA2B;gBAC3B,GAAG,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,cAAc,EAAE,EAAE,GAAG,GAAG,EAAE,SAAS,EAAE,EAAE,OAAO,CAAC,OAAO,CAAC,CAAC;gBACpF,MAAM,cAAc,GAAqB,GAAG,CAAC,SAAS,IAAI,SAAS,CAAC;gBAEpE,4BAA4B;gBAC5B,GAAG,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,eAAe,EAAE,GAAG,EAAE,OAAO,CAAC,OAAO,CAAC,CAAC;gBAEnE,wBAAwB;gBACxB,cAAc,CAAC,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,SAAS,CAAC,cAAc,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC;gBAEhF,6BAA6B;gBAC7B,GAAG,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,gBAAgB,EAAE,EAAE,GAAG,GAAG,EAAE,QAAQ,EAAE,cAAc,CAAC,QAAQ,EAAE,EAAE,OAAO,CAAC,OAAO,CAAC,CAAC;gBAC9G,IAAI,GAAG,CAAC,QAAQ;oBAAE,cAAc,CAAC,QAAQ,GAAG,GAAG,CAAC,QAAQ,CAAC;gBAEzD,0CAA0C;gBAC1C,cAAc,CAAC,QAAQ,GAAG,IAAI,CAAC,YAAY,CAAC,cAAc,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC;gBAE1E,sBAAsB;gBACtB,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,GAAG,EAAE,cAAc,CAAC,IAAI,CAAC,CAAC;gBAC/D,IAAI,KAAK,EAAE,CAAC;oBACV,OAAO,CAAC,yBAAyB;gBACnC,CAAC;gBAED,qBAAqB;gBACrB,IAAI,OAAO,CAAC,KAAK,KAAK,KAAK,IAAI,IAAI,CAAC,MAAM,CAAC,KAAK,EAAE,cAAc,EAAE,CAAC;oBACjE,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,GAAG,EAAE,cAAc,CAAC,QAAQ,CAAC,CAAC;oBACjE,IAAI,IAAI,CAAC,OAAO;wBAAE,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBACrC,CAAC;gBAED,eAAe;gBACf,IAAI,OAAO,CAAC,KAAK,KAAK,KAAK,EAAE,CAAC;oBAC5B,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,EAAE,cAAc,CAAC,CAAC;gBACtC,CAAC;gBAED,KAAK,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;gBAE3B,2CAA2C;gBAC3C,IAAI,YAAY,GAAG,KAAK,EAAE,CAAC;oBACzB,MAAM,OAAO,GAAG,cAAc,CAAC,KAAK;yBACjC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC;yBACzB,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC,CAAC,IAAI,EAAE,OAAO,CAAC,eAAe,EAAE,OAAO,CAAC,eAAe,CAAC,CAAC;yBAC3F,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;yBACjC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;oBACpB,KAAK,MAAM,MAAM,IAAI,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,GAAG,KAAK,CAAC,MAAM,CAAC,EAAE,CAAC;wBAC/D,MAAM,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,UAAU,CAAC,MAAM,EAAE,YAAY,GAAG,CAAC,CAAC,CAAC,CAAC;oBACzD,CAAC;gBACH,CAAC;gBAED,gBAAgB;gBAChB,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;oBAClB,MAAM,IAAI,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,UAAU,CAAC,CAAC,EAAE,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC;gBACvD,CAAC;gBAED,kBAAkB;gBAClB,IAAI,OAAO,CAAC,UAAU,IAAI,OAAO,CAAC,aAAa,EAAE,CAAC;oBAChD,IAAI,CAAC,cAAc,CAAC;wBAClB,WAAW,EAAE,CAAC,GAAG,OAAO,CAAC;wBACzB,WAAW,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;wBAC/C,KAAK;wBACL,MAAM;wBACN,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;qBACpC,EAAE,OAAO,CAAC,aAAa,CAAC,CAAC;gBAC5B,CAAC;YACH,CAAC;YAAC,OAAO,GAAQ,EAAE,CAAC;gBAClB,MAAM,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,GAAG,CAAC,OAAO,EAAE,CAAC,CAAC;YAC3C,CAAC;QACH,CAAC,CAAC;QAEF,wBAAwB;QACxB,KAAK,MAAM,GAAG,IAAI,KAAK,EAAE,CAAC;YACxB,IAAI,KAAK,CAAC,MAAM,IAAI,QAAQ;gBAAE,MAAM;YACpC,MAAM,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,UAAU,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC;QACvC,CAAC;QAED,MAAM,MAAM,CAAC,MAAM,EAAE,CAAC;QAEtB,8BAA8B;QAC9B,MAAM,SAAS,GAAmB,EAAE,CAAC;QACrC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,wBAAwB;YACxB,IAAI,GAAG,GAAQ,EAAE,IAAI,EAAE,CAAC;YACxB,GAAG,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,WAAW,EAAE,GAAG,EAAE,OAAO,CAAC,OAAO,CAAC,CAAC;YAE/D,MAAM,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,KAAK,CAC/B,IAAI,CAAC,QAAQ,EACb,IAAI,CAAC,GAAG,EACR,IAAI,CAAC,KAAK,EACV,IAAI,CAAC,QAAQ,EACb,IAAI,CAAC,MAAM,CAAC,QAAQ,CACrB,CAAC;YAEF,yBAAyB;YACzB,GAAG,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,YAAY,EAAE,EAAE,GAAG,GAAG,EAAE,MAAM,EAAE,EAAE,OAAO,CAAC,OAAO,CAAC,CAAC;YAC/E,SAAS,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,IAAI,MAAM,CAAC,CAAC,CAAC;QAC5C,CAAC;QAED,MAAM,aAAa,GAAG,IAAI,CAAC,kBAAkB,CAAC,KAAK,CAAC,CAAC;QACrD,MAAM,WAAW,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;QACpE,MAAM,OAAO,GAAG,IAAI,CAAC,eAAe,CAAC,KAAK,CAAC,CAAC;QAE5C,MAAM,OAAO,GAAkB;YAC7B,EAAE,EAAE,IAAA,mBAAU,EAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC;YACpF,MAAM,EAAE,OAAO,CAAC,GAAG;YACnB,MAAM,EAAE,SAAS;YACjB,OAAO;YACP,WAAW;YACX,QAAQ,EAAE;gBACR,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;gBACnC,SAAS,EAAE,KAAK,CAAC,MAAM;gBACvB,WAAW,EAAE,KAAK,CAAC,CAAC,CAAC,EAAE,QAAQ,CAAC,IAAI,IAAI,SAAS;gBACjD,SAAS,EAAE,KAAK,CAAC,CAAC,CAAC,EAAE,QAAQ,CAAC,SAAS;gBACvC,OAAO,EAAE,KAAK,CAAC,CAAC,CAAC,EAAE,QAAQ,CAAC,OAAO;gBACnC,aAAa;aACd;YACD,MAAM,EAAE,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,IAAI,UAAU;SACjD,CAAC;QAEF,MAAM,KAAK,GAAe;YACxB,cAAc,EAAE,KAAK,CAAC,MAAM;YAC5B,WAAW;YACX,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;YAChC,MAAM;YACN,MAAM,EAAE,SAAS;YACjB,SAAS;YACT,WAAW;YACX,OAAO;SACR,CAAC;QAEF,oBAAoB;QACpB,OAAO,CAAC,UAAU,EAAE,CAAC;YACnB,cAAc,EAAE,KAAK,CAAC,MAAM;YAC5B,eAAe,EAAE,OAAO,CAAC,IAAI;YAC7B,UAAU,EAAE,EAAE;YACd,MAAM,EAAE,UAAU;SACnB,CAAC,CAAC;QAEH,2EAA2E;QAC3E,MAAM,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC;QAE3B,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS,EAAE,CAAC;IAC5E,CAAC;IAEO,KAAK,CAAC,YAAY,CAAC,UAAkB;QAC3C,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,WAAW,CAAC,UAAU,CAAC,CAAC;YAC1D,MAAM,GAAG,GAAG,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;YAC1C,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,GAAG,EAAE,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC;YAC/C,OAAO,CAAC,CAAC,WAAW,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,GAAG,EAAE,CAAC;QAC3D,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,EAAE,CAAC;QACZ,CAAC;IACH,CAAC;IAEO,cAAc,CAAC,KAAsB,EAAE,GAAW;QACxD,IAAI,CAAC,IAAA,eAAU,EAAC,GAAG,CAAC;YAAE,IAAA,cAAS,EAAC,GAAG,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QAC1D,IAAA,kBAAa,EAAC,IAAA,WAAI,EAAC,GAAG,EAAE,iBAAiB,CAAC,EAAE,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC;IACrE,CAAC;IAEO,cAAc,CAAC,GAAW;QAChC,MAAM,IAAI,GAAG,IAAA,WAAI,EAAC,GAAG,EAAE,iBAAiB,CAAC,CAAC;QAC1C,IAAI,CAAC,IAAA,eAAU,EAAC,IAAI,CAAC;YAAE,OAAO,IAAI,CAAC;QACnC,IAAI,CAAC;YACH,OAAO,IAAI,CAAC,KAAK,CAAC,IAAA,iBAAY,EAAC,IAAI,EAAE,OAAO,CAAC,CAAC,CAAC;QACjD,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAEO,KAAK,CAAC,UAAU,CAAC,KAAa,EAAE,GAAQ,EAAE,OAA4B;QAC5E,IAAI,CAAC,OAAO,EAAE,MAAM;YAAE,OAAO,GAAG,CAAC;QACjC,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;YAC7B,IAAI,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,EAAE,CAAC;gBACxB,GAAG,GAAG,CAAC,MAAM,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC;YAChD,CAAC;QACH,CAAC;QACD,OAAO,GAAG,CAAC;IACb,CAAC;IAEO,eAAe,CAAC,KAAyB;QAC/C,MAAM,QAAQ,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAClD,MAAM,SAAS,GAAG,QAAQ,CAAC,KAAK,CAAC,gBAAgB,CAAC,IAAI,EAAE,CAAC;QACzD,OAAO,SAAS,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;IAChD,CAAC;IAEO,kBAAkB,CAAC,KAAyB;QAClD,MAAM,aAAa,GAAuB,EAAE,CAAC;QAC7C,MAAM,IAAI,GAAG,IAAI,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QAC5C,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;gBAC9B,IAAI,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,IAAI,CAAC,IAAI,KAAK,IAAI,CAAC,GAAG,EAAE,CAAC;oBAClD,aAAa,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,IAAI,CAAC,GAAG,EAAE,EAAE,EAAE,IAAI,CAAC,IAAI,EAAE,IAAI,EAAE,UAAU,EAAE,CAAC,CAAC;gBAC1E,CAAC;YACH,CAAC;QACH,CAAC;QACD,OAAO,aAAa,CAAC;IACvB,CAAC;IAEO,eAAe,CAAC,GAAW,EAAE,OAAkB,EAAE,OAAkB;QACzE,IAAI,OAAO,EAAE,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC;YAAE,OAAO,KAAK,CAAC;QACtD,IAAI,OAAO,EAAE,MAAM,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC;YAAE,OAAO,KAAK,CAAC;QACzE,OAAO,IAAI,CAAC;IACd,CAAC;IAED,uDAAuD;IAC/C,YAAY,CAAC,QAAgB,EAAE,OAAe;QACpD,OAAO,QAAQ,CAAC,OAAO,CAAC,0BAA0B,EAAE,CAAC,KAAK,EAAE,IAAI,EAAE,IAAI,EAAE,EAAE;YACxE,IAAI,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,IAAI,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC;gBAAE,OAAO,KAAK,CAAC;YAChG,IAAI,CAAC;gBACH,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,IAAI,CAAC;gBAC7C,OAAO,IAAI,IAAI,KAAK,QAAQ,GAAG,CAAC;YAClC,CAAC;YAAC,MAAM,CAAC;gBACP,OAAO,KAAK,CAAC;YACf,CAAC;QACH,CAAC,CAAC,CAAC;IACL,CAAC;IAED,4BAA4B;IACpB,KAAK,CAAC,SAAS,CAAC,OAAqB,EAAE,SAAiB;QAC9D,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,YAAY,CAAC,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;QAC/D,MAAM,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,KAAK,CAC/B,SAAS,CAAC,QAAQ,EAAE,SAAS,CAAC,GAAG,EAAE,SAAS,CAAC,KAAK,EAAE,SAAS,CAAC,QAAQ,EAAE,IAAI,CAAC,MAAM,CAAC,QAAQ,CAC7F,CAAC;QACF,MAAM,WAAW,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;QAC7D,OAAO;YACL,KAAK,EAAE,CAAC,SAAS,CAAC;YAClB,OAAO,EAAE;gBACP,EAAE,EAAE,IAAA,mBAAU,EAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC;gBACvE,MAAM,EAAE,OAAO,CAAC,GAAG;gBACnB,MAAM;gBACN,OAAO,EAAE,SAAS,CAAC,WAAW;gBAC9B,WAAW;gBACX,QAAQ,EAAE;oBACR,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;oBACnC,SAAS,EAAE,CAAC;oBACZ,WAAW,EAAE,eAAe;oBAC5B,aAAa,EAAE,EAAE;iBAClB;gBACD,MAAM,EAAE,UAAU;aACnB;YACD,KAAK,EAAE;gBACL,cAAc,EAAE,CAAC,EAAE,WAAW,EAAE,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;gBAChE,MAAM,EAAE,EAAE,EAAE,MAAM,EAAE,CAAC,EAAE,SAAS,EAAE,CAAC,EAAE,WAAW,EAAE,CAAC,EAAE,OAAO,EAAE,CAAC;aAChE;SACF,CAAC;IACJ,CAAC;IAED,0CAA0C;IAClC,KAAK,CAAC,YAAY,CAAC,OAAqB,EAAE,SAAiB;QACjE,MAAM,KAAK,GAAuB,EAAE,CAAC;QAErC,oBAAoB;QACpB,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,eAAe,CAAC,aAAa,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;QACrE,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAEnB,qCAAqC;QACrC,IAAI,CAAC,OAAO,CAAC,KAAK,IAAI,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC;YAC7B,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,eAAe,CAAC,WAAW,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;YACjE,KAAK,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,OAAO,CAAC,QAAQ,IAAI,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;QAC7D,CAAC;QAED,MAAM,SAAS,GAAmB,EAAE,CAAC;QACrC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,MAAM,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,KAAK,CAC/B,IAAI,CAAC,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE,IAAI,CAAC,KAAK,EAAE,IAAI,CAAC,QAAQ,EAAE,IAAI,CAAC,MAAM,CAAC,QAAQ,CACzE,CAAC;YACF,SAAS,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,CAAC;QAC5B,CAAC;QAED,MAAM,WAAW,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;QAChE,OAAO;YACL,KAAK;YACL,OAAO,EAAE;gBACP,EAAE,EAAE,IAAA,mBAAU,EAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC;gBACvE,MAAM,EAAE,OAAO,CAAC,GAAG;gBACnB,MAAM,EAAE,SAAS;gBACjB,OAAO,EAAE,KAAK,CAAC,CAAC,CAAC,EAAE,WAAW;gBAC9B,WAAW;gBACX,QAAQ,EAAE;oBACR,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;oBACnC,SAAS,EAAE,KAAK,CAAC,MAAM;oBACvB,WAAW,EAAE,QAAQ;oBACrB,aAAa,EAAE,IAAI,CAAC,kBAAkB,CAAC,KAAK,CAAC;iBAC9C;gBACD,MAAM,EAAE,UAAU;aACnB;YACD,KAAK,EAAE;gBACL,cAAc,EAAE,KAAK,CAAC,MAAM,EAAE,WAAW,EAAE,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;gBAC3E,MAAM,EAAE,EAAE,EAAE,MAAM,EAAE,CAAC,EAAE,SAAS,EAAE,CAAC,EAAE,WAAW,EAAE,CAAC,EAAE,OAAO,EAAE,CAAC;aAChE;SACF,CAAC;IACJ,CAAC;IAED,wCAAwC;IACxC,OAAO;QACL,IAAI,CAAC,OAAO,CAAC,OAAO,EAAE,CAAC;QACvB,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC;IACrB,CAAC;CACF;AA1dD,sCA0dC"}
@@ -0,0 +1,48 @@
1
+ /// <reference types="node" />
2
+ import { EventEmitter } from 'events';
3
+ import { ExtractedContent, ContentChunk, CrawlProgress, CrawlResult } from './types';
4
+ /**
5
+ * Streaming interface for crawl results.
6
+ * Emits events as pages are processed, enabling real-time consumption.
7
+ */
8
+ export declare class CrawlStream extends EventEmitter {
9
+ private _pages;
10
+ private _chunks;
11
+ private _errors;
12
+ private _done;
13
+ /** Emitted when a page is extracted */
14
+ onPage(handler: (page: ExtractedContent) => void): this;
15
+ /** Emitted when chunks are generated from a page */
16
+ onChunks(handler: (chunks: ContentChunk[]) => void): this;
17
+ /** Emitted on progress updates */
18
+ onProgress(handler: (progress: CrawlProgress) => void): this;
19
+ /** Emitted on errors (non-fatal) */
20
+ onError(handler: (error: {
21
+ url: string;
22
+ error: string;
23
+ }) => void): this;
24
+ /** Emitted when crawl is complete */
25
+ onDone(handler: (result: CrawlResult) => void): this;
26
+ /** @internal */
27
+ emitPage(page: ExtractedContent): void;
28
+ /** @internal */
29
+ emitChunks(chunks: ContentChunk[]): void;
30
+ /** @internal */
31
+ emitProgress(progress: CrawlProgress): void;
32
+ /** @internal */
33
+ emitError(error: {
34
+ url: string;
35
+ error: string;
36
+ }): void;
37
+ /** @internal */
38
+ emitDone(result: CrawlResult): void;
39
+ /** Get all pages collected so far */
40
+ get pages(): ExtractedContent[];
41
+ /** Get all chunks collected so far */
42
+ get chunks(): ContentChunk[];
43
+ /** Check if crawl is complete */
44
+ get done(): boolean;
45
+ /** Wait for completion and return final result */
46
+ toPromise(): Promise<CrawlResult>;
47
+ }
48
+ //# sourceMappingURL=stream.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"stream.d.ts","sourceRoot":"","sources":["../../src/core/stream.ts"],"names":[],"mappings":";AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,QAAQ,CAAC;AACtC,OAAO,EAAE,gBAAgB,EAAE,YAAY,EAAE,aAAa,EAAE,WAAW,EAAE,MAAM,SAAS,CAAC;AAErF;;;GAGG;AACH,qBAAa,WAAY,SAAQ,YAAY;IAC3C,OAAO,CAAC,MAAM,CAA0B;IACxC,OAAO,CAAC,OAAO,CAAsB;IACrC,OAAO,CAAC,OAAO,CAA6C;IAC5D,OAAO,CAAC,KAAK,CAAS;IAEtB,uCAAuC;IACvC,MAAM,CAAC,OAAO,EAAE,CAAC,IAAI,EAAE,gBAAgB,KAAK,IAAI,GAAG,IAAI;IAIvD,oDAAoD;IACpD,QAAQ,CAAC,OAAO,EAAE,CAAC,MAAM,EAAE,YAAY,EAAE,KAAK,IAAI,GAAG,IAAI;IAIzD,kCAAkC;IAClC,UAAU,CAAC,OAAO,EAAE,CAAC,QAAQ,EAAE,aAAa,KAAK,IAAI,GAAG,IAAI;IAI5D,oCAAoC;IACpC,OAAO,CAAC,OAAO,EAAE,CAAC,KAAK,EAAE;QAAE,GAAG,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,KAAK,IAAI,GAAG,IAAI;IAIvE,qCAAqC;IACrC,MAAM,CAAC,OAAO,EAAE,CAAC,MAAM,EAAE,WAAW,KAAK,IAAI,GAAG,IAAI;IAIpD,gBAAgB;IAChB,QAAQ,CAAC,IAAI,EAAE,gBAAgB,GAAG,IAAI;IAKtC,gBAAgB;IAChB,UAAU,CAAC,MAAM,EAAE,YAAY,EAAE,GAAG,IAAI;IAKxC,gBAAgB;IAChB,YAAY,CAAC,QAAQ,EAAE,aAAa,GAAG,IAAI;IAI3C,gBAAgB;IAChB,SAAS,CAAC,KAAK,EAAE;QAAE,GAAG,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,GAAG,IAAI;IAKtD,gBAAgB;IAChB,QAAQ,CAAC,MAAM,EAAE,WAAW,GAAG,IAAI;IAKnC,qCAAqC;IACrC,IAAI,KAAK,IAAI,gBAAgB,EAAE,CAAwB;IAEvD,sCAAsC;IACtC,IAAI,MAAM,IAAI,YAAY,EAAE,CAAyB;IAErD,iCAAiC;IACjC,IAAI,IAAI,IAAI,OAAO,CAAuB;IAE1C,kDAAkD;IAClD,SAAS,IAAI,OAAO,CAAC,WAAW,CAAC;CAIlC"}
@@ -0,0 +1,72 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.CrawlStream = void 0;
4
+ const events_1 = require("events");
5
+ /**
6
+ * Streaming interface for crawl results.
7
+ * Emits events as pages are processed, enabling real-time consumption.
8
+ */
9
+ class CrawlStream extends events_1.EventEmitter {
10
+ _pages = [];
11
+ _chunks = [];
12
+ _errors = [];
13
+ _done = false;
14
+ /** Emitted when a page is extracted */
15
+ onPage(handler) {
16
+ return this.on('page', handler);
17
+ }
18
+ /** Emitted when chunks are generated from a page */
19
+ onChunks(handler) {
20
+ return this.on('chunks', handler);
21
+ }
22
+ /** Emitted on progress updates */
23
+ onProgress(handler) {
24
+ return this.on('progress', handler);
25
+ }
26
+ /** Emitted on errors (non-fatal) */
27
+ onError(handler) {
28
+ return this.on('error', handler);
29
+ }
30
+ /** Emitted when crawl is complete */
31
+ onDone(handler) {
32
+ return this.on('done', handler);
33
+ }
34
+ /** @internal */
35
+ emitPage(page) {
36
+ this._pages.push(page);
37
+ this.emit('page', page);
38
+ }
39
+ /** @internal */
40
+ emitChunks(chunks) {
41
+ this._chunks.push(...chunks);
42
+ this.emit('chunks', chunks);
43
+ }
44
+ /** @internal */
45
+ emitProgress(progress) {
46
+ this.emit('progress', progress);
47
+ }
48
+ /** @internal */
49
+ emitError(error) {
50
+ this._errors.push(error);
51
+ this.emit('error', error);
52
+ }
53
+ /** @internal */
54
+ emitDone(result) {
55
+ this._done = true;
56
+ this.emit('done', result);
57
+ }
58
+ /** Get all pages collected so far */
59
+ get pages() { return this._pages; }
60
+ /** Get all chunks collected so far */
61
+ get chunks() { return this._chunks; }
62
+ /** Check if crawl is complete */
63
+ get done() { return this._done; }
64
+ /** Wait for completion and return final result */
65
+ toPromise() {
66
+ if (this._done)
67
+ return Promise.resolve(this.listenerCount('done') > 0 ? undefined : undefined);
68
+ return new Promise((resolve) => this.once('done', resolve));
69
+ }
70
+ }
71
+ exports.CrawlStream = CrawlStream;
72
+ //# sourceMappingURL=stream.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"stream.js","sourceRoot":"","sources":["../../src/core/stream.ts"],"names":[],"mappings":";;;AAAA,mCAAsC;AAGtC;;;GAGG;AACH,MAAa,WAAY,SAAQ,qBAAY;IACnC,MAAM,GAAuB,EAAE,CAAC;IAChC,OAAO,GAAmB,EAAE,CAAC;IAC7B,OAAO,GAA0C,EAAE,CAAC;IACpD,KAAK,GAAG,KAAK,CAAC;IAEtB,uCAAuC;IACvC,MAAM,CAAC,OAAyC;QAC9C,OAAO,IAAI,CAAC,EAAE,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAClC,CAAC;IAED,oDAAoD;IACpD,QAAQ,CAAC,OAAyC;QAChD,OAAO,IAAI,CAAC,EAAE,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;IACpC,CAAC;IAED,kCAAkC;IAClC,UAAU,CAAC,OAA0C;QACnD,OAAO,IAAI,CAAC,EAAE,CAAC,UAAU,EAAE,OAAO,CAAC,CAAC;IACtC,CAAC;IAED,oCAAoC;IACpC,OAAO,CAAC,OAAwD;QAC9D,OAAO,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;IACnC,CAAC;IAED,qCAAqC;IACrC,MAAM,CAAC,OAAsC;QAC3C,OAAO,IAAI,CAAC,EAAE,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAClC,CAAC;IAED,gBAAgB;IAChB,QAAQ,CAAC,IAAsB;QAC7B,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACvB,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;IAC1B,CAAC;IAED,gBAAgB;IAChB,UAAU,CAAC,MAAsB;QAC/B,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,CAAC;QAC7B,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;IAC9B,CAAC;IAED,gBAAgB;IAChB,YAAY,CAAC,QAAuB;QAClC,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,QAAQ,CAAC,CAAC;IAClC,CAAC;IAED,gBAAgB;IAChB,SAAS,CAAC,KAAqC;QAC7C,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACzB,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC;IAC5B,CAAC;IAED,gBAAgB;IAChB,QAAQ,CAAC,MAAmB;QAC1B,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC;QAClB,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC5B,CAAC;IAED,qCAAqC;IACrC,IAAI,KAAK,KAAyB,OAAO,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC;IAEvD,sCAAsC;IACtC,IAAI,MAAM,KAAqB,OAAO,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC;IAErD,iCAAiC;IACjC,IAAI,IAAI,KAAc,OAAO,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC;IAE1C,kDAAkD;IAClD,SAAS;QACP,IAAI,IAAI,CAAC,KAAK;YAAE,OAAO,OAAO,CAAC,OAAO,CAAC,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,SAAgB,CAAC,CAAC,CAAC,SAAgB,CAAC,CAAC;QAC7G,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC;IAC9D,CAAC;CACF;AA1ED,kCA0EC"}