crawlforge-mcp-server 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/CLAUDE.md +315 -0
  2. package/LICENSE +21 -0
  3. package/README.md +181 -0
  4. package/package.json +115 -0
  5. package/server.js +1963 -0
  6. package/setup.js +112 -0
  7. package/src/constants/config.js +615 -0
  8. package/src/core/ActionExecutor.js +1104 -0
  9. package/src/core/AlertNotificationSystem.js +601 -0
  10. package/src/core/AuthManager.js +315 -0
  11. package/src/core/ChangeTracker.js +2306 -0
  12. package/src/core/JobManager.js +687 -0
  13. package/src/core/LLMsTxtAnalyzer.js +753 -0
  14. package/src/core/LocalizationManager.js +1615 -0
  15. package/src/core/PerformanceManager.js +828 -0
  16. package/src/core/ResearchOrchestrator.js +1327 -0
  17. package/src/core/SnapshotManager.js +1037 -0
  18. package/src/core/StealthBrowserManager.js +1795 -0
  19. package/src/core/WebhookDispatcher.js +745 -0
  20. package/src/core/analysis/ContentAnalyzer.js +749 -0
  21. package/src/core/analysis/LinkAnalyzer.js +972 -0
  22. package/src/core/cache/CacheManager.js +821 -0
  23. package/src/core/connections/ConnectionPool.js +553 -0
  24. package/src/core/crawlers/BFSCrawler.js +845 -0
  25. package/src/core/integrations/PerformanceIntegration.js +377 -0
  26. package/src/core/llm/AnthropicProvider.js +135 -0
  27. package/src/core/llm/LLMManager.js +415 -0
  28. package/src/core/llm/LLMProvider.js +97 -0
  29. package/src/core/llm/OpenAIProvider.js +127 -0
  30. package/src/core/processing/BrowserProcessor.js +986 -0
  31. package/src/core/processing/ContentProcessor.js +505 -0
  32. package/src/core/processing/PDFProcessor.js +448 -0
  33. package/src/core/processing/StreamProcessor.js +673 -0
  34. package/src/core/queue/QueueManager.js +98 -0
  35. package/src/core/workers/WorkerPool.js +585 -0
  36. package/src/core/workers/worker.js +743 -0
  37. package/src/monitoring/healthCheck.js +600 -0
  38. package/src/monitoring/metrics.js +761 -0
  39. package/src/optimization/wave3-optimizations.js +932 -0
  40. package/src/security/security-patches.js +120 -0
  41. package/src/security/security-tests.js +355 -0
  42. package/src/security/wave3-security.js +652 -0
  43. package/src/tools/advanced/BatchScrapeTool.js +1089 -0
  44. package/src/tools/advanced/ScrapeWithActionsTool.js +669 -0
  45. package/src/tools/crawl/crawlDeep.js +449 -0
  46. package/src/tools/crawl/mapSite.js +400 -0
  47. package/src/tools/extract/analyzeContent.js +624 -0
  48. package/src/tools/extract/extractContent.js +329 -0
  49. package/src/tools/extract/processDocument.js +503 -0
  50. package/src/tools/extract/summarizeContent.js +376 -0
  51. package/src/tools/llmstxt/generateLLMsTxt.js +570 -0
  52. package/src/tools/research/deepResearch.js +706 -0
  53. package/src/tools/search/adapters/duckduckgoSearch.js +398 -0
  54. package/src/tools/search/adapters/googleSearch.js +236 -0
  55. package/src/tools/search/adapters/searchProviderFactory.js +96 -0
  56. package/src/tools/search/queryExpander.js +543 -0
  57. package/src/tools/search/ranking/ResultDeduplicator.js +676 -0
  58. package/src/tools/search/ranking/ResultRanker.js +497 -0
  59. package/src/tools/search/searchWeb.js +482 -0
  60. package/src/tools/tracking/trackChanges.js +1355 -0
  61. package/src/utils/CircuitBreaker.js +515 -0
  62. package/src/utils/ErrorHandlingConfig.js +342 -0
  63. package/src/utils/HumanBehaviorSimulator.js +569 -0
  64. package/src/utils/Logger.js +568 -0
  65. package/src/utils/MemoryMonitor.js +173 -0
  66. package/src/utils/RetryManager.js +386 -0
  67. package/src/utils/contentUtils.js +588 -0
  68. package/src/utils/domainFilter.js +612 -0
  69. package/src/utils/inputValidation.js +766 -0
  70. package/src/utils/rateLimiter.js +196 -0
  71. package/src/utils/robotsChecker.js +91 -0
  72. package/src/utils/securityMiddleware.js +416 -0
  73. package/src/utils/sitemapParser.js +678 -0
  74. package/src/utils/ssrfProtection.js +640 -0
  75. package/src/utils/urlNormalizer.js +168 -0
@@ -0,0 +1,673 @@
1
+ /**
2
+ * StreamProcessor - Streaming data processing with memory monitoring and pagination
3
+ * Handles large datasets efficiently through streaming and chunked processing
4
+ */
5
+
6
+ import { Readable, Transform, Writable, pipeline } from 'stream';
7
+ import { promisify } from 'util';
8
+ import { EventEmitter } from 'events';
9
+ import { config } from '../../constants/config.js';
10
+
11
+ const pipelineAsync = promisify(pipeline);
12
+
13
+ export class StreamProcessor extends EventEmitter {
14
+ constructor(options = {}) {
15
+ super();
16
+
17
+ const {
18
+ chunkSize = 1000,
19
+ memoryLimit = 100 * 1024 * 1024, // 100MB
20
+ memoryCheckInterval = 5000,
21
+ bufferHighWaterMark = 16 * 1024, // 16KB
22
+ objectMode = false,
23
+ enableBackpressure = true,
24
+ maxPagesInMemory = 10,
25
+ pageSize = 100
26
+ } = options;
27
+
28
+ this.chunkSize = chunkSize;
29
+ this.memoryLimit = memoryLimit;
30
+ this.memoryCheckInterval = memoryCheckInterval;
31
+ this.bufferHighWaterMark = bufferHighWaterMark;
32
+ this.objectMode = objectMode;
33
+ this.enableBackpressure = enableBackpressure;
34
+ this.maxPagesInMemory = maxPagesInMemory;
35
+ this.pageSize = pageSize;
36
+
37
+ // Memory monitoring
38
+ this.memoryUsage = {
39
+ current: 0,
40
+ peak: 0,
41
+ limit: this.memoryLimit,
42
+ checkInterval: null
43
+ };
44
+
45
+ // Processing state
46
+ this.isProcessing = false;
47
+ this.processedItems = 0;
48
+ this.totalItems = 0;
49
+ this.startTime = null;
50
+ this.endTime = null;
51
+
52
+ // Pagination state
53
+ this.pages = new Map();
54
+ this.currentPage = 0;
55
+ this.pageAccess = new Map(); // Track page access times for LRU
56
+
57
+ this.startMemoryMonitoring();
58
+ this.setupGracefulShutdown();
59
+ }
60
+
61
+ /**
62
+ * Process large dataset using streaming with chunked processing
63
+ * @param {Array|AsyncIterable} data - Data to process
64
+ * @param {Function} processor - Processing function
65
+ * @param {Object} options - Processing options
66
+ * @returns {Promise<Object>} - Processing results
67
+ */
68
+ async processStream(data, processor, options = {}) {
69
+ const {
70
+ parallel = false,
71
+ maxConcurrency = 5,
72
+ yieldEvery = 100,
73
+ enablePagination = false,
74
+ collectResults = true
75
+ } = options;
76
+
77
+ this.isProcessing = true;
78
+ this.startTime = Date.now();
79
+ this.processedItems = 0;
80
+ this.totalItems = Array.isArray(data) ? data.length : 0;
81
+
82
+ const results = collectResults ? [] : null;
83
+ const errors = [];
84
+
85
+ try {
86
+ if (enablePagination) {
87
+ return await this.processWithPagination(data, processor, options);
88
+ } else if (parallel) {
89
+ return await this.processParallel(data, processor, maxConcurrency, options);
90
+ } else {
91
+ return await this.processSequential(data, processor, yieldEvery, collectResults, results, errors);
92
+ }
93
+ } finally {
94
+ this.isProcessing = false;
95
+ this.endTime = Date.now();
96
+ }
97
+ }
98
+
99
+ /**
100
+ * Process data sequentially with memory monitoring
101
+ * @param {Array|AsyncIterable} data - Data to process
102
+ * @param {Function} processor - Processing function
103
+ * @param {number} yieldEvery - Yield control every N items
104
+ * @param {boolean} collectResults - Whether to collect results
105
+ * @param {Array} results - Results array
106
+ * @param {Array} errors - Errors array
107
+ * @returns {Promise<Object>} - Processing results
108
+ */
109
+ async processSequential(data, processor, yieldEvery, collectResults, results, errors) {
110
+ const chunks = this.createChunks(data, this.chunkSize);
111
+
112
+ for (const chunk of chunks) {
113
+ await this.checkMemoryPressure();
114
+
115
+ for (let i = 0; i < chunk.length; i++) {
116
+ const item = chunk[i];
117
+
118
+ try {
119
+ const result = await processor(item, this.processedItems);
120
+
121
+ if (collectResults && result !== undefined) {
122
+ results.push(result);
123
+ }
124
+
125
+ this.processedItems++;
126
+
127
+ // Yield control periodically
128
+ if (this.processedItems % yieldEvery === 0) {
129
+ await this.yield();
130
+ }
131
+
132
+ this.emit('itemProcessed', {
133
+ index: this.processedItems,
134
+ total: this.totalItems,
135
+ result: collectResults ? result : undefined
136
+ });
137
+
138
+ } catch (error) {
139
+ errors.push({
140
+ index: this.processedItems,
141
+ item,
142
+ error: error.message
143
+ });
144
+
145
+ this.emit('itemError', {
146
+ index: this.processedItems,
147
+ error: error.message
148
+ });
149
+ }
150
+ }
151
+
152
+ // Check memory after each chunk
153
+ await this.checkMemoryPressure();
154
+ }
155
+
156
+ return this.createProcessingResult(results, errors);
157
+ }
158
+
159
+ /**
160
+ * Process data in parallel with concurrency control
161
+ * @param {Array|AsyncIterable} data - Data to process
162
+ * @param {Function} processor - Processing function
163
+ * @param {number} maxConcurrency - Maximum concurrent operations
164
+ * @param {Object} options - Processing options
165
+ * @returns {Promise<Object>} - Processing results
166
+ */
167
+ async processParallel(data, processor, maxConcurrency, options) {
168
+ const { collectResults = true } = options;
169
+ const dataArray = Array.isArray(data) ? data : Array.from(data);
170
+ const chunks = this.createChunks(dataArray, maxConcurrency);
171
+
172
+ const results = collectResults ? [] : null;
173
+ const errors = [];
174
+
175
+ for (const chunk of chunks) {
176
+ await this.checkMemoryPressure();
177
+
178
+ const chunkPromises = chunk.map(async (item, index) => {
179
+ try {
180
+ const result = await processor(item, this.processedItems + index);
181
+ return { success: true, result, index: this.processedItems + index };
182
+ } catch (error) {
183
+ return {
184
+ success: false,
185
+ error: error.message,
186
+ item,
187
+ index: this.processedItems + index
188
+ };
189
+ }
190
+ });
191
+
192
+ const chunkResults = await Promise.all(chunkPromises);
193
+
194
+ for (const chunkResult of chunkResults) {
195
+ if (chunkResult.success) {
196
+ if (collectResults && chunkResult.result !== undefined) {
197
+ results.push(chunkResult.result);
198
+ }
199
+
200
+ this.emit('itemProcessed', {
201
+ index: chunkResult.index,
202
+ total: this.totalItems,
203
+ result: collectResults ? chunkResult.result : undefined
204
+ });
205
+ } else {
206
+ errors.push({
207
+ index: chunkResult.index,
208
+ item: chunkResult.item,
209
+ error: chunkResult.error
210
+ });
211
+
212
+ this.emit('itemError', {
213
+ index: chunkResult.index,
214
+ error: chunkResult.error
215
+ });
216
+ }
217
+ }
218
+
219
+ this.processedItems += chunk.length;
220
+ }
221
+
222
+ return this.createProcessingResult(results, errors);
223
+ }
224
+
225
+ /**
226
+ * Process data with pagination for memory efficiency
227
+ * @param {Array|AsyncIterable} data - Data to process
228
+ * @param {Function} processor - Processing function
229
+ * @param {Object} options - Processing options
230
+ * @returns {Promise<Object>} - Processing results with pagination
231
+ */
232
+ async processWithPagination(data, processor, options) {
233
+ const dataArray = Array.isArray(data) ? data : Array.from(data);
234
+ const totalPages = Math.ceil(dataArray.length / this.pageSize);
235
+
236
+ const paginatedResult = {
237
+ totalItems: dataArray.length,
238
+ totalPages,
239
+ pageSize: this.pageSize,
240
+ pages: new Map(),
241
+ errors: []
242
+ };
243
+
244
+ // Process each page
245
+ for (let pageIndex = 0; pageIndex < totalPages; pageIndex++) {
246
+ await this.checkMemoryPressure();
247
+
248
+ const startIndex = pageIndex * this.pageSize;
249
+ const endIndex = Math.min(startIndex + this.pageSize, dataArray.length);
250
+ const pageData = dataArray.slice(startIndex, endIndex);
251
+
252
+ const pageResults = [];
253
+ const pageErrors = [];
254
+
255
+ for (let i = 0; i < pageData.length; i++) {
256
+ const globalIndex = startIndex + i;
257
+ const item = pageData[i];
258
+
259
+ try {
260
+ const result = await processor(item, globalIndex);
261
+ pageResults.push(result);
262
+
263
+ this.emit('itemProcessed', {
264
+ index: globalIndex,
265
+ total: dataArray.length,
266
+ page: pageIndex,
267
+ result
268
+ });
269
+
270
+ } catch (error) {
271
+ pageErrors.push({
272
+ index: globalIndex,
273
+ item,
274
+ error: error.message
275
+ });
276
+
277
+ this.emit('itemError', {
278
+ index: globalIndex,
279
+ page: pageIndex,
280
+ error: error.message
281
+ });
282
+ }
283
+ }
284
+
285
+ // Store page results
286
+ this.storePage(pageIndex, {
287
+ data: pageResults,
288
+ errors: pageErrors,
289
+ startIndex,
290
+ endIndex,
291
+ processedAt: Date.now()
292
+ });
293
+
294
+ this.processedItems += pageData.length;
295
+ paginatedResult.errors.push(...pageErrors);
296
+
297
+ this.emit('pageProcessed', {
298
+ pageIndex,
299
+ totalPages,
300
+ itemsInPage: pageData.length,
301
+ errorsInPage: pageErrors.length
302
+ });
303
+ }
304
+
305
+ paginatedResult.pages = this.getAllPages();
306
+ return paginatedResult;
307
+ }
308
+
309
+ /**
310
+ * Create a transform stream for processing data
311
+ * @param {Function} transformer - Transform function
312
+ * @param {Object} options - Stream options
313
+ * @returns {Transform} - Transform stream
314
+ */
315
+ createTransformStream(transformer, options = {}) {
316
+ const {
317
+ objectMode = this.objectMode,
318
+ highWaterMark = this.bufferHighWaterMark,
319
+ parallel = false,
320
+ maxConcurrency = 5
321
+ } = options;
322
+
323
+ let processedCount = 0;
324
+ let pendingOperations = 0;
325
+
326
+ return new Transform({
327
+ objectMode,
328
+ highWaterMark,
329
+ transform: async function(chunk, encoding, callback) {
330
+ try {
331
+ await this.checkMemoryPressure();
332
+
333
+ if (parallel && pendingOperations < maxConcurrency) {
334
+ pendingOperations++;
335
+
336
+ transformer(chunk, processedCount++)
337
+ .then(result => {
338
+ pendingOperations--;
339
+ this.push(result);
340
+ this.emit('itemProcessed', { index: processedCount, result });
341
+ })
342
+ .catch(error => {
343
+ pendingOperations--;
344
+ this.emit('error', error);
345
+ });
346
+
347
+ callback();
348
+ } else {
349
+ const result = await transformer(chunk, processedCount++);
350
+ this.push(result);
351
+ this.emit('itemProcessed', { index: processedCount, result });
352
+ callback();
353
+ }
354
+ } catch (error) {
355
+ this.emit('itemError', { index: processedCount, error: error.message });
356
+ callback(error);
357
+ }
358
+ }.bind(this)
359
+ });
360
+ }
361
+
362
+ /**
363
+ * Create a readable stream from data
364
+ * @param {Array|AsyncIterable} data - Data to stream
365
+ * @param {Object} options - Stream options
366
+ * @returns {Readable} - Readable stream
367
+ */
368
+ createReadableStream(data, options = {}) {
369
+ const {
370
+ objectMode = this.objectMode,
371
+ highWaterMark = this.bufferHighWaterMark
372
+ } = options;
373
+
374
+ const dataArray = Array.isArray(data) ? data : Array.from(data);
375
+ let index = 0;
376
+
377
+ return new Readable({
378
+ objectMode,
379
+ highWaterMark,
380
+ read() {
381
+ if (index < dataArray.length) {
382
+ this.push(dataArray[index++]);
383
+ } else {
384
+ this.push(null); // End of stream
385
+ }
386
+ }
387
+ });
388
+ }
389
+
390
+ /**
391
+ * Create a writable stream for collecting results
392
+ * @param {Function} writer - Write function
393
+ * @param {Object} options - Stream options
394
+ * @returns {Writable} - Writable stream
395
+ */
396
+ createWritableStream(writer, options = {}) {
397
+ const {
398
+ objectMode = this.objectMode,
399
+ highWaterMark = this.bufferHighWaterMark
400
+ } = options;
401
+
402
+ let writeCount = 0;
403
+
404
+ return new Writable({
405
+ objectMode,
406
+ highWaterMark,
407
+ write: async function(chunk, encoding, callback) {
408
+ try {
409
+ await this.checkMemoryPressure();
410
+ await writer(chunk, writeCount++);
411
+ this.emit('itemWritten', { index: writeCount, chunk });
412
+ callback();
413
+ } catch (error) {
414
+ this.emit('writeError', { index: writeCount, error: error.message });
415
+ callback(error);
416
+ }
417
+ }.bind(this)
418
+ });
419
+ }
420
+
421
+ /**
422
+ * Process data using stream pipeline
423
+ * @param {Readable} readable - Readable stream
424
+ * @param {Array<Transform>} transforms - Transform streams
425
+ * @param {Writable} writable - Writable stream
426
+ * @returns {Promise<void>}
427
+ */
428
+ async processPipeline(readable, transforms, writable) {
429
+ const streams = [readable, ...transforms, writable];
430
+
431
+ this.isProcessing = true;
432
+ this.startTime = Date.now();
433
+
434
+ try {
435
+ await pipelineAsync(...streams);
436
+ } finally {
437
+ this.isProcessing = false;
438
+ this.endTime = Date.now();
439
+ }
440
+ }
441
+
442
+ /**
443
+ * Store page data with LRU eviction
444
+ * @param {number} pageIndex - Page index
445
+ * @param {Object} pageData - Page data
446
+ */
447
+ storePage(pageIndex, pageData) {
448
+ // Update access time
449
+ this.pageAccess.set(pageIndex, Date.now());
450
+
451
+ // Store page
452
+ this.pages.set(pageIndex, pageData);
453
+
454
+ // Evict pages if over limit
455
+ if (this.pages.size > this.maxPagesInMemory) {
456
+ this.evictOldestPage();
457
+ }
458
+ }
459
+
460
+ /**
461
+ * Get page data
462
+ * @param {number} pageIndex - Page index
463
+ * @returns {Object|null} - Page data
464
+ */
465
+ getPage(pageIndex) {
466
+ if (this.pages.has(pageIndex)) {
467
+ // Update access time
468
+ this.pageAccess.set(pageIndex, Date.now());
469
+ return this.pages.get(pageIndex);
470
+ }
471
+ return null;
472
+ }
473
+
474
+ /**
475
+ * Get all stored pages
476
+ * @returns {Map} - All pages
477
+ */
478
+ getAllPages() {
479
+ return new Map(this.pages);
480
+ }
481
+
482
+ /**
483
+ * Evict oldest accessed page
484
+ */
485
+ evictOldestPage() {
486
+ let oldestPage = null;
487
+ let oldestTime = Date.now();
488
+
489
+ for (const [pageIndex, accessTime] of this.pageAccess.entries()) {
490
+ if (accessTime < oldestTime) {
491
+ oldestTime = accessTime;
492
+ oldestPage = pageIndex;
493
+ }
494
+ }
495
+
496
+ if (oldestPage !== null) {
497
+ this.pages.delete(oldestPage);
498
+ this.pageAccess.delete(oldestPage);
499
+
500
+ this.emit('pageEvicted', { pageIndex: oldestPage });
501
+ }
502
+ }
503
+
504
+ /**
505
+ * Create chunks from data
506
+ * @param {Array|AsyncIterable} data - Data to chunk
507
+ * @param {number} chunkSize - Size of each chunk
508
+ * @returns {Array} - Array of chunks
509
+ */
510
+ createChunks(data, chunkSize) {
511
+ const dataArray = Array.isArray(data) ? data : Array.from(data);
512
+ const chunks = [];
513
+
514
+ for (let i = 0; i < dataArray.length; i += chunkSize) {
515
+ chunks.push(dataArray.slice(i, i + chunkSize));
516
+ }
517
+
518
+ return chunks;
519
+ }
520
+
521
+ /**
522
+ * Check memory pressure and trigger GC if needed
523
+ */
524
+ async checkMemoryPressure() {
525
+ const memUsage = process.memoryUsage();
526
+ this.memoryUsage.current = memUsage.heapUsed;
527
+ this.memoryUsage.peak = Math.max(this.memoryUsage.peak, memUsage.heapUsed);
528
+
529
+ if (memUsage.heapUsed > this.memoryLimit) {
530
+ this.emit('memoryPressure', {
531
+ current: memUsage.heapUsed,
532
+ limit: this.memoryLimit,
533
+ percentage: (memUsage.heapUsed / this.memoryLimit) * 100
534
+ });
535
+
536
+ // Force garbage collection if available
537
+ if (global.gc) {
538
+ global.gc();
539
+ }
540
+
541
+ // Evict some pages if using pagination
542
+ if (this.pages.size > this.maxPagesInMemory / 2) {
543
+ const pagesToEvict = Math.ceil(this.pages.size / 4);
544
+ for (let i = 0; i < pagesToEvict; i++) {
545
+ this.evictOldestPage();
546
+ }
547
+ }
548
+
549
+ // Yield control to allow other operations
550
+ await this.yield();
551
+ }
552
+ }
553
+
554
+ /**
555
+ * Start memory monitoring
556
+ */
557
+ startMemoryMonitoring() {
558
+ if (this.memoryUsage.checkInterval) {
559
+ return; // Already monitoring
560
+ }
561
+
562
+ this.memoryUsage.checkInterval = setInterval(() => {
563
+ this.checkMemoryPressure();
564
+ }, this.memoryCheckInterval);
565
+ }
566
+
567
+ /**
568
+ * Stop memory monitoring
569
+ */
570
+ stopMemoryMonitoring() {
571
+ if (this.memoryUsage.checkInterval) {
572
+ clearInterval(this.memoryUsage.checkInterval);
573
+ this.memoryUsage.checkInterval = null;
574
+ }
575
+ }
576
+
577
+ /**
578
+ * Yield control to event loop
579
+ */
580
+ async yield() {
581
+ return new Promise(resolve => setImmediate(resolve));
582
+ }
583
+
584
+ /**
585
+ * Create processing result object
586
+ * @param {Array} results - Processing results
587
+ * @param {Array} errors - Processing errors
588
+ * @returns {Object} - Result object
589
+ */
590
+ createProcessingResult(results, errors) {
591
+ const duration = this.endTime - this.startTime;
592
+ const itemsPerSecond = this.processedItems / (duration / 1000);
593
+
594
+ return {
595
+ processedItems: this.processedItems,
596
+ totalItems: this.totalItems,
597
+ results: results || [],
598
+ errors,
599
+ duration,
600
+ itemsPerSecond: Math.round(itemsPerSecond * 100) / 100,
601
+ memoryUsage: {
602
+ peak: this.memoryUsage.peak,
603
+ current: this.memoryUsage.current,
604
+ limit: this.memoryUsage.limit
605
+ },
606
+ success: errors.length === 0
607
+ };
608
+ }
609
+
610
+ /**
611
+ * Get processing statistics
612
+ * @returns {Object} - Statistics
613
+ */
614
+ getStats() {
615
+ const duration = this.isProcessing ?
616
+ Date.now() - this.startTime :
617
+ (this.endTime || Date.now()) - (this.startTime || Date.now());
618
+
619
+ return {
620
+ isProcessing: this.isProcessing,
621
+ processedItems: this.processedItems,
622
+ totalItems: this.totalItems,
623
+ duration,
624
+ itemsPerSecond: this.processedItems / (duration / 1000),
625
+ memoryUsage: this.memoryUsage,
626
+ pagesInMemory: this.pages.size,
627
+ maxPagesInMemory: this.maxPagesInMemory
628
+ };
629
+ }
630
+
631
+ /**
632
+ * Reset processor state
633
+ */
634
+ reset() {
635
+ this.processedItems = 0;
636
+ this.totalItems = 0;
637
+ this.startTime = null;
638
+ this.endTime = null;
639
+ this.pages.clear();
640
+ this.pageAccess.clear();
641
+ this.currentPage = 0;
642
+
643
+ // Reset memory tracking
644
+ this.memoryUsage.current = 0;
645
+ this.memoryUsage.peak = 0;
646
+ }
647
+
648
+ /**
649
+ * Graceful shutdown
650
+ */
651
+ async shutdown() {
652
+ this.emit('shutdown');
653
+
654
+ this.stopMemoryMonitoring();
655
+ this.pages.clear();
656
+ this.pageAccess.clear();
657
+ }
658
+
659
+ /**
660
+ * Setup graceful shutdown handlers
661
+ */
662
+ setupGracefulShutdown() {
663
+ const shutdown = async () => {
664
+ console.log('StreamProcessor: Graceful shutdown initiated');
665
+ await this.shutdown();
666
+ };
667
+
668
+ process.on('SIGTERM', shutdown);
669
+ process.on('SIGINT', shutdown);
670
+ }
671
+ }
672
+
673
+ export default StreamProcessor;