crawlforge-mcp-server 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +315 -0
- package/LICENSE +21 -0
- package/README.md +181 -0
- package/package.json +115 -0
- package/server.js +1963 -0
- package/setup.js +112 -0
- package/src/constants/config.js +615 -0
- package/src/core/ActionExecutor.js +1104 -0
- package/src/core/AlertNotificationSystem.js +601 -0
- package/src/core/AuthManager.js +315 -0
- package/src/core/ChangeTracker.js +2306 -0
- package/src/core/JobManager.js +687 -0
- package/src/core/LLMsTxtAnalyzer.js +753 -0
- package/src/core/LocalizationManager.js +1615 -0
- package/src/core/PerformanceManager.js +828 -0
- package/src/core/ResearchOrchestrator.js +1327 -0
- package/src/core/SnapshotManager.js +1037 -0
- package/src/core/StealthBrowserManager.js +1795 -0
- package/src/core/WebhookDispatcher.js +745 -0
- package/src/core/analysis/ContentAnalyzer.js +749 -0
- package/src/core/analysis/LinkAnalyzer.js +972 -0
- package/src/core/cache/CacheManager.js +821 -0
- package/src/core/connections/ConnectionPool.js +553 -0
- package/src/core/crawlers/BFSCrawler.js +845 -0
- package/src/core/integrations/PerformanceIntegration.js +377 -0
- package/src/core/llm/AnthropicProvider.js +135 -0
- package/src/core/llm/LLMManager.js +415 -0
- package/src/core/llm/LLMProvider.js +97 -0
- package/src/core/llm/OpenAIProvider.js +127 -0
- package/src/core/processing/BrowserProcessor.js +986 -0
- package/src/core/processing/ContentProcessor.js +505 -0
- package/src/core/processing/PDFProcessor.js +448 -0
- package/src/core/processing/StreamProcessor.js +673 -0
- package/src/core/queue/QueueManager.js +98 -0
- package/src/core/workers/WorkerPool.js +585 -0
- package/src/core/workers/worker.js +743 -0
- package/src/monitoring/healthCheck.js +600 -0
- package/src/monitoring/metrics.js +761 -0
- package/src/optimization/wave3-optimizations.js +932 -0
- package/src/security/security-patches.js +120 -0
- package/src/security/security-tests.js +355 -0
- package/src/security/wave3-security.js +652 -0
- package/src/tools/advanced/BatchScrapeTool.js +1089 -0
- package/src/tools/advanced/ScrapeWithActionsTool.js +669 -0
- package/src/tools/crawl/crawlDeep.js +449 -0
- package/src/tools/crawl/mapSite.js +400 -0
- package/src/tools/extract/analyzeContent.js +624 -0
- package/src/tools/extract/extractContent.js +329 -0
- package/src/tools/extract/processDocument.js +503 -0
- package/src/tools/extract/summarizeContent.js +376 -0
- package/src/tools/llmstxt/generateLLMsTxt.js +570 -0
- package/src/tools/research/deepResearch.js +706 -0
- package/src/tools/search/adapters/duckduckgoSearch.js +398 -0
- package/src/tools/search/adapters/googleSearch.js +236 -0
- package/src/tools/search/adapters/searchProviderFactory.js +96 -0
- package/src/tools/search/queryExpander.js +543 -0
- package/src/tools/search/ranking/ResultDeduplicator.js +676 -0
- package/src/tools/search/ranking/ResultRanker.js +497 -0
- package/src/tools/search/searchWeb.js +482 -0
- package/src/tools/tracking/trackChanges.js +1355 -0
- package/src/utils/CircuitBreaker.js +515 -0
- package/src/utils/ErrorHandlingConfig.js +342 -0
- package/src/utils/HumanBehaviorSimulator.js +569 -0
- package/src/utils/Logger.js +568 -0
- package/src/utils/MemoryMonitor.js +173 -0
- package/src/utils/RetryManager.js +386 -0
- package/src/utils/contentUtils.js +588 -0
- package/src/utils/domainFilter.js +612 -0
- package/src/utils/inputValidation.js +766 -0
- package/src/utils/rateLimiter.js +196 -0
- package/src/utils/robotsChecker.js +91 -0
- package/src/utils/securityMiddleware.js +416 -0
- package/src/utils/sitemapParser.js +678 -0
- package/src/utils/ssrfProtection.js +640 -0
- package/src/utils/urlNormalizer.js +168 -0
|
@@ -0,0 +1,673 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* StreamProcessor - Streaming data processing with memory monitoring and pagination
|
|
3
|
+
* Handles large datasets efficiently through streaming and chunked processing
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { Readable, Transform, Writable, pipeline } from 'stream';
|
|
7
|
+
import { promisify } from 'util';
|
|
8
|
+
import { EventEmitter } from 'events';
|
|
9
|
+
import { config } from '../../constants/config.js';
|
|
10
|
+
|
|
11
|
+
const pipelineAsync = promisify(pipeline);
|
|
12
|
+
|
|
13
|
+
export class StreamProcessor extends EventEmitter {
|
|
14
|
+
constructor(options = {}) {
|
|
15
|
+
super();
|
|
16
|
+
|
|
17
|
+
const {
|
|
18
|
+
chunkSize = 1000,
|
|
19
|
+
memoryLimit = 100 * 1024 * 1024, // 100MB
|
|
20
|
+
memoryCheckInterval = 5000,
|
|
21
|
+
bufferHighWaterMark = 16 * 1024, // 16KB
|
|
22
|
+
objectMode = false,
|
|
23
|
+
enableBackpressure = true,
|
|
24
|
+
maxPagesInMemory = 10,
|
|
25
|
+
pageSize = 100
|
|
26
|
+
} = options;
|
|
27
|
+
|
|
28
|
+
this.chunkSize = chunkSize;
|
|
29
|
+
this.memoryLimit = memoryLimit;
|
|
30
|
+
this.memoryCheckInterval = memoryCheckInterval;
|
|
31
|
+
this.bufferHighWaterMark = bufferHighWaterMark;
|
|
32
|
+
this.objectMode = objectMode;
|
|
33
|
+
this.enableBackpressure = enableBackpressure;
|
|
34
|
+
this.maxPagesInMemory = maxPagesInMemory;
|
|
35
|
+
this.pageSize = pageSize;
|
|
36
|
+
|
|
37
|
+
// Memory monitoring
|
|
38
|
+
this.memoryUsage = {
|
|
39
|
+
current: 0,
|
|
40
|
+
peak: 0,
|
|
41
|
+
limit: this.memoryLimit,
|
|
42
|
+
checkInterval: null
|
|
43
|
+
};
|
|
44
|
+
|
|
45
|
+
// Processing state
|
|
46
|
+
this.isProcessing = false;
|
|
47
|
+
this.processedItems = 0;
|
|
48
|
+
this.totalItems = 0;
|
|
49
|
+
this.startTime = null;
|
|
50
|
+
this.endTime = null;
|
|
51
|
+
|
|
52
|
+
// Pagination state
|
|
53
|
+
this.pages = new Map();
|
|
54
|
+
this.currentPage = 0;
|
|
55
|
+
this.pageAccess = new Map(); // Track page access times for LRU
|
|
56
|
+
|
|
57
|
+
this.startMemoryMonitoring();
|
|
58
|
+
this.setupGracefulShutdown();
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Process large dataset using streaming with chunked processing
|
|
63
|
+
* @param {Array|AsyncIterable} data - Data to process
|
|
64
|
+
* @param {Function} processor - Processing function
|
|
65
|
+
* @param {Object} options - Processing options
|
|
66
|
+
* @returns {Promise<Object>} - Processing results
|
|
67
|
+
*/
|
|
68
|
+
async processStream(data, processor, options = {}) {
|
|
69
|
+
const {
|
|
70
|
+
parallel = false,
|
|
71
|
+
maxConcurrency = 5,
|
|
72
|
+
yieldEvery = 100,
|
|
73
|
+
enablePagination = false,
|
|
74
|
+
collectResults = true
|
|
75
|
+
} = options;
|
|
76
|
+
|
|
77
|
+
this.isProcessing = true;
|
|
78
|
+
this.startTime = Date.now();
|
|
79
|
+
this.processedItems = 0;
|
|
80
|
+
this.totalItems = Array.isArray(data) ? data.length : 0;
|
|
81
|
+
|
|
82
|
+
const results = collectResults ? [] : null;
|
|
83
|
+
const errors = [];
|
|
84
|
+
|
|
85
|
+
try {
|
|
86
|
+
if (enablePagination) {
|
|
87
|
+
return await this.processWithPagination(data, processor, options);
|
|
88
|
+
} else if (parallel) {
|
|
89
|
+
return await this.processParallel(data, processor, maxConcurrency, options);
|
|
90
|
+
} else {
|
|
91
|
+
return await this.processSequential(data, processor, yieldEvery, collectResults, results, errors);
|
|
92
|
+
}
|
|
93
|
+
} finally {
|
|
94
|
+
this.isProcessing = false;
|
|
95
|
+
this.endTime = Date.now();
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
/**
|
|
100
|
+
* Process data sequentially with memory monitoring
|
|
101
|
+
* @param {Array|AsyncIterable} data - Data to process
|
|
102
|
+
* @param {Function} processor - Processing function
|
|
103
|
+
* @param {number} yieldEvery - Yield control every N items
|
|
104
|
+
* @param {boolean} collectResults - Whether to collect results
|
|
105
|
+
* @param {Array} results - Results array
|
|
106
|
+
* @param {Array} errors - Errors array
|
|
107
|
+
* @returns {Promise<Object>} - Processing results
|
|
108
|
+
*/
|
|
109
|
+
async processSequential(data, processor, yieldEvery, collectResults, results, errors) {
|
|
110
|
+
const chunks = this.createChunks(data, this.chunkSize);
|
|
111
|
+
|
|
112
|
+
for (const chunk of chunks) {
|
|
113
|
+
await this.checkMemoryPressure();
|
|
114
|
+
|
|
115
|
+
for (let i = 0; i < chunk.length; i++) {
|
|
116
|
+
const item = chunk[i];
|
|
117
|
+
|
|
118
|
+
try {
|
|
119
|
+
const result = await processor(item, this.processedItems);
|
|
120
|
+
|
|
121
|
+
if (collectResults && result !== undefined) {
|
|
122
|
+
results.push(result);
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
this.processedItems++;
|
|
126
|
+
|
|
127
|
+
// Yield control periodically
|
|
128
|
+
if (this.processedItems % yieldEvery === 0) {
|
|
129
|
+
await this.yield();
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
this.emit('itemProcessed', {
|
|
133
|
+
index: this.processedItems,
|
|
134
|
+
total: this.totalItems,
|
|
135
|
+
result: collectResults ? result : undefined
|
|
136
|
+
});
|
|
137
|
+
|
|
138
|
+
} catch (error) {
|
|
139
|
+
errors.push({
|
|
140
|
+
index: this.processedItems,
|
|
141
|
+
item,
|
|
142
|
+
error: error.message
|
|
143
|
+
});
|
|
144
|
+
|
|
145
|
+
this.emit('itemError', {
|
|
146
|
+
index: this.processedItems,
|
|
147
|
+
error: error.message
|
|
148
|
+
});
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
// Check memory after each chunk
|
|
153
|
+
await this.checkMemoryPressure();
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
return this.createProcessingResult(results, errors);
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
/**
|
|
160
|
+
* Process data in parallel with concurrency control
|
|
161
|
+
* @param {Array|AsyncIterable} data - Data to process
|
|
162
|
+
* @param {Function} processor - Processing function
|
|
163
|
+
* @param {number} maxConcurrency - Maximum concurrent operations
|
|
164
|
+
* @param {Object} options - Processing options
|
|
165
|
+
* @returns {Promise<Object>} - Processing results
|
|
166
|
+
*/
|
|
167
|
+
async processParallel(data, processor, maxConcurrency, options) {
|
|
168
|
+
const { collectResults = true } = options;
|
|
169
|
+
const dataArray = Array.isArray(data) ? data : Array.from(data);
|
|
170
|
+
const chunks = this.createChunks(dataArray, maxConcurrency);
|
|
171
|
+
|
|
172
|
+
const results = collectResults ? [] : null;
|
|
173
|
+
const errors = [];
|
|
174
|
+
|
|
175
|
+
for (const chunk of chunks) {
|
|
176
|
+
await this.checkMemoryPressure();
|
|
177
|
+
|
|
178
|
+
const chunkPromises = chunk.map(async (item, index) => {
|
|
179
|
+
try {
|
|
180
|
+
const result = await processor(item, this.processedItems + index);
|
|
181
|
+
return { success: true, result, index: this.processedItems + index };
|
|
182
|
+
} catch (error) {
|
|
183
|
+
return {
|
|
184
|
+
success: false,
|
|
185
|
+
error: error.message,
|
|
186
|
+
item,
|
|
187
|
+
index: this.processedItems + index
|
|
188
|
+
};
|
|
189
|
+
}
|
|
190
|
+
});
|
|
191
|
+
|
|
192
|
+
const chunkResults = await Promise.all(chunkPromises);
|
|
193
|
+
|
|
194
|
+
for (const chunkResult of chunkResults) {
|
|
195
|
+
if (chunkResult.success) {
|
|
196
|
+
if (collectResults && chunkResult.result !== undefined) {
|
|
197
|
+
results.push(chunkResult.result);
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
this.emit('itemProcessed', {
|
|
201
|
+
index: chunkResult.index,
|
|
202
|
+
total: this.totalItems,
|
|
203
|
+
result: collectResults ? chunkResult.result : undefined
|
|
204
|
+
});
|
|
205
|
+
} else {
|
|
206
|
+
errors.push({
|
|
207
|
+
index: chunkResult.index,
|
|
208
|
+
item: chunkResult.item,
|
|
209
|
+
error: chunkResult.error
|
|
210
|
+
});
|
|
211
|
+
|
|
212
|
+
this.emit('itemError', {
|
|
213
|
+
index: chunkResult.index,
|
|
214
|
+
error: chunkResult.error
|
|
215
|
+
});
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
this.processedItems += chunk.length;
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
return this.createProcessingResult(results, errors);
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
/**
|
|
226
|
+
* Process data with pagination for memory efficiency
|
|
227
|
+
* @param {Array|AsyncIterable} data - Data to process
|
|
228
|
+
* @param {Function} processor - Processing function
|
|
229
|
+
* @param {Object} options - Processing options
|
|
230
|
+
* @returns {Promise<Object>} - Processing results with pagination
|
|
231
|
+
*/
|
|
232
|
+
async processWithPagination(data, processor, options) {
|
|
233
|
+
const dataArray = Array.isArray(data) ? data : Array.from(data);
|
|
234
|
+
const totalPages = Math.ceil(dataArray.length / this.pageSize);
|
|
235
|
+
|
|
236
|
+
const paginatedResult = {
|
|
237
|
+
totalItems: dataArray.length,
|
|
238
|
+
totalPages,
|
|
239
|
+
pageSize: this.pageSize,
|
|
240
|
+
pages: new Map(),
|
|
241
|
+
errors: []
|
|
242
|
+
};
|
|
243
|
+
|
|
244
|
+
// Process each page
|
|
245
|
+
for (let pageIndex = 0; pageIndex < totalPages; pageIndex++) {
|
|
246
|
+
await this.checkMemoryPressure();
|
|
247
|
+
|
|
248
|
+
const startIndex = pageIndex * this.pageSize;
|
|
249
|
+
const endIndex = Math.min(startIndex + this.pageSize, dataArray.length);
|
|
250
|
+
const pageData = dataArray.slice(startIndex, endIndex);
|
|
251
|
+
|
|
252
|
+
const pageResults = [];
|
|
253
|
+
const pageErrors = [];
|
|
254
|
+
|
|
255
|
+
for (let i = 0; i < pageData.length; i++) {
|
|
256
|
+
const globalIndex = startIndex + i;
|
|
257
|
+
const item = pageData[i];
|
|
258
|
+
|
|
259
|
+
try {
|
|
260
|
+
const result = await processor(item, globalIndex);
|
|
261
|
+
pageResults.push(result);
|
|
262
|
+
|
|
263
|
+
this.emit('itemProcessed', {
|
|
264
|
+
index: globalIndex,
|
|
265
|
+
total: dataArray.length,
|
|
266
|
+
page: pageIndex,
|
|
267
|
+
result
|
|
268
|
+
});
|
|
269
|
+
|
|
270
|
+
} catch (error) {
|
|
271
|
+
pageErrors.push({
|
|
272
|
+
index: globalIndex,
|
|
273
|
+
item,
|
|
274
|
+
error: error.message
|
|
275
|
+
});
|
|
276
|
+
|
|
277
|
+
this.emit('itemError', {
|
|
278
|
+
index: globalIndex,
|
|
279
|
+
page: pageIndex,
|
|
280
|
+
error: error.message
|
|
281
|
+
});
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
// Store page results
|
|
286
|
+
this.storePage(pageIndex, {
|
|
287
|
+
data: pageResults,
|
|
288
|
+
errors: pageErrors,
|
|
289
|
+
startIndex,
|
|
290
|
+
endIndex,
|
|
291
|
+
processedAt: Date.now()
|
|
292
|
+
});
|
|
293
|
+
|
|
294
|
+
this.processedItems += pageData.length;
|
|
295
|
+
paginatedResult.errors.push(...pageErrors);
|
|
296
|
+
|
|
297
|
+
this.emit('pageProcessed', {
|
|
298
|
+
pageIndex,
|
|
299
|
+
totalPages,
|
|
300
|
+
itemsInPage: pageData.length,
|
|
301
|
+
errorsInPage: pageErrors.length
|
|
302
|
+
});
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
paginatedResult.pages = this.getAllPages();
|
|
306
|
+
return paginatedResult;
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
/**
|
|
310
|
+
* Create a transform stream for processing data
|
|
311
|
+
* @param {Function} transformer - Transform function
|
|
312
|
+
* @param {Object} options - Stream options
|
|
313
|
+
* @returns {Transform} - Transform stream
|
|
314
|
+
*/
|
|
315
|
+
createTransformStream(transformer, options = {}) {
|
|
316
|
+
const {
|
|
317
|
+
objectMode = this.objectMode,
|
|
318
|
+
highWaterMark = this.bufferHighWaterMark,
|
|
319
|
+
parallel = false,
|
|
320
|
+
maxConcurrency = 5
|
|
321
|
+
} = options;
|
|
322
|
+
|
|
323
|
+
let processedCount = 0;
|
|
324
|
+
let pendingOperations = 0;
|
|
325
|
+
|
|
326
|
+
return new Transform({
|
|
327
|
+
objectMode,
|
|
328
|
+
highWaterMark,
|
|
329
|
+
transform: async function(chunk, encoding, callback) {
|
|
330
|
+
try {
|
|
331
|
+
await this.checkMemoryPressure();
|
|
332
|
+
|
|
333
|
+
if (parallel && pendingOperations < maxConcurrency) {
|
|
334
|
+
pendingOperations++;
|
|
335
|
+
|
|
336
|
+
transformer(chunk, processedCount++)
|
|
337
|
+
.then(result => {
|
|
338
|
+
pendingOperations--;
|
|
339
|
+
this.push(result);
|
|
340
|
+
this.emit('itemProcessed', { index: processedCount, result });
|
|
341
|
+
})
|
|
342
|
+
.catch(error => {
|
|
343
|
+
pendingOperations--;
|
|
344
|
+
this.emit('error', error);
|
|
345
|
+
});
|
|
346
|
+
|
|
347
|
+
callback();
|
|
348
|
+
} else {
|
|
349
|
+
const result = await transformer(chunk, processedCount++);
|
|
350
|
+
this.push(result);
|
|
351
|
+
this.emit('itemProcessed', { index: processedCount, result });
|
|
352
|
+
callback();
|
|
353
|
+
}
|
|
354
|
+
} catch (error) {
|
|
355
|
+
this.emit('itemError', { index: processedCount, error: error.message });
|
|
356
|
+
callback(error);
|
|
357
|
+
}
|
|
358
|
+
}.bind(this)
|
|
359
|
+
});
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
/**
|
|
363
|
+
* Create a readable stream from data
|
|
364
|
+
* @param {Array|AsyncIterable} data - Data to stream
|
|
365
|
+
* @param {Object} options - Stream options
|
|
366
|
+
* @returns {Readable} - Readable stream
|
|
367
|
+
*/
|
|
368
|
+
createReadableStream(data, options = {}) {
|
|
369
|
+
const {
|
|
370
|
+
objectMode = this.objectMode,
|
|
371
|
+
highWaterMark = this.bufferHighWaterMark
|
|
372
|
+
} = options;
|
|
373
|
+
|
|
374
|
+
const dataArray = Array.isArray(data) ? data : Array.from(data);
|
|
375
|
+
let index = 0;
|
|
376
|
+
|
|
377
|
+
return new Readable({
|
|
378
|
+
objectMode,
|
|
379
|
+
highWaterMark,
|
|
380
|
+
read() {
|
|
381
|
+
if (index < dataArray.length) {
|
|
382
|
+
this.push(dataArray[index++]);
|
|
383
|
+
} else {
|
|
384
|
+
this.push(null); // End of stream
|
|
385
|
+
}
|
|
386
|
+
}
|
|
387
|
+
});
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
/**
|
|
391
|
+
* Create a writable stream for collecting results
|
|
392
|
+
* @param {Function} writer - Write function
|
|
393
|
+
* @param {Object} options - Stream options
|
|
394
|
+
* @returns {Writable} - Writable stream
|
|
395
|
+
*/
|
|
396
|
+
createWritableStream(writer, options = {}) {
|
|
397
|
+
const {
|
|
398
|
+
objectMode = this.objectMode,
|
|
399
|
+
highWaterMark = this.bufferHighWaterMark
|
|
400
|
+
} = options;
|
|
401
|
+
|
|
402
|
+
let writeCount = 0;
|
|
403
|
+
|
|
404
|
+
return new Writable({
|
|
405
|
+
objectMode,
|
|
406
|
+
highWaterMark,
|
|
407
|
+
write: async function(chunk, encoding, callback) {
|
|
408
|
+
try {
|
|
409
|
+
await this.checkMemoryPressure();
|
|
410
|
+
await writer(chunk, writeCount++);
|
|
411
|
+
this.emit('itemWritten', { index: writeCount, chunk });
|
|
412
|
+
callback();
|
|
413
|
+
} catch (error) {
|
|
414
|
+
this.emit('writeError', { index: writeCount, error: error.message });
|
|
415
|
+
callback(error);
|
|
416
|
+
}
|
|
417
|
+
}.bind(this)
|
|
418
|
+
});
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
/**
|
|
422
|
+
* Process data using stream pipeline
|
|
423
|
+
* @param {Readable} readable - Readable stream
|
|
424
|
+
* @param {Array<Transform>} transforms - Transform streams
|
|
425
|
+
* @param {Writable} writable - Writable stream
|
|
426
|
+
* @returns {Promise<void>}
|
|
427
|
+
*/
|
|
428
|
+
async processPipeline(readable, transforms, writable) {
|
|
429
|
+
const streams = [readable, ...transforms, writable];
|
|
430
|
+
|
|
431
|
+
this.isProcessing = true;
|
|
432
|
+
this.startTime = Date.now();
|
|
433
|
+
|
|
434
|
+
try {
|
|
435
|
+
await pipelineAsync(...streams);
|
|
436
|
+
} finally {
|
|
437
|
+
this.isProcessing = false;
|
|
438
|
+
this.endTime = Date.now();
|
|
439
|
+
}
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
/**
|
|
443
|
+
* Store page data with LRU eviction
|
|
444
|
+
* @param {number} pageIndex - Page index
|
|
445
|
+
* @param {Object} pageData - Page data
|
|
446
|
+
*/
|
|
447
|
+
storePage(pageIndex, pageData) {
|
|
448
|
+
// Update access time
|
|
449
|
+
this.pageAccess.set(pageIndex, Date.now());
|
|
450
|
+
|
|
451
|
+
// Store page
|
|
452
|
+
this.pages.set(pageIndex, pageData);
|
|
453
|
+
|
|
454
|
+
// Evict pages if over limit
|
|
455
|
+
if (this.pages.size > this.maxPagesInMemory) {
|
|
456
|
+
this.evictOldestPage();
|
|
457
|
+
}
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
/**
|
|
461
|
+
* Get page data
|
|
462
|
+
* @param {number} pageIndex - Page index
|
|
463
|
+
* @returns {Object|null} - Page data
|
|
464
|
+
*/
|
|
465
|
+
getPage(pageIndex) {
|
|
466
|
+
if (this.pages.has(pageIndex)) {
|
|
467
|
+
// Update access time
|
|
468
|
+
this.pageAccess.set(pageIndex, Date.now());
|
|
469
|
+
return this.pages.get(pageIndex);
|
|
470
|
+
}
|
|
471
|
+
return null;
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
/**
|
|
475
|
+
* Get all stored pages
|
|
476
|
+
* @returns {Map} - All pages
|
|
477
|
+
*/
|
|
478
|
+
getAllPages() {
|
|
479
|
+
return new Map(this.pages);
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
/**
|
|
483
|
+
* Evict oldest accessed page
|
|
484
|
+
*/
|
|
485
|
+
evictOldestPage() {
|
|
486
|
+
let oldestPage = null;
|
|
487
|
+
let oldestTime = Date.now();
|
|
488
|
+
|
|
489
|
+
for (const [pageIndex, accessTime] of this.pageAccess.entries()) {
|
|
490
|
+
if (accessTime < oldestTime) {
|
|
491
|
+
oldestTime = accessTime;
|
|
492
|
+
oldestPage = pageIndex;
|
|
493
|
+
}
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
if (oldestPage !== null) {
|
|
497
|
+
this.pages.delete(oldestPage);
|
|
498
|
+
this.pageAccess.delete(oldestPage);
|
|
499
|
+
|
|
500
|
+
this.emit('pageEvicted', { pageIndex: oldestPage });
|
|
501
|
+
}
|
|
502
|
+
}
|
|
503
|
+
|
|
504
|
+
/**
|
|
505
|
+
* Create chunks from data
|
|
506
|
+
* @param {Array|AsyncIterable} data - Data to chunk
|
|
507
|
+
* @param {number} chunkSize - Size of each chunk
|
|
508
|
+
* @returns {Array} - Array of chunks
|
|
509
|
+
*/
|
|
510
|
+
createChunks(data, chunkSize) {
|
|
511
|
+
const dataArray = Array.isArray(data) ? data : Array.from(data);
|
|
512
|
+
const chunks = [];
|
|
513
|
+
|
|
514
|
+
for (let i = 0; i < dataArray.length; i += chunkSize) {
|
|
515
|
+
chunks.push(dataArray.slice(i, i + chunkSize));
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
return chunks;
|
|
519
|
+
}
|
|
520
|
+
|
|
521
|
+
/**
|
|
522
|
+
* Check memory pressure and trigger GC if needed
|
|
523
|
+
*/
|
|
524
|
+
async checkMemoryPressure() {
|
|
525
|
+
const memUsage = process.memoryUsage();
|
|
526
|
+
this.memoryUsage.current = memUsage.heapUsed;
|
|
527
|
+
this.memoryUsage.peak = Math.max(this.memoryUsage.peak, memUsage.heapUsed);
|
|
528
|
+
|
|
529
|
+
if (memUsage.heapUsed > this.memoryLimit) {
|
|
530
|
+
this.emit('memoryPressure', {
|
|
531
|
+
current: memUsage.heapUsed,
|
|
532
|
+
limit: this.memoryLimit,
|
|
533
|
+
percentage: (memUsage.heapUsed / this.memoryLimit) * 100
|
|
534
|
+
});
|
|
535
|
+
|
|
536
|
+
// Force garbage collection if available
|
|
537
|
+
if (global.gc) {
|
|
538
|
+
global.gc();
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
// Evict some pages if using pagination
|
|
542
|
+
if (this.pages.size > this.maxPagesInMemory / 2) {
|
|
543
|
+
const pagesToEvict = Math.ceil(this.pages.size / 4);
|
|
544
|
+
for (let i = 0; i < pagesToEvict; i++) {
|
|
545
|
+
this.evictOldestPage();
|
|
546
|
+
}
|
|
547
|
+
}
|
|
548
|
+
|
|
549
|
+
// Yield control to allow other operations
|
|
550
|
+
await this.yield();
|
|
551
|
+
}
|
|
552
|
+
}
|
|
553
|
+
|
|
554
|
+
/**
|
|
555
|
+
* Start memory monitoring
|
|
556
|
+
*/
|
|
557
|
+
startMemoryMonitoring() {
|
|
558
|
+
if (this.memoryUsage.checkInterval) {
|
|
559
|
+
return; // Already monitoring
|
|
560
|
+
}
|
|
561
|
+
|
|
562
|
+
this.memoryUsage.checkInterval = setInterval(() => {
|
|
563
|
+
this.checkMemoryPressure();
|
|
564
|
+
}, this.memoryCheckInterval);
|
|
565
|
+
}
|
|
566
|
+
|
|
567
|
+
/**
|
|
568
|
+
* Stop memory monitoring
|
|
569
|
+
*/
|
|
570
|
+
stopMemoryMonitoring() {
|
|
571
|
+
if (this.memoryUsage.checkInterval) {
|
|
572
|
+
clearInterval(this.memoryUsage.checkInterval);
|
|
573
|
+
this.memoryUsage.checkInterval = null;
|
|
574
|
+
}
|
|
575
|
+
}
|
|
576
|
+
|
|
577
|
+
/**
|
|
578
|
+
* Yield control to event loop
|
|
579
|
+
*/
|
|
580
|
+
async yield() {
|
|
581
|
+
return new Promise(resolve => setImmediate(resolve));
|
|
582
|
+
}
|
|
583
|
+
|
|
584
|
+
/**
|
|
585
|
+
* Create processing result object
|
|
586
|
+
* @param {Array} results - Processing results
|
|
587
|
+
* @param {Array} errors - Processing errors
|
|
588
|
+
* @returns {Object} - Result object
|
|
589
|
+
*/
|
|
590
|
+
createProcessingResult(results, errors) {
|
|
591
|
+
const duration = this.endTime - this.startTime;
|
|
592
|
+
const itemsPerSecond = this.processedItems / (duration / 1000);
|
|
593
|
+
|
|
594
|
+
return {
|
|
595
|
+
processedItems: this.processedItems,
|
|
596
|
+
totalItems: this.totalItems,
|
|
597
|
+
results: results || [],
|
|
598
|
+
errors,
|
|
599
|
+
duration,
|
|
600
|
+
itemsPerSecond: Math.round(itemsPerSecond * 100) / 100,
|
|
601
|
+
memoryUsage: {
|
|
602
|
+
peak: this.memoryUsage.peak,
|
|
603
|
+
current: this.memoryUsage.current,
|
|
604
|
+
limit: this.memoryUsage.limit
|
|
605
|
+
},
|
|
606
|
+
success: errors.length === 0
|
|
607
|
+
};
|
|
608
|
+
}
|
|
609
|
+
|
|
610
|
+
/**
|
|
611
|
+
* Get processing statistics
|
|
612
|
+
* @returns {Object} - Statistics
|
|
613
|
+
*/
|
|
614
|
+
getStats() {
|
|
615
|
+
const duration = this.isProcessing ?
|
|
616
|
+
Date.now() - this.startTime :
|
|
617
|
+
(this.endTime || Date.now()) - (this.startTime || Date.now());
|
|
618
|
+
|
|
619
|
+
return {
|
|
620
|
+
isProcessing: this.isProcessing,
|
|
621
|
+
processedItems: this.processedItems,
|
|
622
|
+
totalItems: this.totalItems,
|
|
623
|
+
duration,
|
|
624
|
+
itemsPerSecond: this.processedItems / (duration / 1000),
|
|
625
|
+
memoryUsage: this.memoryUsage,
|
|
626
|
+
pagesInMemory: this.pages.size,
|
|
627
|
+
maxPagesInMemory: this.maxPagesInMemory
|
|
628
|
+
};
|
|
629
|
+
}
|
|
630
|
+
|
|
631
|
+
/**
|
|
632
|
+
* Reset processor state
|
|
633
|
+
*/
|
|
634
|
+
reset() {
|
|
635
|
+
this.processedItems = 0;
|
|
636
|
+
this.totalItems = 0;
|
|
637
|
+
this.startTime = null;
|
|
638
|
+
this.endTime = null;
|
|
639
|
+
this.pages.clear();
|
|
640
|
+
this.pageAccess.clear();
|
|
641
|
+
this.currentPage = 0;
|
|
642
|
+
|
|
643
|
+
// Reset memory tracking
|
|
644
|
+
this.memoryUsage.current = 0;
|
|
645
|
+
this.memoryUsage.peak = 0;
|
|
646
|
+
}
|
|
647
|
+
|
|
648
|
+
/**
|
|
649
|
+
* Graceful shutdown
|
|
650
|
+
*/
|
|
651
|
+
async shutdown() {
|
|
652
|
+
this.emit('shutdown');
|
|
653
|
+
|
|
654
|
+
this.stopMemoryMonitoring();
|
|
655
|
+
this.pages.clear();
|
|
656
|
+
this.pageAccess.clear();
|
|
657
|
+
}
|
|
658
|
+
|
|
659
|
+
/**
|
|
660
|
+
* Setup graceful shutdown handlers
|
|
661
|
+
*/
|
|
662
|
+
setupGracefulShutdown() {
|
|
663
|
+
const shutdown = async () => {
|
|
664
|
+
console.log('StreamProcessor: Graceful shutdown initiated');
|
|
665
|
+
await this.shutdown();
|
|
666
|
+
};
|
|
667
|
+
|
|
668
|
+
process.on('SIGTERM', shutdown);
|
|
669
|
+
process.on('SIGINT', shutdown);
|
|
670
|
+
}
|
|
671
|
+
}
|
|
672
|
+
|
|
673
|
+
export default StreamProcessor;
|