crawlforge-mcp-server 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/CLAUDE.md +315 -0
  2. package/LICENSE +21 -0
  3. package/README.md +181 -0
  4. package/package.json +115 -0
  5. package/server.js +1963 -0
  6. package/setup.js +112 -0
  7. package/src/constants/config.js +615 -0
  8. package/src/core/ActionExecutor.js +1104 -0
  9. package/src/core/AlertNotificationSystem.js +601 -0
  10. package/src/core/AuthManager.js +315 -0
  11. package/src/core/ChangeTracker.js +2306 -0
  12. package/src/core/JobManager.js +687 -0
  13. package/src/core/LLMsTxtAnalyzer.js +753 -0
  14. package/src/core/LocalizationManager.js +1615 -0
  15. package/src/core/PerformanceManager.js +828 -0
  16. package/src/core/ResearchOrchestrator.js +1327 -0
  17. package/src/core/SnapshotManager.js +1037 -0
  18. package/src/core/StealthBrowserManager.js +1795 -0
  19. package/src/core/WebhookDispatcher.js +745 -0
  20. package/src/core/analysis/ContentAnalyzer.js +749 -0
  21. package/src/core/analysis/LinkAnalyzer.js +972 -0
  22. package/src/core/cache/CacheManager.js +821 -0
  23. package/src/core/connections/ConnectionPool.js +553 -0
  24. package/src/core/crawlers/BFSCrawler.js +845 -0
  25. package/src/core/integrations/PerformanceIntegration.js +377 -0
  26. package/src/core/llm/AnthropicProvider.js +135 -0
  27. package/src/core/llm/LLMManager.js +415 -0
  28. package/src/core/llm/LLMProvider.js +97 -0
  29. package/src/core/llm/OpenAIProvider.js +127 -0
  30. package/src/core/processing/BrowserProcessor.js +986 -0
  31. package/src/core/processing/ContentProcessor.js +505 -0
  32. package/src/core/processing/PDFProcessor.js +448 -0
  33. package/src/core/processing/StreamProcessor.js +673 -0
  34. package/src/core/queue/QueueManager.js +98 -0
  35. package/src/core/workers/WorkerPool.js +585 -0
  36. package/src/core/workers/worker.js +743 -0
  37. package/src/monitoring/healthCheck.js +600 -0
  38. package/src/monitoring/metrics.js +761 -0
  39. package/src/optimization/wave3-optimizations.js +932 -0
  40. package/src/security/security-patches.js +120 -0
  41. package/src/security/security-tests.js +355 -0
  42. package/src/security/wave3-security.js +652 -0
  43. package/src/tools/advanced/BatchScrapeTool.js +1089 -0
  44. package/src/tools/advanced/ScrapeWithActionsTool.js +669 -0
  45. package/src/tools/crawl/crawlDeep.js +449 -0
  46. package/src/tools/crawl/mapSite.js +400 -0
  47. package/src/tools/extract/analyzeContent.js +624 -0
  48. package/src/tools/extract/extractContent.js +329 -0
  49. package/src/tools/extract/processDocument.js +503 -0
  50. package/src/tools/extract/summarizeContent.js +376 -0
  51. package/src/tools/llmstxt/generateLLMsTxt.js +570 -0
  52. package/src/tools/research/deepResearch.js +706 -0
  53. package/src/tools/search/adapters/duckduckgoSearch.js +398 -0
  54. package/src/tools/search/adapters/googleSearch.js +236 -0
  55. package/src/tools/search/adapters/searchProviderFactory.js +96 -0
  56. package/src/tools/search/queryExpander.js +543 -0
  57. package/src/tools/search/ranking/ResultDeduplicator.js +676 -0
  58. package/src/tools/search/ranking/ResultRanker.js +497 -0
  59. package/src/tools/search/searchWeb.js +482 -0
  60. package/src/tools/tracking/trackChanges.js +1355 -0
  61. package/src/utils/CircuitBreaker.js +515 -0
  62. package/src/utils/ErrorHandlingConfig.js +342 -0
  63. package/src/utils/HumanBehaviorSimulator.js +569 -0
  64. package/src/utils/Logger.js +568 -0
  65. package/src/utils/MemoryMonitor.js +173 -0
  66. package/src/utils/RetryManager.js +386 -0
  67. package/src/utils/contentUtils.js +588 -0
  68. package/src/utils/domainFilter.js +612 -0
  69. package/src/utils/inputValidation.js +766 -0
  70. package/src/utils/rateLimiter.js +196 -0
  71. package/src/utils/robotsChecker.js +91 -0
  72. package/src/utils/securityMiddleware.js +416 -0
  73. package/src/utils/sitemapParser.js +678 -0
  74. package/src/utils/ssrfProtection.js +640 -0
  75. package/src/utils/urlNormalizer.js +168 -0
@@ -0,0 +1,828 @@
1
+ /**
2
+ * PerformanceManager - Orchestrates performance optimization components
3
+ * Integrates WorkerPool, ConnectionPool, StreamProcessor, and QueueManager
4
+ */
5
+
6
+ import { EventEmitter } from 'events';
7
+ import WorkerPool from './workers/WorkerPool.js';
8
+ import ConnectionPool from './connections/ConnectionPool.js';
9
+ import StreamProcessor from './processing/StreamProcessor.js';
10
+ import QueueManager from './queue/QueueManager.js';
11
+ import { config } from '../constants/config.js';
12
+
13
+ export class PerformanceManager extends EventEmitter {
14
+ constructor(options = {}) {
15
+ super();
16
+
17
+ const {
18
+ workerPoolOptions = {},
19
+ connectionPoolOptions = {},
20
+ streamProcessorOptions = {},
21
+ queueManagerOptions = {},
22
+ enableMetrics = true,
23
+ metricsInterval = 10000
24
+ } = options;
25
+
26
+ this.enableMetrics = enableMetrics;
27
+ this.metricsInterval = metricsInterval;
28
+
29
+ // Initialize performance components
30
+ this.workerPool = new WorkerPool({
31
+ maxWorkers: config.performance.maxWorkers,
32
+ ...workerPoolOptions
33
+ });
34
+
35
+ this.connectionPool = new ConnectionPool({
36
+ maxSockets: config.performance.maxWorkers * 2,
37
+ ...connectionPoolOptions
38
+ });
39
+
40
+ this.streamProcessor = new StreamProcessor({
41
+ chunkSize: 1000,
42
+ memoryLimit: 100 * 1024 * 1024, // 100MB
43
+ ...streamProcessorOptions
44
+ });
45
+
46
+ this.queueManager = new QueueManager({
47
+ concurrency: config.performance.queueConcurrency,
48
+ ...queueManagerOptions
49
+ });
50
+
51
+ // Performance metrics
52
+ this.metrics = {
53
+ startTime: Date.now(),
54
+ totalOperations: 0,
55
+ completedOperations: 0,
56
+ failedOperations: 0,
57
+ avgOperationTime: 0,
58
+ memoryUsage: {
59
+ current: 0,
60
+ peak: 0
61
+ },
62
+ componentStats: {}
63
+ };
64
+
65
+ // Task routing configuration
66
+ this.taskRouting = {
67
+ // CPU-intensive tasks go to worker pool
68
+ parseHtml: 'worker',
69
+ extractContent: 'worker',
70
+ analyzeText: 'worker',
71
+ processStructuredData: 'worker',
72
+ calculateSimilarity: 'worker',
73
+
74
+ // I/O tasks go to connection pool
75
+ fetchUrl: 'connection',
76
+ downloadFile: 'connection',
77
+ validateUrls: 'connection',
78
+
79
+ // Large data processing goes to stream processor
80
+ processBatch: 'stream',
81
+ processLargeDataset: 'stream',
82
+ transformData: 'stream',
83
+
84
+ // Standard tasks go to queue manager
85
+ default: 'queue'
86
+ };
87
+
88
+ this.setupEventHandlers();
89
+ this.startMetricsCollection();
90
+ }
91
+
92
+ /**
93
+ * Execute a task using the optimal performance component
94
+ * @param {string} taskType - Type of task
95
+ * @param {any} data - Task data
96
+ * @param {Object} options - Task options
97
+ * @returns {Promise<any>} - Task result
98
+ */
99
+ async executeTask(taskType, data, options = {}) {
100
+ const startTime = Date.now();
101
+ this.metrics.totalOperations++;
102
+
103
+ try {
104
+ const component = this.getOptimalComponent(taskType, data, options);
105
+ const result = await this.executeOnComponent(component, taskType, data, options);
106
+
107
+ this.updateSuccessMetrics(Date.now() - startTime);
108
+
109
+ this.emit('taskCompleted', {
110
+ taskType,
111
+ component,
112
+ duration: Date.now() - startTime,
113
+ dataSize: this.getDataSize(data)
114
+ });
115
+
116
+ return result;
117
+
118
+ } catch (error) {
119
+ this.metrics.failedOperations++;
120
+
121
+ this.emit('taskFailed', {
122
+ taskType,
123
+ error: error.message,
124
+ duration: Date.now() - startTime
125
+ });
126
+
127
+ throw error;
128
+ }
129
+ }
130
+
131
+ /**
132
+ * Execute multiple tasks with intelligent distribution
133
+ * @param {Array} tasks - Array of {taskType, data, options} objects
134
+ * @param {Object} batchOptions - Batch execution options
135
+ * @returns {Promise<Array>} - Array of results
136
+ */
137
+ async executeBatch(tasks, batchOptions = {}) {
138
+ const {
139
+ strategy = 'auto', // 'auto', 'parallel', 'sequential', 'mixed'
140
+ maxConcurrency = config.performance.maxWorkers,
141
+ enableOptimization = true,
142
+ groupBySimilarity = true
143
+ } = batchOptions;
144
+
145
+ if (enableOptimization && groupBySimilarity) {
146
+ const groupedTasks = this.groupTasksBySimilarity(tasks);
147
+ return await this.executeBatchGroups(groupedTasks, batchOptions);
148
+ }
149
+
150
+ switch (strategy) {
151
+ case 'parallel':
152
+ return await this.executeParallelBatch(tasks, batchOptions);
153
+ case 'sequential':
154
+ return await this.executeSequentialBatch(tasks, batchOptions);
155
+ case 'mixed':
156
+ return await this.executeMixedBatch(tasks, batchOptions);
157
+ default:
158
+ return await this.executeAutoBatch(tasks, batchOptions);
159
+ }
160
+ }
161
+
162
+ /**
163
+ * Process large dataset using stream processing
164
+ * @param {Array|AsyncIterable} data - Data to process
165
+ * @param {Function} processor - Processing function
166
+ * @param {Object} options - Processing options
167
+ * @returns {Promise<Object>} - Processing results
168
+ */
169
+ async processLargeDataset(data, processor, options = {}) {
170
+ const {
171
+ enableWorkerPool = true,
172
+ enablePagination = true,
173
+ chunkSize = 1000
174
+ } = options;
175
+
176
+ if (enableWorkerPool) {
177
+ // Enhance processor to use worker pool for CPU-intensive operations
178
+ const enhancedProcessor = async (item, index) => {
179
+ if (this.isCpuIntensive(item)) {
180
+ return await this.workerPool.execute('processItem', item, { timeout: 30000 });
181
+ } else {
182
+ return await processor(item, index);
183
+ }
184
+ };
185
+
186
+ return await this.streamProcessor.processStream(data, enhancedProcessor, {
187
+ ...options,
188
+ enablePagination,
189
+ chunkSize
190
+ });
191
+ } else {
192
+ return await this.streamProcessor.processStream(data, processor, options);
193
+ }
194
+ }
195
+
196
+ /**
197
+ * Get optimal component for task execution
198
+ * @param {string} taskType - Type of task
199
+ * @param {any} data - Task data
200
+ * @param {Object} options - Task options
201
+ * @returns {string} - Component name
202
+ */
203
+ getOptimalComponent(taskType, data, options) {
204
+ // Check explicit routing first
205
+ if (this.taskRouting[taskType]) {
206
+ return this.taskRouting[taskType];
207
+ }
208
+
209
+ // Auto-select based on task characteristics
210
+ const dataSize = this.getDataSize(data);
211
+ const isLargeDataset = dataSize > 10 * 1024 * 1024; // 10MB
212
+ const isCpuIntensive = this.isCpuIntensive(data);
213
+ const isNetworkOperation = this.isNetworkOperation(taskType);
214
+
215
+ if (isLargeDataset && !isCpuIntensive) {
216
+ return 'stream';
217
+ } else if (isCpuIntensive) {
218
+ return 'worker';
219
+ } else if (isNetworkOperation) {
220
+ return 'connection';
221
+ } else {
222
+ return 'queue';
223
+ }
224
+ }
225
+
226
+ /**
227
+ * Execute task on specific component
228
+ * @param {string} component - Component name
229
+ * @param {string} taskType - Task type
230
+ * @param {any} data - Task data
231
+ * @param {Object} options - Task options
232
+ * @returns {Promise<any>} - Task result
233
+ */
234
+ async executeOnComponent(component, taskType, data, options) {
235
+ switch (component) {
236
+ case 'worker':
237
+ return await this.workerPool.execute(taskType, data, options);
238
+
239
+ case 'connection':
240
+ if (taskType === 'fetchUrl' || taskType === 'downloadFile') {
241
+ return await this.connectionPool.request({
242
+ url: data.url || data,
243
+ method: data.method || 'GET',
244
+ headers: data.headers || {},
245
+ ...options
246
+ });
247
+ } else {
248
+ return await this.queueManager.add(async () => {
249
+ return await this.executeNetworkTask(taskType, data, options);
250
+ });
251
+ }
252
+
253
+ case 'stream':
254
+ if (Array.isArray(data) || data[Symbol.asyncIterator]) {
255
+ return await this.streamProcessor.processStream(
256
+ data,
257
+ options.processor || (item => item),
258
+ options
259
+ );
260
+ } else {
261
+ return await this.queueManager.add(async () => {
262
+ return await this.executeStandardTask(taskType, data, options);
263
+ });
264
+ }
265
+
266
+ case 'queue':
267
+ default:
268
+ return await this.queueManager.add(async () => {
269
+ return await this.executeStandardTask(taskType, data, options);
270
+ });
271
+ }
272
+ }
273
+
274
+ /**
275
+ * Execute auto-optimized batch
276
+ * @param {Array} tasks - Tasks to execute
277
+ * @param {Object} options - Batch options
278
+ * @returns {Promise<Array>} - Results
279
+ */
280
+ async executeAutoBatch(tasks, options) {
281
+ const groupedTasks = this.groupTasksByComponent(tasks);
282
+ const results = [];
283
+
284
+ // Execute each group with its optimal component
285
+ for (const [component, componentTasks] of groupedTasks.entries()) {
286
+ let componentResults;
287
+
288
+ switch (component) {
289
+ case 'worker':
290
+ componentResults = await this.workerPool.executeBatch(
291
+ componentTasks.map(t => ({ taskType: t.taskType, data: t.data, options: t.options })),
292
+ { maxConcurrent: this.workerPool.maxWorkers }
293
+ );
294
+ break;
295
+
296
+ case 'connection':
297
+ componentResults = await this.connectionPool.requestBatch(
298
+ componentTasks.map(t => ({
299
+ url: t.data.url || t.data,
300
+ method: t.data.method || 'GET',
301
+ headers: t.data.headers || {},
302
+ ...t.options
303
+ })),
304
+ { maxConcurrent: this.connectionPool.maxSockets * 0.8 }
305
+ );
306
+ break;
307
+
308
+ case 'stream':
309
+ // For stream tasks, process each one individually
310
+ componentResults = [];
311
+ for (const task of componentTasks) {
312
+ const result = await this.streamProcessor.processStream(
313
+ task.data,
314
+ task.options.processor || (item => item),
315
+ task.options
316
+ );
317
+ componentResults.push(result);
318
+ }
319
+ break;
320
+
321
+ default:
322
+ componentResults = await this.queueManager.addAll(
323
+ componentTasks.map(t => () => this.executeStandardTask(t.taskType, t.data, t.options))
324
+ );
325
+ }
326
+
327
+ results.push(...componentResults);
328
+ }
329
+
330
+ return results;
331
+ }
332
+
333
+ /**
334
+ * Group tasks by optimal component
335
+ * @param {Array} tasks - Tasks to group
336
+ * @returns {Map} - Grouped tasks
337
+ */
338
+ groupTasksByComponent(tasks) {
339
+ const groups = new Map();
340
+
341
+ for (const task of tasks) {
342
+ const component = this.getOptimalComponent(task.taskType, task.data, task.options);
343
+
344
+ if (!groups.has(component)) {
345
+ groups.set(component, []);
346
+ }
347
+
348
+ groups.get(component).push(task);
349
+ }
350
+
351
+ return groups;
352
+ }
353
+
354
+ /**
355
+ * Group tasks by similarity for optimization
356
+ * @param {Array} tasks - Tasks to group
357
+ * @returns {Array} - Grouped tasks
358
+ */
359
+ groupTasksBySimilarity(tasks) {
360
+ const groups = [];
361
+ const taskMap = new Map();
362
+
363
+ // Group by task type first
364
+ for (const task of tasks) {
365
+ const key = `${task.taskType}_${this.getOptimalComponent(task.taskType, task.data, task.options)}`;
366
+
367
+ if (!taskMap.has(key)) {
368
+ taskMap.set(key, []);
369
+ }
370
+
371
+ taskMap.get(key).push(task);
372
+ }
373
+
374
+ // Convert to array of groups
375
+ for (const [key, taskGroup] of taskMap.entries()) {
376
+ groups.push(taskGroup);
377
+ }
378
+
379
+ return groups;
380
+ }
381
+
382
+ /**
383
+ * Execute batch groups
384
+ * @param {Array} groups - Task groups
385
+ * @param {Object} options - Batch options
386
+ * @returns {Promise<Array>} - Results
387
+ */
388
+ async executeBatchGroups(groups, options) {
389
+ const results = [];
390
+
391
+ for (const group of groups) {
392
+ const groupResults = await this.executeAutoBatch(group, options);
393
+ results.push(...groupResults);
394
+ }
395
+
396
+ return results;
397
+ }
398
+
399
+ /**
400
+ * Execute parallel batch
401
+ * @param {Array} tasks - Tasks to execute
402
+ * @param {Object} options - Batch options
403
+ * @returns {Promise<Array>} - Results
404
+ */
405
+ async executeParallelBatch(tasks, options) {
406
+ const { maxConcurrency = config.performance.maxWorkers } = options;
407
+ const chunks = this.chunkArray(tasks, maxConcurrency);
408
+ const results = [];
409
+
410
+ for (const chunk of chunks) {
411
+ const chunkPromises = chunk.map(task =>
412
+ this.executeTask(task.taskType, task.data, task.options)
413
+ );
414
+
415
+ const chunkResults = await Promise.all(chunkPromises);
416
+ results.push(...chunkResults);
417
+ }
418
+
419
+ return results;
420
+ }
421
+
422
+ /**
423
+ * Execute sequential batch
424
+ * @param {Array} tasks - Tasks to execute
425
+ * @param {Object} options - Batch options
426
+ * @returns {Promise<Array>} - Results
427
+ */
428
+ async executeSequentialBatch(tasks, options) {
429
+ const results = [];
430
+
431
+ for (const task of tasks) {
432
+ const result = await this.executeTask(task.taskType, task.data, task.options);
433
+ results.push(result);
434
+ }
435
+
436
+ return results;
437
+ }
438
+
439
+ /**
440
+ * Execute mixed batch (some parallel, some sequential)
441
+ * @param {Array} tasks - Tasks to execute
442
+ * @param {Object} options - Batch options
443
+ * @returns {Promise<Array>} - Results
444
+ */
445
+ async executeMixedBatch(tasks, options) {
446
+ const parallelTasks = tasks.filter(task => this.canRunInParallel(task));
447
+ const sequentialTasks = tasks.filter(task => !this.canRunInParallel(task));
448
+
449
+ const parallelResults = await this.executeParallelBatch(parallelTasks, options);
450
+ const sequentialResults = await this.executeSequentialBatch(sequentialTasks, options);
451
+
452
+ // Merge results maintaining original order
453
+ const results = new Array(tasks.length);
454
+ let parallelIndex = 0;
455
+ let sequentialIndex = 0;
456
+
457
+ for (let i = 0; i < tasks.length; i++) {
458
+ if (this.canRunInParallel(tasks[i])) {
459
+ results[i] = parallelResults[parallelIndex++];
460
+ } else {
461
+ results[i] = sequentialResults[sequentialIndex++];
462
+ }
463
+ }
464
+
465
+ return results;
466
+ }
467
+
468
+ /**
469
+ * Check if task can run in parallel
470
+ * @param {Object} task - Task object
471
+ * @returns {boolean} - Whether task can run in parallel
472
+ */
473
+ canRunInParallel(task) {
474
+ const parallelSafeTasks = ['parseHtml', 'extractContent', 'analyzeText', 'fetchUrl'];
475
+ return parallelSafeTasks.includes(task.taskType);
476
+ }
477
+
478
+ /**
479
+ * Execute network task
480
+ * @param {string} taskType - Task type
481
+ * @param {any} data - Task data
482
+ * @param {Object} options - Task options
483
+ * @returns {Promise<any>} - Task result
484
+ */
485
+ async executeNetworkTask(taskType, data, options) {
486
+ // Implement network task execution
487
+ // This would be specific to each task type
488
+ switch (taskType) {
489
+ case 'validateUrls':
490
+ return await this.validateUrls(data, options);
491
+ default:
492
+ throw new Error(`Unknown network task type: ${taskType}`);
493
+ }
494
+ }
495
+
496
+ /**
497
+ * Execute standard task
498
+ * @param {string} taskType - Task type
499
+ * @param {any} data - Task data
500
+ * @param {Object} options - Task options
501
+ * @returns {Promise<any>} - Task result
502
+ */
503
+ async executeStandardTask(taskType, data, options = {}) {
504
+ const startTime = Date.now();
505
+
506
+ try {
507
+ // Determine which component should handle this task
508
+ const routingKey = this.taskRouting[taskType] || this.taskRouting.default;
509
+ let result;
510
+
511
+ switch (routingKey) {
512
+ case 'worker':
513
+ // CPU-intensive tasks go to worker pool
514
+ result = await this.workerPool.execute({
515
+ type: taskType,
516
+ data,
517
+ options
518
+ });
519
+ break;
520
+
521
+ case 'connection':
522
+ // I/O tasks use connection pool
523
+ result = await this._executeIOTask(taskType, data, options);
524
+ break;
525
+
526
+ case 'stream':
527
+ // Large data processing tasks
528
+ result = await this.streamProcessor.process(data, {
529
+ taskType,
530
+ ...options
531
+ });
532
+ break;
533
+
534
+ case 'queue':
535
+ default:
536
+ // Standard queue processing
537
+ result = await this.queueManager.add(async () => {
538
+ return await this._executeGenericTask(taskType, data, options);
539
+ }, options);
540
+ break;
541
+ }
542
+
543
+ // Update metrics
544
+ this.metrics.completedOperations++;
545
+ const duration = Date.now() - startTime;
546
+ this.metrics.avgOperationTime =
547
+ (this.metrics.avgOperationTime * (this.metrics.completedOperations - 1) + duration) /
548
+ this.metrics.completedOperations;
549
+
550
+ return result;
551
+
552
+ } catch (error) {
553
+ this.metrics.failedOperations++;
554
+ throw new Error(`Task execution failed for ${taskType}: ${error.message}`);
555
+ }
556
+ }
557
+
558
+ /**
559
+ * Execute I/O intensive task using connection pool
560
+ * @private
561
+ */
562
+ async _executeIOTask(taskType, data, options) {
563
+ return new Promise((resolve, reject) => {
564
+ const request = this.connectionPool.createRequest({
565
+ taskType,
566
+ data,
567
+ options
568
+ });
569
+
570
+ request.on('response', (response) => {
571
+ resolve(response);
572
+ });
573
+
574
+ request.on('error', (error) => {
575
+ reject(error);
576
+ });
577
+
578
+ request.end();
579
+ });
580
+ }
581
+
582
+ /**
583
+ * Execute generic task
584
+ * @private
585
+ */
586
+ async _executeGenericTask(taskType, data, options) {
587
+ // Basic task execution for common operations
588
+ switch (taskType) {
589
+ case 'validateUrl':
590
+ const url = new URL(data);
591
+ return { valid: true, url: url.href };
592
+
593
+ case 'normalizeData':
594
+ return Array.isArray(data) ? data.filter(Boolean) : data;
595
+
596
+ case 'calculateMetrics':
597
+ return {
598
+ size: JSON.stringify(data).length,
599
+ timestamp: Date.now(),
600
+ ...options
601
+ };
602
+
603
+ default:
604
+ // For unknown task types, return the data as-is with metadata
605
+ return {
606
+ taskType,
607
+ data,
608
+ processed: true,
609
+ timestamp: Date.now()
610
+ };
611
+ }
612
+ }
613
+
614
+ /**
615
+ * Validate URLs
616
+ * @param {Array} urls - URLs to validate
617
+ * @param {Object} options - Validation options
618
+ * @returns {Promise<Object>} - Validation results
619
+ */
620
+ async validateUrls(urls, options) {
621
+ const results = { valid: [], invalid: [] };
622
+
623
+ for (const url of urls) {
624
+ try {
625
+ new URL(url);
626
+ results.valid.push(url);
627
+ } catch (error) {
628
+ results.invalid.push({ url, error: error.message });
629
+ }
630
+ }
631
+
632
+ return results;
633
+ }
634
+
635
+ /**
636
+ * Get data size estimate
637
+ * @param {any} data - Data to measure
638
+ * @returns {number} - Size in bytes
639
+ */
640
+ getDataSize(data) {
641
+ if (typeof data === 'string') {
642
+ return Buffer.byteLength(data, 'utf8');
643
+ } else if (Array.isArray(data)) {
644
+ return data.length * 100; // Rough estimate
645
+ } else if (typeof data === 'object' && data !== null) {
646
+ return JSON.stringify(data).length;
647
+ } else {
648
+ return 0;
649
+ }
650
+ }
651
+
652
+ /**
653
+ * Check if task is CPU intensive
654
+ * @param {any} data - Task data
655
+ * @returns {boolean} - Whether task is CPU intensive
656
+ */
657
+ isCpuIntensive(data) {
658
+ const dataSize = this.getDataSize(data);
659
+ return dataSize > 100 * 1024; // 100KB threshold
660
+ }
661
+
662
+ /**
663
+ * Check if task is network operation
664
+ * @param {string} taskType - Task type
665
+ * @returns {boolean} - Whether task is network operation
666
+ */
667
+ isNetworkOperation(taskType) {
668
+ const networkTasks = ['fetchUrl', 'downloadFile', 'validateUrls', 'checkConnectivity'];
669
+ return networkTasks.includes(taskType);
670
+ }
671
+
672
+ /**
673
+ * Chunk array into smaller arrays
674
+ * @param {Array} array - Array to chunk
675
+ * @param {number} chunkSize - Size of each chunk
676
+ * @returns {Array} - Array of chunks
677
+ */
678
+ chunkArray(array, chunkSize) {
679
+ const chunks = [];
680
+ for (let i = 0; i < array.length; i += chunkSize) {
681
+ chunks.push(array.slice(i, i + chunkSize));
682
+ }
683
+ return chunks;
684
+ }
685
+
686
+ /**
687
+ * Update success metrics
688
+ * @param {number} duration - Operation duration
689
+ */
690
+ updateSuccessMetrics(duration) {
691
+ this.metrics.completedOperations++;
692
+
693
+ const total = this.metrics.completedOperations;
694
+ this.metrics.avgOperationTime = (
695
+ (this.metrics.avgOperationTime * (total - 1) + duration) / total
696
+ );
697
+ }
698
+
699
+ /**
700
+ * Setup event handlers for all components
701
+ */
702
+ setupEventHandlers() {
703
+ // Worker pool events
704
+ this.workerPool.on('taskCompleted', (data) => {
705
+ this.emit('workerTaskCompleted', data);
706
+ });
707
+
708
+ this.workerPool.on('taskFailed', (data) => {
709
+ this.emit('workerTaskFailed', data);
710
+ });
711
+
712
+ // Connection pool events
713
+ this.connectionPool.on('requestCompleted', (data) => {
714
+ this.emit('connectionRequestCompleted', data);
715
+ });
716
+
717
+ this.connectionPool.on('requestFailed', (data) => {
718
+ this.emit('connectionRequestFailed', data);
719
+ });
720
+
721
+ this.connectionPool.on('backpressureActivated', (data) => {
722
+ this.emit('connectionBackpressure', data);
723
+ });
724
+
725
+ // Stream processor events
726
+ this.streamProcessor.on('itemProcessed', (data) => {
727
+ this.emit('streamItemProcessed', data);
728
+ });
729
+
730
+ this.streamProcessor.on('memoryPressure', (data) => {
731
+ this.emit('streamMemoryPressure', data);
732
+ });
733
+
734
+ // Queue manager events
735
+ this.queueManager.queue.on('completed', () => {
736
+ this.emit('queueTaskCompleted');
737
+ });
738
+
739
+ this.queueManager.queue.on('error', (error) => {
740
+ this.emit('queueTaskFailed', { error: error.message });
741
+ });
742
+ }
743
+
744
+ /**
745
+ * Start metrics collection
746
+ */
747
+ startMetricsCollection() {
748
+ if (!this.enableMetrics) return;
749
+
750
+ this.metricsTimer = setInterval(() => {
751
+ this.collectMetrics();
752
+ }, this.metricsInterval);
753
+ }
754
+
755
+ /**
756
+ * Collect metrics from all components
757
+ */
758
+ collectMetrics() {
759
+ const memUsage = process.memoryUsage();
760
+
761
+ this.metrics.memoryUsage = {
762
+ current: memUsage.heapUsed,
763
+ peak: Math.max(this.metrics.memoryUsage.peak || 0, memUsage.heapUsed)
764
+ };
765
+
766
+ this.metrics.componentStats = {
767
+ workerPool: this.workerPool.getStats(),
768
+ connectionPool: this.connectionPool.getStats(),
769
+ streamProcessor: this.streamProcessor.getStats(),
770
+ queueManager: this.queueManager.getStats()
771
+ };
772
+
773
+ this.emit('metricsCollected', this.metrics);
774
+ }
775
+
776
+ /**
777
+ * Get comprehensive performance metrics
778
+ * @returns {Object} - Performance metrics
779
+ */
780
+ getMetrics() {
781
+ this.collectMetrics(); // Get latest metrics
782
+
783
+ return {
784
+ ...this.metrics,
785
+ uptime: Date.now() - this.metrics.startTime,
786
+ operationsPerSecond: this.metrics.completedOperations / ((Date.now() - this.metrics.startTime) / 1000)
787
+ };
788
+ }
789
+
790
+ /**
791
+ * Reset all performance metrics
792
+ */
793
+ resetMetrics() {
794
+ this.metrics = {
795
+ startTime: Date.now(),
796
+ totalOperations: 0,
797
+ completedOperations: 0,
798
+ failedOperations: 0,
799
+ avgOperationTime: 0,
800
+ memoryUsage: { current: 0, peak: 0 },
801
+ componentStats: {}
802
+ };
803
+ }
804
+
805
+ /**
806
+ * Graceful shutdown of all components
807
+ * @returns {Promise<void>}
808
+ */
809
+ async shutdown() {
810
+ this.emit('shutdown');
811
+
812
+ // Stop metrics collection
813
+ if (this.metricsTimer) {
814
+ clearInterval(this.metricsTimer);
815
+ }
816
+
817
+ // Shutdown all components
818
+ await Promise.all([
819
+ this.workerPool.shutdown(),
820
+ this.connectionPool.shutdown(),
821
+ this.streamProcessor.shutdown()
822
+ ]);
823
+
824
+ this.emit('shutdownComplete');
825
+ }
826
+ }
827
+
828
+ export default PerformanceManager;