crawlforge-mcp-server 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +315 -0
- package/LICENSE +21 -0
- package/README.md +181 -0
- package/package.json +115 -0
- package/server.js +1963 -0
- package/setup.js +112 -0
- package/src/constants/config.js +615 -0
- package/src/core/ActionExecutor.js +1104 -0
- package/src/core/AlertNotificationSystem.js +601 -0
- package/src/core/AuthManager.js +315 -0
- package/src/core/ChangeTracker.js +2306 -0
- package/src/core/JobManager.js +687 -0
- package/src/core/LLMsTxtAnalyzer.js +753 -0
- package/src/core/LocalizationManager.js +1615 -0
- package/src/core/PerformanceManager.js +828 -0
- package/src/core/ResearchOrchestrator.js +1327 -0
- package/src/core/SnapshotManager.js +1037 -0
- package/src/core/StealthBrowserManager.js +1795 -0
- package/src/core/WebhookDispatcher.js +745 -0
- package/src/core/analysis/ContentAnalyzer.js +749 -0
- package/src/core/analysis/LinkAnalyzer.js +972 -0
- package/src/core/cache/CacheManager.js +821 -0
- package/src/core/connections/ConnectionPool.js +553 -0
- package/src/core/crawlers/BFSCrawler.js +845 -0
- package/src/core/integrations/PerformanceIntegration.js +377 -0
- package/src/core/llm/AnthropicProvider.js +135 -0
- package/src/core/llm/LLMManager.js +415 -0
- package/src/core/llm/LLMProvider.js +97 -0
- package/src/core/llm/OpenAIProvider.js +127 -0
- package/src/core/processing/BrowserProcessor.js +986 -0
- package/src/core/processing/ContentProcessor.js +505 -0
- package/src/core/processing/PDFProcessor.js +448 -0
- package/src/core/processing/StreamProcessor.js +673 -0
- package/src/core/queue/QueueManager.js +98 -0
- package/src/core/workers/WorkerPool.js +585 -0
- package/src/core/workers/worker.js +743 -0
- package/src/monitoring/healthCheck.js +600 -0
- package/src/monitoring/metrics.js +761 -0
- package/src/optimization/wave3-optimizations.js +932 -0
- package/src/security/security-patches.js +120 -0
- package/src/security/security-tests.js +355 -0
- package/src/security/wave3-security.js +652 -0
- package/src/tools/advanced/BatchScrapeTool.js +1089 -0
- package/src/tools/advanced/ScrapeWithActionsTool.js +669 -0
- package/src/tools/crawl/crawlDeep.js +449 -0
- package/src/tools/crawl/mapSite.js +400 -0
- package/src/tools/extract/analyzeContent.js +624 -0
- package/src/tools/extract/extractContent.js +329 -0
- package/src/tools/extract/processDocument.js +503 -0
- package/src/tools/extract/summarizeContent.js +376 -0
- package/src/tools/llmstxt/generateLLMsTxt.js +570 -0
- package/src/tools/research/deepResearch.js +706 -0
- package/src/tools/search/adapters/duckduckgoSearch.js +398 -0
- package/src/tools/search/adapters/googleSearch.js +236 -0
- package/src/tools/search/adapters/searchProviderFactory.js +96 -0
- package/src/tools/search/queryExpander.js +543 -0
- package/src/tools/search/ranking/ResultDeduplicator.js +676 -0
- package/src/tools/search/ranking/ResultRanker.js +497 -0
- package/src/tools/search/searchWeb.js +482 -0
- package/src/tools/tracking/trackChanges.js +1355 -0
- package/src/utils/CircuitBreaker.js +515 -0
- package/src/utils/ErrorHandlingConfig.js +342 -0
- package/src/utils/HumanBehaviorSimulator.js +569 -0
- package/src/utils/Logger.js +568 -0
- package/src/utils/MemoryMonitor.js +173 -0
- package/src/utils/RetryManager.js +386 -0
- package/src/utils/contentUtils.js +588 -0
- package/src/utils/domainFilter.js +612 -0
- package/src/utils/inputValidation.js +766 -0
- package/src/utils/rateLimiter.js +196 -0
- package/src/utils/robotsChecker.js +91 -0
- package/src/utils/securityMiddleware.js +416 -0
- package/src/utils/sitemapParser.js +678 -0
- package/src/utils/ssrfProtection.js +640 -0
- package/src/utils/urlNormalizer.js +168 -0
|
@@ -0,0 +1,828 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* PerformanceManager - Orchestrates performance optimization components
|
|
3
|
+
* Integrates WorkerPool, ConnectionPool, StreamProcessor, and QueueManager
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { EventEmitter } from 'events';
|
|
7
|
+
import WorkerPool from './workers/WorkerPool.js';
|
|
8
|
+
import ConnectionPool from './connections/ConnectionPool.js';
|
|
9
|
+
import StreamProcessor from './processing/StreamProcessor.js';
|
|
10
|
+
import QueueManager from './queue/QueueManager.js';
|
|
11
|
+
import { config } from '../constants/config.js';
|
|
12
|
+
|
|
13
|
+
export class PerformanceManager extends EventEmitter {
|
|
14
|
+
constructor(options = {}) {
|
|
15
|
+
super();
|
|
16
|
+
|
|
17
|
+
const {
|
|
18
|
+
workerPoolOptions = {},
|
|
19
|
+
connectionPoolOptions = {},
|
|
20
|
+
streamProcessorOptions = {},
|
|
21
|
+
queueManagerOptions = {},
|
|
22
|
+
enableMetrics = true,
|
|
23
|
+
metricsInterval = 10000
|
|
24
|
+
} = options;
|
|
25
|
+
|
|
26
|
+
this.enableMetrics = enableMetrics;
|
|
27
|
+
this.metricsInterval = metricsInterval;
|
|
28
|
+
|
|
29
|
+
// Initialize performance components
|
|
30
|
+
this.workerPool = new WorkerPool({
|
|
31
|
+
maxWorkers: config.performance.maxWorkers,
|
|
32
|
+
...workerPoolOptions
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
this.connectionPool = new ConnectionPool({
|
|
36
|
+
maxSockets: config.performance.maxWorkers * 2,
|
|
37
|
+
...connectionPoolOptions
|
|
38
|
+
});
|
|
39
|
+
|
|
40
|
+
this.streamProcessor = new StreamProcessor({
|
|
41
|
+
chunkSize: 1000,
|
|
42
|
+
memoryLimit: 100 * 1024 * 1024, // 100MB
|
|
43
|
+
...streamProcessorOptions
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
this.queueManager = new QueueManager({
|
|
47
|
+
concurrency: config.performance.queueConcurrency,
|
|
48
|
+
...queueManagerOptions
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
// Performance metrics
|
|
52
|
+
this.metrics = {
|
|
53
|
+
startTime: Date.now(),
|
|
54
|
+
totalOperations: 0,
|
|
55
|
+
completedOperations: 0,
|
|
56
|
+
failedOperations: 0,
|
|
57
|
+
avgOperationTime: 0,
|
|
58
|
+
memoryUsage: {
|
|
59
|
+
current: 0,
|
|
60
|
+
peak: 0
|
|
61
|
+
},
|
|
62
|
+
componentStats: {}
|
|
63
|
+
};
|
|
64
|
+
|
|
65
|
+
// Task routing configuration
|
|
66
|
+
this.taskRouting = {
|
|
67
|
+
// CPU-intensive tasks go to worker pool
|
|
68
|
+
parseHtml: 'worker',
|
|
69
|
+
extractContent: 'worker',
|
|
70
|
+
analyzeText: 'worker',
|
|
71
|
+
processStructuredData: 'worker',
|
|
72
|
+
calculateSimilarity: 'worker',
|
|
73
|
+
|
|
74
|
+
// I/O tasks go to connection pool
|
|
75
|
+
fetchUrl: 'connection',
|
|
76
|
+
downloadFile: 'connection',
|
|
77
|
+
validateUrls: 'connection',
|
|
78
|
+
|
|
79
|
+
// Large data processing goes to stream processor
|
|
80
|
+
processBatch: 'stream',
|
|
81
|
+
processLargeDataset: 'stream',
|
|
82
|
+
transformData: 'stream',
|
|
83
|
+
|
|
84
|
+
// Standard tasks go to queue manager
|
|
85
|
+
default: 'queue'
|
|
86
|
+
};
|
|
87
|
+
|
|
88
|
+
this.setupEventHandlers();
|
|
89
|
+
this.startMetricsCollection();
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* Execute a task using the optimal performance component
|
|
94
|
+
* @param {string} taskType - Type of task
|
|
95
|
+
* @param {any} data - Task data
|
|
96
|
+
* @param {Object} options - Task options
|
|
97
|
+
* @returns {Promise<any>} - Task result
|
|
98
|
+
*/
|
|
99
|
+
async executeTask(taskType, data, options = {}) {
|
|
100
|
+
const startTime = Date.now();
|
|
101
|
+
this.metrics.totalOperations++;
|
|
102
|
+
|
|
103
|
+
try {
|
|
104
|
+
const component = this.getOptimalComponent(taskType, data, options);
|
|
105
|
+
const result = await this.executeOnComponent(component, taskType, data, options);
|
|
106
|
+
|
|
107
|
+
this.updateSuccessMetrics(Date.now() - startTime);
|
|
108
|
+
|
|
109
|
+
this.emit('taskCompleted', {
|
|
110
|
+
taskType,
|
|
111
|
+
component,
|
|
112
|
+
duration: Date.now() - startTime,
|
|
113
|
+
dataSize: this.getDataSize(data)
|
|
114
|
+
});
|
|
115
|
+
|
|
116
|
+
return result;
|
|
117
|
+
|
|
118
|
+
} catch (error) {
|
|
119
|
+
this.metrics.failedOperations++;
|
|
120
|
+
|
|
121
|
+
this.emit('taskFailed', {
|
|
122
|
+
taskType,
|
|
123
|
+
error: error.message,
|
|
124
|
+
duration: Date.now() - startTime
|
|
125
|
+
});
|
|
126
|
+
|
|
127
|
+
throw error;
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
/**
|
|
132
|
+
* Execute multiple tasks with intelligent distribution
|
|
133
|
+
* @param {Array} tasks - Array of {taskType, data, options} objects
|
|
134
|
+
* @param {Object} batchOptions - Batch execution options
|
|
135
|
+
* @returns {Promise<Array>} - Array of results
|
|
136
|
+
*/
|
|
137
|
+
async executeBatch(tasks, batchOptions = {}) {
|
|
138
|
+
const {
|
|
139
|
+
strategy = 'auto', // 'auto', 'parallel', 'sequential', 'mixed'
|
|
140
|
+
maxConcurrency = config.performance.maxWorkers,
|
|
141
|
+
enableOptimization = true,
|
|
142
|
+
groupBySimilarity = true
|
|
143
|
+
} = batchOptions;
|
|
144
|
+
|
|
145
|
+
if (enableOptimization && groupBySimilarity) {
|
|
146
|
+
const groupedTasks = this.groupTasksBySimilarity(tasks);
|
|
147
|
+
return await this.executeBatchGroups(groupedTasks, batchOptions);
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
switch (strategy) {
|
|
151
|
+
case 'parallel':
|
|
152
|
+
return await this.executeParallelBatch(tasks, batchOptions);
|
|
153
|
+
case 'sequential':
|
|
154
|
+
return await this.executeSequentialBatch(tasks, batchOptions);
|
|
155
|
+
case 'mixed':
|
|
156
|
+
return await this.executeMixedBatch(tasks, batchOptions);
|
|
157
|
+
default:
|
|
158
|
+
return await this.executeAutoBatch(tasks, batchOptions);
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
/**
|
|
163
|
+
* Process large dataset using stream processing
|
|
164
|
+
* @param {Array|AsyncIterable} data - Data to process
|
|
165
|
+
* @param {Function} processor - Processing function
|
|
166
|
+
* @param {Object} options - Processing options
|
|
167
|
+
* @returns {Promise<Object>} - Processing results
|
|
168
|
+
*/
|
|
169
|
+
async processLargeDataset(data, processor, options = {}) {
|
|
170
|
+
const {
|
|
171
|
+
enableWorkerPool = true,
|
|
172
|
+
enablePagination = true,
|
|
173
|
+
chunkSize = 1000
|
|
174
|
+
} = options;
|
|
175
|
+
|
|
176
|
+
if (enableWorkerPool) {
|
|
177
|
+
// Enhance processor to use worker pool for CPU-intensive operations
|
|
178
|
+
const enhancedProcessor = async (item, index) => {
|
|
179
|
+
if (this.isCpuIntensive(item)) {
|
|
180
|
+
return await this.workerPool.execute('processItem', item, { timeout: 30000 });
|
|
181
|
+
} else {
|
|
182
|
+
return await processor(item, index);
|
|
183
|
+
}
|
|
184
|
+
};
|
|
185
|
+
|
|
186
|
+
return await this.streamProcessor.processStream(data, enhancedProcessor, {
|
|
187
|
+
...options,
|
|
188
|
+
enablePagination,
|
|
189
|
+
chunkSize
|
|
190
|
+
});
|
|
191
|
+
} else {
|
|
192
|
+
return await this.streamProcessor.processStream(data, processor, options);
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
/**
|
|
197
|
+
* Get optimal component for task execution
|
|
198
|
+
* @param {string} taskType - Type of task
|
|
199
|
+
* @param {any} data - Task data
|
|
200
|
+
* @param {Object} options - Task options
|
|
201
|
+
* @returns {string} - Component name
|
|
202
|
+
*/
|
|
203
|
+
getOptimalComponent(taskType, data, options) {
|
|
204
|
+
// Check explicit routing first
|
|
205
|
+
if (this.taskRouting[taskType]) {
|
|
206
|
+
return this.taskRouting[taskType];
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
// Auto-select based on task characteristics
|
|
210
|
+
const dataSize = this.getDataSize(data);
|
|
211
|
+
const isLargeDataset = dataSize > 10 * 1024 * 1024; // 10MB
|
|
212
|
+
const isCpuIntensive = this.isCpuIntensive(data);
|
|
213
|
+
const isNetworkOperation = this.isNetworkOperation(taskType);
|
|
214
|
+
|
|
215
|
+
if (isLargeDataset && !isCpuIntensive) {
|
|
216
|
+
return 'stream';
|
|
217
|
+
} else if (isCpuIntensive) {
|
|
218
|
+
return 'worker';
|
|
219
|
+
} else if (isNetworkOperation) {
|
|
220
|
+
return 'connection';
|
|
221
|
+
} else {
|
|
222
|
+
return 'queue';
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
/**
|
|
227
|
+
* Execute task on specific component
|
|
228
|
+
* @param {string} component - Component name
|
|
229
|
+
* @param {string} taskType - Task type
|
|
230
|
+
* @param {any} data - Task data
|
|
231
|
+
* @param {Object} options - Task options
|
|
232
|
+
* @returns {Promise<any>} - Task result
|
|
233
|
+
*/
|
|
234
|
+
async executeOnComponent(component, taskType, data, options) {
|
|
235
|
+
switch (component) {
|
|
236
|
+
case 'worker':
|
|
237
|
+
return await this.workerPool.execute(taskType, data, options);
|
|
238
|
+
|
|
239
|
+
case 'connection':
|
|
240
|
+
if (taskType === 'fetchUrl' || taskType === 'downloadFile') {
|
|
241
|
+
return await this.connectionPool.request({
|
|
242
|
+
url: data.url || data,
|
|
243
|
+
method: data.method || 'GET',
|
|
244
|
+
headers: data.headers || {},
|
|
245
|
+
...options
|
|
246
|
+
});
|
|
247
|
+
} else {
|
|
248
|
+
return await this.queueManager.add(async () => {
|
|
249
|
+
return await this.executeNetworkTask(taskType, data, options);
|
|
250
|
+
});
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
case 'stream':
|
|
254
|
+
if (Array.isArray(data) || data[Symbol.asyncIterator]) {
|
|
255
|
+
return await this.streamProcessor.processStream(
|
|
256
|
+
data,
|
|
257
|
+
options.processor || (item => item),
|
|
258
|
+
options
|
|
259
|
+
);
|
|
260
|
+
} else {
|
|
261
|
+
return await this.queueManager.add(async () => {
|
|
262
|
+
return await this.executeStandardTask(taskType, data, options);
|
|
263
|
+
});
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
case 'queue':
|
|
267
|
+
default:
|
|
268
|
+
return await this.queueManager.add(async () => {
|
|
269
|
+
return await this.executeStandardTask(taskType, data, options);
|
|
270
|
+
});
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
/**
|
|
275
|
+
* Execute auto-optimized batch
|
|
276
|
+
* @param {Array} tasks - Tasks to execute
|
|
277
|
+
* @param {Object} options - Batch options
|
|
278
|
+
* @returns {Promise<Array>} - Results
|
|
279
|
+
*/
|
|
280
|
+
async executeAutoBatch(tasks, options) {
|
|
281
|
+
const groupedTasks = this.groupTasksByComponent(tasks);
|
|
282
|
+
const results = [];
|
|
283
|
+
|
|
284
|
+
// Execute each group with its optimal component
|
|
285
|
+
for (const [component, componentTasks] of groupedTasks.entries()) {
|
|
286
|
+
let componentResults;
|
|
287
|
+
|
|
288
|
+
switch (component) {
|
|
289
|
+
case 'worker':
|
|
290
|
+
componentResults = await this.workerPool.executeBatch(
|
|
291
|
+
componentTasks.map(t => ({ taskType: t.taskType, data: t.data, options: t.options })),
|
|
292
|
+
{ maxConcurrent: this.workerPool.maxWorkers }
|
|
293
|
+
);
|
|
294
|
+
break;
|
|
295
|
+
|
|
296
|
+
case 'connection':
|
|
297
|
+
componentResults = await this.connectionPool.requestBatch(
|
|
298
|
+
componentTasks.map(t => ({
|
|
299
|
+
url: t.data.url || t.data,
|
|
300
|
+
method: t.data.method || 'GET',
|
|
301
|
+
headers: t.data.headers || {},
|
|
302
|
+
...t.options
|
|
303
|
+
})),
|
|
304
|
+
{ maxConcurrent: this.connectionPool.maxSockets * 0.8 }
|
|
305
|
+
);
|
|
306
|
+
break;
|
|
307
|
+
|
|
308
|
+
case 'stream':
|
|
309
|
+
// For stream tasks, process each one individually
|
|
310
|
+
componentResults = [];
|
|
311
|
+
for (const task of componentTasks) {
|
|
312
|
+
const result = await this.streamProcessor.processStream(
|
|
313
|
+
task.data,
|
|
314
|
+
task.options.processor || (item => item),
|
|
315
|
+
task.options
|
|
316
|
+
);
|
|
317
|
+
componentResults.push(result);
|
|
318
|
+
}
|
|
319
|
+
break;
|
|
320
|
+
|
|
321
|
+
default:
|
|
322
|
+
componentResults = await this.queueManager.addAll(
|
|
323
|
+
componentTasks.map(t => () => this.executeStandardTask(t.taskType, t.data, t.options))
|
|
324
|
+
);
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
results.push(...componentResults);
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
return results;
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
/**
|
|
334
|
+
* Group tasks by optimal component
|
|
335
|
+
* @param {Array} tasks - Tasks to group
|
|
336
|
+
* @returns {Map} - Grouped tasks
|
|
337
|
+
*/
|
|
338
|
+
groupTasksByComponent(tasks) {
|
|
339
|
+
const groups = new Map();
|
|
340
|
+
|
|
341
|
+
for (const task of tasks) {
|
|
342
|
+
const component = this.getOptimalComponent(task.taskType, task.data, task.options);
|
|
343
|
+
|
|
344
|
+
if (!groups.has(component)) {
|
|
345
|
+
groups.set(component, []);
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
groups.get(component).push(task);
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
return groups;
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
/**
|
|
355
|
+
* Group tasks by similarity for optimization
|
|
356
|
+
* @param {Array} tasks - Tasks to group
|
|
357
|
+
* @returns {Array} - Grouped tasks
|
|
358
|
+
*/
|
|
359
|
+
groupTasksBySimilarity(tasks) {
|
|
360
|
+
const groups = [];
|
|
361
|
+
const taskMap = new Map();
|
|
362
|
+
|
|
363
|
+
// Group by task type first
|
|
364
|
+
for (const task of tasks) {
|
|
365
|
+
const key = `${task.taskType}_${this.getOptimalComponent(task.taskType, task.data, task.options)}`;
|
|
366
|
+
|
|
367
|
+
if (!taskMap.has(key)) {
|
|
368
|
+
taskMap.set(key, []);
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
taskMap.get(key).push(task);
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
// Convert to array of groups
|
|
375
|
+
for (const [key, taskGroup] of taskMap.entries()) {
|
|
376
|
+
groups.push(taskGroup);
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
return groups;
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
/**
|
|
383
|
+
* Execute batch groups
|
|
384
|
+
* @param {Array} groups - Task groups
|
|
385
|
+
* @param {Object} options - Batch options
|
|
386
|
+
* @returns {Promise<Array>} - Results
|
|
387
|
+
*/
|
|
388
|
+
async executeBatchGroups(groups, options) {
|
|
389
|
+
const results = [];
|
|
390
|
+
|
|
391
|
+
for (const group of groups) {
|
|
392
|
+
const groupResults = await this.executeAutoBatch(group, options);
|
|
393
|
+
results.push(...groupResults);
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
return results;
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
/**
|
|
400
|
+
* Execute parallel batch
|
|
401
|
+
* @param {Array} tasks - Tasks to execute
|
|
402
|
+
* @param {Object} options - Batch options
|
|
403
|
+
* @returns {Promise<Array>} - Results
|
|
404
|
+
*/
|
|
405
|
+
async executeParallelBatch(tasks, options) {
|
|
406
|
+
const { maxConcurrency = config.performance.maxWorkers } = options;
|
|
407
|
+
const chunks = this.chunkArray(tasks, maxConcurrency);
|
|
408
|
+
const results = [];
|
|
409
|
+
|
|
410
|
+
for (const chunk of chunks) {
|
|
411
|
+
const chunkPromises = chunk.map(task =>
|
|
412
|
+
this.executeTask(task.taskType, task.data, task.options)
|
|
413
|
+
);
|
|
414
|
+
|
|
415
|
+
const chunkResults = await Promise.all(chunkPromises);
|
|
416
|
+
results.push(...chunkResults);
|
|
417
|
+
}
|
|
418
|
+
|
|
419
|
+
return results;
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
/**
|
|
423
|
+
* Execute sequential batch
|
|
424
|
+
* @param {Array} tasks - Tasks to execute
|
|
425
|
+
* @param {Object} options - Batch options
|
|
426
|
+
* @returns {Promise<Array>} - Results
|
|
427
|
+
*/
|
|
428
|
+
async executeSequentialBatch(tasks, options) {
|
|
429
|
+
const results = [];
|
|
430
|
+
|
|
431
|
+
for (const task of tasks) {
|
|
432
|
+
const result = await this.executeTask(task.taskType, task.data, task.options);
|
|
433
|
+
results.push(result);
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
return results;
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
/**
|
|
440
|
+
* Execute mixed batch (some parallel, some sequential)
|
|
441
|
+
* @param {Array} tasks - Tasks to execute
|
|
442
|
+
* @param {Object} options - Batch options
|
|
443
|
+
* @returns {Promise<Array>} - Results
|
|
444
|
+
*/
|
|
445
|
+
async executeMixedBatch(tasks, options) {
|
|
446
|
+
const parallelTasks = tasks.filter(task => this.canRunInParallel(task));
|
|
447
|
+
const sequentialTasks = tasks.filter(task => !this.canRunInParallel(task));
|
|
448
|
+
|
|
449
|
+
const parallelResults = await this.executeParallelBatch(parallelTasks, options);
|
|
450
|
+
const sequentialResults = await this.executeSequentialBatch(sequentialTasks, options);
|
|
451
|
+
|
|
452
|
+
// Merge results maintaining original order
|
|
453
|
+
const results = new Array(tasks.length);
|
|
454
|
+
let parallelIndex = 0;
|
|
455
|
+
let sequentialIndex = 0;
|
|
456
|
+
|
|
457
|
+
for (let i = 0; i < tasks.length; i++) {
|
|
458
|
+
if (this.canRunInParallel(tasks[i])) {
|
|
459
|
+
results[i] = parallelResults[parallelIndex++];
|
|
460
|
+
} else {
|
|
461
|
+
results[i] = sequentialResults[sequentialIndex++];
|
|
462
|
+
}
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
return results;
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
/**
|
|
469
|
+
* Check if task can run in parallel
|
|
470
|
+
* @param {Object} task - Task object
|
|
471
|
+
* @returns {boolean} - Whether task can run in parallel
|
|
472
|
+
*/
|
|
473
|
+
canRunInParallel(task) {
|
|
474
|
+
const parallelSafeTasks = ['parseHtml', 'extractContent', 'analyzeText', 'fetchUrl'];
|
|
475
|
+
return parallelSafeTasks.includes(task.taskType);
|
|
476
|
+
}
|
|
477
|
+
|
|
478
|
+
/**
|
|
479
|
+
* Execute network task
|
|
480
|
+
* @param {string} taskType - Task type
|
|
481
|
+
* @param {any} data - Task data
|
|
482
|
+
* @param {Object} options - Task options
|
|
483
|
+
* @returns {Promise<any>} - Task result
|
|
484
|
+
*/
|
|
485
|
+
async executeNetworkTask(taskType, data, options) {
|
|
486
|
+
// Implement network task execution
|
|
487
|
+
// This would be specific to each task type
|
|
488
|
+
switch (taskType) {
|
|
489
|
+
case 'validateUrls':
|
|
490
|
+
return await this.validateUrls(data, options);
|
|
491
|
+
default:
|
|
492
|
+
throw new Error(`Unknown network task type: ${taskType}`);
|
|
493
|
+
}
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
/**
|
|
497
|
+
* Execute standard task
|
|
498
|
+
* @param {string} taskType - Task type
|
|
499
|
+
* @param {any} data - Task data
|
|
500
|
+
* @param {Object} options - Task options
|
|
501
|
+
* @returns {Promise<any>} - Task result
|
|
502
|
+
*/
|
|
503
|
+
async executeStandardTask(taskType, data, options = {}) {
|
|
504
|
+
const startTime = Date.now();
|
|
505
|
+
|
|
506
|
+
try {
|
|
507
|
+
// Determine which component should handle this task
|
|
508
|
+
const routingKey = this.taskRouting[taskType] || this.taskRouting.default;
|
|
509
|
+
let result;
|
|
510
|
+
|
|
511
|
+
switch (routingKey) {
|
|
512
|
+
case 'worker':
|
|
513
|
+
// CPU-intensive tasks go to worker pool
|
|
514
|
+
result = await this.workerPool.execute({
|
|
515
|
+
type: taskType,
|
|
516
|
+
data,
|
|
517
|
+
options
|
|
518
|
+
});
|
|
519
|
+
break;
|
|
520
|
+
|
|
521
|
+
case 'connection':
|
|
522
|
+
// I/O tasks use connection pool
|
|
523
|
+
result = await this._executeIOTask(taskType, data, options);
|
|
524
|
+
break;
|
|
525
|
+
|
|
526
|
+
case 'stream':
|
|
527
|
+
// Large data processing tasks
|
|
528
|
+
result = await this.streamProcessor.process(data, {
|
|
529
|
+
taskType,
|
|
530
|
+
...options
|
|
531
|
+
});
|
|
532
|
+
break;
|
|
533
|
+
|
|
534
|
+
case 'queue':
|
|
535
|
+
default:
|
|
536
|
+
// Standard queue processing
|
|
537
|
+
result = await this.queueManager.add(async () => {
|
|
538
|
+
return await this._executeGenericTask(taskType, data, options);
|
|
539
|
+
}, options);
|
|
540
|
+
break;
|
|
541
|
+
}
|
|
542
|
+
|
|
543
|
+
// Update metrics
|
|
544
|
+
this.metrics.completedOperations++;
|
|
545
|
+
const duration = Date.now() - startTime;
|
|
546
|
+
this.metrics.avgOperationTime =
|
|
547
|
+
(this.metrics.avgOperationTime * (this.metrics.completedOperations - 1) + duration) /
|
|
548
|
+
this.metrics.completedOperations;
|
|
549
|
+
|
|
550
|
+
return result;
|
|
551
|
+
|
|
552
|
+
} catch (error) {
|
|
553
|
+
this.metrics.failedOperations++;
|
|
554
|
+
throw new Error(`Task execution failed for ${taskType}: ${error.message}`);
|
|
555
|
+
}
|
|
556
|
+
}
|
|
557
|
+
|
|
558
|
+
/**
|
|
559
|
+
* Execute I/O intensive task using connection pool
|
|
560
|
+
* @private
|
|
561
|
+
*/
|
|
562
|
+
async _executeIOTask(taskType, data, options) {
|
|
563
|
+
return new Promise((resolve, reject) => {
|
|
564
|
+
const request = this.connectionPool.createRequest({
|
|
565
|
+
taskType,
|
|
566
|
+
data,
|
|
567
|
+
options
|
|
568
|
+
});
|
|
569
|
+
|
|
570
|
+
request.on('response', (response) => {
|
|
571
|
+
resolve(response);
|
|
572
|
+
});
|
|
573
|
+
|
|
574
|
+
request.on('error', (error) => {
|
|
575
|
+
reject(error);
|
|
576
|
+
});
|
|
577
|
+
|
|
578
|
+
request.end();
|
|
579
|
+
});
|
|
580
|
+
}
|
|
581
|
+
|
|
582
|
+
/**
|
|
583
|
+
* Execute generic task
|
|
584
|
+
* @private
|
|
585
|
+
*/
|
|
586
|
+
async _executeGenericTask(taskType, data, options) {
|
|
587
|
+
// Basic task execution for common operations
|
|
588
|
+
switch (taskType) {
|
|
589
|
+
case 'validateUrl':
|
|
590
|
+
const url = new URL(data);
|
|
591
|
+
return { valid: true, url: url.href };
|
|
592
|
+
|
|
593
|
+
case 'normalizeData':
|
|
594
|
+
return Array.isArray(data) ? data.filter(Boolean) : data;
|
|
595
|
+
|
|
596
|
+
case 'calculateMetrics':
|
|
597
|
+
return {
|
|
598
|
+
size: JSON.stringify(data).length,
|
|
599
|
+
timestamp: Date.now(),
|
|
600
|
+
...options
|
|
601
|
+
};
|
|
602
|
+
|
|
603
|
+
default:
|
|
604
|
+
// For unknown task types, return the data as-is with metadata
|
|
605
|
+
return {
|
|
606
|
+
taskType,
|
|
607
|
+
data,
|
|
608
|
+
processed: true,
|
|
609
|
+
timestamp: Date.now()
|
|
610
|
+
};
|
|
611
|
+
}
|
|
612
|
+
}
|
|
613
|
+
|
|
614
|
+
/**
|
|
615
|
+
* Validate URLs
|
|
616
|
+
* @param {Array} urls - URLs to validate
|
|
617
|
+
* @param {Object} options - Validation options
|
|
618
|
+
* @returns {Promise<Object>} - Validation results
|
|
619
|
+
*/
|
|
620
|
+
async validateUrls(urls, options) {
|
|
621
|
+
const results = { valid: [], invalid: [] };
|
|
622
|
+
|
|
623
|
+
for (const url of urls) {
|
|
624
|
+
try {
|
|
625
|
+
new URL(url);
|
|
626
|
+
results.valid.push(url);
|
|
627
|
+
} catch (error) {
|
|
628
|
+
results.invalid.push({ url, error: error.message });
|
|
629
|
+
}
|
|
630
|
+
}
|
|
631
|
+
|
|
632
|
+
return results;
|
|
633
|
+
}
|
|
634
|
+
|
|
635
|
+
/**
|
|
636
|
+
* Get data size estimate
|
|
637
|
+
* @param {any} data - Data to measure
|
|
638
|
+
* @returns {number} - Size in bytes
|
|
639
|
+
*/
|
|
640
|
+
getDataSize(data) {
|
|
641
|
+
if (typeof data === 'string') {
|
|
642
|
+
return Buffer.byteLength(data, 'utf8');
|
|
643
|
+
} else if (Array.isArray(data)) {
|
|
644
|
+
return data.length * 100; // Rough estimate
|
|
645
|
+
} else if (typeof data === 'object' && data !== null) {
|
|
646
|
+
return JSON.stringify(data).length;
|
|
647
|
+
} else {
|
|
648
|
+
return 0;
|
|
649
|
+
}
|
|
650
|
+
}
|
|
651
|
+
|
|
652
|
+
/**
|
|
653
|
+
* Check if task is CPU intensive
|
|
654
|
+
* @param {any} data - Task data
|
|
655
|
+
* @returns {boolean} - Whether task is CPU intensive
|
|
656
|
+
*/
|
|
657
|
+
isCpuIntensive(data) {
|
|
658
|
+
const dataSize = this.getDataSize(data);
|
|
659
|
+
return dataSize > 100 * 1024; // 100KB threshold
|
|
660
|
+
}
|
|
661
|
+
|
|
662
|
+
/**
|
|
663
|
+
* Check if task is network operation
|
|
664
|
+
* @param {string} taskType - Task type
|
|
665
|
+
* @returns {boolean} - Whether task is network operation
|
|
666
|
+
*/
|
|
667
|
+
isNetworkOperation(taskType) {
|
|
668
|
+
const networkTasks = ['fetchUrl', 'downloadFile', 'validateUrls', 'checkConnectivity'];
|
|
669
|
+
return networkTasks.includes(taskType);
|
|
670
|
+
}
|
|
671
|
+
|
|
672
|
+
/**
|
|
673
|
+
* Chunk array into smaller arrays
|
|
674
|
+
* @param {Array} array - Array to chunk
|
|
675
|
+
* @param {number} chunkSize - Size of each chunk
|
|
676
|
+
* @returns {Array} - Array of chunks
|
|
677
|
+
*/
|
|
678
|
+
chunkArray(array, chunkSize) {
|
|
679
|
+
const chunks = [];
|
|
680
|
+
for (let i = 0; i < array.length; i += chunkSize) {
|
|
681
|
+
chunks.push(array.slice(i, i + chunkSize));
|
|
682
|
+
}
|
|
683
|
+
return chunks;
|
|
684
|
+
}
|
|
685
|
+
|
|
686
|
+
/**
|
|
687
|
+
* Update success metrics
|
|
688
|
+
* @param {number} duration - Operation duration
|
|
689
|
+
*/
|
|
690
|
+
updateSuccessMetrics(duration) {
|
|
691
|
+
this.metrics.completedOperations++;
|
|
692
|
+
|
|
693
|
+
const total = this.metrics.completedOperations;
|
|
694
|
+
this.metrics.avgOperationTime = (
|
|
695
|
+
(this.metrics.avgOperationTime * (total - 1) + duration) / total
|
|
696
|
+
);
|
|
697
|
+
}
|
|
698
|
+
|
|
699
|
+
/**
|
|
700
|
+
* Setup event handlers for all components
|
|
701
|
+
*/
|
|
702
|
+
setupEventHandlers() {
|
|
703
|
+
// Worker pool events
|
|
704
|
+
this.workerPool.on('taskCompleted', (data) => {
|
|
705
|
+
this.emit('workerTaskCompleted', data);
|
|
706
|
+
});
|
|
707
|
+
|
|
708
|
+
this.workerPool.on('taskFailed', (data) => {
|
|
709
|
+
this.emit('workerTaskFailed', data);
|
|
710
|
+
});
|
|
711
|
+
|
|
712
|
+
// Connection pool events
|
|
713
|
+
this.connectionPool.on('requestCompleted', (data) => {
|
|
714
|
+
this.emit('connectionRequestCompleted', data);
|
|
715
|
+
});
|
|
716
|
+
|
|
717
|
+
this.connectionPool.on('requestFailed', (data) => {
|
|
718
|
+
this.emit('connectionRequestFailed', data);
|
|
719
|
+
});
|
|
720
|
+
|
|
721
|
+
this.connectionPool.on('backpressureActivated', (data) => {
|
|
722
|
+
this.emit('connectionBackpressure', data);
|
|
723
|
+
});
|
|
724
|
+
|
|
725
|
+
// Stream processor events
|
|
726
|
+
this.streamProcessor.on('itemProcessed', (data) => {
|
|
727
|
+
this.emit('streamItemProcessed', data);
|
|
728
|
+
});
|
|
729
|
+
|
|
730
|
+
this.streamProcessor.on('memoryPressure', (data) => {
|
|
731
|
+
this.emit('streamMemoryPressure', data);
|
|
732
|
+
});
|
|
733
|
+
|
|
734
|
+
// Queue manager events
|
|
735
|
+
this.queueManager.queue.on('completed', () => {
|
|
736
|
+
this.emit('queueTaskCompleted');
|
|
737
|
+
});
|
|
738
|
+
|
|
739
|
+
this.queueManager.queue.on('error', (error) => {
|
|
740
|
+
this.emit('queueTaskFailed', { error: error.message });
|
|
741
|
+
});
|
|
742
|
+
}
|
|
743
|
+
|
|
744
|
+
/**
|
|
745
|
+
* Start metrics collection
|
|
746
|
+
*/
|
|
747
|
+
startMetricsCollection() {
|
|
748
|
+
if (!this.enableMetrics) return;
|
|
749
|
+
|
|
750
|
+
this.metricsTimer = setInterval(() => {
|
|
751
|
+
this.collectMetrics();
|
|
752
|
+
}, this.metricsInterval);
|
|
753
|
+
}
|
|
754
|
+
|
|
755
|
+
/**
|
|
756
|
+
* Collect metrics from all components
|
|
757
|
+
*/
|
|
758
|
+
collectMetrics() {
|
|
759
|
+
const memUsage = process.memoryUsage();
|
|
760
|
+
|
|
761
|
+
this.metrics.memoryUsage = {
|
|
762
|
+
current: memUsage.heapUsed,
|
|
763
|
+
peak: Math.max(this.metrics.memoryUsage.peak || 0, memUsage.heapUsed)
|
|
764
|
+
};
|
|
765
|
+
|
|
766
|
+
this.metrics.componentStats = {
|
|
767
|
+
workerPool: this.workerPool.getStats(),
|
|
768
|
+
connectionPool: this.connectionPool.getStats(),
|
|
769
|
+
streamProcessor: this.streamProcessor.getStats(),
|
|
770
|
+
queueManager: this.queueManager.getStats()
|
|
771
|
+
};
|
|
772
|
+
|
|
773
|
+
this.emit('metricsCollected', this.metrics);
|
|
774
|
+
}
|
|
775
|
+
|
|
776
|
+
/**
|
|
777
|
+
* Get comprehensive performance metrics
|
|
778
|
+
* @returns {Object} - Performance metrics
|
|
779
|
+
*/
|
|
780
|
+
getMetrics() {
|
|
781
|
+
this.collectMetrics(); // Get latest metrics
|
|
782
|
+
|
|
783
|
+
return {
|
|
784
|
+
...this.metrics,
|
|
785
|
+
uptime: Date.now() - this.metrics.startTime,
|
|
786
|
+
operationsPerSecond: this.metrics.completedOperations / ((Date.now() - this.metrics.startTime) / 1000)
|
|
787
|
+
};
|
|
788
|
+
}
|
|
789
|
+
|
|
790
|
+
/**
|
|
791
|
+
* Reset all performance metrics
|
|
792
|
+
*/
|
|
793
|
+
resetMetrics() {
|
|
794
|
+
this.metrics = {
|
|
795
|
+
startTime: Date.now(),
|
|
796
|
+
totalOperations: 0,
|
|
797
|
+
completedOperations: 0,
|
|
798
|
+
failedOperations: 0,
|
|
799
|
+
avgOperationTime: 0,
|
|
800
|
+
memoryUsage: { current: 0, peak: 0 },
|
|
801
|
+
componentStats: {}
|
|
802
|
+
};
|
|
803
|
+
}
|
|
804
|
+
|
|
805
|
+
/**
|
|
806
|
+
* Graceful shutdown of all components
|
|
807
|
+
* @returns {Promise<void>}
|
|
808
|
+
*/
|
|
809
|
+
async shutdown() {
|
|
810
|
+
this.emit('shutdown');
|
|
811
|
+
|
|
812
|
+
// Stop metrics collection
|
|
813
|
+
if (this.metricsTimer) {
|
|
814
|
+
clearInterval(this.metricsTimer);
|
|
815
|
+
}
|
|
816
|
+
|
|
817
|
+
// Shutdown all components
|
|
818
|
+
await Promise.all([
|
|
819
|
+
this.workerPool.shutdown(),
|
|
820
|
+
this.connectionPool.shutdown(),
|
|
821
|
+
this.streamProcessor.shutdown()
|
|
822
|
+
]);
|
|
823
|
+
|
|
824
|
+
this.emit('shutdownComplete');
|
|
825
|
+
}
|
|
826
|
+
}
|
|
827
|
+
|
|
828
|
+
export default PerformanceManager;
|