crawlforge-mcp-server 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +315 -0
- package/LICENSE +21 -0
- package/README.md +181 -0
- package/package.json +115 -0
- package/server.js +1963 -0
- package/setup.js +112 -0
- package/src/constants/config.js +615 -0
- package/src/core/ActionExecutor.js +1104 -0
- package/src/core/AlertNotificationSystem.js +601 -0
- package/src/core/AuthManager.js +315 -0
- package/src/core/ChangeTracker.js +2306 -0
- package/src/core/JobManager.js +687 -0
- package/src/core/LLMsTxtAnalyzer.js +753 -0
- package/src/core/LocalizationManager.js +1615 -0
- package/src/core/PerformanceManager.js +828 -0
- package/src/core/ResearchOrchestrator.js +1327 -0
- package/src/core/SnapshotManager.js +1037 -0
- package/src/core/StealthBrowserManager.js +1795 -0
- package/src/core/WebhookDispatcher.js +745 -0
- package/src/core/analysis/ContentAnalyzer.js +749 -0
- package/src/core/analysis/LinkAnalyzer.js +972 -0
- package/src/core/cache/CacheManager.js +821 -0
- package/src/core/connections/ConnectionPool.js +553 -0
- package/src/core/crawlers/BFSCrawler.js +845 -0
- package/src/core/integrations/PerformanceIntegration.js +377 -0
- package/src/core/llm/AnthropicProvider.js +135 -0
- package/src/core/llm/LLMManager.js +415 -0
- package/src/core/llm/LLMProvider.js +97 -0
- package/src/core/llm/OpenAIProvider.js +127 -0
- package/src/core/processing/BrowserProcessor.js +986 -0
- package/src/core/processing/ContentProcessor.js +505 -0
- package/src/core/processing/PDFProcessor.js +448 -0
- package/src/core/processing/StreamProcessor.js +673 -0
- package/src/core/queue/QueueManager.js +98 -0
- package/src/core/workers/WorkerPool.js +585 -0
- package/src/core/workers/worker.js +743 -0
- package/src/monitoring/healthCheck.js +600 -0
- package/src/monitoring/metrics.js +761 -0
- package/src/optimization/wave3-optimizations.js +932 -0
- package/src/security/security-patches.js +120 -0
- package/src/security/security-tests.js +355 -0
- package/src/security/wave3-security.js +652 -0
- package/src/tools/advanced/BatchScrapeTool.js +1089 -0
- package/src/tools/advanced/ScrapeWithActionsTool.js +669 -0
- package/src/tools/crawl/crawlDeep.js +449 -0
- package/src/tools/crawl/mapSite.js +400 -0
- package/src/tools/extract/analyzeContent.js +624 -0
- package/src/tools/extract/extractContent.js +329 -0
- package/src/tools/extract/processDocument.js +503 -0
- package/src/tools/extract/summarizeContent.js +376 -0
- package/src/tools/llmstxt/generateLLMsTxt.js +570 -0
- package/src/tools/research/deepResearch.js +706 -0
- package/src/tools/search/adapters/duckduckgoSearch.js +398 -0
- package/src/tools/search/adapters/googleSearch.js +236 -0
- package/src/tools/search/adapters/searchProviderFactory.js +96 -0
- package/src/tools/search/queryExpander.js +543 -0
- package/src/tools/search/ranking/ResultDeduplicator.js +676 -0
- package/src/tools/search/ranking/ResultRanker.js +497 -0
- package/src/tools/search/searchWeb.js +482 -0
- package/src/tools/tracking/trackChanges.js +1355 -0
- package/src/utils/CircuitBreaker.js +515 -0
- package/src/utils/ErrorHandlingConfig.js +342 -0
- package/src/utils/HumanBehaviorSimulator.js +569 -0
- package/src/utils/Logger.js +568 -0
- package/src/utils/MemoryMonitor.js +173 -0
- package/src/utils/RetryManager.js +386 -0
- package/src/utils/contentUtils.js +588 -0
- package/src/utils/domainFilter.js +612 -0
- package/src/utils/inputValidation.js +766 -0
- package/src/utils/rateLimiter.js +196 -0
- package/src/utils/robotsChecker.js +91 -0
- package/src/utils/securityMiddleware.js +416 -0
- package/src/utils/sitemapParser.js +678 -0
- package/src/utils/ssrfProtection.js +640 -0
- package/src/utils/urlNormalizer.js +168 -0
|
@@ -0,0 +1,932 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Wave 3 Performance Optimization Utilities
|
|
3
|
+
*
|
|
4
|
+
* This module provides performance optimization utilities specifically designed
|
|
5
|
+
* for Wave 3 features: Deep Research, Stealth Mode, Localization, and Change Tracking.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { Worker, isMainThread, parentPort, workerData } from 'worker_threads';
|
|
9
|
+
import { EventEmitter } from 'events';
|
|
10
|
+
import { performance } from 'perf_hooks';
|
|
11
|
+
import https from 'https';
|
|
12
|
+
import http from 'http';
|
|
13
|
+
import fs from 'fs/promises';
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Memory Leak Monitor
|
|
17
|
+
* Tracks and prevents memory leaks in browser contexts and other resources
|
|
18
|
+
*/
|
|
19
|
+
class MemoryLeakMonitor extends EventEmitter {
|
|
20
|
+
constructor(options = {}) {
|
|
21
|
+
super();
|
|
22
|
+
|
|
23
|
+
this.options = {
|
|
24
|
+
checkInterval: 30000, // 30 seconds
|
|
25
|
+
memoryThreshold: 512 * 1024 * 1024, // 512MB
|
|
26
|
+
leakThreshold: 0.15, // 15% growth rate
|
|
27
|
+
gcForceThreshold: 0.8, // Force GC at 80% of threshold
|
|
28
|
+
...options
|
|
29
|
+
};
|
|
30
|
+
|
|
31
|
+
this.measurements = [];
|
|
32
|
+
this.resourceTracking = new Map();
|
|
33
|
+
this.leakDetected = false;
|
|
34
|
+
this.monitoring = false;
|
|
35
|
+
|
|
36
|
+
this.setupMonitoring();
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
setupMonitoring() {
|
|
40
|
+
if (global.gc) {
|
|
41
|
+
// Force garbage collection available
|
|
42
|
+
this.forceGCAvailable = true;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
setInterval(() => {
|
|
46
|
+
if (this.monitoring) {
|
|
47
|
+
this.performMemoryCheck();
|
|
48
|
+
}
|
|
49
|
+
}, this.options.checkInterval);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
startMonitoring() {
|
|
53
|
+
this.monitoring = true;
|
|
54
|
+
this.emit('monitoring_started');
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
stopMonitoring() {
|
|
58
|
+
this.monitoring = false;
|
|
59
|
+
this.emit('monitoring_stopped');
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
performMemoryCheck() {
|
|
63
|
+
const usage = process.memoryUsage();
|
|
64
|
+
const timestamp = Date.now();
|
|
65
|
+
|
|
66
|
+
this.measurements.push({
|
|
67
|
+
timestamp,
|
|
68
|
+
rss: usage.rss,
|
|
69
|
+
heapUsed: usage.heapUsed,
|
|
70
|
+
heapTotal: usage.heapTotal,
|
|
71
|
+
external: usage.external
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
// Keep only last 20 measurements for trend analysis
|
|
75
|
+
if (this.measurements.length > 20) {
|
|
76
|
+
this.measurements.shift();
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
// Check for memory threshold breach
|
|
80
|
+
if (usage.heapUsed > this.options.memoryThreshold) {
|
|
81
|
+
this.emit('threshold_exceeded', { usage, threshold: this.options.memoryThreshold });
|
|
82
|
+
|
|
83
|
+
if (this.forceGCAvailable && usage.heapUsed > this.options.memoryThreshold * this.options.gcForceThreshold) {
|
|
84
|
+
this.forceGarbageCollection();
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// Detect memory leaks
|
|
89
|
+
this.detectMemoryLeak();
|
|
90
|
+
|
|
91
|
+
// Emit current status
|
|
92
|
+
this.emit('memory_status', {
|
|
93
|
+
current: usage,
|
|
94
|
+
trend: this.calculateMemoryTrend(),
|
|
95
|
+
leakDetected: this.leakDetected
|
|
96
|
+
});
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
detectMemoryLeak() {
|
|
100
|
+
if (this.measurements.length < 10) return;
|
|
101
|
+
|
|
102
|
+
const trend = this.calculateMemoryTrend();
|
|
103
|
+
const wasLeaking = this.leakDetected;
|
|
104
|
+
|
|
105
|
+
this.leakDetected = trend > this.options.leakThreshold;
|
|
106
|
+
|
|
107
|
+
if (this.leakDetected && !wasLeaking) {
|
|
108
|
+
this.emit('leak_detected', {
|
|
109
|
+
trend,
|
|
110
|
+
threshold: this.options.leakThreshold,
|
|
111
|
+
measurements: this.measurements
|
|
112
|
+
});
|
|
113
|
+
} else if (!this.leakDetected && wasLeaking) {
|
|
114
|
+
this.emit('leak_resolved', { trend });
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
calculateMemoryTrend() {
|
|
119
|
+
if (this.measurements.length < 2) return 0;
|
|
120
|
+
|
|
121
|
+
const values = this.measurements.map(m => m.heapUsed);
|
|
122
|
+
const n = values.length;
|
|
123
|
+
|
|
124
|
+
// Simple linear regression for trend
|
|
125
|
+
const sumX = n * (n - 1) / 2;
|
|
126
|
+
const sumY = values.reduce((a, b) => a + b, 0);
|
|
127
|
+
const sumXY = values.reduce((sum, y, x) => sum + x * y, 0);
|
|
128
|
+
const sumXX = n * (n - 1) * (2 * n - 1) / 6;
|
|
129
|
+
|
|
130
|
+
const slope = (n * sumXY - sumX * sumY) / (n * sumXX - sumX * sumX);
|
|
131
|
+
return slope / (sumY / n); // Normalize by average
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
forceGarbageCollection() {
|
|
135
|
+
if (this.forceGCAvailable) {
|
|
136
|
+
const beforeGC = process.memoryUsage();
|
|
137
|
+
global.gc();
|
|
138
|
+
const afterGC = process.memoryUsage();
|
|
139
|
+
|
|
140
|
+
const recovered = beforeGC.heapUsed - afterGC.heapUsed;
|
|
141
|
+
this.emit('gc_forced', { beforeGC, afterGC, recovered });
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
trackResource(resourceId, resource, cleanupFn) {
|
|
146
|
+
this.resourceTracking.set(resourceId, {
|
|
147
|
+
resource,
|
|
148
|
+
cleanupFn,
|
|
149
|
+
createdAt: Date.now(),
|
|
150
|
+
lastAccessed: Date.now()
|
|
151
|
+
});
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
releaseResource(resourceId) {
|
|
155
|
+
const tracked = this.resourceTracking.get(resourceId);
|
|
156
|
+
if (tracked) {
|
|
157
|
+
try {
|
|
158
|
+
tracked.cleanupFn();
|
|
159
|
+
this.resourceTracking.delete(resourceId);
|
|
160
|
+
this.emit('resource_released', { resourceId });
|
|
161
|
+
} catch (error) {
|
|
162
|
+
this.emit('cleanup_error', { resourceId, error: error.message });
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
getStats() {
|
|
168
|
+
return {
|
|
169
|
+
monitoring: this.monitoring,
|
|
170
|
+
leakDetected: this.leakDetected,
|
|
171
|
+
trackedResources: this.resourceTracking.size,
|
|
172
|
+
measurements: this.measurements.length,
|
|
173
|
+
currentMemory: process.memoryUsage(),
|
|
174
|
+
memoryTrend: this.calculateMemoryTrend()
|
|
175
|
+
};
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
/**
|
|
180
|
+
* Worker Pool Manager
|
|
181
|
+
* Manages a pool of worker threads for CPU-intensive tasks
|
|
182
|
+
*/
|
|
183
|
+
class WorkerPoolManager extends EventEmitter {
|
|
184
|
+
constructor(options = {}) {
|
|
185
|
+
super();
|
|
186
|
+
|
|
187
|
+
this.options = {
|
|
188
|
+
poolSize: Math.min(require('os').cpus().length, 8),
|
|
189
|
+
maxQueueSize: 100,
|
|
190
|
+
workerTimeout: 30000,
|
|
191
|
+
taskTimeout: 60000,
|
|
192
|
+
...options
|
|
193
|
+
};
|
|
194
|
+
|
|
195
|
+
this.workers = [];
|
|
196
|
+
this.availableWorkers = [];
|
|
197
|
+
this.busyWorkers = new Set();
|
|
198
|
+
this.taskQueue = [];
|
|
199
|
+
this.stats = {
|
|
200
|
+
tasksCompleted: 0,
|
|
201
|
+
tasksErrors: 0,
|
|
202
|
+
averageTaskTime: 0,
|
|
203
|
+
totalTaskTime: 0
|
|
204
|
+
};
|
|
205
|
+
|
|
206
|
+
this.initialize();
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
async initialize() {
|
|
210
|
+
for (let i = 0; i < this.options.poolSize; i++) {
|
|
211
|
+
await this.createWorker(i);
|
|
212
|
+
}
|
|
213
|
+
this.emit('initialized', { poolSize: this.options.poolSize });
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
async createWorker(workerId) {
|
|
217
|
+
return new Promise((resolve, reject) => {
|
|
218
|
+
const worker = new Worker(__filename, {
|
|
219
|
+
workerData: { isWorker: true, workerId }
|
|
220
|
+
});
|
|
221
|
+
|
|
222
|
+
worker.on('message', (result) => {
|
|
223
|
+
this.handleWorkerMessage(worker, result);
|
|
224
|
+
});
|
|
225
|
+
|
|
226
|
+
worker.on('error', (error) => {
|
|
227
|
+
this.handleWorkerError(worker, error);
|
|
228
|
+
});
|
|
229
|
+
|
|
230
|
+
worker.on('exit', (code) => {
|
|
231
|
+
this.handleWorkerExit(worker, code);
|
|
232
|
+
});
|
|
233
|
+
|
|
234
|
+
const workerInfo = {
|
|
235
|
+
worker,
|
|
236
|
+
id: workerId,
|
|
237
|
+
busy: false,
|
|
238
|
+
currentTask: null,
|
|
239
|
+
tasksCompleted: 0,
|
|
240
|
+
createdAt: Date.now()
|
|
241
|
+
};
|
|
242
|
+
|
|
243
|
+
this.workers.push(workerInfo);
|
|
244
|
+
this.availableWorkers.push(workerInfo);
|
|
245
|
+
resolve(workerInfo);
|
|
246
|
+
});
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
async executeTask(taskType, taskData, options = {}) {
|
|
250
|
+
return new Promise((resolve, reject) => {
|
|
251
|
+
const task = {
|
|
252
|
+
id: this.generateTaskId(),
|
|
253
|
+
type: taskType,
|
|
254
|
+
data: taskData,
|
|
255
|
+
options,
|
|
256
|
+
resolve,
|
|
257
|
+
reject,
|
|
258
|
+
createdAt: Date.now(),
|
|
259
|
+
timeout: setTimeout(() => {
|
|
260
|
+
reject(new Error('Task timeout after ' + this.options.taskTimeout + 'ms'));
|
|
261
|
+
}, this.options.taskTimeout)
|
|
262
|
+
};
|
|
263
|
+
|
|
264
|
+
if (this.taskQueue.length >= this.options.maxQueueSize) {
|
|
265
|
+
reject(new Error('Task queue is full'));
|
|
266
|
+
return;
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
this.taskQueue.push(task);
|
|
270
|
+
this.processQueue();
|
|
271
|
+
});
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
processQueue() {
|
|
275
|
+
while (this.taskQueue.length > 0 && this.availableWorkers.length > 0) {
|
|
276
|
+
const task = this.taskQueue.shift();
|
|
277
|
+
const workerInfo = this.availableWorkers.shift();
|
|
278
|
+
|
|
279
|
+
this.assignTaskToWorker(task, workerInfo);
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
assignTaskToWorker(task, workerInfo) {
|
|
284
|
+
workerInfo.busy = true;
|
|
285
|
+
workerInfo.currentTask = task;
|
|
286
|
+
this.busyWorkers.add(workerInfo);
|
|
287
|
+
|
|
288
|
+
const startTime = performance.now();
|
|
289
|
+
|
|
290
|
+
workerInfo.worker.postMessage({
|
|
291
|
+
taskId: task.id,
|
|
292
|
+
type: task.type,
|
|
293
|
+
data: task.data,
|
|
294
|
+
options: task.options
|
|
295
|
+
});
|
|
296
|
+
|
|
297
|
+
task.startTime = startTime;
|
|
298
|
+
task.workerInfo = workerInfo;
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
handleWorkerMessage(worker, message) {
|
|
302
|
+
const workerInfo = this.workers.find(w => w.worker === worker);
|
|
303
|
+
if (!workerInfo || !workerInfo.currentTask) return;
|
|
304
|
+
|
|
305
|
+
const task = workerInfo.currentTask;
|
|
306
|
+
const duration = performance.now() - task.startTime;
|
|
307
|
+
|
|
308
|
+
// Clear timeout
|
|
309
|
+
clearTimeout(task.timeout);
|
|
310
|
+
|
|
311
|
+
// Update statistics
|
|
312
|
+
this.updateStats(duration);
|
|
313
|
+
|
|
314
|
+
// Free up worker
|
|
315
|
+
this.releaseWorker(workerInfo);
|
|
316
|
+
|
|
317
|
+
if (message.error) {
|
|
318
|
+
task.reject(new Error(message.error));
|
|
319
|
+
this.stats.tasksErrors++;
|
|
320
|
+
} else {
|
|
321
|
+
task.resolve(message.result);
|
|
322
|
+
this.stats.tasksCompleted++;
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
// Process next task in queue
|
|
326
|
+
this.processQueue();
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
handleWorkerError(worker, error) {
|
|
330
|
+
const workerInfo = this.workers.find(w => w.worker === worker);
|
|
331
|
+
if (workerInfo && workerInfo.currentTask) {
|
|
332
|
+
workerInfo.currentTask.reject(error);
|
|
333
|
+
this.releaseWorker(workerInfo);
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
this.emit('worker_error', { workerId: workerInfo?.id, error });
|
|
337
|
+
|
|
338
|
+
// Replace failed worker
|
|
339
|
+
this.replaceWorker(workerInfo);
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
handleWorkerExit(worker, code) {
|
|
343
|
+
const workerInfo = this.workers.find(w => w.worker === worker);
|
|
344
|
+
this.emit('worker_exit', { workerId: workerInfo?.id, code });
|
|
345
|
+
|
|
346
|
+
if (code !== 0) {
|
|
347
|
+
this.replaceWorker(workerInfo);
|
|
348
|
+
}
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
async replaceWorker(failedWorker) {
|
|
352
|
+
// Remove failed worker from arrays
|
|
353
|
+
const workerIndex = this.workers.indexOf(failedWorker);
|
|
354
|
+
if (workerIndex > -1) {
|
|
355
|
+
this.workers.splice(workerIndex, 1);
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
const availableIndex = this.availableWorkers.indexOf(failedWorker);
|
|
359
|
+
if (availableIndex > -1) {
|
|
360
|
+
this.availableWorkers.splice(availableIndex, 1);
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
this.busyWorkers.delete(failedWorker);
|
|
364
|
+
|
|
365
|
+
// Create replacement worker
|
|
366
|
+
await this.createWorker(failedWorker.id);
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
releaseWorker(workerInfo) {
|
|
370
|
+
workerInfo.busy = false;
|
|
371
|
+
workerInfo.currentTask = null;
|
|
372
|
+
workerInfo.tasksCompleted++;
|
|
373
|
+
|
|
374
|
+
this.busyWorkers.delete(workerInfo);
|
|
375
|
+
this.availableWorkers.push(workerInfo);
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
updateStats(duration) {
|
|
379
|
+
this.stats.totalTaskTime += duration;
|
|
380
|
+
const completedTasks = this.stats.tasksCompleted + 1;
|
|
381
|
+
this.stats.averageTaskTime = this.stats.totalTaskTime / completedTasks;
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
generateTaskId() {
|
|
385
|
+
return 'task_' + Date.now() + '_' + Math.random().toString(36).substr(2, 9);
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
getStats() {
|
|
389
|
+
return {
|
|
390
|
+
...this.stats,
|
|
391
|
+
activeWorkers: this.workers.length,
|
|
392
|
+
availableWorkers: this.availableWorkers.length,
|
|
393
|
+
busyWorkers: this.busyWorkers.size,
|
|
394
|
+
queuedTasks: this.taskQueue.length,
|
|
395
|
+
uptime: Date.now() - (this.workers[0]?.createdAt || Date.now())
|
|
396
|
+
};
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
async shutdown() {
|
|
400
|
+
// Wait for current tasks to complete
|
|
401
|
+
while (this.busyWorkers.size > 0) {
|
|
402
|
+
await new Promise(resolve => setTimeout(resolve, 100));
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
// Terminate all workers
|
|
406
|
+
const terminatePromises = this.workers.map(workerInfo =>
|
|
407
|
+
workerInfo.worker.terminate()
|
|
408
|
+
);
|
|
409
|
+
|
|
410
|
+
await Promise.all(terminatePromises);
|
|
411
|
+
this.emit('shutdown');
|
|
412
|
+
}
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
/**
|
|
416
|
+
* Advanced Connection Pool Manager
|
|
417
|
+
* Manages HTTP/HTTPS connections with intelligent pooling and request batching
|
|
418
|
+
*/
|
|
419
|
+
class ConnectionPoolManager extends EventEmitter {
|
|
420
|
+
constructor(options = {}) {
|
|
421
|
+
super();
|
|
422
|
+
|
|
423
|
+
this.options = {
|
|
424
|
+
maxSockets: 50,
|
|
425
|
+
maxFreeSockets: 10,
|
|
426
|
+
timeout: 60000,
|
|
427
|
+
freeSocketTimeout: 30000,
|
|
428
|
+
keepAlive: true,
|
|
429
|
+
batchSize: 10,
|
|
430
|
+
batchTimeout: 100,
|
|
431
|
+
retryAttempts: 3,
|
|
432
|
+
retryDelay: 1000,
|
|
433
|
+
...options
|
|
434
|
+
};
|
|
435
|
+
|
|
436
|
+
this.httpAgent = new http.Agent({
|
|
437
|
+
keepAlive: this.options.keepAlive,
|
|
438
|
+
maxSockets: this.options.maxSockets,
|
|
439
|
+
maxFreeSockets: this.options.maxFreeSockets,
|
|
440
|
+
timeout: this.options.timeout,
|
|
441
|
+
freeSocketTimeout: this.options.freeSocketTimeout
|
|
442
|
+
});
|
|
443
|
+
|
|
444
|
+
this.httpsAgent = new https.Agent({
|
|
445
|
+
keepAlive: this.options.keepAlive,
|
|
446
|
+
maxSockets: this.options.maxSockets,
|
|
447
|
+
maxFreeSockets: this.options.maxFreeSockets,
|
|
448
|
+
timeout: this.options.timeout,
|
|
449
|
+
freeSocketTimeout: this.options.freeSocketTimeout
|
|
450
|
+
});
|
|
451
|
+
|
|
452
|
+
this.requestQueue = [];
|
|
453
|
+
this.batchProcessor = null;
|
|
454
|
+
this.stats = {
|
|
455
|
+
requests: 0,
|
|
456
|
+
responses: 0,
|
|
457
|
+
errors: 0,
|
|
458
|
+
batches: 0,
|
|
459
|
+
averageResponseTime: 0,
|
|
460
|
+
totalResponseTime: 0
|
|
461
|
+
};
|
|
462
|
+
|
|
463
|
+
this.setupBatchProcessor();
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
setupBatchProcessor() {
|
|
467
|
+
this.batchProcessor = setInterval(() => {
|
|
468
|
+
this.processBatch();
|
|
469
|
+
}, this.options.batchTimeout);
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
async request(url, options = {}) {
|
|
473
|
+
return new Promise((resolve, reject) => {
|
|
474
|
+
const requestInfo = {
|
|
475
|
+
url,
|
|
476
|
+
options: {
|
|
477
|
+
...options,
|
|
478
|
+
agent: url.startsWith('https:') ? this.httpsAgent : this.httpAgent
|
|
479
|
+
},
|
|
480
|
+
resolve,
|
|
481
|
+
reject,
|
|
482
|
+
createdAt: Date.now(),
|
|
483
|
+
attempts: 0
|
|
484
|
+
};
|
|
485
|
+
|
|
486
|
+
this.requestQueue.push(requestInfo);
|
|
487
|
+
|
|
488
|
+
// Process immediately if batch is full
|
|
489
|
+
if (this.requestQueue.length >= this.options.batchSize) {
|
|
490
|
+
this.processBatch();
|
|
491
|
+
}
|
|
492
|
+
});
|
|
493
|
+
}
|
|
494
|
+
|
|
495
|
+
async processBatch() {
|
|
496
|
+
if (this.requestQueue.length === 0) return;
|
|
497
|
+
|
|
498
|
+
const batch = this.requestQueue.splice(0, this.options.batchSize);
|
|
499
|
+
this.stats.batches++;
|
|
500
|
+
|
|
501
|
+
const batchPromises = batch.map(requestInfo =>
|
|
502
|
+
this.executeRequest(requestInfo)
|
|
503
|
+
);
|
|
504
|
+
|
|
505
|
+
await Promise.allSettled(batchPromises);
|
|
506
|
+
}
|
|
507
|
+
|
|
508
|
+
async executeRequest(requestInfo) {
|
|
509
|
+
const startTime = performance.now();
|
|
510
|
+
|
|
511
|
+
try {
|
|
512
|
+
requestInfo.attempts++;
|
|
513
|
+
|
|
514
|
+
const response = await this.performRequest(requestInfo.url, requestInfo.options);
|
|
515
|
+
const duration = performance.now() - startTime;
|
|
516
|
+
|
|
517
|
+
this.updateStats(duration, false);
|
|
518
|
+
requestInfo.resolve(response);
|
|
519
|
+
|
|
520
|
+
} catch (error) {
|
|
521
|
+
if (requestInfo.attempts < this.options.retryAttempts) {
|
|
522
|
+
// Retry with exponential backoff
|
|
523
|
+
const delay = this.options.retryDelay * Math.pow(2, requestInfo.attempts - 1);
|
|
524
|
+
|
|
525
|
+
setTimeout(() => {
|
|
526
|
+
this.executeRequest(requestInfo);
|
|
527
|
+
}, delay);
|
|
528
|
+
} else {
|
|
529
|
+
this.stats.errors++;
|
|
530
|
+
requestInfo.reject(error);
|
|
531
|
+
}
|
|
532
|
+
}
|
|
533
|
+
}
|
|
534
|
+
|
|
535
|
+
performRequest(url, options) {
|
|
536
|
+
return new Promise((resolve, reject) => {
|
|
537
|
+
const protocol = url.startsWith('https:') ? https : http;
|
|
538
|
+
|
|
539
|
+
const request = protocol.get(url, options, (response) => {
|
|
540
|
+
let data = '';
|
|
541
|
+
|
|
542
|
+
response.on('data', chunk => {
|
|
543
|
+
data += chunk;
|
|
544
|
+
});
|
|
545
|
+
|
|
546
|
+
response.on('end', () => {
|
|
547
|
+
this.stats.responses++;
|
|
548
|
+
resolve({
|
|
549
|
+
statusCode: response.statusCode,
|
|
550
|
+
headers: response.headers,
|
|
551
|
+
body: data
|
|
552
|
+
});
|
|
553
|
+
});
|
|
554
|
+
});
|
|
555
|
+
|
|
556
|
+
request.on('error', reject);
|
|
557
|
+
request.setTimeout(this.options.timeout, () => {
|
|
558
|
+
request.destroy();
|
|
559
|
+
reject(new Error('Request timeout'));
|
|
560
|
+
});
|
|
561
|
+
});
|
|
562
|
+
}
|
|
563
|
+
|
|
564
|
+
updateStats(duration, isError) {
|
|
565
|
+
this.stats.requests++;
|
|
566
|
+
if (!isError) {
|
|
567
|
+
this.stats.totalResponseTime += duration;
|
|
568
|
+
this.stats.averageResponseTime = this.stats.totalResponseTime / this.stats.responses;
|
|
569
|
+
}
|
|
570
|
+
}
|
|
571
|
+
|
|
572
|
+
getStats() {
|
|
573
|
+
return {
|
|
574
|
+
...this.stats,
|
|
575
|
+
queueLength: this.requestQueue.length,
|
|
576
|
+
successRate: this.stats.requests > 0 ? (this.stats.responses / this.stats.requests) * 100 : 0
|
|
577
|
+
};
|
|
578
|
+
}
|
|
579
|
+
|
|
580
|
+
shutdown() {
|
|
581
|
+
if (this.batchProcessor) {
|
|
582
|
+
clearInterval(this.batchProcessor);
|
|
583
|
+
}
|
|
584
|
+
|
|
585
|
+
this.httpAgent.destroy();
|
|
586
|
+
this.httpsAgent.destroy();
|
|
587
|
+
|
|
588
|
+
this.emit('shutdown');
|
|
589
|
+
}
|
|
590
|
+
}
|
|
591
|
+
|
|
592
|
+
/**
|
|
593
|
+
* Circuit Breaker Implementation
|
|
594
|
+
* Provides fault tolerance for external service calls
|
|
595
|
+
*/
|
|
596
|
+
class CircuitBreaker extends EventEmitter {
|
|
597
|
+
constructor(options = {}) {
|
|
598
|
+
super();
|
|
599
|
+
|
|
600
|
+
this.options = {
|
|
601
|
+
failureThreshold: 5,
|
|
602
|
+
successThreshold: 3,
|
|
603
|
+
timeout: 60000,
|
|
604
|
+
...options
|
|
605
|
+
};
|
|
606
|
+
|
|
607
|
+
this.state = 'CLOSED'; // CLOSED, OPEN, HALF_OPEN
|
|
608
|
+
this.failures = 0;
|
|
609
|
+
this.successes = 0;
|
|
610
|
+
this.nextAttempt = 0;
|
|
611
|
+
this.stats = {
|
|
612
|
+
totalRequests: 0,
|
|
613
|
+
totalFailures: 0,
|
|
614
|
+
totalSuccesses: 0,
|
|
615
|
+
stateChanges: 0
|
|
616
|
+
};
|
|
617
|
+
}
|
|
618
|
+
|
|
619
|
+
async execute(fn) {
|
|
620
|
+
this.stats.totalRequests++;
|
|
621
|
+
|
|
622
|
+
if (this.state === 'OPEN') {
|
|
623
|
+
if (Date.now() < this.nextAttempt) {
|
|
624
|
+
const error = new Error('Circuit breaker is OPEN');
|
|
625
|
+
error.circuitBreakerOpen = true;
|
|
626
|
+
throw error;
|
|
627
|
+
} else {
|
|
628
|
+
this.setState('HALF_OPEN');
|
|
629
|
+
}
|
|
630
|
+
}
|
|
631
|
+
|
|
632
|
+
try {
|
|
633
|
+
const result = await fn();
|
|
634
|
+
this.onSuccess();
|
|
635
|
+
return result;
|
|
636
|
+
} catch (error) {
|
|
637
|
+
this.onFailure();
|
|
638
|
+
throw error;
|
|
639
|
+
}
|
|
640
|
+
}
|
|
641
|
+
|
|
642
|
+
onSuccess() {
|
|
643
|
+
this.stats.totalSuccesses++;
|
|
644
|
+
|
|
645
|
+
if (this.state === 'HALF_OPEN') {
|
|
646
|
+
this.successes++;
|
|
647
|
+
if (this.successes >= this.options.successThreshold) {
|
|
648
|
+
this.setState('CLOSED');
|
|
649
|
+
}
|
|
650
|
+
}
|
|
651
|
+
|
|
652
|
+
this.failures = 0;
|
|
653
|
+
}
|
|
654
|
+
|
|
655
|
+
onFailure() {
|
|
656
|
+
this.stats.totalFailures++;
|
|
657
|
+
this.failures++;
|
|
658
|
+
this.successes = 0;
|
|
659
|
+
|
|
660
|
+
if (this.failures >= this.options.failureThreshold) {
|
|
661
|
+
this.setState('OPEN');
|
|
662
|
+
}
|
|
663
|
+
}
|
|
664
|
+
|
|
665
|
+
setState(newState) {
|
|
666
|
+
if (this.state !== newState) {
|
|
667
|
+
const oldState = this.state;
|
|
668
|
+
this.state = newState;
|
|
669
|
+
this.stats.stateChanges++;
|
|
670
|
+
|
|
671
|
+
if (newState === 'OPEN') {
|
|
672
|
+
this.nextAttempt = Date.now() + this.options.timeout;
|
|
673
|
+
} else if (newState === 'CLOSED') {
|
|
674
|
+
this.failures = 0;
|
|
675
|
+
this.successes = 0;
|
|
676
|
+
}
|
|
677
|
+
|
|
678
|
+
this.emit('state_change', { from: oldState, to: newState });
|
|
679
|
+
}
|
|
680
|
+
}
|
|
681
|
+
|
|
682
|
+
getState() {
|
|
683
|
+
return this.state;
|
|
684
|
+
}
|
|
685
|
+
|
|
686
|
+
getStats() {
|
|
687
|
+
return {
|
|
688
|
+
...this.stats,
|
|
689
|
+
currentState: this.state,
|
|
690
|
+
failures: this.failures,
|
|
691
|
+
successes: this.successes,
|
|
692
|
+
nextAttempt: this.nextAttempt,
|
|
693
|
+
failureRate: this.stats.totalRequests > 0 ?
|
|
694
|
+
(this.stats.totalFailures / this.stats.totalRequests) * 100 : 0
|
|
695
|
+
};
|
|
696
|
+
}
|
|
697
|
+
}
|
|
698
|
+
|
|
699
|
+
/**
|
|
700
|
+
* Performance Monitoring Dashboard
|
|
701
|
+
* Real-time performance metrics and alerting
|
|
702
|
+
*/
|
|
703
|
+
class PerformanceDashboard extends EventEmitter {
|
|
704
|
+
constructor(options = {}) {
|
|
705
|
+
super();
|
|
706
|
+
|
|
707
|
+
this.options = {
|
|
708
|
+
sampleInterval: 5000, // 5 seconds
|
|
709
|
+
historySize: 100,
|
|
710
|
+
alertThresholds: {
|
|
711
|
+
memory: 450 * 1024 * 1024, // 450MB
|
|
712
|
+
responseTime: 3000, // 3s
|
|
713
|
+
errorRate: 0.05, // 5%
|
|
714
|
+
cpuUsage: 0.8 // 80%
|
|
715
|
+
},
|
|
716
|
+
...options
|
|
717
|
+
};
|
|
718
|
+
|
|
719
|
+
this.metrics = {
|
|
720
|
+
memory: [],
|
|
721
|
+
responseTime: [],
|
|
722
|
+
errorRate: [],
|
|
723
|
+
throughput: []
|
|
724
|
+
};
|
|
725
|
+
|
|
726
|
+
this.alerts = [];
|
|
727
|
+
this.monitoring = false;
|
|
728
|
+
}
|
|
729
|
+
|
|
730
|
+
startMonitoring() {
|
|
731
|
+
this.monitoring = true;
|
|
732
|
+
this.monitoringInterval = setInterval(() => {
|
|
733
|
+
this.collectMetrics();
|
|
734
|
+
}, this.options.sampleInterval);
|
|
735
|
+
|
|
736
|
+
this.emit('monitoring_started');
|
|
737
|
+
}
|
|
738
|
+
|
|
739
|
+
stopMonitoring() {
|
|
740
|
+
this.monitoring = false;
|
|
741
|
+
if (this.monitoringInterval) {
|
|
742
|
+
clearInterval(this.monitoringInterval);
|
|
743
|
+
}
|
|
744
|
+
this.emit('monitoring_stopped');
|
|
745
|
+
}
|
|
746
|
+
|
|
747
|
+
collectMetrics() {
|
|
748
|
+
const timestamp = Date.now();
|
|
749
|
+
const memUsage = process.memoryUsage();
|
|
750
|
+
const cpuUsage = process.cpuUsage();
|
|
751
|
+
|
|
752
|
+
// Collect memory metrics
|
|
753
|
+
this.addMetric('memory', {
|
|
754
|
+
timestamp,
|
|
755
|
+
rss: memUsage.rss,
|
|
756
|
+
heapUsed: memUsage.heapUsed,
|
|
757
|
+
heapTotal: memUsage.heapTotal,
|
|
758
|
+
external: memUsage.external
|
|
759
|
+
});
|
|
760
|
+
|
|
761
|
+
// Check for alerts
|
|
762
|
+
this.checkAlerts({
|
|
763
|
+
memory: memUsage.heapUsed,
|
|
764
|
+
timestamp
|
|
765
|
+
});
|
|
766
|
+
|
|
767
|
+
this.emit('metrics_collected', {
|
|
768
|
+
memory: memUsage,
|
|
769
|
+
cpu: cpuUsage,
|
|
770
|
+
timestamp
|
|
771
|
+
});
|
|
772
|
+
}
|
|
773
|
+
|
|
774
|
+
addMetric(type, data) {
|
|
775
|
+
if (!this.metrics[type]) {
|
|
776
|
+
this.metrics[type] = [];
|
|
777
|
+
}
|
|
778
|
+
|
|
779
|
+
this.metrics[type].push(data);
|
|
780
|
+
|
|
781
|
+
// Keep only recent history
|
|
782
|
+
if (this.metrics[type].length > this.options.historySize) {
|
|
783
|
+
this.metrics[type].shift();
|
|
784
|
+
}
|
|
785
|
+
}
|
|
786
|
+
|
|
787
|
+
checkAlerts(currentMetrics) {
|
|
788
|
+
const alerts = [];
|
|
789
|
+
|
|
790
|
+
// Memory alert
|
|
791
|
+
if (currentMetrics.memory > this.options.alertThresholds.memory) {
|
|
792
|
+
alerts.push({
|
|
793
|
+
type: 'memory',
|
|
794
|
+
severity: 'warning',
|
|
795
|
+
message: 'Memory usage exceeded threshold: ' + (currentMetrics.memory / 1024 / 1024).toFixed(0) + 'MB',
|
|
796
|
+
timestamp: currentMetrics.timestamp,
|
|
797
|
+
value: currentMetrics.memory,
|
|
798
|
+
threshold: this.options.alertThresholds.memory
|
|
799
|
+
});
|
|
800
|
+
}
|
|
801
|
+
|
|
802
|
+
if (alerts.length > 0) {
|
|
803
|
+
this.alerts.push(...alerts);
|
|
804
|
+
this.emit('alerts', alerts);
|
|
805
|
+
}
|
|
806
|
+
}
|
|
807
|
+
|
|
808
|
+
getMetrics(type, limit = 50) {
|
|
809
|
+
if (!this.metrics[type]) return [];
|
|
810
|
+
return this.metrics[type].slice(-limit);
|
|
811
|
+
}
|
|
812
|
+
|
|
813
|
+
getRecentAlerts(limit = 10) {
|
|
814
|
+
return this.alerts.slice(-limit);
|
|
815
|
+
}
|
|
816
|
+
|
|
817
|
+
getDashboardData() {
|
|
818
|
+
return {
|
|
819
|
+
metrics: {
|
|
820
|
+
memory: this.getMetrics('memory', 20),
|
|
821
|
+
responseTime: this.getMetrics('responseTime', 20),
|
|
822
|
+
errorRate: this.getMetrics('errorRate', 20),
|
|
823
|
+
throughput: this.getMetrics('throughput', 20)
|
|
824
|
+
},
|
|
825
|
+
alerts: this.getRecentAlerts(5),
|
|
826
|
+
summary: this.getSummary(),
|
|
827
|
+
status: {
|
|
828
|
+
monitoring: this.monitoring,
|
|
829
|
+
uptime: process.uptime(),
|
|
830
|
+
nodeVersion: process.version
|
|
831
|
+
}
|
|
832
|
+
};
|
|
833
|
+
}
|
|
834
|
+
|
|
835
|
+
getSummary() {
|
|
836
|
+
const memoryMetrics = this.getMetrics('memory', 10);
|
|
837
|
+
const currentMemory = memoryMetrics.length > 0 ? memoryMetrics[memoryMetrics.length - 1] : null;
|
|
838
|
+
|
|
839
|
+
return {
|
|
840
|
+
currentMemory: currentMemory ? (currentMemory.heapUsed / 1024 / 1024).toFixed(0) + 'MB' : 'N/A',
|
|
841
|
+
memoryTrend: this.calculateTrend(memoryMetrics.map(m => m.heapUsed)),
|
|
842
|
+
alertCount: this.alerts.length,
|
|
843
|
+
criticalAlerts: this.alerts.filter(a => a.severity === 'critical').length
|
|
844
|
+
};
|
|
845
|
+
}
|
|
846
|
+
|
|
847
|
+
calculateTrend(values) {
|
|
848
|
+
if (values.length < 2) return 'stable';
|
|
849
|
+
|
|
850
|
+
const recent = values.slice(-5);
|
|
851
|
+
const avg1 = recent.slice(0, Math.floor(recent.length / 2)).reduce((a, b) => a + b, 0) / Math.floor(recent.length / 2);
|
|
852
|
+
const avg2 = recent.slice(Math.floor(recent.length / 2)).reduce((a, b) => a + b, 0) / (recent.length - Math.floor(recent.length / 2));
|
|
853
|
+
|
|
854
|
+
const change = (avg2 - avg1) / avg1;
|
|
855
|
+
|
|
856
|
+
if (change > 0.1) return 'increasing';
|
|
857
|
+
if (change < -0.1) return 'decreasing';
|
|
858
|
+
return 'stable';
|
|
859
|
+
}
|
|
860
|
+
}
|
|
861
|
+
|
|
862
|
+
// Worker thread implementation
|
|
863
|
+
if (!isMainThread && workerData?.isWorker) {
|
|
864
|
+
// This code runs in worker threads
|
|
865
|
+
parentPort.on('message', async (message) => {
|
|
866
|
+
const { taskId, type, data, options } = message;
|
|
867
|
+
|
|
868
|
+
try {
|
|
869
|
+
let result;
|
|
870
|
+
|
|
871
|
+
switch (type) {
|
|
872
|
+
case 'html_parse':
|
|
873
|
+
result = await parseHTML(data, options);
|
|
874
|
+
break;
|
|
875
|
+
case 'content_analysis':
|
|
876
|
+
result = await analyzeContent(data, options);
|
|
877
|
+
break;
|
|
878
|
+
case 'text_diff':
|
|
879
|
+
result = await calculateTextDiff(data.original, data.current, options);
|
|
880
|
+
break;
|
|
881
|
+
case 'hash_calculation':
|
|
882
|
+
result = await calculateHashes(data, options);
|
|
883
|
+
break;
|
|
884
|
+
default:
|
|
885
|
+
throw new Error('Unknown task type: ' + type);
|
|
886
|
+
}
|
|
887
|
+
|
|
888
|
+
parentPort.postMessage({ taskId, result });
|
|
889
|
+
|
|
890
|
+
} catch (error) {
|
|
891
|
+
parentPort.postMessage({ taskId, error: error.message });
|
|
892
|
+
}
|
|
893
|
+
});
|
|
894
|
+
|
|
895
|
+
// Worker implementations for CPU-intensive tasks
|
|
896
|
+
async function parseHTML(html, options) {
|
|
897
|
+
// CPU-intensive HTML parsing logic
|
|
898
|
+
return { parsed: true, elements: 100 };
|
|
899
|
+
}
|
|
900
|
+
|
|
901
|
+
async function analyzeContent(content, options) {
|
|
902
|
+
// CPU-intensive content analysis logic
|
|
903
|
+
return { analyzed: true, score: 0.85 };
|
|
904
|
+
}
|
|
905
|
+
|
|
906
|
+
async function calculateTextDiff(original, current, options) {
|
|
907
|
+
// CPU-intensive text diffing logic
|
|
908
|
+
return { changes: 5, similarity: 0.92 };
|
|
909
|
+
}
|
|
910
|
+
|
|
911
|
+
async function calculateHashes(data, options) {
|
|
912
|
+
// CPU-intensive hash calculation logic
|
|
913
|
+
return { hash: 'abc123', checksum: 'def456' };
|
|
914
|
+
}
|
|
915
|
+
}
|
|
916
|
+
|
|
917
|
+
// Export all optimization utilities
|
|
918
|
+
export {
|
|
919
|
+
MemoryLeakMonitor,
|
|
920
|
+
WorkerPoolManager,
|
|
921
|
+
ConnectionPoolManager,
|
|
922
|
+
CircuitBreaker,
|
|
923
|
+
PerformanceDashboard as DashboardMonitor
|
|
924
|
+
};
|
|
925
|
+
|
|
926
|
+
export default {
|
|
927
|
+
MemoryLeakMonitor,
|
|
928
|
+
WorkerPoolManager,
|
|
929
|
+
ConnectionPoolManager,
|
|
930
|
+
CircuitBreaker,
|
|
931
|
+
PerformanceDashboard
|
|
932
|
+
};
|