crawlforge-mcp-server 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +315 -0
- package/LICENSE +21 -0
- package/README.md +181 -0
- package/package.json +115 -0
- package/server.js +1963 -0
- package/setup.js +112 -0
- package/src/constants/config.js +615 -0
- package/src/core/ActionExecutor.js +1104 -0
- package/src/core/AlertNotificationSystem.js +601 -0
- package/src/core/AuthManager.js +315 -0
- package/src/core/ChangeTracker.js +2306 -0
- package/src/core/JobManager.js +687 -0
- package/src/core/LLMsTxtAnalyzer.js +753 -0
- package/src/core/LocalizationManager.js +1615 -0
- package/src/core/PerformanceManager.js +828 -0
- package/src/core/ResearchOrchestrator.js +1327 -0
- package/src/core/SnapshotManager.js +1037 -0
- package/src/core/StealthBrowserManager.js +1795 -0
- package/src/core/WebhookDispatcher.js +745 -0
- package/src/core/analysis/ContentAnalyzer.js +749 -0
- package/src/core/analysis/LinkAnalyzer.js +972 -0
- package/src/core/cache/CacheManager.js +821 -0
- package/src/core/connections/ConnectionPool.js +553 -0
- package/src/core/crawlers/BFSCrawler.js +845 -0
- package/src/core/integrations/PerformanceIntegration.js +377 -0
- package/src/core/llm/AnthropicProvider.js +135 -0
- package/src/core/llm/LLMManager.js +415 -0
- package/src/core/llm/LLMProvider.js +97 -0
- package/src/core/llm/OpenAIProvider.js +127 -0
- package/src/core/processing/BrowserProcessor.js +986 -0
- package/src/core/processing/ContentProcessor.js +505 -0
- package/src/core/processing/PDFProcessor.js +448 -0
- package/src/core/processing/StreamProcessor.js +673 -0
- package/src/core/queue/QueueManager.js +98 -0
- package/src/core/workers/WorkerPool.js +585 -0
- package/src/core/workers/worker.js +743 -0
- package/src/monitoring/healthCheck.js +600 -0
- package/src/monitoring/metrics.js +761 -0
- package/src/optimization/wave3-optimizations.js +932 -0
- package/src/security/security-patches.js +120 -0
- package/src/security/security-tests.js +355 -0
- package/src/security/wave3-security.js +652 -0
- package/src/tools/advanced/BatchScrapeTool.js +1089 -0
- package/src/tools/advanced/ScrapeWithActionsTool.js +669 -0
- package/src/tools/crawl/crawlDeep.js +449 -0
- package/src/tools/crawl/mapSite.js +400 -0
- package/src/tools/extract/analyzeContent.js +624 -0
- package/src/tools/extract/extractContent.js +329 -0
- package/src/tools/extract/processDocument.js +503 -0
- package/src/tools/extract/summarizeContent.js +376 -0
- package/src/tools/llmstxt/generateLLMsTxt.js +570 -0
- package/src/tools/research/deepResearch.js +706 -0
- package/src/tools/search/adapters/duckduckgoSearch.js +398 -0
- package/src/tools/search/adapters/googleSearch.js +236 -0
- package/src/tools/search/adapters/searchProviderFactory.js +96 -0
- package/src/tools/search/queryExpander.js +543 -0
- package/src/tools/search/ranking/ResultDeduplicator.js +676 -0
- package/src/tools/search/ranking/ResultRanker.js +497 -0
- package/src/tools/search/searchWeb.js +482 -0
- package/src/tools/tracking/trackChanges.js +1355 -0
- package/src/utils/CircuitBreaker.js +515 -0
- package/src/utils/ErrorHandlingConfig.js +342 -0
- package/src/utils/HumanBehaviorSimulator.js +569 -0
- package/src/utils/Logger.js +568 -0
- package/src/utils/MemoryMonitor.js +173 -0
- package/src/utils/RetryManager.js +386 -0
- package/src/utils/contentUtils.js +588 -0
- package/src/utils/domainFilter.js +612 -0
- package/src/utils/inputValidation.js +766 -0
- package/src/utils/rateLimiter.js +196 -0
- package/src/utils/robotsChecker.js +91 -0
- package/src/utils/securityMiddleware.js +416 -0
- package/src/utils/sitemapParser.js +678 -0
- package/src/utils/ssrfProtection.js +640 -0
- package/src/utils/urlNormalizer.js +168 -0
|
@@ -0,0 +1,342 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ErrorHandlingConfig - Centralized configuration for error handling systems
|
|
3
|
+
* Provides default configurations and factory methods for error handling components
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { RetryManager } from './RetryManager.js';
|
|
7
|
+
import { createCircuitBreaker } from './CircuitBreaker.js';
|
|
8
|
+
import { createLogger } from './Logger.js';
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Default configurations for different service types
|
|
12
|
+
*/
|
|
13
|
+
export const DEFAULT_CONFIGS = {
|
|
14
|
+
// Web scraping operations
|
|
15
|
+
scraping: {
|
|
16
|
+
retry: {
|
|
17
|
+
maxRetries: 3,
|
|
18
|
+
baseDelay: 1000,
|
|
19
|
+
maxDelay: 30000,
|
|
20
|
+
strategy: 'exponential',
|
|
21
|
+
jitter: true,
|
|
22
|
+
retryableErrors: ['ECONNRESET', 'ENOTFOUND', 'ECONNREFUSED', 'ETIMEDOUT', 'EPIPE'],
|
|
23
|
+
retryableStatusCodes: [408, 429, 500, 502, 503, 504, 520, 521, 522, 523, 524]
|
|
24
|
+
},
|
|
25
|
+
circuitBreaker: {
|
|
26
|
+
threshold: 5,
|
|
27
|
+
timeout: 30000,
|
|
28
|
+
resetTimeout: 60000,
|
|
29
|
+
halfOpenMaxCalls: 3,
|
|
30
|
+
errorThresholdPercentage: 50,
|
|
31
|
+
minimumThroughput: 10
|
|
32
|
+
},
|
|
33
|
+
logger: {
|
|
34
|
+
level: 'info',
|
|
35
|
+
enableRequestTracking: true,
|
|
36
|
+
enablePerformanceTracking: true,
|
|
37
|
+
enableErrorTracking: true
|
|
38
|
+
}
|
|
39
|
+
},
|
|
40
|
+
|
|
41
|
+
// API operations (Google Search, etc.)
|
|
42
|
+
api: {
|
|
43
|
+
retry: {
|
|
44
|
+
maxRetries: 4,
|
|
45
|
+
baseDelay: 1000,
|
|
46
|
+
maxDelay: 16000,
|
|
47
|
+
strategy: 'exponential',
|
|
48
|
+
jitter: true,
|
|
49
|
+
retryableErrors: ['ECONNRESET', 'ENOTFOUND', 'ECONNREFUSED', 'ETIMEDOUT'],
|
|
50
|
+
retryableStatusCodes: [408, 429, 500, 502, 503, 504, 520, 521, 522, 523, 524]
|
|
51
|
+
},
|
|
52
|
+
circuitBreaker: {
|
|
53
|
+
threshold: 5,
|
|
54
|
+
timeout: 30000,
|
|
55
|
+
resetTimeout: 120000,
|
|
56
|
+
halfOpenMaxCalls: 3,
|
|
57
|
+
errorThresholdPercentage: 50,
|
|
58
|
+
minimumThroughput: 20
|
|
59
|
+
},
|
|
60
|
+
logger: {
|
|
61
|
+
level: 'info',
|
|
62
|
+
enableRequestTracking: true,
|
|
63
|
+
enablePerformanceTracking: true,
|
|
64
|
+
enableErrorTracking: true
|
|
65
|
+
}
|
|
66
|
+
},
|
|
67
|
+
|
|
68
|
+
// Network operations (general HTTP requests)
|
|
69
|
+
network: {
|
|
70
|
+
retry: {
|
|
71
|
+
maxRetries: 3,
|
|
72
|
+
baseDelay: 1000,
|
|
73
|
+
maxDelay: 30000,
|
|
74
|
+
strategy: 'exponential',
|
|
75
|
+
jitter: true,
|
|
76
|
+
retryableErrors: ['ECONNRESET', 'ENOTFOUND', 'ECONNREFUSED', 'ETIMEDOUT', 'EPIPE'],
|
|
77
|
+
retryableStatusCodes: [408, 429, 500, 502, 503, 504]
|
|
78
|
+
},
|
|
79
|
+
circuitBreaker: {
|
|
80
|
+
threshold: 3,
|
|
81
|
+
timeout: 10000,
|
|
82
|
+
resetTimeout: 60000,
|
|
83
|
+
halfOpenMaxCalls: 2,
|
|
84
|
+
errorThresholdPercentage: 40,
|
|
85
|
+
minimumThroughput: 10
|
|
86
|
+
},
|
|
87
|
+
logger: {
|
|
88
|
+
level: 'info',
|
|
89
|
+
enableRequestTracking: true,
|
|
90
|
+
enablePerformanceTracking: false,
|
|
91
|
+
enableErrorTracking: true
|
|
92
|
+
}
|
|
93
|
+
},
|
|
94
|
+
|
|
95
|
+
// File processing operations
|
|
96
|
+
processing: {
|
|
97
|
+
retry: {
|
|
98
|
+
maxRetries: 2,
|
|
99
|
+
baseDelay: 2000,
|
|
100
|
+
maxDelay: 60000,
|
|
101
|
+
strategy: 'linear',
|
|
102
|
+
jitter: false,
|
|
103
|
+
retryableErrors: ['EMFILE', 'ENFILE', 'EBUSY', 'ENOENT'],
|
|
104
|
+
retryableStatusCodes: []
|
|
105
|
+
},
|
|
106
|
+
circuitBreaker: {
|
|
107
|
+
threshold: 10,
|
|
108
|
+
timeout: 60000,
|
|
109
|
+
resetTimeout: 300000,
|
|
110
|
+
halfOpenMaxCalls: 5,
|
|
111
|
+
errorThresholdPercentage: 70,
|
|
112
|
+
minimumThroughput: 5
|
|
113
|
+
},
|
|
114
|
+
logger: {
|
|
115
|
+
level: 'info',
|
|
116
|
+
enableRequestTracking: false,
|
|
117
|
+
enablePerformanceTracking: true,
|
|
118
|
+
enableErrorTracking: true
|
|
119
|
+
}
|
|
120
|
+
},
|
|
121
|
+
|
|
122
|
+
// Critical operations (require high reliability)
|
|
123
|
+
critical: {
|
|
124
|
+
retry: {
|
|
125
|
+
maxRetries: 5,
|
|
126
|
+
baseDelay: 500,
|
|
127
|
+
maxDelay: 10000,
|
|
128
|
+
strategy: 'exponential',
|
|
129
|
+
jitter: true,
|
|
130
|
+
retryableErrors: ['ECONNRESET', 'ENOTFOUND', 'ECONNREFUSED', 'ETIMEDOUT', 'EPIPE'],
|
|
131
|
+
retryableStatusCodes: [408, 429, 500, 502, 503, 504, 520, 521, 522, 523, 524]
|
|
132
|
+
},
|
|
133
|
+
circuitBreaker: {
|
|
134
|
+
threshold: 3,
|
|
135
|
+
timeout: 15000,
|
|
136
|
+
resetTimeout: 30000,
|
|
137
|
+
halfOpenMaxCalls: 2,
|
|
138
|
+
errorThresholdPercentage: 30,
|
|
139
|
+
minimumThroughput: 5
|
|
140
|
+
},
|
|
141
|
+
logger: {
|
|
142
|
+
level: 'debug',
|
|
143
|
+
enableRequestTracking: true,
|
|
144
|
+
enablePerformanceTracking: true,
|
|
145
|
+
enableErrorTracking: true
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
};
|
|
149
|
+
|
|
150
|
+
/**
|
|
151
|
+
* Factory class for creating error handling components
|
|
152
|
+
*/
|
|
153
|
+
export class ErrorHandlingFactory {
|
|
154
|
+
/**
|
|
155
|
+
* Create a complete error handling suite for a service type
|
|
156
|
+
* @param {string} serviceType - Type of service (scraping, api, network, processing, critical)
|
|
157
|
+
* @param {Object} overrides - Configuration overrides
|
|
158
|
+
* @returns {Object} Error handling components
|
|
159
|
+
*/
|
|
160
|
+
static createSuite(serviceType = 'scraping', overrides = {}) {
|
|
161
|
+
const config = { ...DEFAULT_CONFIGS[serviceType], ...overrides };
|
|
162
|
+
|
|
163
|
+
if (!config) {
|
|
164
|
+
throw new Error(`Unknown service type: ${serviceType}. Available types: ${Object.keys(DEFAULT_CONFIGS).join(', ')}`);
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
return {
|
|
168
|
+
retryManager: new RetryManager(config.retry),
|
|
169
|
+
circuitBreaker: createCircuitBreaker('default', config.circuitBreaker),
|
|
170
|
+
logger: createLogger('default', config.logger)
|
|
171
|
+
};
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
/**
|
|
175
|
+
* Create retry manager with preset configuration
|
|
176
|
+
* @param {string} serviceType - Service type
|
|
177
|
+
* @param {Object} overrides - Configuration overrides
|
|
178
|
+
* @returns {RetryManager} Configured retry manager
|
|
179
|
+
*/
|
|
180
|
+
static createRetryManager(serviceType = 'scraping', overrides = {}) {
|
|
181
|
+
const config = { ...DEFAULT_CONFIGS[serviceType]?.retry, ...overrides };
|
|
182
|
+
return new RetryManager(config);
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
/**
|
|
186
|
+
* Create circuit breaker with preset configuration
|
|
187
|
+
* @param {string} serviceType - Service type
|
|
188
|
+
* @param {Object} overrides - Configuration overrides
|
|
189
|
+
* @returns {CircuitBreaker} Configured circuit breaker
|
|
190
|
+
*/
|
|
191
|
+
static createCircuitBreaker(serviceType = 'scraping', overrides = {}) {
|
|
192
|
+
const config = { ...DEFAULT_CONFIGS[serviceType]?.circuitBreaker, ...overrides };
|
|
193
|
+
return createCircuitBreaker('default', config);
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
/**
|
|
197
|
+
* Create logger with preset configuration
|
|
198
|
+
* @param {string} serviceType - Service type
|
|
199
|
+
* @param {Object} overrides - Configuration overrides
|
|
200
|
+
* @returns {Logger} Configured logger
|
|
201
|
+
*/
|
|
202
|
+
static createLogger(serviceType = 'scraping', overrides = {}) {
|
|
203
|
+
const config = { ...DEFAULT_CONFIGS[serviceType]?.logger, ...overrides };
|
|
204
|
+
return createLogger('default', config);
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
/**
|
|
209
|
+
* Global error handling configuration manager
|
|
210
|
+
*/
|
|
211
|
+
export class ErrorHandlingConfigManager {
|
|
212
|
+
constructor() {
|
|
213
|
+
this.configs = new Map();
|
|
214
|
+
this.instances = new Map();
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
/**
|
|
218
|
+
* Register a custom configuration for a service
|
|
219
|
+
* @param {string} serviceName - Service name
|
|
220
|
+
* @param {Object} config - Error handling configuration
|
|
221
|
+
*/
|
|
222
|
+
registerConfig(serviceName, config) {
|
|
223
|
+
this.configs.set(serviceName, config);
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
/**
|
|
227
|
+
* Get error handling suite for a service
|
|
228
|
+
* @param {string} serviceName - Service name
|
|
229
|
+
* @param {string} serviceType - Default service type if not registered
|
|
230
|
+
* @returns {Object} Error handling components
|
|
231
|
+
*/
|
|
232
|
+
getSuite(serviceName, serviceType = 'scraping') {
|
|
233
|
+
if (this.instances.has(serviceName)) {
|
|
234
|
+
return this.instances.get(serviceName);
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
const config = this.configs.get(serviceName);
|
|
238
|
+
const suite = config
|
|
239
|
+
? ErrorHandlingFactory.createSuite('scraping', config)
|
|
240
|
+
: ErrorHandlingFactory.createSuite(serviceType);
|
|
241
|
+
|
|
242
|
+
this.instances.set(serviceName, suite);
|
|
243
|
+
return suite;
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
/**
|
|
247
|
+
* Clear cached instances (useful for testing)
|
|
248
|
+
*/
|
|
249
|
+
clearCache() {
|
|
250
|
+
// Cleanup existing instances
|
|
251
|
+
for (const suite of this.instances.values()) {
|
|
252
|
+
if (suite.circuitBreaker && typeof suite.circuitBreaker.destroy === 'function') {
|
|
253
|
+
suite.circuitBreaker.destroy();
|
|
254
|
+
}
|
|
255
|
+
if (suite.logger && typeof suite.logger.close === 'function') {
|
|
256
|
+
suite.logger.close();
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
this.instances.clear();
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
/**
|
|
264
|
+
* Get statistics for all registered services
|
|
265
|
+
* @returns {Object} Statistics for all services
|
|
266
|
+
*/
|
|
267
|
+
getAllStats() {
|
|
268
|
+
const stats = {};
|
|
269
|
+
|
|
270
|
+
for (const [serviceName, suite] of this.instances.entries()) {
|
|
271
|
+
stats[serviceName] = {
|
|
272
|
+
retry: suite.retryManager?.getStats(),
|
|
273
|
+
circuitBreaker: suite.circuitBreaker?.getStats(),
|
|
274
|
+
logger: suite.logger?.getStats()
|
|
275
|
+
};
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
return stats;
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
// Global instance
|
|
283
|
+
export const errorHandlingConfig = new ErrorHandlingConfigManager();
|
|
284
|
+
|
|
285
|
+
/**
|
|
286
|
+
* Helper function to get error handling suite
|
|
287
|
+
* @param {string} serviceName - Service name
|
|
288
|
+
* @param {string} serviceType - Service type
|
|
289
|
+
* @returns {Object} Error handling components
|
|
290
|
+
*/
|
|
291
|
+
export function getErrorHandling(serviceName, serviceType = 'scraping') {
|
|
292
|
+
return errorHandlingConfig.getSuite(serviceName, serviceType);
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
/**
|
|
296
|
+
* Helper function to register custom error handling configuration
|
|
297
|
+
* @param {string} serviceName - Service name
|
|
298
|
+
* @param {Object} config - Configuration object
|
|
299
|
+
*/
|
|
300
|
+
export function registerErrorHandlingConfig(serviceName, config) {
|
|
301
|
+
errorHandlingConfig.registerConfig(serviceName, config);
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
/**
|
|
305
|
+
* Decorator for adding error handling to class methods
|
|
306
|
+
* @param {string} serviceName - Service name for error handling
|
|
307
|
+
* @param {string} serviceType - Service type
|
|
308
|
+
* @returns {Function} Decorator function
|
|
309
|
+
*/
|
|
310
|
+
export function withErrorHandling(serviceName, serviceType = 'scraping') {
|
|
311
|
+
return function(target, propertyKey, descriptor) {
|
|
312
|
+
const originalMethod = descriptor.value;
|
|
313
|
+
const suite = getErrorHandling(serviceName, serviceType);
|
|
314
|
+
|
|
315
|
+
descriptor.value = async function(...args) {
|
|
316
|
+
const requestId = suite.logger.startRequest({
|
|
317
|
+
method: propertyKey,
|
|
318
|
+
service: serviceName,
|
|
319
|
+
args: args.length
|
|
320
|
+
});
|
|
321
|
+
|
|
322
|
+
try {
|
|
323
|
+
const result = await suite.retryManager.executeWithCircuitBreaker(
|
|
324
|
+
() => originalMethod.apply(this, args),
|
|
325
|
+
suite.circuitBreaker,
|
|
326
|
+
serviceName,
|
|
327
|
+
{ method: propertyKey }
|
|
328
|
+
);
|
|
329
|
+
|
|
330
|
+
suite.logger.endRequest(requestId, { success: true });
|
|
331
|
+
return result;
|
|
332
|
+
} catch (error) {
|
|
333
|
+
suite.logger.requestError(requestId, error, { method: propertyKey });
|
|
334
|
+
throw error;
|
|
335
|
+
}
|
|
336
|
+
};
|
|
337
|
+
|
|
338
|
+
return descriptor;
|
|
339
|
+
};
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
export default ErrorHandlingFactory;
|