crawlforge-mcp-server 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/CLAUDE.md +315 -0
  2. package/LICENSE +21 -0
  3. package/README.md +181 -0
  4. package/package.json +115 -0
  5. package/server.js +1963 -0
  6. package/setup.js +112 -0
  7. package/src/constants/config.js +615 -0
  8. package/src/core/ActionExecutor.js +1104 -0
  9. package/src/core/AlertNotificationSystem.js +601 -0
  10. package/src/core/AuthManager.js +315 -0
  11. package/src/core/ChangeTracker.js +2306 -0
  12. package/src/core/JobManager.js +687 -0
  13. package/src/core/LLMsTxtAnalyzer.js +753 -0
  14. package/src/core/LocalizationManager.js +1615 -0
  15. package/src/core/PerformanceManager.js +828 -0
  16. package/src/core/ResearchOrchestrator.js +1327 -0
  17. package/src/core/SnapshotManager.js +1037 -0
  18. package/src/core/StealthBrowserManager.js +1795 -0
  19. package/src/core/WebhookDispatcher.js +745 -0
  20. package/src/core/analysis/ContentAnalyzer.js +749 -0
  21. package/src/core/analysis/LinkAnalyzer.js +972 -0
  22. package/src/core/cache/CacheManager.js +821 -0
  23. package/src/core/connections/ConnectionPool.js +553 -0
  24. package/src/core/crawlers/BFSCrawler.js +845 -0
  25. package/src/core/integrations/PerformanceIntegration.js +377 -0
  26. package/src/core/llm/AnthropicProvider.js +135 -0
  27. package/src/core/llm/LLMManager.js +415 -0
  28. package/src/core/llm/LLMProvider.js +97 -0
  29. package/src/core/llm/OpenAIProvider.js +127 -0
  30. package/src/core/processing/BrowserProcessor.js +986 -0
  31. package/src/core/processing/ContentProcessor.js +505 -0
  32. package/src/core/processing/PDFProcessor.js +448 -0
  33. package/src/core/processing/StreamProcessor.js +673 -0
  34. package/src/core/queue/QueueManager.js +98 -0
  35. package/src/core/workers/WorkerPool.js +585 -0
  36. package/src/core/workers/worker.js +743 -0
  37. package/src/monitoring/healthCheck.js +600 -0
  38. package/src/monitoring/metrics.js +761 -0
  39. package/src/optimization/wave3-optimizations.js +932 -0
  40. package/src/security/security-patches.js +120 -0
  41. package/src/security/security-tests.js +355 -0
  42. package/src/security/wave3-security.js +652 -0
  43. package/src/tools/advanced/BatchScrapeTool.js +1089 -0
  44. package/src/tools/advanced/ScrapeWithActionsTool.js +669 -0
  45. package/src/tools/crawl/crawlDeep.js +449 -0
  46. package/src/tools/crawl/mapSite.js +400 -0
  47. package/src/tools/extract/analyzeContent.js +624 -0
  48. package/src/tools/extract/extractContent.js +329 -0
  49. package/src/tools/extract/processDocument.js +503 -0
  50. package/src/tools/extract/summarizeContent.js +376 -0
  51. package/src/tools/llmstxt/generateLLMsTxt.js +570 -0
  52. package/src/tools/research/deepResearch.js +706 -0
  53. package/src/tools/search/adapters/duckduckgoSearch.js +398 -0
  54. package/src/tools/search/adapters/googleSearch.js +236 -0
  55. package/src/tools/search/adapters/searchProviderFactory.js +96 -0
  56. package/src/tools/search/queryExpander.js +543 -0
  57. package/src/tools/search/ranking/ResultDeduplicator.js +676 -0
  58. package/src/tools/search/ranking/ResultRanker.js +497 -0
  59. package/src/tools/search/searchWeb.js +482 -0
  60. package/src/tools/tracking/trackChanges.js +1355 -0
  61. package/src/utils/CircuitBreaker.js +515 -0
  62. package/src/utils/ErrorHandlingConfig.js +342 -0
  63. package/src/utils/HumanBehaviorSimulator.js +569 -0
  64. package/src/utils/Logger.js +568 -0
  65. package/src/utils/MemoryMonitor.js +173 -0
  66. package/src/utils/RetryManager.js +386 -0
  67. package/src/utils/contentUtils.js +588 -0
  68. package/src/utils/domainFilter.js +612 -0
  69. package/src/utils/inputValidation.js +766 -0
  70. package/src/utils/rateLimiter.js +196 -0
  71. package/src/utils/robotsChecker.js +91 -0
  72. package/src/utils/securityMiddleware.js +416 -0
  73. package/src/utils/sitemapParser.js +678 -0
  74. package/src/utils/ssrfProtection.js +640 -0
  75. package/src/utils/urlNormalizer.js +168 -0
@@ -0,0 +1,601 @@
1
+ /**
2
+ * AlertNotificationSystem - Enhanced notification system for change tracking
3
+ * Supports email, webhook, and Slack notifications with throttling and aggregation
4
+ */
5
+
6
+ import { EventEmitter } from 'events';
7
+ import fetch from 'node-fetch';
8
+ import crypto from 'crypto';
9
+
10
+ export class AlertNotificationSystem extends EventEmitter {
11
+ constructor(options = {}) {
12
+ super();
13
+
14
+ this.options = {
15
+ throttlingEnabled: true,
16
+ aggregationEnabled: true,
17
+ retryAttempts: 3,
18
+ retryDelay: 5000,
19
+ signatureSecret: process.env.WEBHOOK_SECRET || 'default-secret',
20
+ ...options
21
+ };
22
+
23
+ // Notification queues and throttling
24
+ this.notificationQueue = [];
25
+ this.throttleCache = new Map();
26
+ this.alertAggregation = new Map();
27
+ this.retryQueue = new Map();
28
+
29
+ // Statistics
30
+ this.stats = {
31
+ totalNotifications: 0,
32
+ successfulNotifications: 0,
33
+ failedNotifications: 0,
34
+ throttledNotifications: 0,
35
+ aggregatedNotifications: 0,
36
+ webhooksSent: 0,
37
+ emailsSent: 0,
38
+ slackMessagesSent: 0
39
+ };
40
+
41
+ // Start processing queue
42
+ this.startQueueProcessor();
43
+ }
44
+
45
+ /**
46
+ * Send notification with throttling and aggregation
47
+ * @param {Object} notification - Notification configuration
48
+ */
49
+ async sendNotification(notification) {
50
+ try {
51
+ const {
52
+ type, // webhook, email, slack
53
+ target, // URL, email address, etc.
54
+ data,
55
+ throttle = 0,
56
+ aggregateKey = null,
57
+ priority = 'medium'
58
+ } = notification;
59
+
60
+ // Check throttling
61
+ if (this.options.throttlingEnabled && throttle > 0) {
62
+ const throttleKey = `${type}_${target}_${aggregateKey || 'default'}`;
63
+ const lastSent = this.throttleCache.get(throttleKey);
64
+
65
+ if (lastSent && Date.now() - lastSent < throttle) {
66
+ this.stats.throttledNotifications++;
67
+ this.emit('notificationThrottled', { notification, throttleKey });
68
+ return { success: false, reason: 'throttled' };
69
+ }
70
+ }
71
+
72
+ // Check aggregation
73
+ if (this.options.aggregationEnabled && aggregateKey) {
74
+ const result = this.handleAggregation(notification, aggregateKey);
75
+ if (result.aggregated) {
76
+ this.stats.aggregatedNotifications++;
77
+ this.emit('notificationAggregated', { notification, aggregateKey });
78
+ return { success: true, reason: 'aggregated' };
79
+ }
80
+ }
81
+
82
+ // Add to queue
83
+ this.notificationQueue.push({
84
+ ...notification,
85
+ id: `notification_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`,
86
+ timestamp: Date.now(),
87
+ attempts: 0
88
+ });
89
+
90
+ this.stats.totalNotifications++;
91
+ this.emit('notificationQueued', notification);
92
+
93
+ return { success: true, reason: 'queued' };
94
+
95
+ } catch (error) {
96
+ this.emit('error', { operation: 'sendNotification', error: error.message });
97
+ return { success: false, reason: 'error', error: error.message };
98
+ }
99
+ }
100
+
101
+ /**
102
+ * Send webhook notification
103
+ * @param {Object} config - Webhook configuration
104
+ * @param {Object} data - Notification data
105
+ */
106
+ async sendWebhookNotification(config, data) {
107
+ try {
108
+ const {
109
+ url,
110
+ method = 'POST',
111
+ headers = {},
112
+ signingSecret,
113
+ includeContent = false
114
+ } = config;
115
+
116
+ // Prepare payload
117
+ const payload = {
118
+ event: 'change_alert',
119
+ timestamp: Date.now(),
120
+ data: includeContent ? data : this.sanitizeData(data)
121
+ };
122
+
123
+ const body = JSON.stringify(payload);
124
+
125
+ // Generate signature if secret provided
126
+ const requestHeaders = {
127
+ 'Content-Type': 'application/json',
128
+ 'User-Agent': 'CrawlForge-AlertSystem/3.0',
129
+ ...headers
130
+ };
131
+
132
+ if (signingSecret) {
133
+ const signature = this.generateSignature(body, signingSecret);
134
+ requestHeaders['X-Signature'] = signature;
135
+ }
136
+
137
+ const response = await fetch(url, {
138
+ method,
139
+ headers: requestHeaders,
140
+ body,
141
+ timeout: 30000
142
+ });
143
+
144
+ if (!response.ok) {
145
+ throw new Error(`HTTP ${response.status}: ${response.statusText}`);
146
+ }
147
+
148
+ this.stats.webhooksSent++;
149
+ this.stats.successfulNotifications++;
150
+
151
+ this.emit('webhookSent', {
152
+ url,
153
+ status: response.status,
154
+ data: payload
155
+ });
156
+
157
+ return { success: true, status: response.status };
158
+
159
+ } catch (error) {
160
+ this.stats.failedNotifications++;
161
+ this.emit('webhookError', {
162
+ url: config.url,
163
+ error: error.message
164
+ });
165
+ throw error;
166
+ }
167
+ }
168
+
169
+ /**
170
+ * Send email notification (placeholder for integration)
171
+ * @param {Object} config - Email configuration
172
+ * @param {Object} data - Notification data
173
+ */
174
+ async sendEmailNotification(config, data) {
175
+ try {
176
+ const {
177
+ recipients,
178
+ subject = 'Content Change Alert',
179
+ includeDetails = true
180
+ } = config;
181
+
182
+ // Email integration would go here
183
+ // For now, just emit event for external handling
184
+ const emailData = {
185
+ to: recipients,
186
+ subject,
187
+ body: this.generateEmailBody(data, includeDetails),
188
+ timestamp: Date.now()
189
+ };
190
+
191
+ this.emit('emailRequested', emailData);
192
+
193
+ this.stats.emailsSent++;
194
+ this.stats.successfulNotifications++;
195
+
196
+ return { success: true, message: 'Email queued for external handling' };
197
+
198
+ } catch (error) {
199
+ this.stats.failedNotifications++;
200
+ this.emit('emailError', {
201
+ recipients: config.recipients,
202
+ error: error.message
203
+ });
204
+ throw error;
205
+ }
206
+ }
207
+
208
+ /**
209
+ * Send Slack notification
210
+ * @param {Object} config - Slack configuration
211
+ * @param {Object} data - Notification data
212
+ */
213
+ async sendSlackNotification(config, data) {
214
+ try {
215
+ const {
216
+ webhookUrl,
217
+ channel,
218
+ username = 'Change Tracker',
219
+ iconEmoji = ':warning:'
220
+ } = config;
221
+
222
+ const payload = {
223
+ channel,
224
+ username,
225
+ icon_emoji: iconEmoji,
226
+ text: this.generateSlackMessage(data),
227
+ attachments: this.generateSlackAttachments(data)
228
+ };
229
+
230
+ const response = await fetch(webhookUrl, {
231
+ method: 'POST',
232
+ headers: {
233
+ 'Content-Type': 'application/json'
234
+ },
235
+ body: JSON.stringify(payload),
236
+ timeout: 30000
237
+ });
238
+
239
+ if (!response.ok) {
240
+ const errorText = await response.text();
241
+ throw new Error(`Slack API error: ${response.status} - ${errorText}`);
242
+ }
243
+
244
+ this.stats.slackMessagesSent++;
245
+ this.stats.successfulNotifications++;
246
+
247
+ this.emit('slackSent', {
248
+ channel,
249
+ message: payload.text
250
+ });
251
+
252
+ return { success: true };
253
+
254
+ } catch (error) {
255
+ this.stats.failedNotifications++;
256
+ this.emit('slackError', {
257
+ channel: config.channel,
258
+ error: error.message
259
+ });
260
+ throw error;
261
+ }
262
+ }
263
+
264
+ /**
265
+ * Process notification queue
266
+ */
267
+ startQueueProcessor() {
268
+ setInterval(async () => {
269
+ if (this.notificationQueue.length === 0) return;
270
+
271
+ const notification = this.notificationQueue.shift();
272
+
273
+ try {
274
+ await this.processNotification(notification);
275
+ } catch (error) {
276
+ await this.handleNotificationFailure(notification, error);
277
+ }
278
+
279
+ }, 1000); // Process every second
280
+ }
281
+
282
+ /**
283
+ * Process individual notification
284
+ * @param {Object} notification - Notification to process
285
+ */
286
+ async processNotification(notification) {
287
+ const { type, config, data } = notification;
288
+
289
+ switch (type) {
290
+ case 'webhook':
291
+ await this.sendWebhookNotification(config, data);
292
+ break;
293
+ case 'email':
294
+ await this.sendEmailNotification(config, data);
295
+ break;
296
+ case 'slack':
297
+ await this.sendSlackNotification(config, data);
298
+ break;
299
+ default:
300
+ throw new Error(`Unknown notification type: ${type}`);
301
+ }
302
+
303
+ // Update throttle cache
304
+ if (notification.throttle && notification.throttle > 0) {
305
+ const throttleKey = `${type}_${config.url || config.recipients?.[0] || config.channel}_${notification.aggregateKey || 'default'}`;
306
+ this.throttleCache.set(throttleKey, Date.now());
307
+ }
308
+
309
+ this.emit('notificationProcessed', notification);
310
+ }
311
+
312
+ /**
313
+ * Handle notification failure with retry logic
314
+ * @param {Object} notification - Failed notification
315
+ * @param {Error} error - Error that occurred
316
+ */
317
+ async handleNotificationFailure(notification, error) {
318
+ notification.attempts = (notification.attempts || 0) + 1;
319
+
320
+ if (notification.attempts < this.options.retryAttempts) {
321
+ // Add to retry queue with delay
322
+ setTimeout(() => {
323
+ this.notificationQueue.push(notification);
324
+ }, this.options.retryDelay * notification.attempts);
325
+
326
+ this.emit('notificationRetry', {
327
+ notification,
328
+ attempt: notification.attempts,
329
+ error: error.message
330
+ });
331
+ } else {
332
+ // Max retries exceeded
333
+ this.stats.failedNotifications++;
334
+
335
+ this.emit('notificationFailed', {
336
+ notification,
337
+ error: error.message,
338
+ finalAttempt: true
339
+ });
340
+ }
341
+ }
342
+
343
+ /**
344
+ * Handle notification aggregation
345
+ * @param {Object} notification - Notification to aggregate
346
+ * @param {string} aggregateKey - Aggregation key
347
+ * @returns {Object} - Aggregation result
348
+ */
349
+ handleAggregation(notification, aggregateKey) {
350
+ const now = Date.now();
351
+ const aggregationWindow = 300000; // 5 minutes
352
+
353
+ if (!this.alertAggregation.has(aggregateKey)) {
354
+ this.alertAggregation.set(aggregateKey, {
355
+ notifications: [],
356
+ firstSeen: now,
357
+ lastSeen: now
358
+ });
359
+ }
360
+
361
+ const aggregate = this.alertAggregation.get(aggregateKey);
362
+ aggregate.notifications.push(notification);
363
+ aggregate.lastSeen = now;
364
+
365
+ // Check if aggregation window expired
366
+ if (now - aggregate.firstSeen > aggregationWindow) {
367
+ // Send aggregated notification
368
+ this.sendAggregatedNotification(aggregateKey, aggregate);
369
+ this.alertAggregation.delete(aggregateKey);
370
+ return { aggregated: false };
371
+ }
372
+
373
+ return { aggregated: true };
374
+ }
375
+
376
+ /**
377
+ * Send aggregated notification
378
+ * @param {string} aggregateKey - Aggregation key
379
+ * @param {Object} aggregate - Aggregated data
380
+ */
381
+ async sendAggregatedNotification(aggregateKey, aggregate) {
382
+ const { notifications } = aggregate;
383
+ const firstNotification = notifications[0];
384
+
385
+ // Create aggregated data
386
+ const aggregatedData = {
387
+ ...firstNotification.data,
388
+ aggregatedCount: notifications.length,
389
+ timeSpan: {
390
+ start: aggregate.firstSeen,
391
+ end: aggregate.lastSeen
392
+ },
393
+ summary: this.generateAggregatedSummary(notifications)
394
+ };
395
+
396
+ // Send using first notification's configuration
397
+ const aggregatedNotification = {
398
+ ...firstNotification,
399
+ data: aggregatedData,
400
+ aggregated: true
401
+ };
402
+
403
+ await this.processNotification(aggregatedNotification);
404
+ }
405
+
406
+ /**
407
+ * Generate aggregated summary
408
+ * @param {Array} notifications - Notifications to summarize
409
+ * @returns {Object} - Summary data
410
+ */
411
+ generateAggregatedSummary(notifications) {
412
+ const urls = new Set();
413
+ const significanceLevels = {};
414
+ const changeTypes = {};
415
+
416
+ notifications.forEach(notification => {
417
+ const { url, significance, changeType } = notification.data;
418
+ urls.add(url);
419
+ significanceLevels[significance] = (significanceLevels[significance] || 0) + 1;
420
+ changeTypes[changeType] = (changeTypes[changeType] || 0) + 1;
421
+ });
422
+
423
+ return {
424
+ uniqueUrls: urls.size,
425
+ urls: Array.from(urls),
426
+ significanceDistribution: significanceLevels,
427
+ changeTypeDistribution: changeTypes,
428
+ totalChanges: notifications.length
429
+ };
430
+ }
431
+
432
+ /**
433
+ * Generate signature for webhook security
434
+ * @param {string} body - Request body
435
+ * @param {string} secret - Signing secret
436
+ * @returns {string} - Signature
437
+ */
438
+ generateSignature(body, secret) {
439
+ return crypto
440
+ .createHmac('sha256', secret)
441
+ .update(body, 'utf8')
442
+ .digest('hex');
443
+ }
444
+
445
+ /**
446
+ * Sanitize data for external transmission
447
+ * @param {Object} data - Data to sanitize
448
+ * @returns {Object} - Sanitized data
449
+ */
450
+ sanitizeData(data) {
451
+ return {
452
+ url: data.url,
453
+ significance: data.significance,
454
+ changeType: data.changeType,
455
+ timestamp: data.timestamp,
456
+ summary: data.summary
457
+ };
458
+ }
459
+
460
+ /**
461
+ * Generate email body
462
+ * @param {Object} data - Notification data
463
+ * @param {boolean} includeDetails - Include detailed information
464
+ * @returns {string} - Email body
465
+ */
466
+ generateEmailBody(data, includeDetails) {
467
+ let body = `Content Change Alert\n\n`;
468
+ body += `URL: ${data.url}\n`;
469
+ body += `Significance: ${data.significance.toUpperCase()}\n`;
470
+ body += `Change Type: ${data.changeType.replace('_', ' ')}\n`;
471
+ body += `Time: ${new Date(data.timestamp).toISOString()}\n\n`;
472
+
473
+ if (data.summary) {
474
+ body += `Summary:\n${data.summary.changeDescription}\n\n`;
475
+ }
476
+
477
+ if (includeDetails && data.details) {
478
+ body += `Details:\n`;
479
+ body += `- Similarity: ${Math.round(data.details.similarity * 100)}%\n`;
480
+ body += `- Changes: ${data.details.addedElements?.length || 0} added, `;
481
+ body += `${data.details.removedElements?.length || 0} removed, `;
482
+ body += `${data.details.modifiedElements?.length || 0} modified\n`;
483
+ }
484
+
485
+ body += `\nGenerated by CrawlForge Change Tracker`;
486
+
487
+ return body;
488
+ }
489
+
490
+ /**
491
+ * Generate Slack message
492
+ * @param {Object} data - Notification data
493
+ * @returns {string} - Slack message
494
+ */
495
+ generateSlackMessage(data) {
496
+ const emoji = this.getSignificanceEmoji(data.significance);
497
+ return `${emoji} Content change detected on ${data.url}`;
498
+ }
499
+
500
+ /**
501
+ * Generate Slack attachments
502
+ * @param {Object} data - Notification data
503
+ * @returns {Array} - Slack attachments
504
+ */
505
+ generateSlackAttachments(data) {
506
+ return [{
507
+ color: this.getSignificanceColor(data.significance),
508
+ fields: [
509
+ {
510
+ title: 'URL',
511
+ value: data.url,
512
+ short: false
513
+ },
514
+ {
515
+ title: 'Significance',
516
+ value: data.significance.toUpperCase(),
517
+ short: true
518
+ },
519
+ {
520
+ title: 'Change Type',
521
+ value: data.changeType.replace('_', ' '),
522
+ short: true
523
+ },
524
+ {
525
+ title: 'Summary',
526
+ value: data.summary?.changeDescription || 'Change detected',
527
+ short: false
528
+ }
529
+ ],
530
+ footer: 'CrawlForge Change Tracker',
531
+ ts: Math.floor(data.timestamp / 1000)
532
+ }];
533
+ }
534
+
535
+ /**
536
+ * Get emoji for significance level
537
+ * @param {string} significance - Significance level
538
+ * @returns {string} - Emoji
539
+ */
540
+ getSignificanceEmoji(significance) {
541
+ const emojis = {
542
+ 'none': ':white_circle:',
543
+ 'minor': ':yellow_circle:',
544
+ 'moderate': ':orange_circle:',
545
+ 'major': ':red_circle:',
546
+ 'critical': ':rotating_light:'
547
+ };
548
+ return emojis[significance] || ':grey_question:';
549
+ }
550
+
551
+ /**
552
+ * Get color for significance level
553
+ * @param {string} significance - Significance level
554
+ * @returns {string} - Color code
555
+ */
556
+ getSignificanceColor(significance) {
557
+ const colors = {
558
+ 'none': '#36a64f',
559
+ 'minor': '#ffeb3b',
560
+ 'moderate': '#ff9800',
561
+ 'major': '#f44336',
562
+ 'critical': '#9c27b0'
563
+ };
564
+ return colors[significance] || '#9e9e9e';
565
+ }
566
+
567
+ /**
568
+ * Get notification statistics
569
+ * @returns {Object} - Statistics
570
+ */
571
+ getStats() {
572
+ return {
573
+ ...this.stats,
574
+ queueLength: this.notificationQueue.length,
575
+ throttleCacheSize: this.throttleCache.size,
576
+ aggregationCacheSize: this.alertAggregation.size,
577
+ successRate: this.stats.totalNotifications > 0 ?
578
+ (this.stats.successfulNotifications / this.stats.totalNotifications) * 100 : 0
579
+ };
580
+ }
581
+
582
+ /**
583
+ * Clear all caches and queues
584
+ */
585
+ clear() {
586
+ this.notificationQueue.length = 0;
587
+ this.throttleCache.clear();
588
+ this.alertAggregation.clear();
589
+ this.retryQueue.clear();
590
+ }
591
+
592
+ /**
593
+ * Cleanup resources
594
+ */
595
+ cleanup() {
596
+ this.clear();
597
+ this.removeAllListeners();
598
+ }
599
+ }
600
+
601
+ export default AlertNotificationSystem;