crawlforge-mcp-server 3.0.17 → 3.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/CLAUDE.md +2 -0
  2. package/README.md +1 -0
  3. package/package.json +6 -2
  4. package/server.js +192 -1277
  5. package/src/constants/config.js +2 -1
  6. package/src/core/ActionExecutor.js +2 -43
  7. package/src/core/AuthManager.js +230 -32
  8. package/src/core/BrowserContextPool.js +187 -0
  9. package/src/core/JobManager.js +7 -5
  10. package/src/core/LocalizationManager.js +14 -125
  11. package/src/core/ResearchOrchestrator.js +86 -5
  12. package/src/core/StealthBrowserManager.js +26 -18
  13. package/src/core/cache/CacheManager.js +4 -1
  14. package/src/core/crawlers/BFSCrawler.js +19 -5
  15. package/src/core/endpointGuard.js +37 -0
  16. package/src/observability/metrics.js +137 -0
  17. package/src/observability/tracing.js +74 -0
  18. package/src/server/auth/oauth.js +388 -0
  19. package/src/server/registerTool.js +41 -0
  20. package/src/server/schemas/common.js +29 -0
  21. package/src/server/transports/http.js +22 -0
  22. package/src/server/transports/stdio.js +16 -0
  23. package/src/server/transports/streamableHttp.js +226 -0
  24. package/src/server/withAuth.js +121 -0
  25. package/src/tools/advanced/BatchScrapeTool.js +12 -1086
  26. package/src/tools/advanced/ScrapeWithActionsTool.js +105 -19
  27. package/src/tools/advanced/batchScrape/index.js +328 -0
  28. package/src/tools/advanced/batchScrape/queue.js +91 -0
  29. package/src/tools/advanced/batchScrape/reporter.js +26 -0
  30. package/src/tools/advanced/batchScrape/schema.js +37 -0
  31. package/src/tools/advanced/batchScrape/worker.js +179 -0
  32. package/src/tools/advanced/scrapeWithActions/recorder.js +188 -0
  33. package/src/tools/basic/_fetch.js +35 -0
  34. package/src/tools/basic/extractLinks.js +74 -0
  35. package/src/tools/basic/extractMetadata.js +74 -0
  36. package/src/tools/basic/extractText.js +46 -0
  37. package/src/tools/basic/fetchUrl.js +44 -0
  38. package/src/tools/basic/scrapeStructured.js +58 -0
  39. package/src/tools/crawl/_sessionContext.js +234 -0
  40. package/src/tools/crawl/crawlDeep.js +55 -5
  41. package/src/tools/crawl/mapSite.js +23 -2
  42. package/src/tools/extract/_fetchAndParse.js +57 -0
  43. package/src/tools/extract/extractStructured.js +3 -19
  44. package/src/tools/extract/extractWithLlm.js +295 -0
  45. package/src/tools/research/deepResearch.js +33 -8
  46. package/src/tools/search/providers/searxng.js +126 -0
  47. package/src/tools/search/ranking/ResultDeduplicator.js +18 -11
  48. package/src/tools/search/ranking/ResultRanker.js +17 -10
  49. package/src/tools/search/ranking/SearchResultCache.js +52 -0
  50. package/src/tools/search/searchWeb.js +112 -6
  51. package/src/tools/tracking/trackChanges/differ.js +98 -0
  52. package/src/tools/tracking/trackChanges/index.js +432 -0
  53. package/src/tools/tracking/trackChanges/monitor.js +93 -0
  54. package/src/tools/tracking/trackChanges/notifier.js +105 -0
  55. package/src/tools/tracking/trackChanges/schema.js +127 -0
  56. package/src/tools/tracking/trackChanges.js +12 -1374
@@ -1,1377 +1,15 @@
1
1
  /**
2
- * TrackChanges Tool - Change Tracking MCP Tool
3
- * Provides baseline capture, comparison, scheduled monitoring,
4
- * and change notification capabilities
2
+ * trackChanges.js backward-compatibility re-export shim.
3
+ *
4
+ * The implementation has been split into:
5
+ * trackChanges/schema.js — Zod input schema
6
+ * trackChanges/differ.js — fetch + history helpers
7
+ * trackChanges/monitor.js — polling monitor lifecycle
8
+ * trackChanges/notifier.js — webhook / email / Slack notifications
9
+ * trackChanges/index.js — TrackChangesTool class + singleton
10
+ *
11
+ * All original named exports are preserved here so existing imports continue to work.
5
12
  */
6
13
 
7
- import { z } from 'zod';
8
- import ChangeTracker from '../../core/ChangeTracker.js';
9
- import SnapshotManager from '../../core/SnapshotManager.js';
10
- import CacheManager from '../../core/cache/CacheManager.js';
11
- import { EventEmitter } from 'events';
12
-
13
- // Input validation schemas
14
- const TrackChangesSchema = z.object({
15
- url: z.string().url(),
16
- operation: z.enum([
17
- 'create_baseline',
18
- 'compare',
19
- 'monitor',
20
- 'get_history',
21
- 'get_stats',
22
- // Enhanced Phase 2.4 operations
23
- 'create_scheduled_monitor',
24
- 'stop_scheduled_monitor',
25
- 'get_dashboard',
26
- 'export_history',
27
- 'create_alert_rule',
28
- 'generate_trend_report',
29
- 'get_monitoring_templates'
30
- ]).default('compare'),
31
-
32
- // Content options
33
- content: z.string().optional(),
34
- html: z.string().optional(),
35
-
36
- // Tracking options
37
- trackingOptions: z.object({
38
- granularity: z.enum(['page', 'section', 'element', 'text']).default('section'),
39
- trackText: z.boolean().default(true),
40
- trackStructure: z.boolean().default(true),
41
- trackAttributes: z.boolean().default(false),
42
- trackImages: z.boolean().default(false),
43
- trackLinks: z.boolean().default(true),
44
- ignoreWhitespace: z.boolean().default(true),
45
- ignoreCase: z.boolean().default(false),
46
- customSelectors: z.array(z.string()).optional(),
47
- excludeSelectors: z.array(z.string()).optional().default([
48
- 'script', 'style', 'noscript', '.advertisement', '.ad', '#comments'
49
- ]),
50
- significanceThresholds: z.object({
51
- minor: z.number().min(0).max(1).default(0.1),
52
- moderate: z.number().min(0).max(1).default(0.3),
53
- major: z.number().min(0).max(1).default(0.7)
54
- }).optional()
55
- }).optional().default({}),
56
-
57
- // Monitoring options
58
- monitoringOptions: z.object({
59
- enabled: z.boolean().default(false),
60
- interval: z.number().min(60000).max(24 * 60 * 60 * 1000).default(300000), // 5 minutes to 24 hours
61
- maxRetries: z.number().min(0).max(5).default(3),
62
- retryDelay: z.number().min(1000).max(60000).default(5000),
63
- notificationThreshold: z.enum(['minor', 'moderate', 'major', 'critical']).default('moderate'),
64
- enableWebhook: z.boolean().default(false),
65
- webhookUrl: z.string().url().optional(),
66
- webhookSecret: z.string().optional()
67
- }).optional(),
68
-
69
- // Storage options
70
- storageOptions: z.object({
71
- enableSnapshots: z.boolean().default(true),
72
- retainHistory: z.boolean().default(true),
73
- maxHistoryEntries: z.number().min(1).max(1000).default(100),
74
- compressionEnabled: z.boolean().default(true),
75
- deltaStorageEnabled: z.boolean().default(true)
76
- }).optional().default({}),
77
-
78
- // Query options for history retrieval
79
- queryOptions: z.object({
80
- limit: z.number().min(1).max(500).default(50),
81
- offset: z.number().min(0).default(0),
82
- startTime: z.number().optional(),
83
- endTime: z.number().optional(),
84
- includeContent: z.boolean().default(false),
85
- significanceFilter: z.enum(['all', 'minor', 'moderate', 'major', 'critical']).optional()
86
- }).optional(),
87
-
88
- // Notification options
89
- notificationOptions: z.object({
90
- email: z.object({
91
- enabled: z.boolean().default(false),
92
- recipients: z.array(z.string().email()).optional(),
93
- subject: z.string().optional(),
94
- includeDetails: z.boolean().default(true)
95
- }).optional(),
96
- webhook: z.object({
97
- enabled: z.boolean().default(false),
98
- url: z.string().url().optional(),
99
- method: z.enum(['POST', 'PUT']).default('POST'),
100
- headers: z.record(z.string()).optional(),
101
- signingSecret: z.string().optional(),
102
- includeContent: z.boolean().default(false)
103
- }).optional(),
104
- slack: z.object({
105
- enabled: z.boolean().default(false),
106
- webhookUrl: z.string().url().optional(),
107
- channel: z.string().optional(),
108
- username: z.string().optional()
109
- }).optional()
110
- }).optional(),
111
-
112
- // Enhanced Phase 2.4 options
113
- scheduledMonitorOptions: z.object({
114
- schedule: z.string().optional(), // Cron expression
115
- templateId: z.string().optional(), // Monitoring template ID
116
- enabled: z.boolean().default(true)
117
- }).optional(),
118
-
119
- alertRuleOptions: z.object({
120
- ruleId: z.string().optional(),
121
- condition: z.string().optional(), // Condition description
122
- actions: z.array(z.enum(['webhook', 'email', 'slack'])).optional(),
123
- throttle: z.number().min(0).optional(),
124
- priority: z.enum(['low', 'medium', 'high']).optional()
125
- }).optional(),
126
-
127
- exportOptions: z.object({
128
- format: z.enum(['json', 'csv']).default('json'),
129
- startTime: z.number().optional(),
130
- endTime: z.number().optional(),
131
- includeContent: z.boolean().default(false),
132
- includeSnapshots: z.boolean().default(false)
133
- }).optional(),
134
-
135
- dashboardOptions: z.object({
136
- includeRecentAlerts: z.boolean().default(true),
137
- includeTrends: z.boolean().default(true),
138
- includeMonitorStatus: z.boolean().default(true)
139
- }).optional()
140
- });
141
-
142
- export class TrackChangesTool extends EventEmitter {
143
- constructor(options = {}) {
144
- super();
145
-
146
- this.options = {
147
- cacheEnabled: true,
148
- cacheTTL: 3600000, // 1 hour
149
- snapshotStorageDir: './snapshots',
150
- enableRealTimeMonitoring: true,
151
- maxConcurrentMonitors: 50,
152
- defaultPollingInterval: 300000, // 5 minutes
153
- ...options
154
- };
155
-
156
- // Initialize components
157
- this.changeTracker = new ChangeTracker({
158
- enableRealTimeTracking: this.options.enableRealTimeMonitoring,
159
- enableSemanticAnalysis: false, // Can be enabled if needed
160
- contentSimilarityThreshold: 0.8
161
- });
162
-
163
- this.snapshotManager = new SnapshotManager({
164
- storageDir: this.options.snapshotStorageDir,
165
- enableCompression: true,
166
- enableDeltaStorage: true,
167
- cacheEnabled: this.options.cacheEnabled
168
- });
169
-
170
- this.cache = this.options.cacheEnabled ?
171
- new CacheManager({ ttl: this.options.cacheTTL }) : null;
172
-
173
- // Active monitors
174
- this.activeMonitors = new Map();
175
- this.monitorStats = new Map();
176
-
177
- // Notification handlers
178
- this.notificationHandlers = {
179
- webhook: this.sendWebhookNotification.bind(this),
180
- email: this.sendEmailNotification.bind(this),
181
- slack: this.sendSlackNotification.bind(this)
182
- };
183
-
184
- this.initialize();
185
- }
186
-
187
- async initialize() {
188
- try {
189
- await this.snapshotManager.initialize();
190
-
191
- // Set up event handlers
192
- this.setupEventHandlers();
193
-
194
- this.emit('initialized');
195
- } catch (error) {
196
- this.emit('error', { operation: 'initialize', error: error.message });
197
- throw error;
198
- }
199
- }
200
-
201
- setupEventHandlers() {
202
- // Handle change tracker events
203
- this.changeTracker.on('changeDetected', (changeRecord) => {
204
- this.handleChangeDetected(changeRecord);
205
- });
206
-
207
- this.changeTracker.on('baselineCreated', (baseline) => {
208
- this.emit('baselineCreated', baseline);
209
- });
210
-
211
- // Handle snapshot manager events
212
- this.snapshotManager.on('snapshotStored', (snapshot) => {
213
- this.emit('snapshotStored', snapshot);
214
- });
215
-
216
- this.snapshotManager.on('error', (error) => {
217
- this.emit('error', error);
218
- });
219
- }
220
-
221
- /**
222
- * Execute the track changes tool
223
- * @param {Object} params - Tool parameters
224
- * @returns {Object} - Execution results
225
- */
226
- async execute(params) {
227
- try {
228
- const validated = TrackChangesSchema.parse(params);
229
- const { url, operation } = validated;
230
-
231
- switch (operation) {
232
- case 'create_baseline':
233
- return await this.createBaseline(validated);
234
-
235
- case 'compare':
236
- return await this.compareWithBaseline(validated);
237
-
238
- case 'monitor':
239
- return await this.setupMonitoring(validated);
240
-
241
- case 'get_history':
242
- return await this.getChangeHistory(validated);
243
-
244
- case 'get_stats':
245
- return await this.getStatistics(validated);
246
-
247
- // Enhanced Phase 2.4 operations
248
- case 'create_scheduled_monitor':
249
- return await this.createScheduledMonitor(validated);
250
-
251
- case 'stop_scheduled_monitor':
252
- return await this.stopScheduledMonitor(validated);
253
-
254
- case 'get_dashboard':
255
- return await this.getMonitoringDashboard(validated);
256
-
257
- case 'export_history':
258
- return await this.exportHistoricalData(validated);
259
-
260
- case 'create_alert_rule':
261
- return await this.createAlertRule(validated);
262
-
263
- case 'generate_trend_report':
264
- return await this.generateTrendReport(validated);
265
-
266
- case 'get_monitoring_templates':
267
- return await this.getMonitoringTemplates(validated);
268
-
269
- default:
270
- throw new Error(`Unknown operation: ${operation}`);
271
- }
272
-
273
- } catch (error) {
274
- return {
275
- success: false,
276
- error: error.message,
277
- timestamp: Date.now()
278
- };
279
- }
280
- }
281
-
282
- /**
283
- * Create a baseline for change tracking
284
- * @param {Object} params - Parameters
285
- * @returns {Object} - Baseline creation results
286
- */
287
- async createBaseline(params) {
288
- const { url, content, html, trackingOptions, storageOptions = {} } = params;
289
-
290
- // Apply defaults for storageOptions fields
291
- const enableSnapshots = storageOptions.enableSnapshots !== false; // Default to true
292
-
293
- try {
294
- // Fetch content if not provided
295
- let sourceContent = content || html;
296
- let fetchMetadata = {};
297
-
298
- if (!sourceContent) {
299
- const fetchResult = await this.fetchContent(url);
300
- if (!fetchResult || !fetchResult.content) {
301
- throw new Error('Failed to fetch content from URL');
302
- }
303
- sourceContent = fetchResult.content;
304
- fetchMetadata = fetchResult.metadata || {};
305
- }
306
-
307
- // Validate sourceContent
308
- if (!sourceContent || typeof sourceContent !== 'string') {
309
- throw new Error('Invalid content: content must be a non-empty string');
310
- }
311
-
312
- // Create baseline with change tracker
313
- const baseline = await this.changeTracker.createBaseline(
314
- url,
315
- sourceContent,
316
- trackingOptions
317
- );
318
-
319
- // Store snapshot if enabled (defaults to true)
320
- let snapshotInfo = null;
321
- if (enableSnapshots) {
322
- const snapshotResult = await this.snapshotManager.storeSnapshot(
323
- url,
324
- sourceContent,
325
- {
326
- ...fetchMetadata,
327
- baseline: true,
328
- trackingOptions
329
- }
330
- );
331
- snapshotInfo = snapshotResult;
332
- }
333
-
334
- return {
335
- success: true,
336
- operation: 'create_baseline',
337
- url,
338
- baseline: {
339
- version: baseline.version,
340
- contentHash: baseline.analysis?.hashes?.page,
341
- sections: Object.keys(baseline.analysis?.hashes?.sections || {}).length,
342
- elements: Object.keys(baseline.analysis?.hashes?.elements || {}).length,
343
- createdAt: baseline.timestamp,
344
- options: trackingOptions
345
- },
346
- snapshot: snapshotInfo,
347
- timestamp: Date.now()
348
- };
349
-
350
- } catch (error) {
351
- throw new Error(`Failed to create baseline: ${error.message}`);
352
- }
353
- }
354
-
355
- /**
356
- * Compare current content with baseline
357
- * @param {Object} params - Parameters
358
- * @returns {Object} - Comparison results
359
- */
360
- async compareWithBaseline(params) {
361
- const { url, content, html, trackingOptions, storageOptions = {}, notificationOptions } = params;
362
-
363
- // Apply defaults for storageOptions fields
364
- const enableSnapshots = storageOptions.enableSnapshots !== false; // Default to true
365
-
366
- try {
367
- // Fetch current content if not provided
368
- let currentContent = content || html;
369
- let fetchMetadata = {};
370
-
371
- if (!currentContent) {
372
- const fetchResult = await this.fetchContent(url);
373
- if (!fetchResult || !fetchResult.content) {
374
- throw new Error('Failed to fetch content from URL');
375
- }
376
- currentContent = fetchResult.content;
377
- fetchMetadata = fetchResult.metadata || {};
378
- }
379
-
380
- // Validate currentContent
381
- if (!currentContent || typeof currentContent !== 'string') {
382
- throw new Error('Invalid content: content must be a non-empty string');
383
- }
384
-
385
- // Perform comparison
386
- const comparisonResult = await this.changeTracker.compareWithBaseline(
387
- url,
388
- currentContent,
389
- trackingOptions
390
- );
391
-
392
- // Store snapshot if changes detected and storage enabled (defaults to true)
393
- let snapshotInfo = null;
394
- if (comparisonResult.hasChanges && enableSnapshots) {
395
- const snapshotResult = await this.snapshotManager.storeSnapshot(
396
- url,
397
- currentContent,
398
- {
399
- ...fetchMetadata,
400
- changes: comparisonResult.summary,
401
- significance: comparisonResult.significance
402
- }
403
- );
404
- snapshotInfo = snapshotResult;
405
- }
406
-
407
- // Send notifications if significant changes detected
408
- if (comparisonResult.hasChanges && notificationOptions) {
409
- await this.sendNotifications(url, comparisonResult, notificationOptions);
410
- }
411
-
412
- return {
413
- success: true,
414
- operation: 'compare',
415
- url,
416
- hasChanges: comparisonResult.hasChanges,
417
- significance: comparisonResult.significance,
418
- changeType: comparisonResult.changeType,
419
- summary: comparisonResult.summary,
420
- details: comparisonResult.details,
421
- metrics: comparisonResult.metrics,
422
- recommendations: comparisonResult.recommendations,
423
- snapshot: snapshotInfo,
424
- timestamp: Date.now()
425
- };
426
-
427
- } catch (error) {
428
- throw new Error(`Failed to compare with baseline: ${error.message}`);
429
- }
430
- }
431
-
432
- /**
433
- * Set up continuous monitoring for a URL
434
- * @param {Object} params - Parameters
435
- * @returns {Object} - Monitoring setup results
436
- */
437
- async setupMonitoring(params) {
438
- const { url, monitoringOptions, trackingOptions, storageOptions, notificationOptions } = params;
439
-
440
- try {
441
- // Check if already monitoring this URL
442
- if (this.activeMonitors.has(url)) {
443
- const existing = this.activeMonitors.get(url);
444
- clearInterval(existing.timer);
445
- }
446
-
447
- // Create monitoring configuration
448
- const monitorConfig = {
449
- url,
450
- options: {
451
- ...monitoringOptions,
452
- trackingOptions,
453
- storageOptions,
454
- notificationOptions
455
- },
456
- stats: {
457
- started: Date.now(),
458
- checks: 0,
459
- changesDetected: 0,
460
- errors: 0,
461
- lastCheck: null,
462
- lastChange: null,
463
- averageResponseTime: 0
464
- }
465
- };
466
-
467
- // Set up polling timer
468
- const timer = setInterval(async () => {
469
- await this.performMonitoringCheck(url, monitorConfig);
470
- }, monitoringOptions.interval);
471
-
472
- monitorConfig.timer = timer;
473
-
474
- // Store active monitor
475
- this.activeMonitors.set(url, monitorConfig);
476
- this.monitorStats.set(url, monitorConfig.stats);
477
-
478
- // Perform initial check
479
- await this.performMonitoringCheck(url, monitorConfig);
480
-
481
- return {
482
- success: true,
483
- operation: 'monitor',
484
- url,
485
- monitoring: {
486
- enabled: true,
487
- interval: monitoringOptions.interval,
488
- notificationThreshold: monitoringOptions.notificationThreshold,
489
- startedAt: monitorConfig.stats.started
490
- },
491
- timestamp: Date.now()
492
- };
493
-
494
- } catch (error) {
495
- throw new Error(`Failed to setup monitoring: ${error.message}`);
496
- }
497
- }
498
-
499
- /**
500
- * Get change history for a URL
501
- * @param {Object} params - Parameters
502
- * @returns {Object} - Change history
503
- */
504
- async getChangeHistory(params) {
505
- const { url, queryOptions } = params;
506
-
507
- try {
508
- // Get change history from change tracker
509
- const changeHistory = this.changeTracker.getChangeHistory(url, queryOptions.limit);
510
-
511
- // Get snapshot history from snapshot manager
512
- const snapshotHistory = await this.snapshotManager.getChangeHistory(url, queryOptions);
513
-
514
- // Merge and enrich history data
515
- const combinedHistory = this.mergeHistoryData(changeHistory, snapshotHistory.history);
516
-
517
- // Apply filters
518
- let filteredHistory = combinedHistory;
519
- if (queryOptions.significanceFilter && queryOptions.significanceFilter !== 'all') {
520
- filteredHistory = combinedHistory.filter(entry =>
521
- this.matchesSignificanceFilter(entry, queryOptions.significanceFilter)
522
- );
523
- }
524
-
525
- // Apply pagination
526
- const start = queryOptions.offset || 0;
527
- const end = start + (queryOptions.limit || 50);
528
- const paginatedHistory = filteredHistory.slice(start, end);
529
-
530
- return {
531
- success: true,
532
- operation: 'get_history',
533
- url,
534
- history: paginatedHistory,
535
- pagination: {
536
- total: filteredHistory.length,
537
- limit: queryOptions.limit,
538
- offset: queryOptions.offset,
539
- hasMore: end < filteredHistory.length
540
- },
541
- timespan: {
542
- earliest: combinedHistory.length > 0 ?
543
- combinedHistory[combinedHistory.length - 1].timestamp : null,
544
- latest: combinedHistory.length > 0 ?
545
- combinedHistory[0].timestamp : null,
546
- totalEntries: combinedHistory.length
547
- },
548
- timestamp: Date.now()
549
- };
550
-
551
- } catch (error) {
552
- throw new Error(`Failed to get change history: ${error.message}`);
553
- }
554
- }
555
-
556
- /**
557
- * Get statistics for change tracking
558
- * @param {Object} params - Parameters
559
- * @returns {Object} - Statistics
560
- */
561
- async getStatistics(params) {
562
- const { url } = params;
563
-
564
- try {
565
- // Get change tracker stats
566
- const changeTrackerStats = this.changeTracker.getStats();
567
-
568
- // Get snapshot manager stats
569
- const snapshotManagerStats = this.snapshotManager.getStats();
570
-
571
- // Get monitoring stats
572
- const monitoringStats = url ?
573
- this.monitorStats.get(url) :
574
- this.getAggregatedMonitoringStats();
575
-
576
- // Get URL-specific stats if URL provided
577
- let urlStats = null;
578
- if (url) {
579
- urlStats = await this.getUrlSpecificStats(url);
580
- }
581
-
582
- return {
583
- success: true,
584
- operation: 'get_stats',
585
- url: url || 'global',
586
- stats: {
587
- changeTracking: changeTrackerStats,
588
- snapshotStorage: snapshotManagerStats,
589
- monitoring: monitoringStats,
590
- urlSpecific: urlStats,
591
- system: {
592
- activeMonitors: this.activeMonitors.size,
593
- cacheEnabled: !!this.cache,
594
- cacheStats: this.cache ? this.cache.getStats() : null
595
- }
596
- },
597
- timestamp: Date.now()
598
- };
599
-
600
- } catch (error) {
601
- throw new Error(`Failed to get statistics: ${error.message}`);
602
- }
603
- }
604
-
605
- // Helper methods
606
-
607
- async fetchContent(url) {
608
- try {
609
- const response = await fetch(url, {
610
- headers: {
611
- 'User-Agent': 'MCP-WebScraper-ChangeTracker/3.0',
612
- 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
613
- 'Accept-Language': 'en-US,en;q=0.5',
614
- 'Accept-Encoding': 'gzip, deflate',
615
- 'Cache-Control': 'no-cache'
616
- },
617
- timeout: 30000
618
- });
619
-
620
- if (!response.ok) {
621
- throw new Error(`HTTP ${response.status}: ${response.statusText}`);
622
- }
623
-
624
- const content = await response.text();
625
-
626
- return {
627
- content,
628
- metadata: {
629
- statusCode: response.status,
630
- contentType: response.headers.get('content-type'),
631
- contentLength: content.length,
632
- lastModified: response.headers.get('last-modified'),
633
- etag: response.headers.get('etag'),
634
- fetchedAt: Date.now()
635
- }
636
- };
637
-
638
- } catch (error) {
639
- throw new Error(`Failed to fetch content: ${error.message}`);
640
- }
641
- }
642
-
643
- async performMonitoringCheck(url, monitorConfig) {
644
- const startTime = Date.now();
645
-
646
- try {
647
- monitorConfig.stats.checks++;
648
-
649
- // Fetch current content
650
- const fetchResult = await this.fetchContent(url);
651
-
652
- // Perform comparison
653
- const comparisonResult = await this.changeTracker.compareWithBaseline(
654
- url,
655
- fetchResult.content,
656
- monitorConfig.options.trackingOptions
657
- );
658
-
659
- // Update stats
660
- const responseTime = Date.now() - startTime;
661
- monitorConfig.stats.averageResponseTime =
662
- (monitorConfig.stats.averageResponseTime * (monitorConfig.stats.checks - 1) + responseTime) /
663
- monitorConfig.stats.checks;
664
-
665
- monitorConfig.stats.lastCheck = Date.now();
666
-
667
- // Handle changes if detected
668
- if (comparisonResult.hasChanges) {
669
- monitorConfig.stats.changesDetected++;
670
- monitorConfig.stats.lastChange = Date.now();
671
-
672
- // Check if change meets notification threshold
673
- if (this.meetsNotificationThreshold(
674
- comparisonResult.significance,
675
- monitorConfig.options.notificationThreshold
676
- )) {
677
- // Store snapshot if enabled
678
- if (monitorConfig.options.storageOptions?.enableSnapshots) {
679
- await this.snapshotManager.storeSnapshot(
680
- url,
681
- fetchResult.content,
682
- {
683
- ...fetchResult.metadata,
684
- changes: comparisonResult.summary,
685
- significance: comparisonResult.significance,
686
- monitoring: true
687
- }
688
- );
689
- }
690
-
691
- // Send notifications
692
- if (monitorConfig.options.notificationOptions) {
693
- await this.sendNotifications(
694
- url,
695
- comparisonResult,
696
- monitorConfig.options.notificationOptions
697
- );
698
- }
699
- }
700
- }
701
-
702
- this.emit('monitoringCheck', {
703
- url,
704
- hasChanges: comparisonResult.hasChanges,
705
- significance: comparisonResult.significance,
706
- responseTime,
707
- timestamp: Date.now()
708
- });
709
-
710
- } catch (error) {
711
- monitorConfig.stats.errors++;
712
-
713
- this.emit('monitoringError', {
714
- url,
715
- error: error.message,
716
- timestamp: Date.now()
717
- });
718
-
719
- // If too many errors, disable monitoring
720
- if (monitorConfig.stats.errors > monitorConfig.options.maxRetries) {
721
- this.stopMonitoring(url);
722
-
723
- this.emit('monitoringDisabled', {
724
- url,
725
- reason: 'Too many errors',
726
- totalErrors: monitorConfig.stats.errors
727
- });
728
- }
729
- }
730
- }
731
-
732
- async sendNotifications(url, changeResult, notificationOptions) {
733
- const notifications = [];
734
-
735
- if (notificationOptions.webhook?.enabled) {
736
- notifications.push(this.sendWebhookNotification(url, changeResult, notificationOptions.webhook));
737
- }
738
-
739
- if (notificationOptions.email?.enabled) {
740
- notifications.push(this.sendEmailNotification(url, changeResult, notificationOptions.email));
741
- }
742
-
743
- if (notificationOptions.slack?.enabled) {
744
- notifications.push(this.sendSlackNotification(url, changeResult, notificationOptions.slack));
745
- }
746
-
747
- await Promise.allSettled(notifications);
748
- }
749
-
750
- async sendWebhookNotification(url, changeResult, webhookConfig) {
751
- try {
752
- const payload = {
753
- event: 'change_detected',
754
- url,
755
- timestamp: Date.now(),
756
- significance: changeResult.significance,
757
- changeType: changeResult.changeType,
758
- summary: changeResult.summary,
759
- details: webhookConfig.includeContent ? changeResult.details : undefined
760
- };
761
-
762
- const response = await fetch(webhookConfig.url, {
763
- method: webhookConfig.method || 'POST',
764
- headers: {
765
- 'Content-Type': 'application/json',
766
- 'User-Agent': 'MCP-WebScraper-ChangeTracker/3.0',
767
- ...webhookConfig.headers
768
- },
769
- body: JSON.stringify(payload)
770
- });
771
-
772
- if (!response.ok) {
773
- throw new Error(`Webhook failed: ${response.status} ${response.statusText}`);
774
- }
775
-
776
- this.emit('notificationSent', {
777
- type: 'webhook',
778
- url,
779
- success: true
780
- });
781
-
782
- } catch (error) {
783
- this.emit('notificationError', {
784
- type: 'webhook',
785
- url,
786
- error: error.message
787
- });
788
- }
789
- }
790
-
791
- async sendEmailNotification(url, changeResult, emailConfig) {
792
- // Email notification implementation would go here
793
- // This would integrate with email service providers
794
- this.emit('notificationSent', {
795
- type: 'email',
796
- url,
797
- success: true,
798
- note: 'Email notifications require external service integration'
799
- });
800
- }
801
-
802
- async sendSlackNotification(url, changeResult, slackConfig) {
803
- try {
804
- const payload = {
805
- text: `🔄 Content Change Detected`,
806
- attachments: [{
807
- color: this.getSlackColor(changeResult.significance),
808
- fields: [
809
- {
810
- title: 'URL',
811
- value: url,
812
- short: false
813
- },
814
- {
815
- title: 'Significance',
816
- value: changeResult.significance.toUpperCase(),
817
- short: true
818
- },
819
- {
820
- title: 'Change Type',
821
- value: changeResult.changeType.replace('_', ' '),
822
- short: true
823
- },
824
- {
825
- title: 'Summary',
826
- value: changeResult.summary.changeDescription,
827
- short: false
828
- }
829
- ],
830
- timestamp: Math.floor(Date.now() / 1000)
831
- }],
832
- channel: slackConfig.channel,
833
- username: slackConfig.username || 'Change Tracker'
834
- };
835
-
836
- const response = await fetch(slackConfig.webhookUrl, {
837
- method: 'POST',
838
- headers: {
839
- 'Content-Type': 'application/json'
840
- },
841
- body: JSON.stringify(payload)
842
- });
843
-
844
- if (!response.ok) {
845
- throw new Error(`Slack notification failed: ${response.status}`);
846
- }
847
-
848
- this.emit('notificationSent', {
849
- type: 'slack',
850
- url,
851
- success: true
852
- });
853
-
854
- } catch (error) {
855
- this.emit('notificationError', {
856
- type: 'slack',
857
- url,
858
- error: error.message
859
- });
860
- }
861
- }
862
-
863
- mergeHistoryData(changeHistory, snapshotHistory) {
864
- // Merge change tracker history with snapshot history
865
- const merged = [];
866
-
867
- // Add change history entries
868
- changeHistory.forEach(entry => {
869
- merged.push({
870
- ...entry,
871
- source: 'change_tracker',
872
- hasSnapshot: false
873
- });
874
- });
875
-
876
- // Add snapshot history entries
877
- snapshotHistory.forEach(entry => {
878
- const existing = merged.find(m =>
879
- Math.abs(m.timestamp - entry.timestamp) < 60000 // Within 1 minute
880
- );
881
-
882
- if (existing) {
883
- existing.hasSnapshot = true;
884
- existing.snapshotId = entry.snapshotId;
885
- } else {
886
- merged.push({
887
- ...entry,
888
- source: 'snapshot',
889
- hasSnapshot: true
890
- });
891
- }
892
- });
893
-
894
- // Sort by timestamp (newest first)
895
- return merged.sort((a, b) => b.timestamp - a.timestamp);
896
- }
897
-
898
- matchesSignificanceFilter(entry, filter) {
899
- const significanceLevels = ['none', 'minor', 'moderate', 'major', 'critical'];
900
- const entryLevel = significanceLevels.indexOf(entry.significance || 'none');
901
- const filterLevel = significanceLevels.indexOf(filter);
902
-
903
- return entryLevel >= filterLevel;
904
- }
905
-
906
- meetsNotificationThreshold(significance, threshold) {
907
- const levels = ['none', 'minor', 'moderate', 'major', 'critical'];
908
- const significanceLevel = levels.indexOf(significance);
909
- const thresholdLevel = levels.indexOf(threshold);
910
-
911
- return significanceLevel >= thresholdLevel;
912
- }
913
-
914
- getSlackColor(significance) {
915
- const colors = {
916
- 'none': '#36a64f',
917
- 'minor': '#ffeb3b',
918
- 'moderate': '#ff9800',
919
- 'major': '#f44336',
920
- 'critical': '#9c27b0'
921
- };
922
-
923
- return colors[significance] || '#36a64f';
924
- }
925
-
926
- async getUrlSpecificStats(url) {
927
- try {
928
- const changeHistory = this.changeTracker.getChangeHistory(url, 100);
929
- const snapshotHistory = await this.snapshotManager.querySnapshots({
930
- url,
931
- limit: 100,
932
- includeContent: false
933
- });
934
-
935
- return {
936
- totalChanges: changeHistory.length,
937
- totalSnapshots: snapshotHistory.snapshots.length,
938
- lastChange: changeHistory.length > 0 ? changeHistory[0].timestamp : null,
939
- averageChangeInterval: this.calculateAverageInterval(changeHistory),
940
- significanceDistribution: this.calculateSignificanceDistribution(changeHistory),
941
- isBeingMonitored: this.activeMonitors.has(url)
942
- };
943
-
944
- } catch (error) {
945
- return {
946
- error: error.message
947
- };
948
- }
949
- }
950
-
951
- getAggregatedMonitoringStats() {
952
- const stats = {
953
- totalMonitors: this.activeMonitors.size,
954
- totalChecks: 0,
955
- totalChanges: 0,
956
- totalErrors: 0,
957
- averageResponseTime: 0,
958
- oldestMonitor: null,
959
- newestMonitor: null
960
- };
961
-
962
- const allStats = Array.from(this.monitorStats.values());
963
-
964
- if (allStats.length === 0) return stats;
965
-
966
- stats.totalChecks = allStats.reduce((sum, s) => sum + s.checks, 0);
967
- stats.totalChanges = allStats.reduce((sum, s) => sum + s.changesDetected, 0);
968
- stats.totalErrors = allStats.reduce((sum, s) => sum + s.errors, 0);
969
- stats.averageResponseTime = allStats.reduce((sum, s) => sum + s.averageResponseTime, 0) / allStats.length;
970
- stats.oldestMonitor = Math.min(...allStats.map(s => s.started));
971
- stats.newestMonitor = Math.max(...allStats.map(s => s.started));
972
-
973
- return stats;
974
- }
975
-
976
- calculateAverageInterval(changeHistory) {
977
- if (changeHistory.length < 2) return null;
978
-
979
- let totalInterval = 0;
980
- for (let i = 1; i < changeHistory.length; i++) {
981
- totalInterval += changeHistory[i - 1].timestamp - changeHistory[i].timestamp;
982
- }
983
-
984
- return totalInterval / (changeHistory.length - 1);
985
- }
986
-
987
- calculateSignificanceDistribution(changeHistory) {
988
- const distribution = {
989
- none: 0,
990
- minor: 0,
991
- moderate: 0,
992
- major: 0,
993
- critical: 0
994
- };
995
-
996
- changeHistory.forEach(entry => {
997
- const significance = entry.significance || 'none';
998
- if (distribution.hasOwnProperty(significance)) {
999
- distribution[significance]++;
1000
- }
1001
- });
1002
-
1003
- return distribution;
1004
- }
1005
-
1006
- async handleChangeDetected(changeRecord) {
1007
- // Store snapshot if significant change
1008
- if (changeRecord.significance !== 'none') {
1009
- try {
1010
- await this.snapshotManager.storeSnapshot(
1011
- changeRecord.url,
1012
- changeRecord.details.current || '',
1013
- {
1014
- changes: changeRecord.details,
1015
- significance: changeRecord.significance,
1016
- changeType: changeRecord.changeType
1017
- }
1018
- );
1019
- } catch (error) {
1020
- this.emit('error', {
1021
- operation: 'storeChangeSnapshot',
1022
- url: changeRecord.url,
1023
- error: error.message
1024
- });
1025
- }
1026
- }
1027
- }
1028
-
1029
- // Public API methods
1030
-
1031
- stopMonitoring(url) {
1032
- if (this.activeMonitors.has(url)) {
1033
- const monitor = this.activeMonitors.get(url);
1034
- clearInterval(monitor.timer);
1035
- this.activeMonitors.delete(url);
1036
-
1037
- this.emit('monitoringStopped', { url });
1038
- return true;
1039
- }
1040
- return false;
1041
- }
1042
-
1043
- stopAllMonitoring() {
1044
- const urls = Array.from(this.activeMonitors.keys());
1045
- urls.forEach(url => this.stopMonitoring(url));
1046
-
1047
- this.emit('allMonitoringStopped', { count: urls.length });
1048
- return urls.length;
1049
- }
1050
-
1051
- getActiveMonitors() {
1052
- return Array.from(this.activeMonitors.keys()).map(url => ({
1053
- url,
1054
- config: this.activeMonitors.get(url).options,
1055
- stats: this.monitorStats.get(url)
1056
- }));
1057
- }
1058
-
1059
- /**
1060
- * Create scheduled monitor using enhanced features
1061
- * @param {Object} params - Parameters
1062
- * @returns {Object} - Scheduled monitor results
1063
- */
1064
- async createScheduledMonitor(params) {
1065
- const { url, scheduledMonitorOptions, trackingOptions, notificationOptions } = params;
1066
-
1067
- try {
1068
- const schedule = scheduledMonitorOptions?.schedule || '0 */1 * * *'; // Hourly default
1069
- const templateId = scheduledMonitorOptions?.templateId;
1070
-
1071
- // Apply template if specified
1072
- let monitorOptions = { ...trackingOptions };
1073
- if (templateId && this.changeTracker.monitoringTemplates.has(templateId)) {
1074
- const template = this.changeTracker.monitoringTemplates.get(templateId);
1075
- monitorOptions = { ...template.options, ...monitorOptions };
1076
- }
1077
-
1078
- // Create scheduled monitor
1079
- const result = await this.changeTracker.createScheduledMonitor(
1080
- url,
1081
- schedule,
1082
- {
1083
- ...monitorOptions,
1084
- alertRules: {
1085
- threshold: 'moderate',
1086
- methods: ['webhook'],
1087
- throttle: 600000,
1088
- ...notificationOptions
1089
- }
1090
- }
1091
- );
1092
-
1093
- return {
1094
- success: true,
1095
- operation: 'create_scheduled_monitor',
1096
- url,
1097
- monitor: result,
1098
- template: templateId ? this.changeTracker.monitoringTemplates.get(templateId)?.name : null,
1099
- timestamp: Date.now()
1100
- };
1101
-
1102
- } catch (error) {
1103
- throw new Error(`Failed to create scheduled monitor: ${error.message}`);
1104
- }
1105
- }
1106
-
1107
- /**
1108
- * Stop scheduled monitor
1109
- * @param {Object} params - Parameters
1110
- * @returns {Object} - Stop results
1111
- */
1112
- async stopScheduledMonitor(params) {
1113
- const { url } = params;
1114
-
1115
- try {
1116
- // Find and stop the scheduled monitor for this URL
1117
- let stoppedMonitors = 0;
1118
-
1119
- for (const [id, monitor] of this.changeTracker.scheduledMonitors.entries()) {
1120
- if (monitor.url === url) {
1121
- if (monitor.cronJob) {
1122
- monitor.cronJob.destroy();
1123
- }
1124
- monitor.status = 'stopped';
1125
- this.changeTracker.scheduledMonitors.delete(id);
1126
- stoppedMonitors++;
1127
- }
1128
- }
1129
-
1130
- return {
1131
- success: true,
1132
- operation: 'stop_scheduled_monitor',
1133
- url,
1134
- stoppedMonitors,
1135
- timestamp: Date.now()
1136
- };
1137
-
1138
- } catch (error) {
1139
- throw new Error(`Failed to stop scheduled monitor: ${error.message}`);
1140
- }
1141
- }
1142
-
1143
- /**
1144
- * Get monitoring dashboard
1145
- * @param {Object} params - Parameters
1146
- * @returns {Object} - Dashboard data
1147
- */
1148
- async getMonitoringDashboard(params) {
1149
- const { dashboardOptions } = params;
1150
-
1151
- try {
1152
- const dashboard = this.changeTracker.getMonitoringDashboard();
1153
-
1154
- // Filter based on options
1155
- if (!dashboardOptions?.includeRecentAlerts) {
1156
- delete dashboard.recentAlerts;
1157
- }
1158
-
1159
- if (!dashboardOptions?.includeTrends) {
1160
- delete dashboard.trends;
1161
- }
1162
-
1163
- if (!dashboardOptions?.includeMonitorStatus) {
1164
- dashboard.monitors = dashboard.monitors.map(m => ({
1165
- id: m.id,
1166
- url: m.url,
1167
- status: m.status
1168
- }));
1169
- }
1170
-
1171
- return {
1172
- success: true,
1173
- operation: 'get_dashboard',
1174
- dashboard,
1175
- timestamp: Date.now()
1176
- };
1177
-
1178
- } catch (error) {
1179
- throw new Error(`Failed to get monitoring dashboard: ${error.message}`);
1180
- }
1181
- }
1182
-
1183
- /**
1184
- * Export historical data
1185
- * @param {Object} params - Parameters
1186
- * @returns {Object} - Exported data
1187
- */
1188
- async exportHistoricalData(params) {
1189
- const { url, exportOptions } = params;
1190
-
1191
- try {
1192
- const exportData = await this.changeTracker.exportHistoricalData({
1193
- ...exportOptions,
1194
- url
1195
- });
1196
-
1197
- return {
1198
- success: true,
1199
- operation: 'export_history',
1200
- url: url || 'global',
1201
- export: exportData,
1202
- timestamp: Date.now()
1203
- };
1204
-
1205
- } catch (error) {
1206
- throw new Error(`Failed to export historical data: ${error.message}`);
1207
- }
1208
- }
1209
-
1210
- /**
1211
- * Create custom alert rule
1212
- * @param {Object} params - Parameters
1213
- * @returns {Object} - Alert rule results
1214
- */
1215
- async createAlertRule(params) {
1216
- const { alertRuleOptions } = params;
1217
-
1218
- try {
1219
- const ruleId = alertRuleOptions?.ruleId ||
1220
- `custom_rule_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
1221
-
1222
- const rule = {
1223
- condition: this.parseCondition(alertRuleOptions?.condition || 'significance === "major"'),
1224
- actions: alertRuleOptions?.actions || ['webhook'],
1225
- throttle: alertRuleOptions?.throttle || 600000,
1226
- priority: alertRuleOptions?.priority || 'medium'
1227
- };
1228
-
1229
- // Store the alert rule
1230
- this.changeTracker.alertRules.set(ruleId, rule);
1231
-
1232
- return {
1233
- success: true,
1234
- operation: 'create_alert_rule',
1235
- ruleId,
1236
- rule,
1237
- timestamp: Date.now()
1238
- };
1239
-
1240
- } catch (error) {
1241
- throw new Error(`Failed to create alert rule: ${error.message}`);
1242
- }
1243
- }
1244
-
1245
- /**
1246
- * Generate trend analysis report
1247
- * @param {Object} params - Parameters
1248
- * @returns {Object} - Trend report
1249
- */
1250
- async generateTrendReport(params) {
1251
- const { url } = params;
1252
-
1253
- try {
1254
- const report = await this.changeTracker.generateTrendAnalysisReport(url);
1255
-
1256
- return {
1257
- success: true,
1258
- operation: 'generate_trend_report',
1259
- report,
1260
- timestamp: Date.now()
1261
- };
1262
-
1263
- } catch (error) {
1264
- throw new Error(`Failed to generate trend report: ${error.message}`);
1265
- }
1266
- }
1267
-
1268
- /**
1269
- * Get available monitoring templates
1270
- * @param {Object} params - Parameters
1271
- * @returns {Object} - Templates list
1272
- */
1273
- async getMonitoringTemplates(params) {
1274
- try {
1275
- const templates = {};
1276
-
1277
- for (const [id, template] of this.changeTracker.monitoringTemplates.entries()) {
1278
- templates[id] = {
1279
- name: template.name,
1280
- frequency: template.frequency,
1281
- description: this.generateTemplateDescription(template),
1282
- options: template.options,
1283
- alertRules: template.alertRules
1284
- };
1285
- }
1286
-
1287
- return {
1288
- success: true,
1289
- operation: 'get_monitoring_templates',
1290
- templates,
1291
- count: Object.keys(templates).length,
1292
- timestamp: Date.now()
1293
- };
1294
-
1295
- } catch (error) {
1296
- throw new Error(`Failed to get monitoring templates: ${error.message}`);
1297
- }
1298
- }
1299
-
1300
- // Helper methods for enhanced features
1301
-
1302
- parseCondition(conditionString) {
1303
- // Simple condition parser - in production would use a proper parser
1304
- return (changeResult, history) => {
1305
- try {
1306
- // Basic condition evaluation
1307
- if (conditionString.includes('significance')) {
1308
- const match = conditionString.match(/significance\s*===\s*["'](\w+)["']/);
1309
- if (match) {
1310
- return changeResult.significance === match[1];
1311
- }
1312
- }
1313
-
1314
- if (conditionString.includes('frequent')) {
1315
- const recent = history.filter(h => Date.now() - h.timestamp < 3600000);
1316
- return recent.length > 3;
1317
- }
1318
-
1319
- return false;
1320
- } catch (error) {
1321
- return false;
1322
- }
1323
- };
1324
- }
1325
-
1326
- generateTemplateDescription(template) {
1327
- const descriptions = {
1328
- 'news-site': 'Optimized for news websites with frequent content updates',
1329
- 'e-commerce': 'Tracks product pages, prices, and inventory changes',
1330
- 'documentation': 'Monitors documentation sites with less frequent but important changes'
1331
- };
1332
-
1333
- return descriptions[template.name] || 'Custom monitoring template';
1334
- }
1335
-
1336
- async shutdown() {
1337
- this.stopAllMonitoring();
1338
- await this.snapshotManager.shutdown();
1339
- await this.changeTracker.cleanup();
1340
- this.emit('shutdown');
1341
- }
1342
- }
1343
-
1344
- export default TrackChangesTool;
1345
- // Create and export tool instance for MCP compatibility
1346
- export const trackChangesTool = new TrackChangesTool();
1347
-
1348
- // Add name property for MCP protocol compliance
1349
- trackChangesTool.name = 'track_changes';
1350
-
1351
- // Add validateParameters method for MCP protocol compliance
1352
- trackChangesTool.validateParameters = function(params) {
1353
- return TrackChangesSchema.parse(params);
1354
- };
1355
-
1356
- // Add description property for MCP protocol compliance
1357
- trackChangesTool.description = 'Track and analyze content changes with baseline capture, comparison, and monitoring capabilities';
1358
-
1359
- // Add inputSchema property for MCP protocol compliance
1360
- trackChangesTool.inputSchema = {
1361
- type: 'object',
1362
- properties: {
1363
- url: {
1364
- type: 'string',
1365
- description: 'URL to track for changes'
1366
- },
1367
- operation: {
1368
- type: 'string',
1369
- description: 'Operation to perform: create_baseline, compare, monitor, get_history, get_stats'
1370
- },
1371
- content: {
1372
- type: 'string',
1373
- description: 'Content to analyze or compare'
1374
- }
1375
- },
1376
- required: ['url']
1377
- };
14
+ export { TrackChangesTool, TrackChangesTool as default, trackChangesTool } from './trackChanges/index.js';
15
+ export { TrackChangesSchema } from './trackChanges/schema.js';