crawlforge-mcp-server 3.0.18 → 3.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/package.json +5 -2
  2. package/server.js +192 -1277
  3. package/src/core/ActionExecutor.js +2 -43
  4. package/src/core/AuthManager.js +127 -14
  5. package/src/core/BrowserContextPool.js +187 -0
  6. package/src/core/JobManager.js +7 -5
  7. package/src/core/LocalizationManager.js +14 -125
  8. package/src/core/StealthBrowserManager.js +26 -18
  9. package/src/core/cache/CacheManager.js +4 -1
  10. package/src/core/crawlers/BFSCrawler.js +19 -5
  11. package/src/observability/metrics.js +137 -0
  12. package/src/observability/tracing.js +74 -0
  13. package/src/server/auth/oauth.js +388 -0
  14. package/src/server/registerTool.js +41 -0
  15. package/src/server/schemas/common.js +29 -0
  16. package/src/server/transports/http.js +22 -0
  17. package/src/server/transports/stdio.js +16 -0
  18. package/src/server/transports/streamableHttp.js +226 -0
  19. package/src/server/withAuth.js +121 -0
  20. package/src/tools/advanced/BatchScrapeTool.js +12 -1086
  21. package/src/tools/advanced/ScrapeWithActionsTool.js +105 -19
  22. package/src/tools/advanced/batchScrape/index.js +328 -0
  23. package/src/tools/advanced/batchScrape/queue.js +91 -0
  24. package/src/tools/advanced/batchScrape/reporter.js +26 -0
  25. package/src/tools/advanced/batchScrape/schema.js +37 -0
  26. package/src/tools/advanced/batchScrape/worker.js +179 -0
  27. package/src/tools/advanced/scrapeWithActions/recorder.js +188 -0
  28. package/src/tools/basic/_fetch.js +35 -0
  29. package/src/tools/basic/extractLinks.js +74 -0
  30. package/src/tools/basic/extractMetadata.js +74 -0
  31. package/src/tools/basic/extractText.js +46 -0
  32. package/src/tools/basic/fetchUrl.js +44 -0
  33. package/src/tools/basic/scrapeStructured.js +58 -0
  34. package/src/tools/crawl/_sessionContext.js +234 -0
  35. package/src/tools/crawl/crawlDeep.js +55 -5
  36. package/src/tools/crawl/mapSite.js +23 -2
  37. package/src/tools/extract/_fetchAndParse.js +57 -0
  38. package/src/tools/extract/extractStructured.js +3 -19
  39. package/src/tools/extract/extractWithLlm.js +365 -0
  40. package/src/tools/search/providers/searxng.js +126 -0
  41. package/src/tools/search/ranking/ResultDeduplicator.js +18 -11
  42. package/src/tools/search/ranking/ResultRanker.js +17 -10
  43. package/src/tools/search/ranking/SearchResultCache.js +52 -0
  44. package/src/tools/search/searchWeb.js +112 -6
  45. package/src/tools/tracking/trackChanges/differ.js +98 -0
  46. package/src/tools/tracking/trackChanges/index.js +432 -0
  47. package/src/tools/tracking/trackChanges/monitor.js +93 -0
  48. package/src/tools/tracking/trackChanges/notifier.js +105 -0
  49. package/src/tools/tracking/trackChanges/schema.js +127 -0
  50. package/src/tools/tracking/trackChanges.js +12 -1374
@@ -4,11 +4,14 @@ import { CacheManager } from '../../core/cache/CacheManager.js';
4
4
  import { QueryExpander } from './queryExpander.js';
5
5
  import { ResultRanker } from './ranking/ResultRanker.js';
6
6
  import { ResultDeduplicator } from './ranking/ResultDeduplicator.js';
7
+ import { SearchResultCache } from './ranking/SearchResultCache.js';
7
8
  import LocalizationManager from '../../core/LocalizationManager.js';
8
9
  import { isCreatorModeVerified } from '../../core/creatorMode.js';
10
+ import { searchViaSearxng } from './providers/searxng.js';
9
11
 
10
12
  const SearchWebSchema = z.object({
11
13
  query: z.string().min(1),
14
+ provider: z.enum(['crawlforge', 'searxng']).optional().default('crawlforge'),
12
15
  limit: z.number().min(1).max(100).optional().default(10),
13
16
  offset: z.number().min(0).optional().default(0),
14
17
  lang: z.string().optional().default('en'),
@@ -92,13 +95,16 @@ export class SearchWebTool {
92
95
  }
93
96
 
94
97
  this.cache = cacheEnabled ? new CacheManager({ ttl: cacheTTL }) : null;
95
-
98
+
96
99
  // Initialize query expander
97
100
  this.queryExpander = new QueryExpander(expanderOptions);
98
-
99
- // Initialize ranking and deduplication systems
100
- this.resultRanker = new ResultRanker({ cacheEnabled, cacheTTL, ...rankingOptions });
101
- this.resultDeduplicator = new ResultDeduplicator({ cacheEnabled, cacheTTL, ...deduplicationOptions });
101
+
102
+ // Shared cache for ranking + deduplication — avoids two separate LRU instances
103
+ const sharedRankingCache = new SearchResultCache({ ttl: cacheTTL, enabled: cacheEnabled });
104
+
105
+ // Initialize ranking and deduplication systems (both share the same cache)
106
+ this.resultRanker = new ResultRanker({ cacheEnabled, cacheTTL, sharedCache: sharedRankingCache, ...rankingOptions });
107
+ this.resultDeduplicator = new ResultDeduplicator({ cacheEnabled, cacheTTL, sharedCache: sharedRankingCache, ...deduplicationOptions });
102
108
 
103
109
  // Initialize localization manager
104
110
  this.localizationManager = new LocalizationManager({
@@ -110,7 +116,13 @@ export class SearchWebTool {
110
116
  async execute(params) {
111
117
  try {
112
118
  const validated = SearchWebSchema.parse(params);
113
-
119
+
120
+ // --- SearXNG provider short-circuit ---
121
+ if (validated.provider === 'searxng') {
122
+ return await this._executeViaSearxng(validated);
123
+ }
124
+ // --- end SearXNG short-circuit ---
125
+
114
126
  // Apply localization if specified
115
127
  let localizedParams = validated;
116
128
  if (validated.localization) {
@@ -336,6 +348,100 @@ export class SearchWebTool {
336
348
  }
337
349
  }
338
350
 
351
+ /**
352
+ * Execute search via a self-hosted SearXNG instance.
353
+ * Results are normalised to the same shape as the CrawlForge/Google path.
354
+ *
355
+ * @param {Object} validated - Parsed & validated parameters from SearchWebSchema
356
+ * @returns {Promise<Object>} Standard search_web response object
357
+ */
358
+ async _executeViaSearxng(validated) {
359
+ // page is 1-based; offset is 0-based items, so map via limit
360
+ const page = Math.floor(validated.offset / validated.limit) + 1;
361
+
362
+ const adapterResult = await searchViaSearxng({
363
+ query: validated.query,
364
+ limit: validated.limit,
365
+ page,
366
+ safeSearch: validated.safe_search,
367
+ language: validated.lang
368
+ });
369
+
370
+ // Run through shared post-processing (deduplication, ranking)
371
+ let processedResults = await this.processResults(adapterResult);
372
+
373
+ let deduplicationInfo = null;
374
+ if (validated.enable_deduplication && processedResults.length > 1) {
375
+ const dedupeOptions = validated.deduplication_thresholds
376
+ ? { thresholds: validated.deduplication_thresholds }
377
+ : {};
378
+ const originalCount = processedResults.length;
379
+ processedResults = await this.resultDeduplicator.deduplicateResults(
380
+ processedResults,
381
+ dedupeOptions
382
+ );
383
+ deduplicationInfo = {
384
+ originalCount,
385
+ finalCount: processedResults.length,
386
+ duplicatesRemoved: originalCount - processedResults.length,
387
+ deduplicationRate:
388
+ ((originalCount - processedResults.length) / originalCount * 100).toFixed(1) + '%'
389
+ };
390
+ }
391
+
392
+ let rankingInfo = null;
393
+ if (validated.enable_ranking && processedResults.length > 1) {
394
+ const rankingOptions = validated.ranking_weights
395
+ ? { weights: validated.ranking_weights }
396
+ : {};
397
+ processedResults = await this.resultRanker.rankResults(
398
+ processedResults,
399
+ validated.query,
400
+ rankingOptions
401
+ );
402
+ rankingInfo = {
403
+ algorithmsUsed: ['bm25', 'semantic', 'authority', 'freshness'],
404
+ weightsApplied: this.resultRanker.options.weights,
405
+ totalResults: processedResults.length
406
+ };
407
+ }
408
+
409
+ if (!validated.include_ranking_details) {
410
+ processedResults = processedResults.map(({ rankingDetails, ...r }) => r);
411
+ }
412
+ if (!validated.include_deduplication_details) {
413
+ processedResults = processedResults.map(({ deduplicationInfo: _d, ...r }) => r);
414
+ }
415
+
416
+ return {
417
+ query: validated.query,
418
+ results: processedResults,
419
+ total_results: adapterResult.searchInformation?.totalResults || 0,
420
+ search_time: adapterResult.searchInformation?.searchTime || 0,
421
+ offset: validated.offset,
422
+ limit: validated.limit,
423
+ cached: false,
424
+ provider: {
425
+ name: 'searxng',
426
+ backend: 'SearXNG (self-hosted)',
427
+ instanceUrl: process.env.CRAWLFORGE_SEARXNG_URL || null,
428
+ capabilities: {
429
+ requiresApiKey: false,
430
+ supportsPagination: true,
431
+ supportsLanguageFilter: true,
432
+ supportsSafeSearch: true
433
+ }
434
+ },
435
+ localization: null,
436
+ processing: {
437
+ ranking: rankingInfo,
438
+ deduplication: deduplicationInfo,
439
+ query_expansion: null,
440
+ localization_applied: false
441
+ }
442
+ };
443
+ }
444
+
339
445
  async processResults(searchResults) {
340
446
  if (!searchResults.items || searchResults.items.length === 0) {
341
447
  return [];
@@ -0,0 +1,98 @@
1
+ /**
2
+ * TrackChanges — differ module.
3
+ * URL content fetching and history/stat helper functions.
4
+ */
5
+
6
+ /**
7
+ * Fetch the HTML/text content of a URL with change-tracking headers.
8
+ * @param {string} url
9
+ * @returns {Promise<{ content: string, metadata: Object }>}
10
+ */
11
+ export async function fetchContent(url) {
12
+ try {
13
+ const response = await fetch(url, {
14
+ headers: {
15
+ 'User-Agent': 'MCP-WebScraper-ChangeTracker/3.0',
16
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
17
+ 'Accept-Language': 'en-US,en;q=0.5',
18
+ 'Accept-Encoding': 'gzip, deflate',
19
+ 'Cache-Control': 'no-cache'
20
+ },
21
+ timeout: 30000
22
+ });
23
+
24
+ if (!response.ok) {
25
+ throw new Error(`HTTP ${response.status}: ${response.statusText}`);
26
+ }
27
+
28
+ const content = await response.text();
29
+
30
+ return {
31
+ content,
32
+ metadata: {
33
+ statusCode: response.status,
34
+ contentType: response.headers.get('content-type'),
35
+ contentLength: content.length,
36
+ lastModified: response.headers.get('last-modified'),
37
+ etag: response.headers.get('etag'),
38
+ fetchedAt: Date.now()
39
+ }
40
+ };
41
+ } catch (error) {
42
+ throw new Error(`Failed to fetch content: ${error.message}`);
43
+ }
44
+ }
45
+
46
+ /**
47
+ * Merge change-tracker history entries with snapshot history entries.
48
+ * Deduplicates by timestamp proximity (within 60 s).
49
+ */
50
+ export function mergeHistoryData(changeHistory, snapshotHistory) {
51
+ const merged = [];
52
+
53
+ changeHistory.forEach(entry => {
54
+ merged.push({ ...entry, source: 'change_tracker', hasSnapshot: false });
55
+ });
56
+
57
+ snapshotHistory.forEach(entry => {
58
+ const existing = merged.find(m => Math.abs(m.timestamp - entry.timestamp) < 60000);
59
+ if (existing) {
60
+ existing.hasSnapshot = true;
61
+ existing.snapshotId = entry.snapshotId;
62
+ } else {
63
+ merged.push({ ...entry, source: 'snapshot', hasSnapshot: true });
64
+ }
65
+ });
66
+
67
+ return merged.sort((a, b) => b.timestamp - a.timestamp);
68
+ }
69
+
70
+ /** Return true if entry.significance is at or above the filter level. */
71
+ export function matchesSignificanceFilter(entry, filter) {
72
+ const levels = ['none', 'minor', 'moderate', 'major', 'critical'];
73
+ return levels.indexOf(entry.significance || 'none') >= levels.indexOf(filter);
74
+ }
75
+
76
+ /** Return true if significance meets the notification threshold. */
77
+ export function meetsNotificationThreshold(significance, threshold) {
78
+ const levels = ['none', 'minor', 'moderate', 'major', 'critical'];
79
+ return levels.indexOf(significance) >= levels.indexOf(threshold);
80
+ }
81
+
82
+ export function calculateAverageInterval(changeHistory) {
83
+ if (changeHistory.length < 2) return null;
84
+ let total = 0;
85
+ for (let i = 1; i < changeHistory.length; i++) {
86
+ total += changeHistory[i - 1].timestamp - changeHistory[i].timestamp;
87
+ }
88
+ return total / (changeHistory.length - 1);
89
+ }
90
+
91
+ export function calculateSignificanceDistribution(changeHistory) {
92
+ const dist = { none: 0, minor: 0, moderate: 0, major: 0, critical: 0 };
93
+ changeHistory.forEach(entry => {
94
+ const sig = entry.significance || 'none';
95
+ if (Object.prototype.hasOwnProperty.call(dist, sig)) dist[sig]++;
96
+ });
97
+ return dist;
98
+ }
@@ -0,0 +1,432 @@
1
+ /**
2
+ * TrackChanges — entry-point (index.js).
3
+ *
4
+ * Preserves the same exports as the original single-file trackChanges.js:
5
+ * export class TrackChangesTool
6
+ * export default TrackChangesTool
7
+ * export const trackChangesTool (singleton)
8
+ *
9
+ * Handler logic ≤ 150 LOC here; heavy work delegated to:
10
+ * schema.js — Zod input schema
11
+ * differ.js — fetch, merge-history, stat helpers
12
+ * monitor.js — polling monitor lifecycle
13
+ * notifier.js — webhook / email / Slack notifications
14
+ */
15
+
16
+ import { EventEmitter } from 'events';
17
+ import ChangeTracker from '../../../core/ChangeTracker.js';
18
+ import SnapshotManager from '../../../core/SnapshotManager.js';
19
+ import CacheManager from '../../../core/cache/CacheManager.js';
20
+ import { TrackChangesSchema } from './schema.js';
21
+ import { fetchContent, mergeHistoryData, matchesSignificanceFilter, calculateAverageInterval, calculateSignificanceDistribution } from './differ.js';
22
+ import { performMonitoringCheck, stopMonitor } from './monitor.js';
23
+ import { sendNotifications } from './notifier.js';
24
+
25
+ export class TrackChangesTool extends EventEmitter {
26
+ constructor(options = {}) {
27
+ super();
28
+
29
+ this.options = {
30
+ cacheEnabled: true,
31
+ cacheTTL: 3600000,
32
+ snapshotStorageDir: './snapshots',
33
+ enableRealTimeMonitoring: true,
34
+ maxConcurrentMonitors: 50,
35
+ defaultPollingInterval: 300000,
36
+ ...options
37
+ };
38
+
39
+ this.changeTracker = new ChangeTracker({
40
+ enableRealTimeTracking: this.options.enableRealTimeMonitoring,
41
+ enableSemanticAnalysis: false,
42
+ contentSimilarityThreshold: 0.8
43
+ });
44
+
45
+ this.snapshotManager = new SnapshotManager({
46
+ storageDir: this.options.snapshotStorageDir,
47
+ enableCompression: true,
48
+ enableDeltaStorage: true,
49
+ cacheEnabled: this.options.cacheEnabled
50
+ });
51
+
52
+ this.cache = this.options.cacheEnabled
53
+ ? new CacheManager({ ttl: this.options.cacheTTL })
54
+ : null;
55
+
56
+ this.activeMonitors = new Map();
57
+ this.monitorStats = new Map();
58
+
59
+ this.initialize();
60
+ }
61
+
62
+ async initialize() {
63
+ try {
64
+ await this.snapshotManager.initialize();
65
+ this._setupEventHandlers();
66
+ this.emit('initialized');
67
+ } catch (error) {
68
+ this.emit('error', { operation: 'initialize', error: error.message });
69
+ throw error;
70
+ }
71
+ }
72
+
73
+ _setupEventHandlers() {
74
+ this.changeTracker.on('changeDetected', async (changeRecord) => {
75
+ if (changeRecord.significance !== 'none') {
76
+ try {
77
+ await this.snapshotManager.storeSnapshot(
78
+ changeRecord.url,
79
+ changeRecord.details.current || '',
80
+ { changes: changeRecord.details, significance: changeRecord.significance, changeType: changeRecord.changeType }
81
+ );
82
+ } catch (error) {
83
+ this.emit('error', { operation: 'storeChangeSnapshot', url: changeRecord.url, error: error.message });
84
+ }
85
+ }
86
+ });
87
+
88
+ this.changeTracker.on('baselineCreated', (baseline) => this.emit('baselineCreated', baseline));
89
+ this.snapshotManager.on('snapshotStored', (snapshot) => this.emit('snapshotStored', snapshot));
90
+ this.snapshotManager.on('error', (error) => this.emit('error', error));
91
+ }
92
+
93
+ async execute(params) {
94
+ try {
95
+ const validated = TrackChangesSchema.parse(params);
96
+ const { operation } = validated;
97
+
98
+ switch (operation) {
99
+ case 'create_baseline': return await this.createBaseline(validated);
100
+ case 'compare': return await this.compareWithBaseline(validated);
101
+ case 'monitor': return await this.setupMonitoring(validated);
102
+ case 'get_history': return await this.getChangeHistory(validated);
103
+ case 'get_stats': return await this.getStatistics(validated);
104
+ case 'create_scheduled_monitor':return await this.createScheduledMonitor(validated);
105
+ case 'stop_scheduled_monitor': return await this.stopScheduledMonitor(validated);
106
+ case 'get_dashboard': return await this.getMonitoringDashboard(validated);
107
+ case 'export_history': return await this.exportHistoricalData(validated);
108
+ case 'create_alert_rule': return await this.createAlertRule(validated);
109
+ case 'generate_trend_report': return await this.generateTrendReport(validated);
110
+ case 'get_monitoring_templates':return await this.getMonitoringTemplates(validated);
111
+ default: throw new Error(`Unknown operation: ${operation}`);
112
+ }
113
+ } catch (error) {
114
+ return { success: false, error: error.message, timestamp: Date.now() };
115
+ }
116
+ }
117
+
118
+ async createBaseline(params) {
119
+ const { url, content, html, trackingOptions, storageOptions = {} } = params;
120
+ const enableSnapshots = storageOptions.enableSnapshots !== false;
121
+
122
+ let sourceContent = content || html;
123
+ let fetchMeta = {};
124
+ if (!sourceContent) {
125
+ const r = await fetchContent(url);
126
+ sourceContent = r.content;
127
+ fetchMeta = r.metadata;
128
+ }
129
+ if (!sourceContent || typeof sourceContent !== 'string') throw new Error('Invalid content');
130
+
131
+ const baseline = await this.changeTracker.createBaseline(url, sourceContent, trackingOptions);
132
+ let snapshotInfo = null;
133
+ if (enableSnapshots) {
134
+ snapshotInfo = await this.snapshotManager.storeSnapshot(url, sourceContent, { ...fetchMeta, baseline: true, trackingOptions });
135
+ }
136
+
137
+ return {
138
+ success: true, operation: 'create_baseline', url,
139
+ baseline: {
140
+ version: baseline.version,
141
+ contentHash: baseline.analysis?.hashes?.page,
142
+ sections: Object.keys(baseline.analysis?.hashes?.sections || {}).length,
143
+ elements: Object.keys(baseline.analysis?.hashes?.elements || {}).length,
144
+ createdAt: baseline.timestamp,
145
+ options: trackingOptions
146
+ },
147
+ snapshot: snapshotInfo, timestamp: Date.now()
148
+ };
149
+ }
150
+
151
+ async compareWithBaseline(params) {
152
+ const { url, content, html, trackingOptions, storageOptions = {}, notificationOptions } = params;
153
+ const enableSnapshots = storageOptions.enableSnapshots !== false;
154
+
155
+ let currentContent = content || html;
156
+ let fetchMeta = {};
157
+ if (!currentContent) {
158
+ const r = await fetchContent(url);
159
+ currentContent = r.content;
160
+ fetchMeta = r.metadata;
161
+ }
162
+ if (!currentContent || typeof currentContent !== 'string') throw new Error('Invalid content');
163
+
164
+ const comparisonResult = await this.changeTracker.compareWithBaseline(url, currentContent, trackingOptions);
165
+
166
+ let snapshotInfo = null;
167
+ if (comparisonResult.hasChanges && enableSnapshots) {
168
+ snapshotInfo = await this.snapshotManager.storeSnapshot(url, currentContent, {
169
+ ...fetchMeta, changes: comparisonResult.summary, significance: comparisonResult.significance
170
+ });
171
+ }
172
+
173
+ if (comparisonResult.hasChanges && notificationOptions) {
174
+ await sendNotifications(url, comparisonResult, notificationOptions, this);
175
+ }
176
+
177
+ return {
178
+ success: true, operation: 'compare', url,
179
+ hasChanges: comparisonResult.hasChanges,
180
+ significance: comparisonResult.significance,
181
+ changeType: comparisonResult.changeType,
182
+ summary: comparisonResult.summary,
183
+ details: comparisonResult.details,
184
+ metrics: comparisonResult.metrics,
185
+ recommendations: comparisonResult.recommendations,
186
+ snapshot: snapshotInfo, timestamp: Date.now()
187
+ };
188
+ }
189
+
190
+ async setupMonitoring(params) {
191
+ const { url, monitoringOptions, trackingOptions, storageOptions, notificationOptions } = params;
192
+
193
+ if (this.activeMonitors.has(url)) {
194
+ clearInterval(this.activeMonitors.get(url).timer);
195
+ }
196
+
197
+ const deps = { changeTracker: this.changeTracker, snapshotManager: this.snapshotManager, emitter: this };
198
+
199
+ const monitorConfig = {
200
+ url,
201
+ options: { ...monitoringOptions, trackingOptions, storageOptions, notificationOptions },
202
+ stats: { started: Date.now(), checks: 0, changesDetected: 0, errors: 0, lastCheck: null, lastChange: null, averageResponseTime: 0 }
203
+ };
204
+
205
+ monitorConfig.timer = setInterval(
206
+ () => performMonitoringCheck(url, monitorConfig, deps),
207
+ monitoringOptions.interval
208
+ );
209
+
210
+ this.activeMonitors.set(url, monitorConfig);
211
+ this.monitorStats.set(url, monitorConfig.stats);
212
+
213
+ await performMonitoringCheck(url, monitorConfig, deps);
214
+
215
+ return {
216
+ success: true, operation: 'monitor', url,
217
+ monitoring: { enabled: true, interval: monitoringOptions.interval, notificationThreshold: monitoringOptions.notificationThreshold, startedAt: monitorConfig.stats.started },
218
+ timestamp: Date.now()
219
+ };
220
+ }
221
+
222
+ async getChangeHistory(params) {
223
+ const { url, queryOptions } = params;
224
+
225
+ const changeHistory = this.changeTracker.getChangeHistory(url, queryOptions.limit);
226
+ const snapshotHistory = await this.snapshotManager.getChangeHistory(url, queryOptions);
227
+ let combined = mergeHistoryData(changeHistory, snapshotHistory.history);
228
+
229
+ if (queryOptions.significanceFilter && queryOptions.significanceFilter !== 'all') {
230
+ combined = combined.filter(e => matchesSignificanceFilter(e, queryOptions.significanceFilter));
231
+ }
232
+
233
+ const start = queryOptions.offset || 0;
234
+ const end = start + (queryOptions.limit || 50);
235
+
236
+ return {
237
+ success: true, operation: 'get_history', url,
238
+ history: combined.slice(start, end),
239
+ pagination: { total: combined.length, limit: queryOptions.limit, offset: queryOptions.offset, hasMore: end < combined.length },
240
+ timespan: {
241
+ earliest: combined.length > 0 ? combined[combined.length - 1].timestamp : null,
242
+ latest: combined.length > 0 ? combined[0].timestamp : null,
243
+ totalEntries: combined.length
244
+ },
245
+ timestamp: Date.now()
246
+ };
247
+ }
248
+
249
+ async getStatistics(params) {
250
+ const { url } = params;
251
+ const monitoringStats = url ? this.monitorStats.get(url) : this._getAggregatedMonitoringStats();
252
+ let urlStats = null;
253
+ if (url) {
254
+ try {
255
+ const changeHistory = this.changeTracker.getChangeHistory(url, 100);
256
+ const snapshotHistory = await this.snapshotManager.querySnapshots({ url, limit: 100, includeContent: false });
257
+ urlStats = {
258
+ totalChanges: changeHistory.length,
259
+ totalSnapshots: snapshotHistory.snapshots.length,
260
+ lastChange: changeHistory.length > 0 ? changeHistory[0].timestamp : null,
261
+ averageChangeInterval: calculateAverageInterval(changeHistory),
262
+ significanceDistribution: calculateSignificanceDistribution(changeHistory),
263
+ isBeingMonitored: this.activeMonitors.has(url)
264
+ };
265
+ } catch (error) {
266
+ urlStats = { error: error.message };
267
+ }
268
+ }
269
+
270
+ return {
271
+ success: true, operation: 'get_stats', url: url || 'global',
272
+ stats: {
273
+ changeTracking: this.changeTracker.getStats(),
274
+ snapshotStorage: this.snapshotManager.getStats(),
275
+ monitoring: monitoringStats,
276
+ urlSpecific: urlStats,
277
+ system: { activeMonitors: this.activeMonitors.size, cacheEnabled: !!this.cache, cacheStats: this.cache ? this.cache.getStats() : null }
278
+ },
279
+ timestamp: Date.now()
280
+ };
281
+ }
282
+
283
+ async createScheduledMonitor(params) {
284
+ const { url, scheduledMonitorOptions, trackingOptions, notificationOptions } = params;
285
+ const schedule = scheduledMonitorOptions?.schedule || '0 */1 * * *';
286
+ const templateId = scheduledMonitorOptions?.templateId;
287
+ let monitorOptions = { ...trackingOptions };
288
+ if (templateId && this.changeTracker.monitoringTemplates.has(templateId)) {
289
+ monitorOptions = { ...this.changeTracker.monitoringTemplates.get(templateId).options, ...monitorOptions };
290
+ }
291
+ const result = await this.changeTracker.createScheduledMonitor(url, schedule, {
292
+ ...monitorOptions,
293
+ alertRules: { threshold: 'moderate', methods: ['webhook'], throttle: 600000, ...notificationOptions }
294
+ });
295
+ return { success: true, operation: 'create_scheduled_monitor', url, monitor: result, template: templateId ? this.changeTracker.monitoringTemplates.get(templateId)?.name : null, timestamp: Date.now() };
296
+ }
297
+
298
+ async stopScheduledMonitor(params) {
299
+ const { url } = params;
300
+ let stoppedMonitors = 0;
301
+ for (const [id, monitor] of this.changeTracker.scheduledMonitors.entries()) {
302
+ if (monitor.url === url) {
303
+ monitor.cronJob?.destroy();
304
+ monitor.status = 'stopped';
305
+ this.changeTracker.scheduledMonitors.delete(id);
306
+ stoppedMonitors++;
307
+ }
308
+ }
309
+ return { success: true, operation: 'stop_scheduled_monitor', url, stoppedMonitors, timestamp: Date.now() };
310
+ }
311
+
312
+ async getMonitoringDashboard(params) {
313
+ const { dashboardOptions } = params;
314
+ const dashboard = this.changeTracker.getMonitoringDashboard();
315
+ if (!dashboardOptions?.includeRecentAlerts) delete dashboard.recentAlerts;
316
+ if (!dashboardOptions?.includeTrends) delete dashboard.trends;
317
+ if (!dashboardOptions?.includeMonitorStatus) {
318
+ dashboard.monitors = dashboard.monitors.map(m => ({ id: m.id, url: m.url, status: m.status }));
319
+ }
320
+ return { success: true, operation: 'get_dashboard', dashboard, timestamp: Date.now() };
321
+ }
322
+
323
+ async exportHistoricalData(params) {
324
+ const { url, exportOptions } = params;
325
+ const exportData = await this.changeTracker.exportHistoricalData({ ...exportOptions, url });
326
+ return { success: true, operation: 'export_history', url: url || 'global', export: exportData, timestamp: Date.now() };
327
+ }
328
+
329
+ async createAlertRule(params) {
330
+ const { alertRuleOptions } = params;
331
+ const ruleId = alertRuleOptions?.ruleId || `custom_rule_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
332
+ const rule = {
333
+ condition: this._parseCondition(alertRuleOptions?.condition || 'significance === "major"'),
334
+ actions: alertRuleOptions?.actions || ['webhook'],
335
+ throttle: alertRuleOptions?.throttle || 600000,
336
+ priority: alertRuleOptions?.priority || 'medium'
337
+ };
338
+ this.changeTracker.alertRules.set(ruleId, rule);
339
+ return { success: true, operation: 'create_alert_rule', ruleId, rule, timestamp: Date.now() };
340
+ }
341
+
342
+ async generateTrendReport(params) {
343
+ const report = await this.changeTracker.generateTrendAnalysisReport(params.url);
344
+ return { success: true, operation: 'generate_trend_report', report, timestamp: Date.now() };
345
+ }
346
+
347
+ async getMonitoringTemplates() {
348
+ const templates = {};
349
+ for (const [id, template] of this.changeTracker.monitoringTemplates.entries()) {
350
+ templates[id] = { name: template.name, frequency: template.frequency, options: template.options, alertRules: template.alertRules };
351
+ }
352
+ return { success: true, operation: 'get_monitoring_templates', templates, count: Object.keys(templates).length, timestamp: Date.now() };
353
+ }
354
+
355
+ // ── Public API ────────────────────────────────────────────────────────────────
356
+
357
+ stopMonitoring(url) {
358
+ if (!this.activeMonitors.has(url)) return false;
359
+ const monitorConfig = this.activeMonitors.get(url);
360
+ stopMonitor(url, monitorConfig, this);
361
+ this.activeMonitors.delete(url);
362
+ return true;
363
+ }
364
+
365
+ stopAllMonitoring() {
366
+ const urls = Array.from(this.activeMonitors.keys());
367
+ urls.forEach(url => this.stopMonitoring(url));
368
+ this.emit('allMonitoringStopped', { count: urls.length });
369
+ return urls.length;
370
+ }
371
+
372
+ getActiveMonitors() {
373
+ return Array.from(this.activeMonitors.keys()).map(url => ({
374
+ url,
375
+ config: this.activeMonitors.get(url).options,
376
+ stats: this.monitorStats.get(url)
377
+ }));
378
+ }
379
+
380
+ async shutdown() {
381
+ this.stopAllMonitoring();
382
+ await this.snapshotManager.shutdown();
383
+ await this.changeTracker.cleanup();
384
+ this.emit('shutdown');
385
+ }
386
+
387
+ // ── Private helpers ────────────────────────────────────────────────────────────
388
+
389
+ _getAggregatedMonitoringStats() {
390
+ const stats = { totalMonitors: this.activeMonitors.size, totalChecks: 0, totalChanges: 0, totalErrors: 0, averageResponseTime: 0, oldestMonitor: null, newestMonitor: null };
391
+ const all = Array.from(this.monitorStats.values());
392
+ if (all.length === 0) return stats;
393
+ stats.totalChecks = all.reduce((s, v) => s + v.checks, 0);
394
+ stats.totalChanges = all.reduce((s, v) => s + v.changesDetected, 0);
395
+ stats.totalErrors = all.reduce((s, v) => s + v.errors, 0);
396
+ stats.averageResponseTime = all.reduce((s, v) => s + v.averageResponseTime, 0) / all.length;
397
+ stats.oldestMonitor = Math.min(...all.map(v => v.started));
398
+ stats.newestMonitor = Math.max(...all.map(v => v.started));
399
+ return stats;
400
+ }
401
+
402
+ _parseCondition(conditionString) {
403
+ return (changeResult) => {
404
+ try {
405
+ if (conditionString.includes('significance')) {
406
+ const match = conditionString.match(/significance\s*===\s*["'](\w+)["']/);
407
+ if (match) return changeResult.significance === match[1];
408
+ }
409
+ return false;
410
+ } catch {
411
+ return false;
412
+ }
413
+ };
414
+ }
415
+ }
416
+
417
+ export default TrackChangesTool;
418
+
419
+ // Singleton instance — kept for backward-compat with any code that imports it directly
420
+ export const trackChangesTool = new TrackChangesTool();
421
+ trackChangesTool.name = 'track_changes';
422
+ trackChangesTool.validateParameters = (params) => TrackChangesSchema.parse(params);
423
+ trackChangesTool.description = 'Track and analyze content changes with baseline capture, comparison, and monitoring capabilities';
424
+ trackChangesTool.inputSchema = {
425
+ type: 'object',
426
+ properties: {
427
+ url: { type: 'string', description: 'URL to track for changes' },
428
+ operation: { type: 'string', description: 'Operation to perform: create_baseline, compare, monitor, get_history, get_stats' },
429
+ content: { type: 'string', description: 'Content to analyze or compare' }
430
+ },
431
+ required: ['url']
432
+ };