crawlforge-mcp-server 3.0.18 → 3.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +5 -2
- package/server.js +192 -1277
- package/src/core/ActionExecutor.js +2 -43
- package/src/core/AuthManager.js +127 -14
- package/src/core/BrowserContextPool.js +187 -0
- package/src/core/JobManager.js +7 -5
- package/src/core/LocalizationManager.js +14 -125
- package/src/core/StealthBrowserManager.js +26 -18
- package/src/core/cache/CacheManager.js +4 -1
- package/src/core/crawlers/BFSCrawler.js +19 -5
- package/src/observability/metrics.js +137 -0
- package/src/observability/tracing.js +74 -0
- package/src/server/auth/oauth.js +388 -0
- package/src/server/registerTool.js +41 -0
- package/src/server/schemas/common.js +29 -0
- package/src/server/transports/http.js +22 -0
- package/src/server/transports/stdio.js +16 -0
- package/src/server/transports/streamableHttp.js +226 -0
- package/src/server/withAuth.js +121 -0
- package/src/tools/advanced/BatchScrapeTool.js +12 -1086
- package/src/tools/advanced/ScrapeWithActionsTool.js +105 -19
- package/src/tools/advanced/batchScrape/index.js +328 -0
- package/src/tools/advanced/batchScrape/queue.js +91 -0
- package/src/tools/advanced/batchScrape/reporter.js +26 -0
- package/src/tools/advanced/batchScrape/schema.js +37 -0
- package/src/tools/advanced/batchScrape/worker.js +179 -0
- package/src/tools/advanced/scrapeWithActions/recorder.js +188 -0
- package/src/tools/basic/_fetch.js +35 -0
- package/src/tools/basic/extractLinks.js +74 -0
- package/src/tools/basic/extractMetadata.js +74 -0
- package/src/tools/basic/extractText.js +46 -0
- package/src/tools/basic/fetchUrl.js +44 -0
- package/src/tools/basic/scrapeStructured.js +58 -0
- package/src/tools/crawl/_sessionContext.js +234 -0
- package/src/tools/crawl/crawlDeep.js +55 -5
- package/src/tools/crawl/mapSite.js +23 -2
- package/src/tools/extract/_fetchAndParse.js +57 -0
- package/src/tools/extract/extractStructured.js +3 -19
- package/src/tools/extract/extractWithLlm.js +365 -0
- package/src/tools/search/providers/searxng.js +126 -0
- package/src/tools/search/ranking/ResultDeduplicator.js +18 -11
- package/src/tools/search/ranking/ResultRanker.js +17 -10
- package/src/tools/search/ranking/SearchResultCache.js +52 -0
- package/src/tools/search/searchWeb.js +112 -6
- package/src/tools/tracking/trackChanges/differ.js +98 -0
- package/src/tools/tracking/trackChanges/index.js +432 -0
- package/src/tools/tracking/trackChanges/monitor.js +93 -0
- package/src/tools/tracking/trackChanges/notifier.js +105 -0
- package/src/tools/tracking/trackChanges/schema.js +127 -0
- package/src/tools/tracking/trackChanges.js +12 -1374
|
@@ -4,11 +4,14 @@ import { CacheManager } from '../../core/cache/CacheManager.js';
|
|
|
4
4
|
import { QueryExpander } from './queryExpander.js';
|
|
5
5
|
import { ResultRanker } from './ranking/ResultRanker.js';
|
|
6
6
|
import { ResultDeduplicator } from './ranking/ResultDeduplicator.js';
|
|
7
|
+
import { SearchResultCache } from './ranking/SearchResultCache.js';
|
|
7
8
|
import LocalizationManager from '../../core/LocalizationManager.js';
|
|
8
9
|
import { isCreatorModeVerified } from '../../core/creatorMode.js';
|
|
10
|
+
import { searchViaSearxng } from './providers/searxng.js';
|
|
9
11
|
|
|
10
12
|
const SearchWebSchema = z.object({
|
|
11
13
|
query: z.string().min(1),
|
|
14
|
+
provider: z.enum(['crawlforge', 'searxng']).optional().default('crawlforge'),
|
|
12
15
|
limit: z.number().min(1).max(100).optional().default(10),
|
|
13
16
|
offset: z.number().min(0).optional().default(0),
|
|
14
17
|
lang: z.string().optional().default('en'),
|
|
@@ -92,13 +95,16 @@ export class SearchWebTool {
|
|
|
92
95
|
}
|
|
93
96
|
|
|
94
97
|
this.cache = cacheEnabled ? new CacheManager({ ttl: cacheTTL }) : null;
|
|
95
|
-
|
|
98
|
+
|
|
96
99
|
// Initialize query expander
|
|
97
100
|
this.queryExpander = new QueryExpander(expanderOptions);
|
|
98
|
-
|
|
99
|
-
//
|
|
100
|
-
|
|
101
|
-
|
|
101
|
+
|
|
102
|
+
// Shared cache for ranking + deduplication — avoids two separate LRU instances
|
|
103
|
+
const sharedRankingCache = new SearchResultCache({ ttl: cacheTTL, enabled: cacheEnabled });
|
|
104
|
+
|
|
105
|
+
// Initialize ranking and deduplication systems (both share the same cache)
|
|
106
|
+
this.resultRanker = new ResultRanker({ cacheEnabled, cacheTTL, sharedCache: sharedRankingCache, ...rankingOptions });
|
|
107
|
+
this.resultDeduplicator = new ResultDeduplicator({ cacheEnabled, cacheTTL, sharedCache: sharedRankingCache, ...deduplicationOptions });
|
|
102
108
|
|
|
103
109
|
// Initialize localization manager
|
|
104
110
|
this.localizationManager = new LocalizationManager({
|
|
@@ -110,7 +116,13 @@ export class SearchWebTool {
|
|
|
110
116
|
async execute(params) {
|
|
111
117
|
try {
|
|
112
118
|
const validated = SearchWebSchema.parse(params);
|
|
113
|
-
|
|
119
|
+
|
|
120
|
+
// --- SearXNG provider short-circuit ---
|
|
121
|
+
if (validated.provider === 'searxng') {
|
|
122
|
+
return await this._executeViaSearxng(validated);
|
|
123
|
+
}
|
|
124
|
+
// --- end SearXNG short-circuit ---
|
|
125
|
+
|
|
114
126
|
// Apply localization if specified
|
|
115
127
|
let localizedParams = validated;
|
|
116
128
|
if (validated.localization) {
|
|
@@ -336,6 +348,100 @@ export class SearchWebTool {
|
|
|
336
348
|
}
|
|
337
349
|
}
|
|
338
350
|
|
|
351
|
+
/**
|
|
352
|
+
* Execute search via a self-hosted SearXNG instance.
|
|
353
|
+
* Results are normalised to the same shape as the CrawlForge/Google path.
|
|
354
|
+
*
|
|
355
|
+
* @param {Object} validated - Parsed & validated parameters from SearchWebSchema
|
|
356
|
+
* @returns {Promise<Object>} Standard search_web response object
|
|
357
|
+
*/
|
|
358
|
+
async _executeViaSearxng(validated) {
|
|
359
|
+
// page is 1-based; offset is 0-based items, so map via limit
|
|
360
|
+
const page = Math.floor(validated.offset / validated.limit) + 1;
|
|
361
|
+
|
|
362
|
+
const adapterResult = await searchViaSearxng({
|
|
363
|
+
query: validated.query,
|
|
364
|
+
limit: validated.limit,
|
|
365
|
+
page,
|
|
366
|
+
safeSearch: validated.safe_search,
|
|
367
|
+
language: validated.lang
|
|
368
|
+
});
|
|
369
|
+
|
|
370
|
+
// Run through shared post-processing (deduplication, ranking)
|
|
371
|
+
let processedResults = await this.processResults(adapterResult);
|
|
372
|
+
|
|
373
|
+
let deduplicationInfo = null;
|
|
374
|
+
if (validated.enable_deduplication && processedResults.length > 1) {
|
|
375
|
+
const dedupeOptions = validated.deduplication_thresholds
|
|
376
|
+
? { thresholds: validated.deduplication_thresholds }
|
|
377
|
+
: {};
|
|
378
|
+
const originalCount = processedResults.length;
|
|
379
|
+
processedResults = await this.resultDeduplicator.deduplicateResults(
|
|
380
|
+
processedResults,
|
|
381
|
+
dedupeOptions
|
|
382
|
+
);
|
|
383
|
+
deduplicationInfo = {
|
|
384
|
+
originalCount,
|
|
385
|
+
finalCount: processedResults.length,
|
|
386
|
+
duplicatesRemoved: originalCount - processedResults.length,
|
|
387
|
+
deduplicationRate:
|
|
388
|
+
((originalCount - processedResults.length) / originalCount * 100).toFixed(1) + '%'
|
|
389
|
+
};
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
let rankingInfo = null;
|
|
393
|
+
if (validated.enable_ranking && processedResults.length > 1) {
|
|
394
|
+
const rankingOptions = validated.ranking_weights
|
|
395
|
+
? { weights: validated.ranking_weights }
|
|
396
|
+
: {};
|
|
397
|
+
processedResults = await this.resultRanker.rankResults(
|
|
398
|
+
processedResults,
|
|
399
|
+
validated.query,
|
|
400
|
+
rankingOptions
|
|
401
|
+
);
|
|
402
|
+
rankingInfo = {
|
|
403
|
+
algorithmsUsed: ['bm25', 'semantic', 'authority', 'freshness'],
|
|
404
|
+
weightsApplied: this.resultRanker.options.weights,
|
|
405
|
+
totalResults: processedResults.length
|
|
406
|
+
};
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
if (!validated.include_ranking_details) {
|
|
410
|
+
processedResults = processedResults.map(({ rankingDetails, ...r }) => r);
|
|
411
|
+
}
|
|
412
|
+
if (!validated.include_deduplication_details) {
|
|
413
|
+
processedResults = processedResults.map(({ deduplicationInfo: _d, ...r }) => r);
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
return {
|
|
417
|
+
query: validated.query,
|
|
418
|
+
results: processedResults,
|
|
419
|
+
total_results: adapterResult.searchInformation?.totalResults || 0,
|
|
420
|
+
search_time: adapterResult.searchInformation?.searchTime || 0,
|
|
421
|
+
offset: validated.offset,
|
|
422
|
+
limit: validated.limit,
|
|
423
|
+
cached: false,
|
|
424
|
+
provider: {
|
|
425
|
+
name: 'searxng',
|
|
426
|
+
backend: 'SearXNG (self-hosted)',
|
|
427
|
+
instanceUrl: process.env.CRAWLFORGE_SEARXNG_URL || null,
|
|
428
|
+
capabilities: {
|
|
429
|
+
requiresApiKey: false,
|
|
430
|
+
supportsPagination: true,
|
|
431
|
+
supportsLanguageFilter: true,
|
|
432
|
+
supportsSafeSearch: true
|
|
433
|
+
}
|
|
434
|
+
},
|
|
435
|
+
localization: null,
|
|
436
|
+
processing: {
|
|
437
|
+
ranking: rankingInfo,
|
|
438
|
+
deduplication: deduplicationInfo,
|
|
439
|
+
query_expansion: null,
|
|
440
|
+
localization_applied: false
|
|
441
|
+
}
|
|
442
|
+
};
|
|
443
|
+
}
|
|
444
|
+
|
|
339
445
|
async processResults(searchResults) {
|
|
340
446
|
if (!searchResults.items || searchResults.items.length === 0) {
|
|
341
447
|
return [];
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* TrackChanges — differ module.
|
|
3
|
+
* URL content fetching and history/stat helper functions.
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Fetch the HTML/text content of a URL with change-tracking headers.
|
|
8
|
+
* @param {string} url
|
|
9
|
+
* @returns {Promise<{ content: string, metadata: Object }>}
|
|
10
|
+
*/
|
|
11
|
+
export async function fetchContent(url) {
|
|
12
|
+
try {
|
|
13
|
+
const response = await fetch(url, {
|
|
14
|
+
headers: {
|
|
15
|
+
'User-Agent': 'MCP-WebScraper-ChangeTracker/3.0',
|
|
16
|
+
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
|
17
|
+
'Accept-Language': 'en-US,en;q=0.5',
|
|
18
|
+
'Accept-Encoding': 'gzip, deflate',
|
|
19
|
+
'Cache-Control': 'no-cache'
|
|
20
|
+
},
|
|
21
|
+
timeout: 30000
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
if (!response.ok) {
|
|
25
|
+
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
const content = await response.text();
|
|
29
|
+
|
|
30
|
+
return {
|
|
31
|
+
content,
|
|
32
|
+
metadata: {
|
|
33
|
+
statusCode: response.status,
|
|
34
|
+
contentType: response.headers.get('content-type'),
|
|
35
|
+
contentLength: content.length,
|
|
36
|
+
lastModified: response.headers.get('last-modified'),
|
|
37
|
+
etag: response.headers.get('etag'),
|
|
38
|
+
fetchedAt: Date.now()
|
|
39
|
+
}
|
|
40
|
+
};
|
|
41
|
+
} catch (error) {
|
|
42
|
+
throw new Error(`Failed to fetch content: ${error.message}`);
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Merge change-tracker history entries with snapshot history entries.
|
|
48
|
+
* Deduplicates by timestamp proximity (within 60 s).
|
|
49
|
+
*/
|
|
50
|
+
export function mergeHistoryData(changeHistory, snapshotHistory) {
|
|
51
|
+
const merged = [];
|
|
52
|
+
|
|
53
|
+
changeHistory.forEach(entry => {
|
|
54
|
+
merged.push({ ...entry, source: 'change_tracker', hasSnapshot: false });
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
snapshotHistory.forEach(entry => {
|
|
58
|
+
const existing = merged.find(m => Math.abs(m.timestamp - entry.timestamp) < 60000);
|
|
59
|
+
if (existing) {
|
|
60
|
+
existing.hasSnapshot = true;
|
|
61
|
+
existing.snapshotId = entry.snapshotId;
|
|
62
|
+
} else {
|
|
63
|
+
merged.push({ ...entry, source: 'snapshot', hasSnapshot: true });
|
|
64
|
+
}
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
return merged.sort((a, b) => b.timestamp - a.timestamp);
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
/** Return true if entry.significance is at or above the filter level. */
|
|
71
|
+
export function matchesSignificanceFilter(entry, filter) {
|
|
72
|
+
const levels = ['none', 'minor', 'moderate', 'major', 'critical'];
|
|
73
|
+
return levels.indexOf(entry.significance || 'none') >= levels.indexOf(filter);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/** Return true if significance meets the notification threshold. */
|
|
77
|
+
export function meetsNotificationThreshold(significance, threshold) {
|
|
78
|
+
const levels = ['none', 'minor', 'moderate', 'major', 'critical'];
|
|
79
|
+
return levels.indexOf(significance) >= levels.indexOf(threshold);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
export function calculateAverageInterval(changeHistory) {
|
|
83
|
+
if (changeHistory.length < 2) return null;
|
|
84
|
+
let total = 0;
|
|
85
|
+
for (let i = 1; i < changeHistory.length; i++) {
|
|
86
|
+
total += changeHistory[i - 1].timestamp - changeHistory[i].timestamp;
|
|
87
|
+
}
|
|
88
|
+
return total / (changeHistory.length - 1);
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
export function calculateSignificanceDistribution(changeHistory) {
|
|
92
|
+
const dist = { none: 0, minor: 0, moderate: 0, major: 0, critical: 0 };
|
|
93
|
+
changeHistory.forEach(entry => {
|
|
94
|
+
const sig = entry.significance || 'none';
|
|
95
|
+
if (Object.prototype.hasOwnProperty.call(dist, sig)) dist[sig]++;
|
|
96
|
+
});
|
|
97
|
+
return dist;
|
|
98
|
+
}
|
|
@@ -0,0 +1,432 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* TrackChanges — entry-point (index.js).
|
|
3
|
+
*
|
|
4
|
+
* Preserves the same exports as the original single-file trackChanges.js:
|
|
5
|
+
* export class TrackChangesTool
|
|
6
|
+
* export default TrackChangesTool
|
|
7
|
+
* export const trackChangesTool (singleton)
|
|
8
|
+
*
|
|
9
|
+
* Handler logic ≤ 150 LOC here; heavy work delegated to:
|
|
10
|
+
* schema.js — Zod input schema
|
|
11
|
+
* differ.js — fetch, merge-history, stat helpers
|
|
12
|
+
* monitor.js — polling monitor lifecycle
|
|
13
|
+
* notifier.js — webhook / email / Slack notifications
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
import { EventEmitter } from 'events';
|
|
17
|
+
import ChangeTracker from '../../../core/ChangeTracker.js';
|
|
18
|
+
import SnapshotManager from '../../../core/SnapshotManager.js';
|
|
19
|
+
import CacheManager from '../../../core/cache/CacheManager.js';
|
|
20
|
+
import { TrackChangesSchema } from './schema.js';
|
|
21
|
+
import { fetchContent, mergeHistoryData, matchesSignificanceFilter, calculateAverageInterval, calculateSignificanceDistribution } from './differ.js';
|
|
22
|
+
import { performMonitoringCheck, stopMonitor } from './monitor.js';
|
|
23
|
+
import { sendNotifications } from './notifier.js';
|
|
24
|
+
|
|
25
|
+
export class TrackChangesTool extends EventEmitter {
|
|
26
|
+
constructor(options = {}) {
|
|
27
|
+
super();
|
|
28
|
+
|
|
29
|
+
this.options = {
|
|
30
|
+
cacheEnabled: true,
|
|
31
|
+
cacheTTL: 3600000,
|
|
32
|
+
snapshotStorageDir: './snapshots',
|
|
33
|
+
enableRealTimeMonitoring: true,
|
|
34
|
+
maxConcurrentMonitors: 50,
|
|
35
|
+
defaultPollingInterval: 300000,
|
|
36
|
+
...options
|
|
37
|
+
};
|
|
38
|
+
|
|
39
|
+
this.changeTracker = new ChangeTracker({
|
|
40
|
+
enableRealTimeTracking: this.options.enableRealTimeMonitoring,
|
|
41
|
+
enableSemanticAnalysis: false,
|
|
42
|
+
contentSimilarityThreshold: 0.8
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
this.snapshotManager = new SnapshotManager({
|
|
46
|
+
storageDir: this.options.snapshotStorageDir,
|
|
47
|
+
enableCompression: true,
|
|
48
|
+
enableDeltaStorage: true,
|
|
49
|
+
cacheEnabled: this.options.cacheEnabled
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
this.cache = this.options.cacheEnabled
|
|
53
|
+
? new CacheManager({ ttl: this.options.cacheTTL })
|
|
54
|
+
: null;
|
|
55
|
+
|
|
56
|
+
this.activeMonitors = new Map();
|
|
57
|
+
this.monitorStats = new Map();
|
|
58
|
+
|
|
59
|
+
this.initialize();
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
async initialize() {
|
|
63
|
+
try {
|
|
64
|
+
await this.snapshotManager.initialize();
|
|
65
|
+
this._setupEventHandlers();
|
|
66
|
+
this.emit('initialized');
|
|
67
|
+
} catch (error) {
|
|
68
|
+
this.emit('error', { operation: 'initialize', error: error.message });
|
|
69
|
+
throw error;
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
_setupEventHandlers() {
|
|
74
|
+
this.changeTracker.on('changeDetected', async (changeRecord) => {
|
|
75
|
+
if (changeRecord.significance !== 'none') {
|
|
76
|
+
try {
|
|
77
|
+
await this.snapshotManager.storeSnapshot(
|
|
78
|
+
changeRecord.url,
|
|
79
|
+
changeRecord.details.current || '',
|
|
80
|
+
{ changes: changeRecord.details, significance: changeRecord.significance, changeType: changeRecord.changeType }
|
|
81
|
+
);
|
|
82
|
+
} catch (error) {
|
|
83
|
+
this.emit('error', { operation: 'storeChangeSnapshot', url: changeRecord.url, error: error.message });
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
});
|
|
87
|
+
|
|
88
|
+
this.changeTracker.on('baselineCreated', (baseline) => this.emit('baselineCreated', baseline));
|
|
89
|
+
this.snapshotManager.on('snapshotStored', (snapshot) => this.emit('snapshotStored', snapshot));
|
|
90
|
+
this.snapshotManager.on('error', (error) => this.emit('error', error));
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
async execute(params) {
|
|
94
|
+
try {
|
|
95
|
+
const validated = TrackChangesSchema.parse(params);
|
|
96
|
+
const { operation } = validated;
|
|
97
|
+
|
|
98
|
+
switch (operation) {
|
|
99
|
+
case 'create_baseline': return await this.createBaseline(validated);
|
|
100
|
+
case 'compare': return await this.compareWithBaseline(validated);
|
|
101
|
+
case 'monitor': return await this.setupMonitoring(validated);
|
|
102
|
+
case 'get_history': return await this.getChangeHistory(validated);
|
|
103
|
+
case 'get_stats': return await this.getStatistics(validated);
|
|
104
|
+
case 'create_scheduled_monitor':return await this.createScheduledMonitor(validated);
|
|
105
|
+
case 'stop_scheduled_monitor': return await this.stopScheduledMonitor(validated);
|
|
106
|
+
case 'get_dashboard': return await this.getMonitoringDashboard(validated);
|
|
107
|
+
case 'export_history': return await this.exportHistoricalData(validated);
|
|
108
|
+
case 'create_alert_rule': return await this.createAlertRule(validated);
|
|
109
|
+
case 'generate_trend_report': return await this.generateTrendReport(validated);
|
|
110
|
+
case 'get_monitoring_templates':return await this.getMonitoringTemplates(validated);
|
|
111
|
+
default: throw new Error(`Unknown operation: ${operation}`);
|
|
112
|
+
}
|
|
113
|
+
} catch (error) {
|
|
114
|
+
return { success: false, error: error.message, timestamp: Date.now() };
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
async createBaseline(params) {
|
|
119
|
+
const { url, content, html, trackingOptions, storageOptions = {} } = params;
|
|
120
|
+
const enableSnapshots = storageOptions.enableSnapshots !== false;
|
|
121
|
+
|
|
122
|
+
let sourceContent = content || html;
|
|
123
|
+
let fetchMeta = {};
|
|
124
|
+
if (!sourceContent) {
|
|
125
|
+
const r = await fetchContent(url);
|
|
126
|
+
sourceContent = r.content;
|
|
127
|
+
fetchMeta = r.metadata;
|
|
128
|
+
}
|
|
129
|
+
if (!sourceContent || typeof sourceContent !== 'string') throw new Error('Invalid content');
|
|
130
|
+
|
|
131
|
+
const baseline = await this.changeTracker.createBaseline(url, sourceContent, trackingOptions);
|
|
132
|
+
let snapshotInfo = null;
|
|
133
|
+
if (enableSnapshots) {
|
|
134
|
+
snapshotInfo = await this.snapshotManager.storeSnapshot(url, sourceContent, { ...fetchMeta, baseline: true, trackingOptions });
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
return {
|
|
138
|
+
success: true, operation: 'create_baseline', url,
|
|
139
|
+
baseline: {
|
|
140
|
+
version: baseline.version,
|
|
141
|
+
contentHash: baseline.analysis?.hashes?.page,
|
|
142
|
+
sections: Object.keys(baseline.analysis?.hashes?.sections || {}).length,
|
|
143
|
+
elements: Object.keys(baseline.analysis?.hashes?.elements || {}).length,
|
|
144
|
+
createdAt: baseline.timestamp,
|
|
145
|
+
options: trackingOptions
|
|
146
|
+
},
|
|
147
|
+
snapshot: snapshotInfo, timestamp: Date.now()
|
|
148
|
+
};
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
async compareWithBaseline(params) {
|
|
152
|
+
const { url, content, html, trackingOptions, storageOptions = {}, notificationOptions } = params;
|
|
153
|
+
const enableSnapshots = storageOptions.enableSnapshots !== false;
|
|
154
|
+
|
|
155
|
+
let currentContent = content || html;
|
|
156
|
+
let fetchMeta = {};
|
|
157
|
+
if (!currentContent) {
|
|
158
|
+
const r = await fetchContent(url);
|
|
159
|
+
currentContent = r.content;
|
|
160
|
+
fetchMeta = r.metadata;
|
|
161
|
+
}
|
|
162
|
+
if (!currentContent || typeof currentContent !== 'string') throw new Error('Invalid content');
|
|
163
|
+
|
|
164
|
+
const comparisonResult = await this.changeTracker.compareWithBaseline(url, currentContent, trackingOptions);
|
|
165
|
+
|
|
166
|
+
let snapshotInfo = null;
|
|
167
|
+
if (comparisonResult.hasChanges && enableSnapshots) {
|
|
168
|
+
snapshotInfo = await this.snapshotManager.storeSnapshot(url, currentContent, {
|
|
169
|
+
...fetchMeta, changes: comparisonResult.summary, significance: comparisonResult.significance
|
|
170
|
+
});
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
if (comparisonResult.hasChanges && notificationOptions) {
|
|
174
|
+
await sendNotifications(url, comparisonResult, notificationOptions, this);
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
return {
|
|
178
|
+
success: true, operation: 'compare', url,
|
|
179
|
+
hasChanges: comparisonResult.hasChanges,
|
|
180
|
+
significance: comparisonResult.significance,
|
|
181
|
+
changeType: comparisonResult.changeType,
|
|
182
|
+
summary: comparisonResult.summary,
|
|
183
|
+
details: comparisonResult.details,
|
|
184
|
+
metrics: comparisonResult.metrics,
|
|
185
|
+
recommendations: comparisonResult.recommendations,
|
|
186
|
+
snapshot: snapshotInfo, timestamp: Date.now()
|
|
187
|
+
};
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
async setupMonitoring(params) {
|
|
191
|
+
const { url, monitoringOptions, trackingOptions, storageOptions, notificationOptions } = params;
|
|
192
|
+
|
|
193
|
+
if (this.activeMonitors.has(url)) {
|
|
194
|
+
clearInterval(this.activeMonitors.get(url).timer);
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
const deps = { changeTracker: this.changeTracker, snapshotManager: this.snapshotManager, emitter: this };
|
|
198
|
+
|
|
199
|
+
const monitorConfig = {
|
|
200
|
+
url,
|
|
201
|
+
options: { ...monitoringOptions, trackingOptions, storageOptions, notificationOptions },
|
|
202
|
+
stats: { started: Date.now(), checks: 0, changesDetected: 0, errors: 0, lastCheck: null, lastChange: null, averageResponseTime: 0 }
|
|
203
|
+
};
|
|
204
|
+
|
|
205
|
+
monitorConfig.timer = setInterval(
|
|
206
|
+
() => performMonitoringCheck(url, monitorConfig, deps),
|
|
207
|
+
monitoringOptions.interval
|
|
208
|
+
);
|
|
209
|
+
|
|
210
|
+
this.activeMonitors.set(url, monitorConfig);
|
|
211
|
+
this.monitorStats.set(url, monitorConfig.stats);
|
|
212
|
+
|
|
213
|
+
await performMonitoringCheck(url, monitorConfig, deps);
|
|
214
|
+
|
|
215
|
+
return {
|
|
216
|
+
success: true, operation: 'monitor', url,
|
|
217
|
+
monitoring: { enabled: true, interval: monitoringOptions.interval, notificationThreshold: monitoringOptions.notificationThreshold, startedAt: monitorConfig.stats.started },
|
|
218
|
+
timestamp: Date.now()
|
|
219
|
+
};
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
async getChangeHistory(params) {
|
|
223
|
+
const { url, queryOptions } = params;
|
|
224
|
+
|
|
225
|
+
const changeHistory = this.changeTracker.getChangeHistory(url, queryOptions.limit);
|
|
226
|
+
const snapshotHistory = await this.snapshotManager.getChangeHistory(url, queryOptions);
|
|
227
|
+
let combined = mergeHistoryData(changeHistory, snapshotHistory.history);
|
|
228
|
+
|
|
229
|
+
if (queryOptions.significanceFilter && queryOptions.significanceFilter !== 'all') {
|
|
230
|
+
combined = combined.filter(e => matchesSignificanceFilter(e, queryOptions.significanceFilter));
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
const start = queryOptions.offset || 0;
|
|
234
|
+
const end = start + (queryOptions.limit || 50);
|
|
235
|
+
|
|
236
|
+
return {
|
|
237
|
+
success: true, operation: 'get_history', url,
|
|
238
|
+
history: combined.slice(start, end),
|
|
239
|
+
pagination: { total: combined.length, limit: queryOptions.limit, offset: queryOptions.offset, hasMore: end < combined.length },
|
|
240
|
+
timespan: {
|
|
241
|
+
earliest: combined.length > 0 ? combined[combined.length - 1].timestamp : null,
|
|
242
|
+
latest: combined.length > 0 ? combined[0].timestamp : null,
|
|
243
|
+
totalEntries: combined.length
|
|
244
|
+
},
|
|
245
|
+
timestamp: Date.now()
|
|
246
|
+
};
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
async getStatistics(params) {
|
|
250
|
+
const { url } = params;
|
|
251
|
+
const monitoringStats = url ? this.monitorStats.get(url) : this._getAggregatedMonitoringStats();
|
|
252
|
+
let urlStats = null;
|
|
253
|
+
if (url) {
|
|
254
|
+
try {
|
|
255
|
+
const changeHistory = this.changeTracker.getChangeHistory(url, 100);
|
|
256
|
+
const snapshotHistory = await this.snapshotManager.querySnapshots({ url, limit: 100, includeContent: false });
|
|
257
|
+
urlStats = {
|
|
258
|
+
totalChanges: changeHistory.length,
|
|
259
|
+
totalSnapshots: snapshotHistory.snapshots.length,
|
|
260
|
+
lastChange: changeHistory.length > 0 ? changeHistory[0].timestamp : null,
|
|
261
|
+
averageChangeInterval: calculateAverageInterval(changeHistory),
|
|
262
|
+
significanceDistribution: calculateSignificanceDistribution(changeHistory),
|
|
263
|
+
isBeingMonitored: this.activeMonitors.has(url)
|
|
264
|
+
};
|
|
265
|
+
} catch (error) {
|
|
266
|
+
urlStats = { error: error.message };
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
return {
|
|
271
|
+
success: true, operation: 'get_stats', url: url || 'global',
|
|
272
|
+
stats: {
|
|
273
|
+
changeTracking: this.changeTracker.getStats(),
|
|
274
|
+
snapshotStorage: this.snapshotManager.getStats(),
|
|
275
|
+
monitoring: monitoringStats,
|
|
276
|
+
urlSpecific: urlStats,
|
|
277
|
+
system: { activeMonitors: this.activeMonitors.size, cacheEnabled: !!this.cache, cacheStats: this.cache ? this.cache.getStats() : null }
|
|
278
|
+
},
|
|
279
|
+
timestamp: Date.now()
|
|
280
|
+
};
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
async createScheduledMonitor(params) {
|
|
284
|
+
const { url, scheduledMonitorOptions, trackingOptions, notificationOptions } = params;
|
|
285
|
+
const schedule = scheduledMonitorOptions?.schedule || '0 */1 * * *';
|
|
286
|
+
const templateId = scheduledMonitorOptions?.templateId;
|
|
287
|
+
let monitorOptions = { ...trackingOptions };
|
|
288
|
+
if (templateId && this.changeTracker.monitoringTemplates.has(templateId)) {
|
|
289
|
+
monitorOptions = { ...this.changeTracker.monitoringTemplates.get(templateId).options, ...monitorOptions };
|
|
290
|
+
}
|
|
291
|
+
const result = await this.changeTracker.createScheduledMonitor(url, schedule, {
|
|
292
|
+
...monitorOptions,
|
|
293
|
+
alertRules: { threshold: 'moderate', methods: ['webhook'], throttle: 600000, ...notificationOptions }
|
|
294
|
+
});
|
|
295
|
+
return { success: true, operation: 'create_scheduled_monitor', url, monitor: result, template: templateId ? this.changeTracker.monitoringTemplates.get(templateId)?.name : null, timestamp: Date.now() };
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
async stopScheduledMonitor(params) {
|
|
299
|
+
const { url } = params;
|
|
300
|
+
let stoppedMonitors = 0;
|
|
301
|
+
for (const [id, monitor] of this.changeTracker.scheduledMonitors.entries()) {
|
|
302
|
+
if (monitor.url === url) {
|
|
303
|
+
monitor.cronJob?.destroy();
|
|
304
|
+
monitor.status = 'stopped';
|
|
305
|
+
this.changeTracker.scheduledMonitors.delete(id);
|
|
306
|
+
stoppedMonitors++;
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
return { success: true, operation: 'stop_scheduled_monitor', url, stoppedMonitors, timestamp: Date.now() };
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
async getMonitoringDashboard(params) {
|
|
313
|
+
const { dashboardOptions } = params;
|
|
314
|
+
const dashboard = this.changeTracker.getMonitoringDashboard();
|
|
315
|
+
if (!dashboardOptions?.includeRecentAlerts) delete dashboard.recentAlerts;
|
|
316
|
+
if (!dashboardOptions?.includeTrends) delete dashboard.trends;
|
|
317
|
+
if (!dashboardOptions?.includeMonitorStatus) {
|
|
318
|
+
dashboard.monitors = dashboard.monitors.map(m => ({ id: m.id, url: m.url, status: m.status }));
|
|
319
|
+
}
|
|
320
|
+
return { success: true, operation: 'get_dashboard', dashboard, timestamp: Date.now() };
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
async exportHistoricalData(params) {
|
|
324
|
+
const { url, exportOptions } = params;
|
|
325
|
+
const exportData = await this.changeTracker.exportHistoricalData({ ...exportOptions, url });
|
|
326
|
+
return { success: true, operation: 'export_history', url: url || 'global', export: exportData, timestamp: Date.now() };
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
async createAlertRule(params) {
|
|
330
|
+
const { alertRuleOptions } = params;
|
|
331
|
+
const ruleId = alertRuleOptions?.ruleId || `custom_rule_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
|
|
332
|
+
const rule = {
|
|
333
|
+
condition: this._parseCondition(alertRuleOptions?.condition || 'significance === "major"'),
|
|
334
|
+
actions: alertRuleOptions?.actions || ['webhook'],
|
|
335
|
+
throttle: alertRuleOptions?.throttle || 600000,
|
|
336
|
+
priority: alertRuleOptions?.priority || 'medium'
|
|
337
|
+
};
|
|
338
|
+
this.changeTracker.alertRules.set(ruleId, rule);
|
|
339
|
+
return { success: true, operation: 'create_alert_rule', ruleId, rule, timestamp: Date.now() };
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
async generateTrendReport(params) {
|
|
343
|
+
const report = await this.changeTracker.generateTrendAnalysisReport(params.url);
|
|
344
|
+
return { success: true, operation: 'generate_trend_report', report, timestamp: Date.now() };
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
async getMonitoringTemplates() {
|
|
348
|
+
const templates = {};
|
|
349
|
+
for (const [id, template] of this.changeTracker.monitoringTemplates.entries()) {
|
|
350
|
+
templates[id] = { name: template.name, frequency: template.frequency, options: template.options, alertRules: template.alertRules };
|
|
351
|
+
}
|
|
352
|
+
return { success: true, operation: 'get_monitoring_templates', templates, count: Object.keys(templates).length, timestamp: Date.now() };
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
// ── Public API ────────────────────────────────────────────────────────────────
|
|
356
|
+
|
|
357
|
+
stopMonitoring(url) {
|
|
358
|
+
if (!this.activeMonitors.has(url)) return false;
|
|
359
|
+
const monitorConfig = this.activeMonitors.get(url);
|
|
360
|
+
stopMonitor(url, monitorConfig, this);
|
|
361
|
+
this.activeMonitors.delete(url);
|
|
362
|
+
return true;
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
stopAllMonitoring() {
|
|
366
|
+
const urls = Array.from(this.activeMonitors.keys());
|
|
367
|
+
urls.forEach(url => this.stopMonitoring(url));
|
|
368
|
+
this.emit('allMonitoringStopped', { count: urls.length });
|
|
369
|
+
return urls.length;
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
getActiveMonitors() {
|
|
373
|
+
return Array.from(this.activeMonitors.keys()).map(url => ({
|
|
374
|
+
url,
|
|
375
|
+
config: this.activeMonitors.get(url).options,
|
|
376
|
+
stats: this.monitorStats.get(url)
|
|
377
|
+
}));
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
async shutdown() {
|
|
381
|
+
this.stopAllMonitoring();
|
|
382
|
+
await this.snapshotManager.shutdown();
|
|
383
|
+
await this.changeTracker.cleanup();
|
|
384
|
+
this.emit('shutdown');
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
// ── Private helpers ────────────────────────────────────────────────────────────
|
|
388
|
+
|
|
389
|
+
_getAggregatedMonitoringStats() {
|
|
390
|
+
const stats = { totalMonitors: this.activeMonitors.size, totalChecks: 0, totalChanges: 0, totalErrors: 0, averageResponseTime: 0, oldestMonitor: null, newestMonitor: null };
|
|
391
|
+
const all = Array.from(this.monitorStats.values());
|
|
392
|
+
if (all.length === 0) return stats;
|
|
393
|
+
stats.totalChecks = all.reduce((s, v) => s + v.checks, 0);
|
|
394
|
+
stats.totalChanges = all.reduce((s, v) => s + v.changesDetected, 0);
|
|
395
|
+
stats.totalErrors = all.reduce((s, v) => s + v.errors, 0);
|
|
396
|
+
stats.averageResponseTime = all.reduce((s, v) => s + v.averageResponseTime, 0) / all.length;
|
|
397
|
+
stats.oldestMonitor = Math.min(...all.map(v => v.started));
|
|
398
|
+
stats.newestMonitor = Math.max(...all.map(v => v.started));
|
|
399
|
+
return stats;
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
_parseCondition(conditionString) {
|
|
403
|
+
return (changeResult) => {
|
|
404
|
+
try {
|
|
405
|
+
if (conditionString.includes('significance')) {
|
|
406
|
+
const match = conditionString.match(/significance\s*===\s*["'](\w+)["']/);
|
|
407
|
+
if (match) return changeResult.significance === match[1];
|
|
408
|
+
}
|
|
409
|
+
return false;
|
|
410
|
+
} catch {
|
|
411
|
+
return false;
|
|
412
|
+
}
|
|
413
|
+
};
|
|
414
|
+
}
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
export default TrackChangesTool;
|
|
418
|
+
|
|
419
|
+
// Singleton instance — kept for backward-compat with any code that imports it directly
|
|
420
|
+
export const trackChangesTool = new TrackChangesTool();
|
|
421
|
+
trackChangesTool.name = 'track_changes';
|
|
422
|
+
trackChangesTool.validateParameters = (params) => TrackChangesSchema.parse(params);
|
|
423
|
+
trackChangesTool.description = 'Track and analyze content changes with baseline capture, comparison, and monitoring capabilities';
|
|
424
|
+
trackChangesTool.inputSchema = {
|
|
425
|
+
type: 'object',
|
|
426
|
+
properties: {
|
|
427
|
+
url: { type: 'string', description: 'URL to track for changes' },
|
|
428
|
+
operation: { type: 'string', description: 'Operation to perform: create_baseline, compare, monitor, get_history, get_stats' },
|
|
429
|
+
content: { type: 'string', description: 'Content to analyze or compare' }
|
|
430
|
+
},
|
|
431
|
+
required: ['url']
|
|
432
|
+
};
|