crawlforge-mcp-server 3.0.18 → 3.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +5 -2
- package/server.js +192 -1277
- package/src/core/ActionExecutor.js +2 -43
- package/src/core/AuthManager.js +127 -14
- package/src/core/BrowserContextPool.js +187 -0
- package/src/core/JobManager.js +7 -5
- package/src/core/LocalizationManager.js +14 -125
- package/src/core/StealthBrowserManager.js +26 -18
- package/src/core/cache/CacheManager.js +4 -1
- package/src/core/crawlers/BFSCrawler.js +19 -5
- package/src/observability/metrics.js +137 -0
- package/src/observability/tracing.js +74 -0
- package/src/server/auth/oauth.js +388 -0
- package/src/server/registerTool.js +41 -0
- package/src/server/schemas/common.js +29 -0
- package/src/server/transports/http.js +22 -0
- package/src/server/transports/stdio.js +16 -0
- package/src/server/transports/streamableHttp.js +226 -0
- package/src/server/withAuth.js +121 -0
- package/src/tools/advanced/BatchScrapeTool.js +12 -1086
- package/src/tools/advanced/ScrapeWithActionsTool.js +105 -19
- package/src/tools/advanced/batchScrape/index.js +328 -0
- package/src/tools/advanced/batchScrape/queue.js +91 -0
- package/src/tools/advanced/batchScrape/reporter.js +26 -0
- package/src/tools/advanced/batchScrape/schema.js +37 -0
- package/src/tools/advanced/batchScrape/worker.js +179 -0
- package/src/tools/advanced/scrapeWithActions/recorder.js +188 -0
- package/src/tools/basic/_fetch.js +35 -0
- package/src/tools/basic/extractLinks.js +74 -0
- package/src/tools/basic/extractMetadata.js +74 -0
- package/src/tools/basic/extractText.js +46 -0
- package/src/tools/basic/fetchUrl.js +44 -0
- package/src/tools/basic/scrapeStructured.js +58 -0
- package/src/tools/crawl/_sessionContext.js +234 -0
- package/src/tools/crawl/crawlDeep.js +55 -5
- package/src/tools/crawl/mapSite.js +23 -2
- package/src/tools/extract/_fetchAndParse.js +57 -0
- package/src/tools/extract/extractStructured.js +3 -19
- package/src/tools/extract/extractWithLlm.js +295 -0
- package/src/tools/search/providers/searxng.js +126 -0
- package/src/tools/search/ranking/ResultDeduplicator.js +18 -11
- package/src/tools/search/ranking/ResultRanker.js +17 -10
- package/src/tools/search/ranking/SearchResultCache.js +52 -0
- package/src/tools/search/searchWeb.js +112 -6
- package/src/tools/tracking/trackChanges/differ.js +98 -0
- package/src/tools/tracking/trackChanges/index.js +432 -0
- package/src/tools/tracking/trackChanges/monitor.js +93 -0
- package/src/tools/tracking/trackChanges/notifier.js +105 -0
- package/src/tools/tracking/trackChanges/schema.js +127 -0
- package/src/tools/tracking/trackChanges.js +12 -1374
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* TrackChanges — monitor module.
|
|
3
|
+
* Handles the polling monitor lifecycle: start, check, stop.
|
|
4
|
+
* Used by the TrackChangesTool class (index.js).
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import { fetchContent, meetsNotificationThreshold } from './differ.js';
|
|
8
|
+
import { sendNotifications } from './notifier.js';
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Perform a single monitoring check for a URL.
|
|
12
|
+
* Mutates monitorConfig.stats in place.
|
|
13
|
+
*
|
|
14
|
+
* @param {string} url
|
|
15
|
+
* @param {Object} monitorConfig
|
|
16
|
+
* @param {Object} deps — { changeTracker, snapshotManager, emitter }
|
|
17
|
+
*/
|
|
18
|
+
export async function performMonitoringCheck(url, monitorConfig, { changeTracker, snapshotManager, emitter }) {
|
|
19
|
+
const startTime = Date.now();
|
|
20
|
+
|
|
21
|
+
try {
|
|
22
|
+
monitorConfig.stats.checks++;
|
|
23
|
+
|
|
24
|
+
const fetchResult = await fetchContent(url);
|
|
25
|
+
|
|
26
|
+
const comparisonResult = await changeTracker.compareWithBaseline(
|
|
27
|
+
url,
|
|
28
|
+
fetchResult.content,
|
|
29
|
+
monitorConfig.options.trackingOptions
|
|
30
|
+
);
|
|
31
|
+
|
|
32
|
+
const responseTime = Date.now() - startTime;
|
|
33
|
+
monitorConfig.stats.averageResponseTime =
|
|
34
|
+
(monitorConfig.stats.averageResponseTime * (monitorConfig.stats.checks - 1) + responseTime) /
|
|
35
|
+
monitorConfig.stats.checks;
|
|
36
|
+
|
|
37
|
+
monitorConfig.stats.lastCheck = Date.now();
|
|
38
|
+
|
|
39
|
+
if (comparisonResult.hasChanges) {
|
|
40
|
+
monitorConfig.stats.changesDetected++;
|
|
41
|
+
monitorConfig.stats.lastChange = Date.now();
|
|
42
|
+
|
|
43
|
+
if (meetsNotificationThreshold(
|
|
44
|
+
comparisonResult.significance,
|
|
45
|
+
monitorConfig.options.notificationThreshold
|
|
46
|
+
)) {
|
|
47
|
+
if (monitorConfig.options.storageOptions?.enableSnapshots) {
|
|
48
|
+
await snapshotManager.storeSnapshot(url, fetchResult.content, {
|
|
49
|
+
...fetchResult.metadata,
|
|
50
|
+
changes: comparisonResult.summary,
|
|
51
|
+
significance: comparisonResult.significance,
|
|
52
|
+
monitoring: true
|
|
53
|
+
});
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
if (monitorConfig.options.notificationOptions) {
|
|
57
|
+
await sendNotifications(url, comparisonResult, monitorConfig.options.notificationOptions, emitter);
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
emitter?.emit('monitoringCheck', {
|
|
63
|
+
url,
|
|
64
|
+
hasChanges: comparisonResult.hasChanges,
|
|
65
|
+
significance: comparisonResult.significance,
|
|
66
|
+
responseTime,
|
|
67
|
+
timestamp: Date.now()
|
|
68
|
+
});
|
|
69
|
+
} catch (error) {
|
|
70
|
+
monitorConfig.stats.errors++;
|
|
71
|
+
|
|
72
|
+
emitter?.emit('monitoringError', { url, error: error.message, timestamp: Date.now() });
|
|
73
|
+
|
|
74
|
+
if (monitorConfig.stats.errors > monitorConfig.options.maxRetries) {
|
|
75
|
+
stopMonitor(url, monitorConfig, emitter);
|
|
76
|
+
emitter?.emit('monitoringDisabled', {
|
|
77
|
+
url,
|
|
78
|
+
reason: 'Too many errors',
|
|
79
|
+
totalErrors: monitorConfig.stats.errors
|
|
80
|
+
});
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
/**
|
|
86
|
+
* Stop a single active monitor (clears its interval).
|
|
87
|
+
*/
|
|
88
|
+
export function stopMonitor(url, monitorConfig, emitter) {
|
|
89
|
+
if (monitorConfig?.timer) {
|
|
90
|
+
clearInterval(monitorConfig.timer);
|
|
91
|
+
}
|
|
92
|
+
emitter?.emit('monitoringStopped', { url });
|
|
93
|
+
}
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* TrackChanges — notifier module.
|
|
3
|
+
* Handles webhook, email and Slack change notifications.
|
|
4
|
+
* Used by monitor.js and the main TrackChangesTool class.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Send all enabled notifications for a detected change.
|
|
9
|
+
* @param {string} url
|
|
10
|
+
* @param {Object} changeResult
|
|
11
|
+
* @param {Object} notificationOptions
|
|
12
|
+
* @param {EventEmitter} emitter — tool instance for event emission
|
|
13
|
+
*/
|
|
14
|
+
export async function sendNotifications(url, changeResult, notificationOptions, emitter) {
|
|
15
|
+
const notifications = [];
|
|
16
|
+
|
|
17
|
+
if (notificationOptions.webhook?.enabled) {
|
|
18
|
+
notifications.push(sendWebhookNotification(url, changeResult, notificationOptions.webhook, emitter));
|
|
19
|
+
}
|
|
20
|
+
if (notificationOptions.email?.enabled) {
|
|
21
|
+
notifications.push(sendEmailNotification(url, changeResult, notificationOptions.email, emitter));
|
|
22
|
+
}
|
|
23
|
+
if (notificationOptions.slack?.enabled) {
|
|
24
|
+
notifications.push(sendSlackNotification(url, changeResult, notificationOptions.slack, emitter));
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
await Promise.allSettled(notifications);
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export async function sendWebhookNotification(url, changeResult, webhookConfig, emitter) {
|
|
31
|
+
try {
|
|
32
|
+
const payload = {
|
|
33
|
+
event: 'change_detected',
|
|
34
|
+
url,
|
|
35
|
+
timestamp: Date.now(),
|
|
36
|
+
significance: changeResult.significance,
|
|
37
|
+
changeType: changeResult.changeType,
|
|
38
|
+
summary: changeResult.summary,
|
|
39
|
+
details: webhookConfig.includeContent ? changeResult.details : undefined
|
|
40
|
+
};
|
|
41
|
+
|
|
42
|
+
const response = await fetch(webhookConfig.url, {
|
|
43
|
+
method: webhookConfig.method || 'POST',
|
|
44
|
+
headers: {
|
|
45
|
+
'Content-Type': 'application/json',
|
|
46
|
+
'User-Agent': 'MCP-WebScraper-ChangeTracker/3.0',
|
|
47
|
+
...webhookConfig.headers
|
|
48
|
+
},
|
|
49
|
+
body: JSON.stringify(payload)
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
if (!response.ok) {
|
|
53
|
+
throw new Error(`Webhook failed: ${response.status} ${response.statusText}`);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
emitter?.emit('notificationSent', { type: 'webhook', url, success: true });
|
|
57
|
+
} catch (error) {
|
|
58
|
+
emitter?.emit('notificationError', { type: 'webhook', url, error: error.message });
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
export async function sendEmailNotification(url, changeResult, emailConfig, emitter) {
|
|
63
|
+
// Email integration placeholder — requires external service
|
|
64
|
+
emitter?.emit('notificationSent', {
|
|
65
|
+
type: 'email',
|
|
66
|
+
url,
|
|
67
|
+
success: true,
|
|
68
|
+
note: 'Email notifications require external service integration'
|
|
69
|
+
});
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
export async function sendSlackNotification(url, changeResult, slackConfig, emitter) {
|
|
73
|
+
try {
|
|
74
|
+
const colors = { none: '#36a64f', minor: '#ffeb3b', moderate: '#ff9800', major: '#f44336', critical: '#9c27b0' };
|
|
75
|
+
const payload = {
|
|
76
|
+
text: '🔄 Content Change Detected',
|
|
77
|
+
attachments: [{
|
|
78
|
+
color: colors[changeResult.significance] || '#36a64f',
|
|
79
|
+
fields: [
|
|
80
|
+
{ title: 'URL', value: url, short: false },
|
|
81
|
+
{ title: 'Significance', value: changeResult.significance.toUpperCase(), short: true },
|
|
82
|
+
{ title: 'Change Type', value: changeResult.changeType.replace('_', ' '), short: true },
|
|
83
|
+
{ title: 'Summary', value: changeResult.summary.changeDescription, short: false }
|
|
84
|
+
],
|
|
85
|
+
timestamp: Math.floor(Date.now() / 1000)
|
|
86
|
+
}],
|
|
87
|
+
channel: slackConfig.channel,
|
|
88
|
+
username: slackConfig.username || 'Change Tracker'
|
|
89
|
+
};
|
|
90
|
+
|
|
91
|
+
const response = await fetch(slackConfig.webhookUrl, {
|
|
92
|
+
method: 'POST',
|
|
93
|
+
headers: { 'Content-Type': 'application/json' },
|
|
94
|
+
body: JSON.stringify(payload)
|
|
95
|
+
});
|
|
96
|
+
|
|
97
|
+
if (!response.ok) {
|
|
98
|
+
throw new Error(`Slack notification failed: ${response.status}`);
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
emitter?.emit('notificationSent', { type: 'slack', url, success: true });
|
|
102
|
+
} catch (error) {
|
|
103
|
+
emitter?.emit('notificationError', { type: 'slack', url, error: error.message });
|
|
104
|
+
}
|
|
105
|
+
}
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* TrackChanges — schema module.
|
|
3
|
+
* Centralises the Zod input schema so monitor.js, differ.js, notifier.js
|
|
4
|
+
* and the entry-point index.js can all import from one place.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import { z } from 'zod';
|
|
8
|
+
|
|
9
|
+
export const TrackChangesSchema = z.object({
|
|
10
|
+
url: z.string().url(),
|
|
11
|
+
operation: z.enum([
|
|
12
|
+
'create_baseline',
|
|
13
|
+
'compare',
|
|
14
|
+
'monitor',
|
|
15
|
+
'get_history',
|
|
16
|
+
'get_stats',
|
|
17
|
+
'create_scheduled_monitor',
|
|
18
|
+
'stop_scheduled_monitor',
|
|
19
|
+
'get_dashboard',
|
|
20
|
+
'export_history',
|
|
21
|
+
'create_alert_rule',
|
|
22
|
+
'generate_trend_report',
|
|
23
|
+
'get_monitoring_templates'
|
|
24
|
+
]).default('compare'),
|
|
25
|
+
|
|
26
|
+
content: z.string().optional(),
|
|
27
|
+
html: z.string().optional(),
|
|
28
|
+
|
|
29
|
+
trackingOptions: z.object({
|
|
30
|
+
granularity: z.enum(['page', 'section', 'element', 'text']).default('section'),
|
|
31
|
+
trackText: z.boolean().default(true),
|
|
32
|
+
trackStructure: z.boolean().default(true),
|
|
33
|
+
trackAttributes: z.boolean().default(false),
|
|
34
|
+
trackImages: z.boolean().default(false),
|
|
35
|
+
trackLinks: z.boolean().default(true),
|
|
36
|
+
ignoreWhitespace: z.boolean().default(true),
|
|
37
|
+
ignoreCase: z.boolean().default(false),
|
|
38
|
+
customSelectors: z.array(z.string()).optional(),
|
|
39
|
+
excludeSelectors: z.array(z.string()).optional().default([
|
|
40
|
+
'script', 'style', 'noscript', '.advertisement', '.ad', '#comments'
|
|
41
|
+
]),
|
|
42
|
+
significanceThresholds: z.object({
|
|
43
|
+
minor: z.number().min(0).max(1).default(0.1),
|
|
44
|
+
moderate: z.number().min(0).max(1).default(0.3),
|
|
45
|
+
major: z.number().min(0).max(1).default(0.7)
|
|
46
|
+
}).optional()
|
|
47
|
+
}).optional().default({}),
|
|
48
|
+
|
|
49
|
+
monitoringOptions: z.object({
|
|
50
|
+
enabled: z.boolean().default(false),
|
|
51
|
+
interval: z.number().min(60000).max(24 * 60 * 60 * 1000).default(300000),
|
|
52
|
+
maxRetries: z.number().min(0).max(5).default(3),
|
|
53
|
+
retryDelay: z.number().min(1000).max(60000).default(5000),
|
|
54
|
+
notificationThreshold: z.enum(['minor', 'moderate', 'major', 'critical']).default('moderate'),
|
|
55
|
+
enableWebhook: z.boolean().default(false),
|
|
56
|
+
webhookUrl: z.string().url().optional(),
|
|
57
|
+
webhookSecret: z.string().optional()
|
|
58
|
+
}).optional(),
|
|
59
|
+
|
|
60
|
+
storageOptions: z.object({
|
|
61
|
+
enableSnapshots: z.boolean().default(true),
|
|
62
|
+
retainHistory: z.boolean().default(true),
|
|
63
|
+
maxHistoryEntries: z.number().min(1).max(1000).default(100),
|
|
64
|
+
compressionEnabled: z.boolean().default(true),
|
|
65
|
+
deltaStorageEnabled: z.boolean().default(true)
|
|
66
|
+
}).optional().default({}),
|
|
67
|
+
|
|
68
|
+
queryOptions: z.object({
|
|
69
|
+
limit: z.number().min(1).max(500).default(50),
|
|
70
|
+
offset: z.number().min(0).default(0),
|
|
71
|
+
startTime: z.number().optional(),
|
|
72
|
+
endTime: z.number().optional(),
|
|
73
|
+
includeContent: z.boolean().default(false),
|
|
74
|
+
significanceFilter: z.enum(['all', 'minor', 'moderate', 'major', 'critical']).optional()
|
|
75
|
+
}).optional(),
|
|
76
|
+
|
|
77
|
+
notificationOptions: z.object({
|
|
78
|
+
email: z.object({
|
|
79
|
+
enabled: z.boolean().default(false),
|
|
80
|
+
recipients: z.array(z.string().email()).optional(),
|
|
81
|
+
subject: z.string().optional(),
|
|
82
|
+
includeDetails: z.boolean().default(true)
|
|
83
|
+
}).optional(),
|
|
84
|
+
webhook: z.object({
|
|
85
|
+
enabled: z.boolean().default(false),
|
|
86
|
+
url: z.string().url().optional(),
|
|
87
|
+
method: z.enum(['POST', 'PUT']).default('POST'),
|
|
88
|
+
headers: z.record(z.string()).optional(),
|
|
89
|
+
signingSecret: z.string().optional(),
|
|
90
|
+
includeContent: z.boolean().default(false)
|
|
91
|
+
}).optional(),
|
|
92
|
+
slack: z.object({
|
|
93
|
+
enabled: z.boolean().default(false),
|
|
94
|
+
webhookUrl: z.string().url().optional(),
|
|
95
|
+
channel: z.string().optional(),
|
|
96
|
+
username: z.string().optional()
|
|
97
|
+
}).optional()
|
|
98
|
+
}).optional(),
|
|
99
|
+
|
|
100
|
+
scheduledMonitorOptions: z.object({
|
|
101
|
+
schedule: z.string().optional(),
|
|
102
|
+
templateId: z.string().optional(),
|
|
103
|
+
enabled: z.boolean().default(true)
|
|
104
|
+
}).optional(),
|
|
105
|
+
|
|
106
|
+
alertRuleOptions: z.object({
|
|
107
|
+
ruleId: z.string().optional(),
|
|
108
|
+
condition: z.string().optional(),
|
|
109
|
+
actions: z.array(z.enum(['webhook', 'email', 'slack'])).optional(),
|
|
110
|
+
throttle: z.number().min(0).optional(),
|
|
111
|
+
priority: z.enum(['low', 'medium', 'high']).optional()
|
|
112
|
+
}).optional(),
|
|
113
|
+
|
|
114
|
+
exportOptions: z.object({
|
|
115
|
+
format: z.enum(['json', 'csv']).default('json'),
|
|
116
|
+
startTime: z.number().optional(),
|
|
117
|
+
endTime: z.number().optional(),
|
|
118
|
+
includeContent: z.boolean().default(false),
|
|
119
|
+
includeSnapshots: z.boolean().default(false)
|
|
120
|
+
}).optional(),
|
|
121
|
+
|
|
122
|
+
dashboardOptions: z.object({
|
|
123
|
+
includeRecentAlerts: z.boolean().default(true),
|
|
124
|
+
includeTrends: z.boolean().default(true),
|
|
125
|
+
includeMonitorStatus: z.boolean().default(true)
|
|
126
|
+
}).optional()
|
|
127
|
+
});
|