crawlforge-mcp-server 3.0.18 → 3.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/package.json +5 -2
  2. package/server.js +192 -1277
  3. package/src/core/ActionExecutor.js +2 -43
  4. package/src/core/AuthManager.js +127 -14
  5. package/src/core/BrowserContextPool.js +187 -0
  6. package/src/core/JobManager.js +7 -5
  7. package/src/core/LocalizationManager.js +14 -125
  8. package/src/core/StealthBrowserManager.js +26 -18
  9. package/src/core/cache/CacheManager.js +4 -1
  10. package/src/core/crawlers/BFSCrawler.js +19 -5
  11. package/src/observability/metrics.js +137 -0
  12. package/src/observability/tracing.js +74 -0
  13. package/src/server/auth/oauth.js +388 -0
  14. package/src/server/registerTool.js +41 -0
  15. package/src/server/schemas/common.js +29 -0
  16. package/src/server/transports/http.js +22 -0
  17. package/src/server/transports/stdio.js +16 -0
  18. package/src/server/transports/streamableHttp.js +226 -0
  19. package/src/server/withAuth.js +121 -0
  20. package/src/tools/advanced/BatchScrapeTool.js +12 -1086
  21. package/src/tools/advanced/ScrapeWithActionsTool.js +105 -19
  22. package/src/tools/advanced/batchScrape/index.js +328 -0
  23. package/src/tools/advanced/batchScrape/queue.js +91 -0
  24. package/src/tools/advanced/batchScrape/reporter.js +26 -0
  25. package/src/tools/advanced/batchScrape/schema.js +37 -0
  26. package/src/tools/advanced/batchScrape/worker.js +179 -0
  27. package/src/tools/advanced/scrapeWithActions/recorder.js +188 -0
  28. package/src/tools/basic/_fetch.js +35 -0
  29. package/src/tools/basic/extractLinks.js +74 -0
  30. package/src/tools/basic/extractMetadata.js +74 -0
  31. package/src/tools/basic/extractText.js +46 -0
  32. package/src/tools/basic/fetchUrl.js +44 -0
  33. package/src/tools/basic/scrapeStructured.js +58 -0
  34. package/src/tools/crawl/_sessionContext.js +234 -0
  35. package/src/tools/crawl/crawlDeep.js +55 -5
  36. package/src/tools/crawl/mapSite.js +23 -2
  37. package/src/tools/extract/_fetchAndParse.js +57 -0
  38. package/src/tools/extract/extractStructured.js +3 -19
  39. package/src/tools/extract/extractWithLlm.js +295 -0
  40. package/src/tools/search/providers/searxng.js +126 -0
  41. package/src/tools/search/ranking/ResultDeduplicator.js +18 -11
  42. package/src/tools/search/ranking/ResultRanker.js +17 -10
  43. package/src/tools/search/ranking/SearchResultCache.js +52 -0
  44. package/src/tools/search/searchWeb.js +112 -6
  45. package/src/tools/tracking/trackChanges/differ.js +98 -0
  46. package/src/tools/tracking/trackChanges/index.js +432 -0
  47. package/src/tools/tracking/trackChanges/monitor.js +93 -0
  48. package/src/tools/tracking/trackChanges/notifier.js +105 -0
  49. package/src/tools/tracking/trackChanges/schema.js +127 -0
  50. package/src/tools/tracking/trackChanges.js +12 -1374
@@ -0,0 +1,93 @@
1
+ /**
2
+ * TrackChanges — monitor module.
3
+ * Handles the polling monitor lifecycle: start, check, stop.
4
+ * Used by the TrackChangesTool class (index.js).
5
+ */
6
+
7
+ import { fetchContent, meetsNotificationThreshold } from './differ.js';
8
+ import { sendNotifications } from './notifier.js';
9
+
10
+ /**
11
+ * Perform a single monitoring check for a URL.
12
+ * Mutates monitorConfig.stats in place.
13
+ *
14
+ * @param {string} url
15
+ * @param {Object} monitorConfig
16
+ * @param {Object} deps — { changeTracker, snapshotManager, emitter }
17
+ */
18
+ export async function performMonitoringCheck(url, monitorConfig, { changeTracker, snapshotManager, emitter }) {
19
+ const startTime = Date.now();
20
+
21
+ try {
22
+ monitorConfig.stats.checks++;
23
+
24
+ const fetchResult = await fetchContent(url);
25
+
26
+ const comparisonResult = await changeTracker.compareWithBaseline(
27
+ url,
28
+ fetchResult.content,
29
+ monitorConfig.options.trackingOptions
30
+ );
31
+
32
+ const responseTime = Date.now() - startTime;
33
+ monitorConfig.stats.averageResponseTime =
34
+ (monitorConfig.stats.averageResponseTime * (monitorConfig.stats.checks - 1) + responseTime) /
35
+ monitorConfig.stats.checks;
36
+
37
+ monitorConfig.stats.lastCheck = Date.now();
38
+
39
+ if (comparisonResult.hasChanges) {
40
+ monitorConfig.stats.changesDetected++;
41
+ monitorConfig.stats.lastChange = Date.now();
42
+
43
+ if (meetsNotificationThreshold(
44
+ comparisonResult.significance,
45
+ monitorConfig.options.notificationThreshold
46
+ )) {
47
+ if (monitorConfig.options.storageOptions?.enableSnapshots) {
48
+ await snapshotManager.storeSnapshot(url, fetchResult.content, {
49
+ ...fetchResult.metadata,
50
+ changes: comparisonResult.summary,
51
+ significance: comparisonResult.significance,
52
+ monitoring: true
53
+ });
54
+ }
55
+
56
+ if (monitorConfig.options.notificationOptions) {
57
+ await sendNotifications(url, comparisonResult, monitorConfig.options.notificationOptions, emitter);
58
+ }
59
+ }
60
+ }
61
+
62
+ emitter?.emit('monitoringCheck', {
63
+ url,
64
+ hasChanges: comparisonResult.hasChanges,
65
+ significance: comparisonResult.significance,
66
+ responseTime,
67
+ timestamp: Date.now()
68
+ });
69
+ } catch (error) {
70
+ monitorConfig.stats.errors++;
71
+
72
+ emitter?.emit('monitoringError', { url, error: error.message, timestamp: Date.now() });
73
+
74
+ if (monitorConfig.stats.errors > monitorConfig.options.maxRetries) {
75
+ stopMonitor(url, monitorConfig, emitter);
76
+ emitter?.emit('monitoringDisabled', {
77
+ url,
78
+ reason: 'Too many errors',
79
+ totalErrors: monitorConfig.stats.errors
80
+ });
81
+ }
82
+ }
83
+ }
84
+
85
+ /**
86
+ * Stop a single active monitor (clears its interval).
87
+ */
88
+ export function stopMonitor(url, monitorConfig, emitter) {
89
+ if (monitorConfig?.timer) {
90
+ clearInterval(monitorConfig.timer);
91
+ }
92
+ emitter?.emit('monitoringStopped', { url });
93
+ }
@@ -0,0 +1,105 @@
1
+ /**
2
+ * TrackChanges — notifier module.
3
+ * Handles webhook, email and Slack change notifications.
4
+ * Used by monitor.js and the main TrackChangesTool class.
5
+ */
6
+
7
+ /**
8
+ * Send all enabled notifications for a detected change.
9
+ * @param {string} url
10
+ * @param {Object} changeResult
11
+ * @param {Object} notificationOptions
12
+ * @param {EventEmitter} emitter — tool instance for event emission
13
+ */
14
+ export async function sendNotifications(url, changeResult, notificationOptions, emitter) {
15
+ const notifications = [];
16
+
17
+ if (notificationOptions.webhook?.enabled) {
18
+ notifications.push(sendWebhookNotification(url, changeResult, notificationOptions.webhook, emitter));
19
+ }
20
+ if (notificationOptions.email?.enabled) {
21
+ notifications.push(sendEmailNotification(url, changeResult, notificationOptions.email, emitter));
22
+ }
23
+ if (notificationOptions.slack?.enabled) {
24
+ notifications.push(sendSlackNotification(url, changeResult, notificationOptions.slack, emitter));
25
+ }
26
+
27
+ await Promise.allSettled(notifications);
28
+ }
29
+
30
+ export async function sendWebhookNotification(url, changeResult, webhookConfig, emitter) {
31
+ try {
32
+ const payload = {
33
+ event: 'change_detected',
34
+ url,
35
+ timestamp: Date.now(),
36
+ significance: changeResult.significance,
37
+ changeType: changeResult.changeType,
38
+ summary: changeResult.summary,
39
+ details: webhookConfig.includeContent ? changeResult.details : undefined
40
+ };
41
+
42
+ const response = await fetch(webhookConfig.url, {
43
+ method: webhookConfig.method || 'POST',
44
+ headers: {
45
+ 'Content-Type': 'application/json',
46
+ 'User-Agent': 'MCP-WebScraper-ChangeTracker/3.0',
47
+ ...webhookConfig.headers
48
+ },
49
+ body: JSON.stringify(payload)
50
+ });
51
+
52
+ if (!response.ok) {
53
+ throw new Error(`Webhook failed: ${response.status} ${response.statusText}`);
54
+ }
55
+
56
+ emitter?.emit('notificationSent', { type: 'webhook', url, success: true });
57
+ } catch (error) {
58
+ emitter?.emit('notificationError', { type: 'webhook', url, error: error.message });
59
+ }
60
+ }
61
+
62
+ export async function sendEmailNotification(url, changeResult, emailConfig, emitter) {
63
+ // Email integration placeholder — requires external service
64
+ emitter?.emit('notificationSent', {
65
+ type: 'email',
66
+ url,
67
+ success: true,
68
+ note: 'Email notifications require external service integration'
69
+ });
70
+ }
71
+
72
+ export async function sendSlackNotification(url, changeResult, slackConfig, emitter) {
73
+ try {
74
+ const colors = { none: '#36a64f', minor: '#ffeb3b', moderate: '#ff9800', major: '#f44336', critical: '#9c27b0' };
75
+ const payload = {
76
+ text: '🔄 Content Change Detected',
77
+ attachments: [{
78
+ color: colors[changeResult.significance] || '#36a64f',
79
+ fields: [
80
+ { title: 'URL', value: url, short: false },
81
+ { title: 'Significance', value: changeResult.significance.toUpperCase(), short: true },
82
+ { title: 'Change Type', value: changeResult.changeType.replace('_', ' '), short: true },
83
+ { title: 'Summary', value: changeResult.summary.changeDescription, short: false }
84
+ ],
85
+ timestamp: Math.floor(Date.now() / 1000)
86
+ }],
87
+ channel: slackConfig.channel,
88
+ username: slackConfig.username || 'Change Tracker'
89
+ };
90
+
91
+ const response = await fetch(slackConfig.webhookUrl, {
92
+ method: 'POST',
93
+ headers: { 'Content-Type': 'application/json' },
94
+ body: JSON.stringify(payload)
95
+ });
96
+
97
+ if (!response.ok) {
98
+ throw new Error(`Slack notification failed: ${response.status}`);
99
+ }
100
+
101
+ emitter?.emit('notificationSent', { type: 'slack', url, success: true });
102
+ } catch (error) {
103
+ emitter?.emit('notificationError', { type: 'slack', url, error: error.message });
104
+ }
105
+ }
@@ -0,0 +1,127 @@
1
+ /**
2
+ * TrackChanges — schema module.
3
+ * Centralises the Zod input schema so monitor.js, differ.js, notifier.js
4
+ * and the entry-point index.js can all import from one place.
5
+ */
6
+
7
+ import { z } from 'zod';
8
+
9
+ export const TrackChangesSchema = z.object({
10
+ url: z.string().url(),
11
+ operation: z.enum([
12
+ 'create_baseline',
13
+ 'compare',
14
+ 'monitor',
15
+ 'get_history',
16
+ 'get_stats',
17
+ 'create_scheduled_monitor',
18
+ 'stop_scheduled_monitor',
19
+ 'get_dashboard',
20
+ 'export_history',
21
+ 'create_alert_rule',
22
+ 'generate_trend_report',
23
+ 'get_monitoring_templates'
24
+ ]).default('compare'),
25
+
26
+ content: z.string().optional(),
27
+ html: z.string().optional(),
28
+
29
+ trackingOptions: z.object({
30
+ granularity: z.enum(['page', 'section', 'element', 'text']).default('section'),
31
+ trackText: z.boolean().default(true),
32
+ trackStructure: z.boolean().default(true),
33
+ trackAttributes: z.boolean().default(false),
34
+ trackImages: z.boolean().default(false),
35
+ trackLinks: z.boolean().default(true),
36
+ ignoreWhitespace: z.boolean().default(true),
37
+ ignoreCase: z.boolean().default(false),
38
+ customSelectors: z.array(z.string()).optional(),
39
+ excludeSelectors: z.array(z.string()).optional().default([
40
+ 'script', 'style', 'noscript', '.advertisement', '.ad', '#comments'
41
+ ]),
42
+ significanceThresholds: z.object({
43
+ minor: z.number().min(0).max(1).default(0.1),
44
+ moderate: z.number().min(0).max(1).default(0.3),
45
+ major: z.number().min(0).max(1).default(0.7)
46
+ }).optional()
47
+ }).optional().default({}),
48
+
49
+ monitoringOptions: z.object({
50
+ enabled: z.boolean().default(false),
51
+ interval: z.number().min(60000).max(24 * 60 * 60 * 1000).default(300000),
52
+ maxRetries: z.number().min(0).max(5).default(3),
53
+ retryDelay: z.number().min(1000).max(60000).default(5000),
54
+ notificationThreshold: z.enum(['minor', 'moderate', 'major', 'critical']).default('moderate'),
55
+ enableWebhook: z.boolean().default(false),
56
+ webhookUrl: z.string().url().optional(),
57
+ webhookSecret: z.string().optional()
58
+ }).optional(),
59
+
60
+ storageOptions: z.object({
61
+ enableSnapshots: z.boolean().default(true),
62
+ retainHistory: z.boolean().default(true),
63
+ maxHistoryEntries: z.number().min(1).max(1000).default(100),
64
+ compressionEnabled: z.boolean().default(true),
65
+ deltaStorageEnabled: z.boolean().default(true)
66
+ }).optional().default({}),
67
+
68
+ queryOptions: z.object({
69
+ limit: z.number().min(1).max(500).default(50),
70
+ offset: z.number().min(0).default(0),
71
+ startTime: z.number().optional(),
72
+ endTime: z.number().optional(),
73
+ includeContent: z.boolean().default(false),
74
+ significanceFilter: z.enum(['all', 'minor', 'moderate', 'major', 'critical']).optional()
75
+ }).optional(),
76
+
77
+ notificationOptions: z.object({
78
+ email: z.object({
79
+ enabled: z.boolean().default(false),
80
+ recipients: z.array(z.string().email()).optional(),
81
+ subject: z.string().optional(),
82
+ includeDetails: z.boolean().default(true)
83
+ }).optional(),
84
+ webhook: z.object({
85
+ enabled: z.boolean().default(false),
86
+ url: z.string().url().optional(),
87
+ method: z.enum(['POST', 'PUT']).default('POST'),
88
+ headers: z.record(z.string()).optional(),
89
+ signingSecret: z.string().optional(),
90
+ includeContent: z.boolean().default(false)
91
+ }).optional(),
92
+ slack: z.object({
93
+ enabled: z.boolean().default(false),
94
+ webhookUrl: z.string().url().optional(),
95
+ channel: z.string().optional(),
96
+ username: z.string().optional()
97
+ }).optional()
98
+ }).optional(),
99
+
100
+ scheduledMonitorOptions: z.object({
101
+ schedule: z.string().optional(),
102
+ templateId: z.string().optional(),
103
+ enabled: z.boolean().default(true)
104
+ }).optional(),
105
+
106
+ alertRuleOptions: z.object({
107
+ ruleId: z.string().optional(),
108
+ condition: z.string().optional(),
109
+ actions: z.array(z.enum(['webhook', 'email', 'slack'])).optional(),
110
+ throttle: z.number().min(0).optional(),
111
+ priority: z.enum(['low', 'medium', 'high']).optional()
112
+ }).optional(),
113
+
114
+ exportOptions: z.object({
115
+ format: z.enum(['json', 'csv']).default('json'),
116
+ startTime: z.number().optional(),
117
+ endTime: z.number().optional(),
118
+ includeContent: z.boolean().default(false),
119
+ includeSnapshots: z.boolean().default(false)
120
+ }).optional(),
121
+
122
+ dashboardOptions: z.object({
123
+ includeRecentAlerts: z.boolean().default(true),
124
+ includeTrends: z.boolean().default(true),
125
+ includeMonitorStatus: z.boolean().default(true)
126
+ }).optional()
127
+ });