crawlforge-mcp-server 3.0.17 → 3.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/CLAUDE.md +2 -0
  2. package/README.md +1 -0
  3. package/package.json +6 -2
  4. package/server.js +192 -1277
  5. package/src/constants/config.js +2 -1
  6. package/src/core/ActionExecutor.js +2 -43
  7. package/src/core/AuthManager.js +230 -32
  8. package/src/core/BrowserContextPool.js +187 -0
  9. package/src/core/JobManager.js +7 -5
  10. package/src/core/LocalizationManager.js +14 -125
  11. package/src/core/ResearchOrchestrator.js +86 -5
  12. package/src/core/StealthBrowserManager.js +26 -18
  13. package/src/core/cache/CacheManager.js +4 -1
  14. package/src/core/crawlers/BFSCrawler.js +19 -5
  15. package/src/core/endpointGuard.js +37 -0
  16. package/src/observability/metrics.js +137 -0
  17. package/src/observability/tracing.js +74 -0
  18. package/src/server/auth/oauth.js +388 -0
  19. package/src/server/registerTool.js +41 -0
  20. package/src/server/schemas/common.js +29 -0
  21. package/src/server/transports/http.js +22 -0
  22. package/src/server/transports/stdio.js +16 -0
  23. package/src/server/transports/streamableHttp.js +226 -0
  24. package/src/server/withAuth.js +121 -0
  25. package/src/tools/advanced/BatchScrapeTool.js +12 -1086
  26. package/src/tools/advanced/ScrapeWithActionsTool.js +105 -19
  27. package/src/tools/advanced/batchScrape/index.js +328 -0
  28. package/src/tools/advanced/batchScrape/queue.js +91 -0
  29. package/src/tools/advanced/batchScrape/reporter.js +26 -0
  30. package/src/tools/advanced/batchScrape/schema.js +37 -0
  31. package/src/tools/advanced/batchScrape/worker.js +179 -0
  32. package/src/tools/advanced/scrapeWithActions/recorder.js +188 -0
  33. package/src/tools/basic/_fetch.js +35 -0
  34. package/src/tools/basic/extractLinks.js +74 -0
  35. package/src/tools/basic/extractMetadata.js +74 -0
  36. package/src/tools/basic/extractText.js +46 -0
  37. package/src/tools/basic/fetchUrl.js +44 -0
  38. package/src/tools/basic/scrapeStructured.js +58 -0
  39. package/src/tools/crawl/_sessionContext.js +234 -0
  40. package/src/tools/crawl/crawlDeep.js +55 -5
  41. package/src/tools/crawl/mapSite.js +23 -2
  42. package/src/tools/extract/_fetchAndParse.js +57 -0
  43. package/src/tools/extract/extractStructured.js +3 -19
  44. package/src/tools/extract/extractWithLlm.js +295 -0
  45. package/src/tools/research/deepResearch.js +33 -8
  46. package/src/tools/search/providers/searxng.js +126 -0
  47. package/src/tools/search/ranking/ResultDeduplicator.js +18 -11
  48. package/src/tools/search/ranking/ResultRanker.js +17 -10
  49. package/src/tools/search/ranking/SearchResultCache.js +52 -0
  50. package/src/tools/search/searchWeb.js +112 -6
  51. package/src/tools/tracking/trackChanges/differ.js +98 -0
  52. package/src/tools/tracking/trackChanges/index.js +432 -0
  53. package/src/tools/tracking/trackChanges/monitor.js +93 -0
  54. package/src/tools/tracking/trackChanges/notifier.js +105 -0
  55. package/src/tools/tracking/trackChanges/schema.js +127 -0
  56. package/src/tools/tracking/trackChanges.js +12 -1374
@@ -0,0 +1,127 @@
1
+ /**
2
+ * TrackChanges — schema module.
3
+ * Centralises the Zod input schema so monitor.js, differ.js, notifier.js
4
+ * and the entry-point index.js can all import from one place.
5
+ */
6
+
7
+ import { z } from 'zod';
8
+
9
+ export const TrackChangesSchema = z.object({
10
+ url: z.string().url(),
11
+ operation: z.enum([
12
+ 'create_baseline',
13
+ 'compare',
14
+ 'monitor',
15
+ 'get_history',
16
+ 'get_stats',
17
+ 'create_scheduled_monitor',
18
+ 'stop_scheduled_monitor',
19
+ 'get_dashboard',
20
+ 'export_history',
21
+ 'create_alert_rule',
22
+ 'generate_trend_report',
23
+ 'get_monitoring_templates'
24
+ ]).default('compare'),
25
+
26
+ content: z.string().optional(),
27
+ html: z.string().optional(),
28
+
29
+ trackingOptions: z.object({
30
+ granularity: z.enum(['page', 'section', 'element', 'text']).default('section'),
31
+ trackText: z.boolean().default(true),
32
+ trackStructure: z.boolean().default(true),
33
+ trackAttributes: z.boolean().default(false),
34
+ trackImages: z.boolean().default(false),
35
+ trackLinks: z.boolean().default(true),
36
+ ignoreWhitespace: z.boolean().default(true),
37
+ ignoreCase: z.boolean().default(false),
38
+ customSelectors: z.array(z.string()).optional(),
39
+ excludeSelectors: z.array(z.string()).optional().default([
40
+ 'script', 'style', 'noscript', '.advertisement', '.ad', '#comments'
41
+ ]),
42
+ significanceThresholds: z.object({
43
+ minor: z.number().min(0).max(1).default(0.1),
44
+ moderate: z.number().min(0).max(1).default(0.3),
45
+ major: z.number().min(0).max(1).default(0.7)
46
+ }).optional()
47
+ }).optional().default({}),
48
+
49
+ monitoringOptions: z.object({
50
+ enabled: z.boolean().default(false),
51
+ interval: z.number().min(60000).max(24 * 60 * 60 * 1000).default(300000),
52
+ maxRetries: z.number().min(0).max(5).default(3),
53
+ retryDelay: z.number().min(1000).max(60000).default(5000),
54
+ notificationThreshold: z.enum(['minor', 'moderate', 'major', 'critical']).default('moderate'),
55
+ enableWebhook: z.boolean().default(false),
56
+ webhookUrl: z.string().url().optional(),
57
+ webhookSecret: z.string().optional()
58
+ }).optional(),
59
+
60
+ storageOptions: z.object({
61
+ enableSnapshots: z.boolean().default(true),
62
+ retainHistory: z.boolean().default(true),
63
+ maxHistoryEntries: z.number().min(1).max(1000).default(100),
64
+ compressionEnabled: z.boolean().default(true),
65
+ deltaStorageEnabled: z.boolean().default(true)
66
+ }).optional().default({}),
67
+
68
+ queryOptions: z.object({
69
+ limit: z.number().min(1).max(500).default(50),
70
+ offset: z.number().min(0).default(0),
71
+ startTime: z.number().optional(),
72
+ endTime: z.number().optional(),
73
+ includeContent: z.boolean().default(false),
74
+ significanceFilter: z.enum(['all', 'minor', 'moderate', 'major', 'critical']).optional()
75
+ }).optional(),
76
+
77
+ notificationOptions: z.object({
78
+ email: z.object({
79
+ enabled: z.boolean().default(false),
80
+ recipients: z.array(z.string().email()).optional(),
81
+ subject: z.string().optional(),
82
+ includeDetails: z.boolean().default(true)
83
+ }).optional(),
84
+ webhook: z.object({
85
+ enabled: z.boolean().default(false),
86
+ url: z.string().url().optional(),
87
+ method: z.enum(['POST', 'PUT']).default('POST'),
88
+ headers: z.record(z.string()).optional(),
89
+ signingSecret: z.string().optional(),
90
+ includeContent: z.boolean().default(false)
91
+ }).optional(),
92
+ slack: z.object({
93
+ enabled: z.boolean().default(false),
94
+ webhookUrl: z.string().url().optional(),
95
+ channel: z.string().optional(),
96
+ username: z.string().optional()
97
+ }).optional()
98
+ }).optional(),
99
+
100
+ scheduledMonitorOptions: z.object({
101
+ schedule: z.string().optional(),
102
+ templateId: z.string().optional(),
103
+ enabled: z.boolean().default(true)
104
+ }).optional(),
105
+
106
+ alertRuleOptions: z.object({
107
+ ruleId: z.string().optional(),
108
+ condition: z.string().optional(),
109
+ actions: z.array(z.enum(['webhook', 'email', 'slack'])).optional(),
110
+ throttle: z.number().min(0).optional(),
111
+ priority: z.enum(['low', 'medium', 'high']).optional()
112
+ }).optional(),
113
+
114
+ exportOptions: z.object({
115
+ format: z.enum(['json', 'csv']).default('json'),
116
+ startTime: z.number().optional(),
117
+ endTime: z.number().optional(),
118
+ includeContent: z.boolean().default(false),
119
+ includeSnapshots: z.boolean().default(false)
120
+ }).optional(),
121
+
122
+ dashboardOptions: z.object({
123
+ includeRecentAlerts: z.boolean().default(true),
124
+ includeTrends: z.boolean().default(true),
125
+ includeMonitorStatus: z.boolean().default(true)
126
+ }).optional()
127
+ });