crawlforge-mcp-server 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/CLAUDE.md +315 -0
  2. package/LICENSE +21 -0
  3. package/README.md +181 -0
  4. package/package.json +115 -0
  5. package/server.js +1963 -0
  6. package/setup.js +112 -0
  7. package/src/constants/config.js +615 -0
  8. package/src/core/ActionExecutor.js +1104 -0
  9. package/src/core/AlertNotificationSystem.js +601 -0
  10. package/src/core/AuthManager.js +315 -0
  11. package/src/core/ChangeTracker.js +2306 -0
  12. package/src/core/JobManager.js +687 -0
  13. package/src/core/LLMsTxtAnalyzer.js +753 -0
  14. package/src/core/LocalizationManager.js +1615 -0
  15. package/src/core/PerformanceManager.js +828 -0
  16. package/src/core/ResearchOrchestrator.js +1327 -0
  17. package/src/core/SnapshotManager.js +1037 -0
  18. package/src/core/StealthBrowserManager.js +1795 -0
  19. package/src/core/WebhookDispatcher.js +745 -0
  20. package/src/core/analysis/ContentAnalyzer.js +749 -0
  21. package/src/core/analysis/LinkAnalyzer.js +972 -0
  22. package/src/core/cache/CacheManager.js +821 -0
  23. package/src/core/connections/ConnectionPool.js +553 -0
  24. package/src/core/crawlers/BFSCrawler.js +845 -0
  25. package/src/core/integrations/PerformanceIntegration.js +377 -0
  26. package/src/core/llm/AnthropicProvider.js +135 -0
  27. package/src/core/llm/LLMManager.js +415 -0
  28. package/src/core/llm/LLMProvider.js +97 -0
  29. package/src/core/llm/OpenAIProvider.js +127 -0
  30. package/src/core/processing/BrowserProcessor.js +986 -0
  31. package/src/core/processing/ContentProcessor.js +505 -0
  32. package/src/core/processing/PDFProcessor.js +448 -0
  33. package/src/core/processing/StreamProcessor.js +673 -0
  34. package/src/core/queue/QueueManager.js +98 -0
  35. package/src/core/workers/WorkerPool.js +585 -0
  36. package/src/core/workers/worker.js +743 -0
  37. package/src/monitoring/healthCheck.js +600 -0
  38. package/src/monitoring/metrics.js +761 -0
  39. package/src/optimization/wave3-optimizations.js +932 -0
  40. package/src/security/security-patches.js +120 -0
  41. package/src/security/security-tests.js +355 -0
  42. package/src/security/wave3-security.js +652 -0
  43. package/src/tools/advanced/BatchScrapeTool.js +1089 -0
  44. package/src/tools/advanced/ScrapeWithActionsTool.js +669 -0
  45. package/src/tools/crawl/crawlDeep.js +449 -0
  46. package/src/tools/crawl/mapSite.js +400 -0
  47. package/src/tools/extract/analyzeContent.js +624 -0
  48. package/src/tools/extract/extractContent.js +329 -0
  49. package/src/tools/extract/processDocument.js +503 -0
  50. package/src/tools/extract/summarizeContent.js +376 -0
  51. package/src/tools/llmstxt/generateLLMsTxt.js +570 -0
  52. package/src/tools/research/deepResearch.js +706 -0
  53. package/src/tools/search/adapters/duckduckgoSearch.js +398 -0
  54. package/src/tools/search/adapters/googleSearch.js +236 -0
  55. package/src/tools/search/adapters/searchProviderFactory.js +96 -0
  56. package/src/tools/search/queryExpander.js +543 -0
  57. package/src/tools/search/ranking/ResultDeduplicator.js +676 -0
  58. package/src/tools/search/ranking/ResultRanker.js +497 -0
  59. package/src/tools/search/searchWeb.js +482 -0
  60. package/src/tools/tracking/trackChanges.js +1355 -0
  61. package/src/utils/CircuitBreaker.js +515 -0
  62. package/src/utils/ErrorHandlingConfig.js +342 -0
  63. package/src/utils/HumanBehaviorSimulator.js +569 -0
  64. package/src/utils/Logger.js +568 -0
  65. package/src/utils/MemoryMonitor.js +173 -0
  66. package/src/utils/RetryManager.js +386 -0
  67. package/src/utils/contentUtils.js +588 -0
  68. package/src/utils/domainFilter.js +612 -0
  69. package/src/utils/inputValidation.js +766 -0
  70. package/src/utils/rateLimiter.js +196 -0
  71. package/src/utils/robotsChecker.js +91 -0
  72. package/src/utils/securityMiddleware.js +416 -0
  73. package/src/utils/sitemapParser.js +678 -0
  74. package/src/utils/ssrfProtection.js +640 -0
  75. package/src/utils/urlNormalizer.js +168 -0
@@ -0,0 +1,315 @@
1
+ /**
2
+ * Authentication and Credit Management for CrawlForge MCP Server
3
+ * Handles API key validation, credit tracking, and usage reporting
4
+ */
5
+
6
+ import fetch from 'node-fetch';
7
+ import fs from 'fs/promises';
8
+ import path from 'path';
9
+
10
+ class AuthManager {
11
+ constructor() {
12
+ this.apiEndpoint = process.env.CRAWLFORGE_API_URL || 'https://api.crawlforge.com';
13
+ this.configPath = path.join(process.env.HOME || process.env.USERPROFILE, '.crawlforge', 'config.json');
14
+ this.config = null;
15
+ this.creditCache = new Map();
16
+ this.lastCreditCheck = null;
17
+ this.CREDIT_CHECK_INTERVAL = 60000; // Check credits every minute max
18
+ this.initialized = false;
19
+ this.creatorMode = process.env.CRAWLFORGE_CREATOR_MODE === 'true';
20
+ }
21
+
22
+ /**
23
+ * Check if running in creator mode (unlimited access, no API required)
24
+ */
25
+ isCreatorMode() {
26
+ return this.creatorMode;
27
+ }
28
+
29
+ /**
30
+ * Initialize the auth manager and load stored config
31
+ */
32
+ async initialize() {
33
+ if (this.initialized) return;
34
+
35
+ // Skip config loading in creator mode
36
+ if (this.isCreatorMode()) {
37
+ console.log('🚀 Creator Mode Active - Unlimited Access Enabled');
38
+ this.initialized = true;
39
+ return;
40
+ }
41
+
42
+ try {
43
+ await this.loadConfig();
44
+ this.initialized = true;
45
+ } catch (error) {
46
+ console.log('No existing CrawlForge configuration found. Run setup to configure.');
47
+ this.initialized = true;
48
+ }
49
+ }
50
+
51
+ /**
52
+ * Load configuration from disk
53
+ */
54
+ async loadConfig() {
55
+ try {
56
+ const configData = await fs.readFile(this.configPath, 'utf-8');
57
+ this.config = JSON.parse(configData);
58
+
59
+ // Validate config structure
60
+ if (!this.config.apiKey || !this.config.userId) {
61
+ throw new Error('Invalid configuration');
62
+ }
63
+ } catch (error) {
64
+ this.config = null;
65
+ throw error;
66
+ }
67
+ }
68
+
69
+ /**
70
+ * Save configuration to disk
71
+ */
72
+ async saveConfig(apiKey, userId, email) {
73
+ const config = {
74
+ apiKey,
75
+ userId,
76
+ email,
77
+ createdAt: new Date().toISOString(),
78
+ version: '1.0.0'
79
+ };
80
+
81
+ // Create config directory if it doesn't exist
82
+ const configDir = path.dirname(this.configPath);
83
+ await fs.mkdir(configDir, { recursive: true });
84
+
85
+ // Save config
86
+ await fs.writeFile(this.configPath, JSON.stringify(config, null, 2));
87
+ this.config = config;
88
+ }
89
+
90
+ /**
91
+ * Setup wizard for first-time users
92
+ */
93
+ async runSetup(apiKey) {
94
+ console.log('🔧 Setting up CrawlForge MCP Server...\n');
95
+
96
+ if (!apiKey) {
97
+ console.log('❌ API key is required for setup');
98
+ console.log('Get your API key from: https://crawlforge.com/dashboard/api-keys');
99
+ return false;
100
+ }
101
+
102
+ // Validate API key with backend
103
+ const validation = await this.validateApiKey(apiKey);
104
+
105
+ if (!validation.valid) {
106
+ console.log(`❌ Invalid API key: ${validation.error}`);
107
+ return false;
108
+ }
109
+
110
+ // Save configuration
111
+ await this.saveConfig(apiKey, validation.userId, validation.email);
112
+
113
+ console.log('✅ Setup complete!');
114
+ console.log(`📧 Account: ${validation.email}`);
115
+ console.log(`💳 Credits remaining: ${validation.creditsRemaining}`);
116
+ console.log(`📦 Plan: ${validation.planId}`);
117
+
118
+ return true;
119
+ }
120
+
121
+ /**
122
+ * Validate API key with backend
123
+ */
124
+ async validateApiKey(apiKey) {
125
+ try {
126
+ const response = await fetch(`${this.apiEndpoint}/api/v1/auth/validate`, {
127
+ method: 'POST',
128
+ headers: {
129
+ 'Content-Type': 'application/json',
130
+ 'X-API-Key': apiKey
131
+ }
132
+ });
133
+
134
+ if (!response.ok) {
135
+ const error = await response.json();
136
+ return {
137
+ valid: false,
138
+ error: error.message || 'Invalid API key'
139
+ };
140
+ }
141
+
142
+ const data = await response.json();
143
+ return {
144
+ valid: true,
145
+ userId: data.userId,
146
+ email: data.email,
147
+ creditsRemaining: data.creditsRemaining,
148
+ planId: data.planId
149
+ };
150
+ } catch (error) {
151
+ return {
152
+ valid: false,
153
+ error: `Connection error: ${error.message}`
154
+ };
155
+ }
156
+ }
157
+
158
+ /**
159
+ * Check if user has enough credits for a tool
160
+ */
161
+ async checkCredits(estimatedCredits = 1) {
162
+ // Creator mode has unlimited credits
163
+ if (this.isCreatorMode()) {
164
+ return true;
165
+ }
166
+
167
+ if (!this.config) {
168
+ throw new Error('CrawlForge not configured. Run setup first.');
169
+ }
170
+
171
+ // Use cached credits if recent
172
+ const now = Date.now();
173
+ if (this.lastCreditCheck && (now - this.lastCreditCheck) < this.CREDIT_CHECK_INTERVAL) {
174
+ const cached = this.creditCache.get(this.config.userId);
175
+ if (cached !== undefined) {
176
+ return cached >= estimatedCredits;
177
+ }
178
+ }
179
+
180
+ // Fetch current credits from backend
181
+ try {
182
+ const response = await fetch(`${this.apiEndpoint}/api/v1/credits`, {
183
+ headers: {
184
+ 'X-API-Key': this.config.apiKey
185
+ }
186
+ });
187
+
188
+ if (response.ok) {
189
+ const data = await response.json();
190
+ this.creditCache.set(this.config.userId, data.creditsRemaining);
191
+ this.lastCreditCheck = now;
192
+ return data.creditsRemaining >= estimatedCredits;
193
+ }
194
+ } catch (error) {
195
+ // If can't check, allow operation but log error
196
+ console.error('Failed to check credits:', error.message);
197
+ }
198
+
199
+ return true; // Allow operation if can't verify
200
+ }
201
+
202
+ /**
203
+ * Report usage to backend for credit deduction
204
+ */
205
+ async reportUsage(tool, creditsUsed, requestData = {}, responseStatus = 200, processingTime = 0) {
206
+ // Skip usage reporting in creator mode
207
+ if (this.isCreatorMode()) {
208
+ return;
209
+ }
210
+
211
+ if (!this.config) {
212
+ return; // Silently skip if not configured
213
+ }
214
+
215
+ try {
216
+ const payload = {
217
+ tool,
218
+ creditsUsed,
219
+ requestData,
220
+ responseStatus,
221
+ processingTime,
222
+ timestamp: new Date().toISOString(),
223
+ version: '3.0.0'
224
+ };
225
+
226
+ await fetch(`${this.apiEndpoint}/api/v1/usage`, {
227
+ method: 'POST',
228
+ headers: {
229
+ 'Content-Type': 'application/json',
230
+ 'X-API-Key': this.config.apiKey
231
+ },
232
+ body: JSON.stringify(payload)
233
+ });
234
+
235
+ // Update cached credits
236
+ const cached = this.creditCache.get(this.config.userId);
237
+ if (cached !== undefined) {
238
+ this.creditCache.set(this.config.userId, Math.max(0, cached - creditsUsed));
239
+ }
240
+ } catch (error) {
241
+ // Log but don't throw - usage reporting should not break tool execution
242
+ console.error('Failed to report usage:', error.message);
243
+ }
244
+ }
245
+
246
+ /**
247
+ * Get credit cost for a tool
248
+ */
249
+ getToolCost(tool) {
250
+ const costs = {
251
+ // Basic tools (1 credit)
252
+ fetch_url: 1,
253
+ extract_text: 1,
254
+ extract_links: 1,
255
+ extract_metadata: 1,
256
+
257
+ // Advanced tools (2-3 credits)
258
+ scrape_structured: 2,
259
+ search_web: 2,
260
+ summarize_content: 2,
261
+ analyze_content: 2,
262
+
263
+ // Premium tools (5-10 credits)
264
+ crawl_deep: 5,
265
+ map_site: 5,
266
+ batch_scrape: 5,
267
+ deep_research: 10,
268
+ stealth_mode: 10,
269
+
270
+ // Heavy processing (10+ credits)
271
+ process_document: 3,
272
+ extract_content: 3,
273
+ scrape_with_actions: 5,
274
+ generate_llms_txt: 3,
275
+ localization: 5
276
+ };
277
+
278
+ return costs[tool] || 1;
279
+ }
280
+
281
+ /**
282
+ * Check if authenticated
283
+ */
284
+ isAuthenticated() {
285
+ // Creator mode is always authenticated
286
+ if (this.isCreatorMode()) {
287
+ return true;
288
+ }
289
+ return this.config !== null && this.config.apiKey !== undefined;
290
+ }
291
+
292
+ /**
293
+ * Get current configuration
294
+ */
295
+ getConfig() {
296
+ return this.config;
297
+ }
298
+
299
+ /**
300
+ * Clear stored configuration
301
+ */
302
+ async clearConfig() {
303
+ try {
304
+ await fs.unlink(this.configPath);
305
+ this.config = null;
306
+ this.creditCache.clear();
307
+ console.log('Configuration cleared.');
308
+ } catch (error) {
309
+ console.error('Failed to clear configuration:', error.message);
310
+ }
311
+ }
312
+ }
313
+
314
+ // Export singleton instance
315
+ export default new AuthManager();