crawlforge-mcp-server 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +315 -0
- package/LICENSE +21 -0
- package/README.md +181 -0
- package/package.json +115 -0
- package/server.js +1963 -0
- package/setup.js +112 -0
- package/src/constants/config.js +615 -0
- package/src/core/ActionExecutor.js +1104 -0
- package/src/core/AlertNotificationSystem.js +601 -0
- package/src/core/AuthManager.js +315 -0
- package/src/core/ChangeTracker.js +2306 -0
- package/src/core/JobManager.js +687 -0
- package/src/core/LLMsTxtAnalyzer.js +753 -0
- package/src/core/LocalizationManager.js +1615 -0
- package/src/core/PerformanceManager.js +828 -0
- package/src/core/ResearchOrchestrator.js +1327 -0
- package/src/core/SnapshotManager.js +1037 -0
- package/src/core/StealthBrowserManager.js +1795 -0
- package/src/core/WebhookDispatcher.js +745 -0
- package/src/core/analysis/ContentAnalyzer.js +749 -0
- package/src/core/analysis/LinkAnalyzer.js +972 -0
- package/src/core/cache/CacheManager.js +821 -0
- package/src/core/connections/ConnectionPool.js +553 -0
- package/src/core/crawlers/BFSCrawler.js +845 -0
- package/src/core/integrations/PerformanceIntegration.js +377 -0
- package/src/core/llm/AnthropicProvider.js +135 -0
- package/src/core/llm/LLMManager.js +415 -0
- package/src/core/llm/LLMProvider.js +97 -0
- package/src/core/llm/OpenAIProvider.js +127 -0
- package/src/core/processing/BrowserProcessor.js +986 -0
- package/src/core/processing/ContentProcessor.js +505 -0
- package/src/core/processing/PDFProcessor.js +448 -0
- package/src/core/processing/StreamProcessor.js +673 -0
- package/src/core/queue/QueueManager.js +98 -0
- package/src/core/workers/WorkerPool.js +585 -0
- package/src/core/workers/worker.js +743 -0
- package/src/monitoring/healthCheck.js +600 -0
- package/src/monitoring/metrics.js +761 -0
- package/src/optimization/wave3-optimizations.js +932 -0
- package/src/security/security-patches.js +120 -0
- package/src/security/security-tests.js +355 -0
- package/src/security/wave3-security.js +652 -0
- package/src/tools/advanced/BatchScrapeTool.js +1089 -0
- package/src/tools/advanced/ScrapeWithActionsTool.js +669 -0
- package/src/tools/crawl/crawlDeep.js +449 -0
- package/src/tools/crawl/mapSite.js +400 -0
- package/src/tools/extract/analyzeContent.js +624 -0
- package/src/tools/extract/extractContent.js +329 -0
- package/src/tools/extract/processDocument.js +503 -0
- package/src/tools/extract/summarizeContent.js +376 -0
- package/src/tools/llmstxt/generateLLMsTxt.js +570 -0
- package/src/tools/research/deepResearch.js +706 -0
- package/src/tools/search/adapters/duckduckgoSearch.js +398 -0
- package/src/tools/search/adapters/googleSearch.js +236 -0
- package/src/tools/search/adapters/searchProviderFactory.js +96 -0
- package/src/tools/search/queryExpander.js +543 -0
- package/src/tools/search/ranking/ResultDeduplicator.js +676 -0
- package/src/tools/search/ranking/ResultRanker.js +497 -0
- package/src/tools/search/searchWeb.js +482 -0
- package/src/tools/tracking/trackChanges.js +1355 -0
- package/src/utils/CircuitBreaker.js +515 -0
- package/src/utils/ErrorHandlingConfig.js +342 -0
- package/src/utils/HumanBehaviorSimulator.js +569 -0
- package/src/utils/Logger.js +568 -0
- package/src/utils/MemoryMonitor.js +173 -0
- package/src/utils/RetryManager.js +386 -0
- package/src/utils/contentUtils.js +588 -0
- package/src/utils/domainFilter.js +612 -0
- package/src/utils/inputValidation.js +766 -0
- package/src/utils/rateLimiter.js +196 -0
- package/src/utils/robotsChecker.js +91 -0
- package/src/utils/securityMiddleware.js +416 -0
- package/src/utils/sitemapParser.js +678 -0
- package/src/utils/ssrfProtection.js +640 -0
- package/src/utils/urlNormalizer.js +168 -0
|
@@ -0,0 +1,315 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Authentication and Credit Management for CrawlForge MCP Server
|
|
3
|
+
* Handles API key validation, credit tracking, and usage reporting
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import fetch from 'node-fetch';
|
|
7
|
+
import fs from 'fs/promises';
|
|
8
|
+
import path from 'path';
|
|
9
|
+
|
|
10
|
+
class AuthManager {
|
|
11
|
+
constructor() {
|
|
12
|
+
this.apiEndpoint = process.env.CRAWLFORGE_API_URL || 'https://api.crawlforge.com';
|
|
13
|
+
this.configPath = path.join(process.env.HOME || process.env.USERPROFILE, '.crawlforge', 'config.json');
|
|
14
|
+
this.config = null;
|
|
15
|
+
this.creditCache = new Map();
|
|
16
|
+
this.lastCreditCheck = null;
|
|
17
|
+
this.CREDIT_CHECK_INTERVAL = 60000; // Check credits every minute max
|
|
18
|
+
this.initialized = false;
|
|
19
|
+
this.creatorMode = process.env.CRAWLFORGE_CREATOR_MODE === 'true';
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Check if running in creator mode (unlimited access, no API required)
|
|
24
|
+
*/
|
|
25
|
+
isCreatorMode() {
|
|
26
|
+
return this.creatorMode;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Initialize the auth manager and load stored config
|
|
31
|
+
*/
|
|
32
|
+
async initialize() {
|
|
33
|
+
if (this.initialized) return;
|
|
34
|
+
|
|
35
|
+
// Skip config loading in creator mode
|
|
36
|
+
if (this.isCreatorMode()) {
|
|
37
|
+
console.log('🚀 Creator Mode Active - Unlimited Access Enabled');
|
|
38
|
+
this.initialized = true;
|
|
39
|
+
return;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
try {
|
|
43
|
+
await this.loadConfig();
|
|
44
|
+
this.initialized = true;
|
|
45
|
+
} catch (error) {
|
|
46
|
+
console.log('No existing CrawlForge configuration found. Run setup to configure.');
|
|
47
|
+
this.initialized = true;
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Load configuration from disk
|
|
53
|
+
*/
|
|
54
|
+
async loadConfig() {
|
|
55
|
+
try {
|
|
56
|
+
const configData = await fs.readFile(this.configPath, 'utf-8');
|
|
57
|
+
this.config = JSON.parse(configData);
|
|
58
|
+
|
|
59
|
+
// Validate config structure
|
|
60
|
+
if (!this.config.apiKey || !this.config.userId) {
|
|
61
|
+
throw new Error('Invalid configuration');
|
|
62
|
+
}
|
|
63
|
+
} catch (error) {
|
|
64
|
+
this.config = null;
|
|
65
|
+
throw error;
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Save configuration to disk
|
|
71
|
+
*/
|
|
72
|
+
async saveConfig(apiKey, userId, email) {
|
|
73
|
+
const config = {
|
|
74
|
+
apiKey,
|
|
75
|
+
userId,
|
|
76
|
+
email,
|
|
77
|
+
createdAt: new Date().toISOString(),
|
|
78
|
+
version: '1.0.0'
|
|
79
|
+
};
|
|
80
|
+
|
|
81
|
+
// Create config directory if it doesn't exist
|
|
82
|
+
const configDir = path.dirname(this.configPath);
|
|
83
|
+
await fs.mkdir(configDir, { recursive: true });
|
|
84
|
+
|
|
85
|
+
// Save config
|
|
86
|
+
await fs.writeFile(this.configPath, JSON.stringify(config, null, 2));
|
|
87
|
+
this.config = config;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
/**
|
|
91
|
+
* Setup wizard for first-time users
|
|
92
|
+
*/
|
|
93
|
+
async runSetup(apiKey) {
|
|
94
|
+
console.log('🔧 Setting up CrawlForge MCP Server...\n');
|
|
95
|
+
|
|
96
|
+
if (!apiKey) {
|
|
97
|
+
console.log('❌ API key is required for setup');
|
|
98
|
+
console.log('Get your API key from: https://crawlforge.com/dashboard/api-keys');
|
|
99
|
+
return false;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
// Validate API key with backend
|
|
103
|
+
const validation = await this.validateApiKey(apiKey);
|
|
104
|
+
|
|
105
|
+
if (!validation.valid) {
|
|
106
|
+
console.log(`❌ Invalid API key: ${validation.error}`);
|
|
107
|
+
return false;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
// Save configuration
|
|
111
|
+
await this.saveConfig(apiKey, validation.userId, validation.email);
|
|
112
|
+
|
|
113
|
+
console.log('✅ Setup complete!');
|
|
114
|
+
console.log(`📧 Account: ${validation.email}`);
|
|
115
|
+
console.log(`💳 Credits remaining: ${validation.creditsRemaining}`);
|
|
116
|
+
console.log(`📦 Plan: ${validation.planId}`);
|
|
117
|
+
|
|
118
|
+
return true;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
/**
|
|
122
|
+
* Validate API key with backend
|
|
123
|
+
*/
|
|
124
|
+
async validateApiKey(apiKey) {
|
|
125
|
+
try {
|
|
126
|
+
const response = await fetch(`${this.apiEndpoint}/api/v1/auth/validate`, {
|
|
127
|
+
method: 'POST',
|
|
128
|
+
headers: {
|
|
129
|
+
'Content-Type': 'application/json',
|
|
130
|
+
'X-API-Key': apiKey
|
|
131
|
+
}
|
|
132
|
+
});
|
|
133
|
+
|
|
134
|
+
if (!response.ok) {
|
|
135
|
+
const error = await response.json();
|
|
136
|
+
return {
|
|
137
|
+
valid: false,
|
|
138
|
+
error: error.message || 'Invalid API key'
|
|
139
|
+
};
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
const data = await response.json();
|
|
143
|
+
return {
|
|
144
|
+
valid: true,
|
|
145
|
+
userId: data.userId,
|
|
146
|
+
email: data.email,
|
|
147
|
+
creditsRemaining: data.creditsRemaining,
|
|
148
|
+
planId: data.planId
|
|
149
|
+
};
|
|
150
|
+
} catch (error) {
|
|
151
|
+
return {
|
|
152
|
+
valid: false,
|
|
153
|
+
error: `Connection error: ${error.message}`
|
|
154
|
+
};
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
/**
|
|
159
|
+
* Check if user has enough credits for a tool
|
|
160
|
+
*/
|
|
161
|
+
async checkCredits(estimatedCredits = 1) {
|
|
162
|
+
// Creator mode has unlimited credits
|
|
163
|
+
if (this.isCreatorMode()) {
|
|
164
|
+
return true;
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
if (!this.config) {
|
|
168
|
+
throw new Error('CrawlForge not configured. Run setup first.');
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
// Use cached credits if recent
|
|
172
|
+
const now = Date.now();
|
|
173
|
+
if (this.lastCreditCheck && (now - this.lastCreditCheck) < this.CREDIT_CHECK_INTERVAL) {
|
|
174
|
+
const cached = this.creditCache.get(this.config.userId);
|
|
175
|
+
if (cached !== undefined) {
|
|
176
|
+
return cached >= estimatedCredits;
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
// Fetch current credits from backend
|
|
181
|
+
try {
|
|
182
|
+
const response = await fetch(`${this.apiEndpoint}/api/v1/credits`, {
|
|
183
|
+
headers: {
|
|
184
|
+
'X-API-Key': this.config.apiKey
|
|
185
|
+
}
|
|
186
|
+
});
|
|
187
|
+
|
|
188
|
+
if (response.ok) {
|
|
189
|
+
const data = await response.json();
|
|
190
|
+
this.creditCache.set(this.config.userId, data.creditsRemaining);
|
|
191
|
+
this.lastCreditCheck = now;
|
|
192
|
+
return data.creditsRemaining >= estimatedCredits;
|
|
193
|
+
}
|
|
194
|
+
} catch (error) {
|
|
195
|
+
// If can't check, allow operation but log error
|
|
196
|
+
console.error('Failed to check credits:', error.message);
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
return true; // Allow operation if can't verify
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
/**
|
|
203
|
+
* Report usage to backend for credit deduction
|
|
204
|
+
*/
|
|
205
|
+
async reportUsage(tool, creditsUsed, requestData = {}, responseStatus = 200, processingTime = 0) {
|
|
206
|
+
// Skip usage reporting in creator mode
|
|
207
|
+
if (this.isCreatorMode()) {
|
|
208
|
+
return;
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
if (!this.config) {
|
|
212
|
+
return; // Silently skip if not configured
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
try {
|
|
216
|
+
const payload = {
|
|
217
|
+
tool,
|
|
218
|
+
creditsUsed,
|
|
219
|
+
requestData,
|
|
220
|
+
responseStatus,
|
|
221
|
+
processingTime,
|
|
222
|
+
timestamp: new Date().toISOString(),
|
|
223
|
+
version: '3.0.0'
|
|
224
|
+
};
|
|
225
|
+
|
|
226
|
+
await fetch(`${this.apiEndpoint}/api/v1/usage`, {
|
|
227
|
+
method: 'POST',
|
|
228
|
+
headers: {
|
|
229
|
+
'Content-Type': 'application/json',
|
|
230
|
+
'X-API-Key': this.config.apiKey
|
|
231
|
+
},
|
|
232
|
+
body: JSON.stringify(payload)
|
|
233
|
+
});
|
|
234
|
+
|
|
235
|
+
// Update cached credits
|
|
236
|
+
const cached = this.creditCache.get(this.config.userId);
|
|
237
|
+
if (cached !== undefined) {
|
|
238
|
+
this.creditCache.set(this.config.userId, Math.max(0, cached - creditsUsed));
|
|
239
|
+
}
|
|
240
|
+
} catch (error) {
|
|
241
|
+
// Log but don't throw - usage reporting should not break tool execution
|
|
242
|
+
console.error('Failed to report usage:', error.message);
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
/**
|
|
247
|
+
* Get credit cost for a tool
|
|
248
|
+
*/
|
|
249
|
+
getToolCost(tool) {
|
|
250
|
+
const costs = {
|
|
251
|
+
// Basic tools (1 credit)
|
|
252
|
+
fetch_url: 1,
|
|
253
|
+
extract_text: 1,
|
|
254
|
+
extract_links: 1,
|
|
255
|
+
extract_metadata: 1,
|
|
256
|
+
|
|
257
|
+
// Advanced tools (2-3 credits)
|
|
258
|
+
scrape_structured: 2,
|
|
259
|
+
search_web: 2,
|
|
260
|
+
summarize_content: 2,
|
|
261
|
+
analyze_content: 2,
|
|
262
|
+
|
|
263
|
+
// Premium tools (5-10 credits)
|
|
264
|
+
crawl_deep: 5,
|
|
265
|
+
map_site: 5,
|
|
266
|
+
batch_scrape: 5,
|
|
267
|
+
deep_research: 10,
|
|
268
|
+
stealth_mode: 10,
|
|
269
|
+
|
|
270
|
+
// Heavy processing (10+ credits)
|
|
271
|
+
process_document: 3,
|
|
272
|
+
extract_content: 3,
|
|
273
|
+
scrape_with_actions: 5,
|
|
274
|
+
generate_llms_txt: 3,
|
|
275
|
+
localization: 5
|
|
276
|
+
};
|
|
277
|
+
|
|
278
|
+
return costs[tool] || 1;
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
/**
|
|
282
|
+
* Check if authenticated
|
|
283
|
+
*/
|
|
284
|
+
isAuthenticated() {
|
|
285
|
+
// Creator mode is always authenticated
|
|
286
|
+
if (this.isCreatorMode()) {
|
|
287
|
+
return true;
|
|
288
|
+
}
|
|
289
|
+
return this.config !== null && this.config.apiKey !== undefined;
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
/**
|
|
293
|
+
* Get current configuration
|
|
294
|
+
*/
|
|
295
|
+
getConfig() {
|
|
296
|
+
return this.config;
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
/**
|
|
300
|
+
* Clear stored configuration
|
|
301
|
+
*/
|
|
302
|
+
async clearConfig() {
|
|
303
|
+
try {
|
|
304
|
+
await fs.unlink(this.configPath);
|
|
305
|
+
this.config = null;
|
|
306
|
+
this.creditCache.clear();
|
|
307
|
+
console.log('Configuration cleared.');
|
|
308
|
+
} catch (error) {
|
|
309
|
+
console.error('Failed to clear configuration:', error.message);
|
|
310
|
+
}
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
// Export singleton instance
|
|
315
|
+
export default new AuthManager();
|