crawlforge-mcp-server 3.0.0 → 3.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +186 -45
- package/README.md +19 -15
- package/package.json +9 -6
- package/server.js +13 -16
- package/setup.js +5 -5
- package/src/core/ActionExecutor.js +16 -1
- package/src/core/AuthManager.js +2 -2
- package/src/core/ChangeTracker.js +5 -963
- package/src/core/WebhookDispatcher.js +4 -0
package/CLAUDE.md
CHANGED
|
@@ -4,7 +4,7 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co
|
|
|
4
4
|
|
|
5
5
|
## Project Overview
|
|
6
6
|
|
|
7
|
-
CrawlForge MCP Server - A professional MCP (Model Context Protocol) server implementation providing
|
|
7
|
+
CrawlForge MCP Server - A professional MCP (Model Context Protocol) server implementation providing 19 comprehensive web scraping, crawling, and content processing tools. Version 3.0 includes advanced content extraction, document processing, summarization, and analysis capabilities. Wave 2 adds asynchronous batch processing and browser automation features. Wave 3 introduces deep research orchestration, stealth scraping, localization, and change tracking.
|
|
8
8
|
|
|
9
9
|
## Development Commands
|
|
10
10
|
|
|
@@ -12,42 +12,26 @@ CrawlForge MCP Server - A professional MCP (Model Context Protocol) server imple
|
|
|
12
12
|
# Install dependencies
|
|
13
13
|
npm install
|
|
14
14
|
|
|
15
|
-
#
|
|
16
|
-
|
|
17
|
-
#
|
|
15
|
+
# Setup (required for first run)
|
|
16
|
+
npm run setup
|
|
17
|
+
# Or provide API key via environment:
|
|
18
|
+
export CRAWLFORGE_API_KEY="your_api_key_here"
|
|
18
19
|
|
|
19
|
-
# Run the server
|
|
20
|
+
# Run the server (production)
|
|
20
21
|
npm start
|
|
21
22
|
|
|
23
|
+
# Development mode with verbose logging
|
|
24
|
+
npm run dev
|
|
25
|
+
|
|
22
26
|
# Test MCP protocol compliance
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
#
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
#
|
|
30
|
-
|
|
31
|
-
npm run test:performance:quick # Quick performance tests
|
|
32
|
-
npm run test:load # Load testing
|
|
33
|
-
npm run test:memory # Memory usage tests
|
|
34
|
-
npm run test:benchmark # Component benchmarks
|
|
35
|
-
npm run test:integration # Integration tests
|
|
36
|
-
npm run test:security # Security test suite
|
|
37
|
-
npm run test:all # Run all tests
|
|
38
|
-
|
|
39
|
-
# Wave 2 Validation Tests
|
|
40
|
-
node tests/validation/test-wave2-runner.js # Test Wave 2 features
|
|
41
|
-
node tests/validation/test-batch-scrape.js # Test batch scraping
|
|
42
|
-
node tests/validation/test-scrape-with-actions.js # Test action scraping
|
|
43
|
-
node tests/integration/master-test-runner.js # Run master test suite
|
|
44
|
-
|
|
45
|
-
# Wave 3 Tests
|
|
46
|
-
npm run test:wave3 # Full Wave 3 validation
|
|
47
|
-
npm run test:wave3:quick # Quick Wave 3 tests
|
|
48
|
-
npm run test:wave3:verbose # Verbose Wave 3 output
|
|
49
|
-
npm run test:unit:wave3 # Wave 3 unit tests (Jest)
|
|
50
|
-
npm run test:integration:wave3 # Wave 3 integration tests
|
|
27
|
+
npm test
|
|
28
|
+
|
|
29
|
+
# Functional tests
|
|
30
|
+
node test-tools.js # Test all tools (basic, Wave 2, Wave 3)
|
|
31
|
+
node test-real-world.js # Test real-world usage scenarios
|
|
32
|
+
|
|
33
|
+
# MCP Protocol tests
|
|
34
|
+
node tests/integration/mcp-protocol-compliance.test.js # MCP protocol compliance
|
|
51
35
|
|
|
52
36
|
# Docker commands
|
|
53
37
|
npm run docker:build # Build Docker image
|
|
@@ -71,18 +55,20 @@ npm run release:major # Major version bump
|
|
|
71
55
|
npm run clean # Remove cache, logs, test results
|
|
72
56
|
|
|
73
57
|
# Running specific test files
|
|
74
|
-
node tests/
|
|
75
|
-
node
|
|
76
|
-
node
|
|
58
|
+
node tests/integration/mcp-protocol-compliance.test.js # MCP protocol compliance
|
|
59
|
+
node test-tools.js # All tools functional test
|
|
60
|
+
node test-real-world.js # Real-world scenarios test
|
|
77
61
|
```
|
|
78
62
|
|
|
79
63
|
## High-Level Architecture
|
|
80
64
|
|
|
81
65
|
### Core Infrastructure (`src/core/`)
|
|
66
|
+
|
|
67
|
+
- **AuthManager**: Authentication, credit tracking, and usage reporting
|
|
82
68
|
- **PerformanceManager**: Centralized performance monitoring and optimization
|
|
83
69
|
- **JobManager**: Asynchronous job tracking and management for batch operations
|
|
84
70
|
- **WebhookDispatcher**: Event notification system for job completion callbacks
|
|
85
|
-
- **ActionExecutor**: Browser automation engine for complex interactions
|
|
71
|
+
- **ActionExecutor**: Browser automation engine for complex interactions (Playwright-based)
|
|
86
72
|
- **ResearchOrchestrator**: Coordinates multi-stage research with query expansion and synthesis
|
|
87
73
|
- **StealthBrowserManager**: Manages stealth mode scraping with anti-detection features
|
|
88
74
|
- **LocalizationManager**: Handles multi-language content and localization
|
|
@@ -90,27 +76,61 @@ node tests/security/security-test-suite.js # Security test suite
|
|
|
90
76
|
- **SnapshotManager**: Manages website snapshots and version history
|
|
91
77
|
|
|
92
78
|
### Tool Layer (`src/tools/`)
|
|
79
|
+
|
|
93
80
|
Tools are organized in subdirectories by category:
|
|
81
|
+
|
|
94
82
|
- `advanced/` - BatchScrapeTool, ScrapeWithActionsTool
|
|
95
83
|
- `crawl/` - crawlDeep, mapSite
|
|
96
84
|
- `extract/` - analyzeContent, extractContent, processDocument, summarizeContent
|
|
97
85
|
- `research/` - deepResearch
|
|
98
86
|
- `search/` - searchWeb and provider adapters (Google, DuckDuckGo)
|
|
99
87
|
- `tracking/` - trackChanges
|
|
88
|
+
- `llmstxt/` - generateLLMsTxt
|
|
89
|
+
|
|
90
|
+
### Available MCP Tools (19 total)
|
|
91
|
+
|
|
92
|
+
**Basic Tools (server.js inline):**
|
|
93
|
+
|
|
94
|
+
- fetch_url, extract_text, extract_links, extract_metadata, scrape_structured
|
|
95
|
+
|
|
96
|
+
**Advanced Tools:**
|
|
97
|
+
|
|
98
|
+
- search_web (conditional - requires search provider), crawl_deep, map_site
|
|
99
|
+
- extract_content, process_document, summarize_content, analyze_content
|
|
100
|
+
- batch_scrape, scrape_with_actions, deep_research
|
|
101
|
+
- track_changes, generate_llms_txt, stealth_mode, localization
|
|
100
102
|
|
|
101
103
|
### MCP Server Entry Point
|
|
104
|
+
|
|
102
105
|
The main server implementation is in `server.js` which:
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
106
|
+
|
|
107
|
+
1. **Authentication Flow**: Uses AuthManager for API key validation and credit tracking
|
|
108
|
+
- Checks for authentication on startup
|
|
109
|
+
- Auto-setup if CRAWLFORGE_API_KEY environment variable is present
|
|
110
|
+
2. **Tool Registration**: All tools registered via `server.registerTool()` pattern
|
|
111
|
+
- Wrapped with `withAuth()` function for credit tracking and authentication
|
|
112
|
+
- Each tool has inline Zod schema for parameter validation
|
|
113
|
+
- Response format uses `content` array with text objects
|
|
114
|
+
3. **Transport**: Uses stdio transport for MCP protocol communication
|
|
115
|
+
4. **Graceful Shutdown**: Cleans up browser instances, job managers, and other resources
|
|
116
|
+
|
|
117
|
+
### Tool Credit System
|
|
118
|
+
|
|
119
|
+
Each tool wrapped with `withAuth(toolName, handler)`:
|
|
120
|
+
|
|
121
|
+
- Checks credits before execution
|
|
122
|
+
- Reports usage with credit deduction on success
|
|
123
|
+
- Charges half credits on error
|
|
124
|
+
- Returns credit error if insufficient balance
|
|
108
125
|
|
|
109
126
|
### Key Configuration
|
|
110
127
|
|
|
111
|
-
Critical environment variables
|
|
128
|
+
Critical environment variables defined in `src/constants/config.js`:
|
|
112
129
|
|
|
113
130
|
```bash
|
|
131
|
+
# Authentication (required)
|
|
132
|
+
CRAWLFORGE_API_KEY=your_api_key_here
|
|
133
|
+
|
|
114
134
|
# Search Provider (auto, google, duckduckgo)
|
|
115
135
|
SEARCH_PROVIDER=auto
|
|
116
136
|
|
|
@@ -130,9 +150,16 @@ MAX_PAGES_PER_CRAWL=100
|
|
|
130
150
|
RESPECT_ROBOTS_TXT=true
|
|
131
151
|
```
|
|
132
152
|
|
|
153
|
+
### Configuration Files
|
|
154
|
+
|
|
155
|
+
- `~/.crawlforge/config.json` - User authentication and API key storage
|
|
156
|
+
- `.env` - Environment variables for development
|
|
157
|
+
- `src/constants/config.js` - Central configuration with defaults and validation
|
|
158
|
+
|
|
133
159
|
## Common Development Tasks
|
|
134
160
|
|
|
135
161
|
### Running a Single Test
|
|
162
|
+
|
|
136
163
|
```bash
|
|
137
164
|
# Run a specific test file
|
|
138
165
|
node tests/unit/linkAnalyzer.test.js
|
|
@@ -145,6 +172,7 @@ npm run test:wave3:verbose
|
|
|
145
172
|
```
|
|
146
173
|
|
|
147
174
|
### Testing Tool Integration
|
|
175
|
+
|
|
148
176
|
```bash
|
|
149
177
|
# Test MCP protocol compliance
|
|
150
178
|
npm test
|
|
@@ -158,11 +186,24 @@ node tests/validation/wave3-validation.js
|
|
|
158
186
|
```
|
|
159
187
|
|
|
160
188
|
### Debugging Tips
|
|
161
|
-
|
|
189
|
+
|
|
190
|
+
- Server logs are written to console via Winston logger (stderr for status, stdout for MCP protocol)
|
|
162
191
|
- Set `NODE_ENV=development` for verbose logging
|
|
163
|
-
- Use `--expose-gc` flag for memory profiling
|
|
192
|
+
- Use `--expose-gc` flag for memory profiling: `node --expose-gc server.js`
|
|
164
193
|
- Check `cache/` directory for cached responses
|
|
165
194
|
- Review `logs/` directory for application logs
|
|
195
|
+
- Memory monitoring automatically enabled in development mode (logs every 60s if >200MB)
|
|
196
|
+
|
|
197
|
+
### Adding New Tools
|
|
198
|
+
|
|
199
|
+
When adding a new tool to server.js:
|
|
200
|
+
|
|
201
|
+
1. Import the tool class from `src/tools/`
|
|
202
|
+
2. Instantiate the tool (with config if needed)
|
|
203
|
+
3. Register with `server.registerTool(name, { description, inputSchema }, withAuth(name, handler))`
|
|
204
|
+
4. Ensure tool implements `execute(params)` method
|
|
205
|
+
5. Add to cleanup array in gracefulShutdown if it has `destroy()` or `cleanup()` methods
|
|
206
|
+
6. Update tool count in console log at server startup (line 1860)
|
|
166
207
|
|
|
167
208
|
## CI/CD Security Integration
|
|
168
209
|
|
|
@@ -171,9 +212,11 @@ node tests/validation/wave3-validation.js
|
|
|
171
212
|
The project includes comprehensive security testing integrated into the CI/CD pipeline:
|
|
172
213
|
|
|
173
214
|
#### Main CI Pipeline (`.github/workflows/ci.yml`)
|
|
215
|
+
|
|
174
216
|
The CI pipeline runs on every PR and push to main/develop branches and includes:
|
|
175
217
|
|
|
176
218
|
**Security Test Suite:**
|
|
219
|
+
|
|
177
220
|
- SSRF Protection validation
|
|
178
221
|
- Input validation (XSS, SQL injection, command injection)
|
|
179
222
|
- Rate limiting functionality
|
|
@@ -181,42 +224,50 @@ The CI pipeline runs on every PR and push to main/develop branches and includes:
|
|
|
181
224
|
- Regex DoS vulnerability detection
|
|
182
225
|
|
|
183
226
|
**Dependency Security:**
|
|
227
|
+
|
|
184
228
|
- npm audit with JSON output and summary generation
|
|
185
229
|
- Vulnerability severity analysis (critical/high/moderate/low)
|
|
186
230
|
- License compliance checking
|
|
187
231
|
- Outdated package detection
|
|
188
232
|
|
|
189
233
|
**Static Code Analysis:**
|
|
234
|
+
|
|
190
235
|
- CodeQL security analysis with extended queries
|
|
191
236
|
- ESLint security rules for dangerous patterns
|
|
192
237
|
- Hardcoded secret detection
|
|
193
238
|
- Security file scanning
|
|
194
239
|
|
|
195
240
|
**Reporting & Artifacts:**
|
|
241
|
+
|
|
196
242
|
- Comprehensive security reports generated
|
|
197
243
|
- PR comments with security summaries
|
|
198
244
|
- Artifact upload for detailed analysis
|
|
199
245
|
- Build failure on critical vulnerabilities
|
|
200
246
|
|
|
201
247
|
#### Dedicated Security Workflow (`.github/workflows/security.yml`)
|
|
248
|
+
|
|
202
249
|
Daily scheduled comprehensive security scanning:
|
|
203
250
|
|
|
204
251
|
**Dependency Security Scan:**
|
|
252
|
+
|
|
205
253
|
- Full vulnerability audit with configurable severity levels
|
|
206
254
|
- License compliance verification
|
|
207
255
|
- Detailed vulnerability reporting
|
|
208
256
|
|
|
209
257
|
**Static Code Analysis:**
|
|
258
|
+
|
|
210
259
|
- Extended CodeQL analysis with security-focused queries
|
|
211
260
|
- ESLint security plugin integration
|
|
212
261
|
- Pattern-based secret detection
|
|
213
262
|
|
|
214
263
|
**Container Security:**
|
|
264
|
+
|
|
215
265
|
- Trivy vulnerability scanning
|
|
216
266
|
- SARIF report generation
|
|
217
267
|
- Container base image analysis
|
|
218
268
|
|
|
219
269
|
**Automated Issue Creation:**
|
|
270
|
+
|
|
220
271
|
- GitHub issues created for critical vulnerabilities
|
|
221
272
|
- Detailed security reports with remediation steps
|
|
222
273
|
- Configurable severity thresholds
|
|
@@ -224,11 +275,13 @@ Daily scheduled comprehensive security scanning:
|
|
|
224
275
|
### Security Thresholds and Policies
|
|
225
276
|
|
|
226
277
|
**Build Failure Conditions:**
|
|
278
|
+
|
|
227
279
|
- Any critical severity vulnerabilities
|
|
228
280
|
- More than 3 high severity vulnerabilities
|
|
229
281
|
- Security test suite failures
|
|
230
282
|
|
|
231
283
|
**Automated Actions:**
|
|
284
|
+
|
|
232
285
|
- Daily security scans at 2 AM UTC
|
|
233
286
|
- PR blocking for security failures
|
|
234
287
|
- Automatic security issue creation
|
|
@@ -257,6 +310,7 @@ node tests/security/security-test-suite.js
|
|
|
257
310
|
### Security Artifacts and Reports
|
|
258
311
|
|
|
259
312
|
**Generated Reports:**
|
|
313
|
+
|
|
260
314
|
- `SECURITY-REPORT.md`: Comprehensive security assessment
|
|
261
315
|
- `npm-audit.json`: Detailed vulnerability data
|
|
262
316
|
- `security-tests.log`: Test execution logs
|
|
@@ -264,6 +318,7 @@ node tests/security/security-test-suite.js
|
|
|
264
318
|
- `license-check.md`: License compliance report
|
|
265
319
|
|
|
266
320
|
**Artifact Retention:**
|
|
321
|
+
|
|
267
322
|
- CI security results: 30 days
|
|
268
323
|
- Comprehensive security reports: 90 days
|
|
269
324
|
- Critical vulnerability reports: Indefinite
|
|
@@ -283,18 +338,21 @@ gh workflow run security.yml \
|
|
|
283
338
|
```
|
|
284
339
|
|
|
285
340
|
**Available Options:**
|
|
341
|
+
|
|
286
342
|
- `scan_type`: all, dependencies, code-analysis, container-scan
|
|
287
343
|
- `severity_threshold`: low, moderate, high, critical
|
|
288
344
|
|
|
289
345
|
### Security Integration Best Practices
|
|
290
346
|
|
|
291
347
|
**For Contributors:**
|
|
348
|
+
|
|
292
349
|
1. Always run `npm run test:security` before submitting PRs
|
|
293
350
|
2. Address any security warnings in your code
|
|
294
351
|
3. Keep dependencies updated with `npm audit fix`
|
|
295
352
|
4. Review security artifacts when CI fails
|
|
296
353
|
|
|
297
354
|
**For Maintainers:**
|
|
355
|
+
|
|
298
356
|
1. Review security reports weekly
|
|
299
357
|
2. Respond to automated security issues promptly
|
|
300
358
|
3. Keep security thresholds updated
|
|
@@ -303,13 +361,96 @@ gh workflow run security.yml \
|
|
|
303
361
|
### Security Documentation
|
|
304
362
|
|
|
305
363
|
Comprehensive security documentation is available in:
|
|
364
|
+
|
|
306
365
|
- `.github/SECURITY.md` - Complete security policy and procedures
|
|
307
366
|
- Security workflow logs and artifacts
|
|
308
367
|
- Generated security reports in CI runs
|
|
309
368
|
|
|
310
369
|
The security integration ensures that:
|
|
370
|
+
|
|
311
371
|
- No critical vulnerabilities reach production
|
|
312
372
|
- Security issues are detected early in development
|
|
313
373
|
- Comprehensive audit trails are maintained
|
|
314
374
|
- Automated remediation guidance is provided
|
|
315
375
|
|
|
376
|
+
## Important Implementation Patterns
|
|
377
|
+
|
|
378
|
+
### Tool Structure
|
|
379
|
+
|
|
380
|
+
All tools follow a consistent class-based pattern:
|
|
381
|
+
|
|
382
|
+
```javascript
|
|
383
|
+
export class ToolName {
|
|
384
|
+
constructor(config) {
|
|
385
|
+
this.config = config;
|
|
386
|
+
// Initialize resources
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
async execute(params) {
|
|
390
|
+
// Validate params (Zod validation done in server.js)
|
|
391
|
+
// Execute tool logic
|
|
392
|
+
// Return structured result
|
|
393
|
+
return { success: true, data: {...} };
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
async destroy() {
|
|
397
|
+
// Cleanup resources (browsers, connections, etc.)
|
|
398
|
+
}
|
|
399
|
+
}
|
|
400
|
+
```
|
|
401
|
+
|
|
402
|
+
### Search Provider Architecture
|
|
403
|
+
|
|
404
|
+
Search providers implement a factory pattern:
|
|
405
|
+
|
|
406
|
+
- `searchProviderFactory.js` selects provider based on config
|
|
407
|
+
- Providers implement common interface: `search(query, options)`
|
|
408
|
+
- Auto-fallback: Google → DuckDuckGo if Google credentials missing
|
|
409
|
+
- Each provider in `src/tools/search/adapters/`
|
|
410
|
+
|
|
411
|
+
### Browser Management
|
|
412
|
+
|
|
413
|
+
- Playwright used for browser automation (ActionExecutor, ScrapeWithActionsTool)
|
|
414
|
+
- Stealth features in StealthBrowserManager
|
|
415
|
+
- Always cleanup browsers in error handlers
|
|
416
|
+
- Context isolation per operation for security
|
|
417
|
+
|
|
418
|
+
### Memory Management
|
|
419
|
+
|
|
420
|
+
Critical for long-running processes:
|
|
421
|
+
|
|
422
|
+
- Graceful shutdown handlers registered for SIGINT/SIGTERM
|
|
423
|
+
- All tools with heavy resources must implement `destroy()` or `cleanup()`
|
|
424
|
+
- Memory monitoring in development mode (server.js line 1955-1963)
|
|
425
|
+
- Force GC on shutdown if available
|
|
426
|
+
|
|
427
|
+
### Error Handling Pattern
|
|
428
|
+
|
|
429
|
+
```javascript
|
|
430
|
+
try {
|
|
431
|
+
const result = await tool.execute(params);
|
|
432
|
+
return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
|
|
433
|
+
} catch (error) {
|
|
434
|
+
return {
|
|
435
|
+
content: [{ type: "text", text: `Operation failed: ${error.message}` }],
|
|
436
|
+
isError: true,
|
|
437
|
+
};
|
|
438
|
+
}
|
|
439
|
+
```
|
|
440
|
+
|
|
441
|
+
### Configuration Validation
|
|
442
|
+
|
|
443
|
+
- All config in `src/constants/config.js` with defaults
|
|
444
|
+
- `validateConfig()` checks required settings
|
|
445
|
+
- Environment variables parsed with fallbacks
|
|
446
|
+
- Config errors only fail in production (warnings in dev)
|
|
447
|
+
|
|
448
|
+
## 🎯 Project Management Rules
|
|
449
|
+
|
|
450
|
+
## 🎯 Project Management Rules
|
|
451
|
+
|
|
452
|
+
- always have the project manager work with the appropriate sub agents in parallel
|
|
453
|
+
- i want the project manager to always be in charge and then get the appropriate sub agents to work on the tasks in parallel. each sub agent must work on their strengths. when they are done they let the project manager know and the project manager updates the @PRODUCTION_READINESS.md file.
|
|
454
|
+
- whenever a phase is completed push all changes to github
|
|
455
|
+
- put all the documentation md files into the docs folders to keep everything organized
|
|
456
|
+
- every time you finish a phase run npm run build and fix all errors. do this before you push to github.
|
package/README.md
CHANGED
|
@@ -9,7 +9,7 @@ Professional web scraping and content extraction server implementing the Model C
|
|
|
9
9
|
|
|
10
10
|
## 🎯 Features
|
|
11
11
|
|
|
12
|
-
- **
|
|
12
|
+
- **19 Professional Tools**: Web scraping, deep research, stealth browsing, content analysis
|
|
13
13
|
- **Free Tier**: 1,000 credits to get started instantly
|
|
14
14
|
- **MCP Compatible**: Works with Claude, Cursor, and other MCP-enabled AI tools
|
|
15
15
|
- **Enterprise Ready**: Scale up with paid plans for production use
|
|
@@ -34,7 +34,7 @@ This will:
|
|
|
34
34
|
- Configure your credentials securely
|
|
35
35
|
- Verify your setup is working
|
|
36
36
|
|
|
37
|
-
**Don't have an API key?** Get one free at [https://crawlforge.
|
|
37
|
+
**Don't have an API key?** Get one free at [https://www.crawlforge.dev/signup](https://www.crawlforge.dev/signup)
|
|
38
38
|
|
|
39
39
|
### 3. Configure Your IDE
|
|
40
40
|
|
|
@@ -105,15 +105,19 @@ Or use the MCP plugin in Cursor settings.
|
|
|
105
105
|
|
|
106
106
|
## 💳 Pricing
|
|
107
107
|
|
|
108
|
-
| Plan | Credits/Month |
|
|
109
|
-
|
|
110
|
-
| **Free** | 1,000 |
|
|
111
|
-
| **
|
|
112
|
-
| **
|
|
113
|
-
| **
|
|
114
|
-
| **Enterprise** | Unlimited | Custom | Large scale operations |
|
|
108
|
+
| Plan | Credits/Month | Best For |
|
|
109
|
+
|------|---------------|----------|
|
|
110
|
+
| **Free** | 1,000 | Testing & personal projects |
|
|
111
|
+
| **Starter** | 5,000 | Small projects & development |
|
|
112
|
+
| **Professional** | 50,000 | Professional use & production |
|
|
113
|
+
| **Enterprise** | 250,000 | Large scale operations |
|
|
115
114
|
|
|
116
|
-
|
|
115
|
+
**All plans include:**
|
|
116
|
+
- Access to all 19 tools
|
|
117
|
+
- Credits never expire and roll over month-to-month
|
|
118
|
+
- API access and webhook notifications
|
|
119
|
+
|
|
120
|
+
[View full pricing](https://www.crawlforge.dev/pricing)
|
|
117
121
|
|
|
118
122
|
## 🔧 Advanced Configuration
|
|
119
123
|
|
|
@@ -124,7 +128,7 @@ Or use the MCP plugin in Cursor settings.
|
|
|
124
128
|
export CRAWLFORGE_API_KEY="sk_live_your_api_key_here"
|
|
125
129
|
|
|
126
130
|
# Optional: Custom API endpoint (for enterprise)
|
|
127
|
-
export CRAWLFORGE_API_URL="https://api.crawlforge.
|
|
131
|
+
export CRAWLFORGE_API_URL="https://api.crawlforge.dev"
|
|
128
132
|
```
|
|
129
133
|
|
|
130
134
|
### Manual Configuration
|
|
@@ -161,9 +165,9 @@ Once configured, use these tools in your AI assistant:
|
|
|
161
165
|
|
|
162
166
|
## 🆘 Support
|
|
163
167
|
|
|
164
|
-
- **Documentation**: [https://crawlforge.
|
|
165
|
-
- **Issues**: [GitHub Issues](https://github.com/crawlforge
|
|
166
|
-
- **Email**: support@crawlforge.
|
|
168
|
+
- **Documentation**: [https://www.crawlforge.dev/docs](https://www.crawlforge.dev/docs)
|
|
169
|
+
- **Issues**: [GitHub Issues](https://github.com/mysleekdesigns/crawlforge-mcp/issues)
|
|
170
|
+
- **Email**: support@crawlforge.dev
|
|
167
171
|
- **Discord**: [Join our community](https://discord.gg/crawlforge)
|
|
168
172
|
|
|
169
173
|
## 📄 License
|
|
@@ -178,4 +182,4 @@ Contributions are welcome! Please read our [Contributing Guide](CONTRIBUTING.md)
|
|
|
178
182
|
|
|
179
183
|
**Built with ❤️ by the CrawlForge team**
|
|
180
184
|
|
|
181
|
-
[Website](https://crawlforge.
|
|
185
|
+
[Website](https://www.crawlforge.dev) | [Documentation](https://www.crawlforge.dev/docs) | [API Reference](https://www.crawlforge.dev/api-reference)
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "crawlforge-mcp-server",
|
|
3
|
-
"version": "3.0.
|
|
4
|
-
"description": "CrawlForge MCP Server - Professional Model Context Protocol server with
|
|
3
|
+
"version": "3.0.2",
|
|
4
|
+
"description": "CrawlForge MCP Server - Professional Model Context Protocol server with 19 comprehensive web scraping, crawling, and content processing tools.",
|
|
5
5
|
"main": "server.js",
|
|
6
6
|
"bin": {
|
|
7
7
|
"crawlforge": "server.js",
|
|
@@ -12,6 +12,9 @@
|
|
|
12
12
|
"setup": "node setup.js",
|
|
13
13
|
"dev": "cross-env NODE_ENV=development node server.js",
|
|
14
14
|
"test": "node tests/integration/mcp-protocol-compliance.test.js",
|
|
15
|
+
"test:tools": "node test-tools.js",
|
|
16
|
+
"test:real-world": "node test-real-world.js",
|
|
17
|
+
"test:all": "bash run-all-tests.sh",
|
|
15
18
|
"postinstall": "echo '\n🎉 CrawlForge MCP Server installed!\n\nRun \"npx crawlforge-setup\" to configure your API key and get started.\n'",
|
|
16
19
|
"docker:build": "docker build -t crawlforge .",
|
|
17
20
|
"docker:dev": "docker-compose up crawlforge-dev",
|
|
@@ -45,17 +48,17 @@
|
|
|
45
48
|
],
|
|
46
49
|
"author": {
|
|
47
50
|
"name": "Simon Lacey",
|
|
48
|
-
"email": "
|
|
51
|
+
"email": "support@crawlforge.dev"
|
|
49
52
|
},
|
|
50
53
|
"license": "MIT",
|
|
51
54
|
"repository": {
|
|
52
55
|
"type": "git",
|
|
53
|
-
"url": "git+https://github.com/crawlforge
|
|
56
|
+
"url": "git+https://github.com/mysleekdesigns/crawlforge-mcp.git"
|
|
54
57
|
},
|
|
55
58
|
"bugs": {
|
|
56
|
-
"url": "https://github.com/crawlforge
|
|
59
|
+
"url": "https://github.com/mysleekdesigns/crawlforge-mcp/issues"
|
|
57
60
|
},
|
|
58
|
-
"homepage": "https://crawlforge.
|
|
61
|
+
"homepage": "https://crawlforge.dev",
|
|
59
62
|
"type": "module",
|
|
60
63
|
"engines": {
|
|
61
64
|
"node": ">=18.0.0",
|
package/server.js
CHANGED
|
@@ -16,8 +16,8 @@ import { BatchScrapeTool } from "./src/tools/advanced/BatchScrapeTool.js";
|
|
|
16
16
|
import { ScrapeWithActionsTool } from "./src/tools/advanced/ScrapeWithActionsTool.js";
|
|
17
17
|
// Deep Research Tool
|
|
18
18
|
import { DeepResearchTool } from "./src/tools/research/deepResearch.js";
|
|
19
|
-
// Change Tracking Tool
|
|
20
|
-
|
|
19
|
+
// Change Tracking Tool
|
|
20
|
+
import { TrackChangesTool } from "./src/tools/tracking/trackChanges.js";
|
|
21
21
|
// LLMs.txt Generator Tool (Phase 2.5)
|
|
22
22
|
import { GenerateLLMsTxtTool } from "./src/tools/llmstxt/generateLLMsTxt.js";
|
|
23
23
|
// Wave 3-4 Core Managers
|
|
@@ -62,7 +62,7 @@ if (!AuthManager.isAuthenticated() && !AuthManager.isCreatorMode()) {
|
|
|
62
62
|
console.log('Or set your API key via environment variable:');
|
|
63
63
|
console.log(' export CRAWLFORGE_API_KEY="your_api_key_here"');
|
|
64
64
|
console.log('');
|
|
65
|
-
console.log('Get your free API key at: https://crawlforge.
|
|
65
|
+
console.log('Get your free API key at: https://www.crawlforge.dev/signup');
|
|
66
66
|
console.log('(Includes 1,000 free credits!)');
|
|
67
67
|
console.log('');
|
|
68
68
|
process.exit(0);
|
|
@@ -77,7 +77,7 @@ if (configErrors.length > 0 && config.server.nodeEnv === 'production') {
|
|
|
77
77
|
}
|
|
78
78
|
|
|
79
79
|
// Create the server
|
|
80
|
-
const server = new McpServer({ name: "crawlforge", version: "3.0.
|
|
80
|
+
const server = new McpServer({ name: "crawlforge", version: "3.0.1" });
|
|
81
81
|
|
|
82
82
|
// Helper function to wrap tool handlers with authentication and credit tracking
|
|
83
83
|
function withAuth(toolName, handler) {
|
|
@@ -97,7 +97,7 @@ function withAuth(toolName, handler) {
|
|
|
97
97
|
type: "text",
|
|
98
98
|
text: JSON.stringify({
|
|
99
99
|
error: "Insufficient credits",
|
|
100
|
-
message: `This operation requires ${creditCost} credits. Please upgrade your plan at https://crawlforge.
|
|
100
|
+
message: `This operation requires ${creditCost} credits. Please upgrade your plan at https://www.crawlforge.dev/pricing`,
|
|
101
101
|
creditsRequired: creditCost
|
|
102
102
|
}, null, 2)
|
|
103
103
|
}]
|
|
@@ -161,8 +161,8 @@ const scrapeWithActionsTool = new ScrapeWithActionsTool();
|
|
|
161
161
|
// Initialize Deep Research Tool
|
|
162
162
|
const deepResearchTool = new DeepResearchTool();
|
|
163
163
|
|
|
164
|
-
// Initialize Change Tracking Tool
|
|
165
|
-
|
|
164
|
+
// Initialize Change Tracking Tool
|
|
165
|
+
const trackChangesTool = new TrackChangesTool();
|
|
166
166
|
|
|
167
167
|
// Initialize LLMs.txt Generator Tool (Phase 2.5)
|
|
168
168
|
const generateLLMsTxtTool = new GenerateLLMsTxtTool();
|
|
@@ -1407,8 +1407,6 @@ server.registerTool("deep_research", {
|
|
|
1407
1407
|
}));
|
|
1408
1408
|
|
|
1409
1409
|
// Tool: track_changes - Enhanced Content change tracking with baseline capture and monitoring (Phase 2.4)
|
|
1410
|
-
// Temporarily disabled due to import issue
|
|
1411
|
-
/*
|
|
1412
1410
|
server.registerTool("track_changes", {
|
|
1413
1411
|
description: "Enhanced content change tracking with baseline capture, comparison, scheduled monitoring, advanced comparison engine, alert system, and historical analysis",
|
|
1414
1412
|
inputSchema: {
|
|
@@ -1512,8 +1510,8 @@ server.registerTool("track_changes", {
|
|
|
1512
1510
|
includeTrends: z.boolean().default(true),
|
|
1513
1511
|
includeMonitorStatus: z.boolean().default(true)
|
|
1514
1512
|
}).optional()
|
|
1515
|
-
}
|
|
1516
|
-
}, async (params) => {
|
|
1513
|
+
}
|
|
1514
|
+
}, withAuth("track_changes", async (params) => {
|
|
1517
1515
|
try {
|
|
1518
1516
|
const result = await trackChangesTool.execute(params);
|
|
1519
1517
|
return {
|
|
@@ -1531,7 +1529,7 @@ server.registerTool("track_changes", {
|
|
|
1531
1529
|
isError: true
|
|
1532
1530
|
};
|
|
1533
1531
|
}
|
|
1534
|
-
});
|
|
1532
|
+
}));
|
|
1535
1533
|
|
|
1536
1534
|
// Tool: generate_llms_txt - Generate LLMs.txt and LLMs-full.txt files (Phase 2.5)
|
|
1537
1535
|
server.registerTool("generate_llms_txt", {
|
|
@@ -1575,8 +1573,7 @@ server.registerTool("generate_llms_txt", {
|
|
|
1575
1573
|
isError: true
|
|
1576
1574
|
};
|
|
1577
1575
|
}
|
|
1578
|
-
});
|
|
1579
|
-
*/
|
|
1576
|
+
}));
|
|
1580
1577
|
|
|
1581
1578
|
// Tool: stealth_mode - Advanced anti-detection browser management (Wave 3)
|
|
1582
1579
|
server.registerTool("stealth_mode", {
|
|
@@ -1854,7 +1851,7 @@ async function runServer() {
|
|
|
1854
1851
|
const phase3Tools = ', extract_content, process_document, summarize_content, analyze_content';
|
|
1855
1852
|
const wave2Tools = ', batch_scrape, scrape_with_actions';
|
|
1856
1853
|
const researchTools = ', deep_research';
|
|
1857
|
-
const trackingTools = '';
|
|
1854
|
+
const trackingTools = ', track_changes';
|
|
1858
1855
|
const llmsTxtTools = ', generate_llms_txt';
|
|
1859
1856
|
const wave3Tools = ', stealth_mode, localization';
|
|
1860
1857
|
console.error(`Tools available: ${baseTools}${searchTool}${phase3Tools}${wave2Tools}${researchTools}${trackingTools}${llmsTxtTools}${wave3Tools}`);
|
|
@@ -1890,7 +1887,7 @@ async function gracefulShutdown(signal) {
|
|
|
1890
1887
|
batchScrapeTool,
|
|
1891
1888
|
scrapeWithActionsTool,
|
|
1892
1889
|
deepResearchTool,
|
|
1893
|
-
|
|
1890
|
+
trackChangesTool,
|
|
1894
1891
|
generateLLMsTxtTool,
|
|
1895
1892
|
stealthBrowserManager,
|
|
1896
1893
|
localizationManager
|
package/setup.js
CHANGED
|
@@ -29,7 +29,7 @@ async function main() {
|
|
|
29
29
|
console.log(' • An internet connection');
|
|
30
30
|
console.log('');
|
|
31
31
|
console.log('Don\'t have an API key yet?');
|
|
32
|
-
console.log('Get one free at: https://crawlforge.
|
|
32
|
+
console.log('Get one free at: https://www.crawlforge.dev/signup');
|
|
33
33
|
console.log('(Includes 1,000 free credits to get started!)');
|
|
34
34
|
console.log('');
|
|
35
35
|
console.log('────────────────────────────────────────────────────────');
|
|
@@ -57,7 +57,7 @@ async function main() {
|
|
|
57
57
|
if (!apiKey || !apiKey.trim()) {
|
|
58
58
|
console.log('');
|
|
59
59
|
console.log('❌ API key is required');
|
|
60
|
-
console.log('Get your free API key at: https://crawlforge.
|
|
60
|
+
console.log('Get your free API key at: https://www.crawlforge.dev/signup');
|
|
61
61
|
rl.close();
|
|
62
62
|
process.exit(1);
|
|
63
63
|
}
|
|
@@ -78,15 +78,15 @@ async function main() {
|
|
|
78
78
|
console.log(' npm start # Start the MCP server');
|
|
79
79
|
console.log(' npm run test # Test your setup');
|
|
80
80
|
console.log('');
|
|
81
|
-
console.log('Need help? Visit: https://crawlforge.
|
|
81
|
+
console.log('Need help? Visit: https://www.crawlforge.dev/docs');
|
|
82
82
|
console.log('');
|
|
83
83
|
} else {
|
|
84
84
|
console.log('');
|
|
85
85
|
console.log('Setup failed. Please check your API key and try again.');
|
|
86
86
|
console.log('');
|
|
87
87
|
console.log('Need help?');
|
|
88
|
-
console.log(' • Documentation: https://crawlforge.
|
|
89
|
-
console.log(' • Support: support@crawlforge.
|
|
88
|
+
console.log(' • Documentation: https://www.crawlforge.dev/docs');
|
|
89
|
+
console.log(' • Support: support@crawlforge.dev');
|
|
90
90
|
console.log('');
|
|
91
91
|
rl.close();
|
|
92
92
|
process.exit(1);
|