crawlforge-mcp-server 3.0.1 → 3.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +63 -19
- package/README.md +19 -15
- package/package.json +6 -6
- package/server.js +13 -16
- package/setup.js +5 -5
- package/src/core/ActionExecutor.js +16 -1
- package/src/core/AuthManager.js +2 -2
- package/src/core/ChangeTracker.js +5 -963
- package/src/core/WebhookDispatcher.js +4 -0
package/CLAUDE.md
CHANGED
|
@@ -4,7 +4,7 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co
|
|
|
4
4
|
|
|
5
5
|
## Project Overview
|
|
6
6
|
|
|
7
|
-
CrawlForge MCP Server - A professional MCP (Model Context Protocol) server implementation providing
|
|
7
|
+
CrawlForge MCP Server - A professional MCP (Model Context Protocol) server implementation providing 19 comprehensive web scraping, crawling, and content processing tools. Version 3.0 includes advanced content extraction, document processing, summarization, and analysis capabilities. Wave 2 adds asynchronous batch processing and browser automation features. Wave 3 introduces deep research orchestration, stealth scraping, localization, and change tracking.
|
|
8
8
|
|
|
9
9
|
## Development Commands
|
|
10
10
|
|
|
@@ -12,15 +12,11 @@ CrawlForge MCP Server - A professional MCP (Model Context Protocol) server imple
|
|
|
12
12
|
# Install dependencies
|
|
13
13
|
npm install
|
|
14
14
|
|
|
15
|
-
# Setup (required for first run
|
|
15
|
+
# Setup (required for first run)
|
|
16
16
|
npm run setup
|
|
17
17
|
# Or provide API key via environment:
|
|
18
18
|
export CRAWLFORGE_API_KEY="your_api_key_here"
|
|
19
19
|
|
|
20
|
-
# Creator Mode (bypass API key requirement for development)
|
|
21
|
-
export BYPASS_API_KEY=true
|
|
22
|
-
npm start
|
|
23
|
-
|
|
24
20
|
# Run the server (production)
|
|
25
21
|
npm start
|
|
26
22
|
|
|
@@ -67,7 +63,8 @@ node test-real-world.js # Real-world scenarios t
|
|
|
67
63
|
## High-Level Architecture
|
|
68
64
|
|
|
69
65
|
### Core Infrastructure (`src/core/`)
|
|
70
|
-
|
|
66
|
+
|
|
67
|
+
- **AuthManager**: Authentication, credit tracking, and usage reporting
|
|
71
68
|
- **PerformanceManager**: Centralized performance monitoring and optimization
|
|
72
69
|
- **JobManager**: Asynchronous job tracking and management for batch operations
|
|
73
70
|
- **WebhookDispatcher**: Event notification system for job completion callbacks
|
|
@@ -79,33 +76,37 @@ node test-real-world.js # Real-world scenarios t
|
|
|
79
76
|
- **SnapshotManager**: Manages website snapshots and version history
|
|
80
77
|
|
|
81
78
|
### Tool Layer (`src/tools/`)
|
|
79
|
+
|
|
82
80
|
Tools are organized in subdirectories by category:
|
|
81
|
+
|
|
83
82
|
- `advanced/` - BatchScrapeTool, ScrapeWithActionsTool
|
|
84
83
|
- `crawl/` - crawlDeep, mapSite
|
|
85
84
|
- `extract/` - analyzeContent, extractContent, processDocument, summarizeContent
|
|
86
85
|
- `research/` - deepResearch
|
|
87
86
|
- `search/` - searchWeb and provider adapters (Google, DuckDuckGo)
|
|
88
|
-
- `tracking/` - trackChanges
|
|
87
|
+
- `tracking/` - trackChanges
|
|
89
88
|
- `llmstxt/` - generateLLMsTxt
|
|
90
89
|
|
|
91
|
-
### Available MCP Tools (
|
|
90
|
+
### Available MCP Tools (19 total)
|
|
91
|
+
|
|
92
92
|
**Basic Tools (server.js inline):**
|
|
93
|
+
|
|
93
94
|
- fetch_url, extract_text, extract_links, extract_metadata, scrape_structured
|
|
94
95
|
|
|
95
96
|
**Advanced Tools:**
|
|
97
|
+
|
|
96
98
|
- search_web (conditional - requires search provider), crawl_deep, map_site
|
|
97
99
|
- extract_content, process_document, summarize_content, analyze_content
|
|
98
100
|
- batch_scrape, scrape_with_actions, deep_research
|
|
99
|
-
- generate_llms_txt, stealth_mode, localization
|
|
100
|
-
|
|
101
|
-
**Note:** track_changes tool is implemented but currently commented out in server.js (line 1409-1535)
|
|
101
|
+
- track_changes, generate_llms_txt, stealth_mode, localization
|
|
102
102
|
|
|
103
103
|
### MCP Server Entry Point
|
|
104
|
+
|
|
104
105
|
The main server implementation is in `server.js` which:
|
|
106
|
+
|
|
105
107
|
1. **Authentication Flow**: Uses AuthManager for API key validation and credit tracking
|
|
106
|
-
- Checks for authentication on startup
|
|
108
|
+
- Checks for authentication on startup
|
|
107
109
|
- Auto-setup if CRAWLFORGE_API_KEY environment variable is present
|
|
108
|
-
- Creator mode enabled via BYPASS_API_KEY=true
|
|
109
110
|
2. **Tool Registration**: All tools registered via `server.registerTool()` pattern
|
|
110
111
|
- Wrapped with `withAuth()` function for credit tracking and authentication
|
|
111
112
|
- Each tool has inline Zod schema for parameter validation
|
|
@@ -114,8 +115,10 @@ The main server implementation is in `server.js` which:
|
|
|
114
115
|
4. **Graceful Shutdown**: Cleans up browser instances, job managers, and other resources
|
|
115
116
|
|
|
116
117
|
### Tool Credit System
|
|
118
|
+
|
|
117
119
|
Each tool wrapped with `withAuth(toolName, handler)`:
|
|
118
|
-
|
|
120
|
+
|
|
121
|
+
- Checks credits before execution
|
|
119
122
|
- Reports usage with credit deduction on success
|
|
120
123
|
- Charges half credits on error
|
|
121
124
|
- Returns credit error if insufficient balance
|
|
@@ -125,9 +128,8 @@ Each tool wrapped with `withAuth(toolName, handler)`:
|
|
|
125
128
|
Critical environment variables defined in `src/constants/config.js`:
|
|
126
129
|
|
|
127
130
|
```bash
|
|
128
|
-
# Authentication (required
|
|
131
|
+
# Authentication (required)
|
|
129
132
|
CRAWLFORGE_API_KEY=your_api_key_here
|
|
130
|
-
BYPASS_API_KEY=true # Enable creator mode for development
|
|
131
133
|
|
|
132
134
|
# Search Provider (auto, google, duckduckgo)
|
|
133
135
|
SEARCH_PROVIDER=auto
|
|
@@ -149,6 +151,7 @@ RESPECT_ROBOTS_TXT=true
|
|
|
149
151
|
```
|
|
150
152
|
|
|
151
153
|
### Configuration Files
|
|
154
|
+
|
|
152
155
|
- `~/.crawlforge/config.json` - User authentication and API key storage
|
|
153
156
|
- `.env` - Environment variables for development
|
|
154
157
|
- `src/constants/config.js` - Central configuration with defaults and validation
|
|
@@ -156,6 +159,7 @@ RESPECT_ROBOTS_TXT=true
|
|
|
156
159
|
## Common Development Tasks
|
|
157
160
|
|
|
158
161
|
### Running a Single Test
|
|
162
|
+
|
|
159
163
|
```bash
|
|
160
164
|
# Run a specific test file
|
|
161
165
|
node tests/unit/linkAnalyzer.test.js
|
|
@@ -168,6 +172,7 @@ npm run test:wave3:verbose
|
|
|
168
172
|
```
|
|
169
173
|
|
|
170
174
|
### Testing Tool Integration
|
|
175
|
+
|
|
171
176
|
```bash
|
|
172
177
|
# Test MCP protocol compliance
|
|
173
178
|
npm test
|
|
@@ -181,16 +186,18 @@ node tests/validation/wave3-validation.js
|
|
|
181
186
|
```
|
|
182
187
|
|
|
183
188
|
### Debugging Tips
|
|
189
|
+
|
|
184
190
|
- Server logs are written to console via Winston logger (stderr for status, stdout for MCP protocol)
|
|
185
191
|
- Set `NODE_ENV=development` for verbose logging
|
|
186
192
|
- Use `--expose-gc` flag for memory profiling: `node --expose-gc server.js`
|
|
187
193
|
- Check `cache/` directory for cached responses
|
|
188
194
|
- Review `logs/` directory for application logs
|
|
189
|
-
- Use creator mode during development to bypass authentication: `BYPASS_API_KEY=true npm start`
|
|
190
195
|
- Memory monitoring automatically enabled in development mode (logs every 60s if >200MB)
|
|
191
196
|
|
|
192
197
|
### Adding New Tools
|
|
198
|
+
|
|
193
199
|
When adding a new tool to server.js:
|
|
200
|
+
|
|
194
201
|
1. Import the tool class from `src/tools/`
|
|
195
202
|
2. Instantiate the tool (with config if needed)
|
|
196
203
|
3. Register with `server.registerTool(name, { description, inputSchema }, withAuth(name, handler))`
|
|
@@ -205,9 +212,11 @@ When adding a new tool to server.js:
|
|
|
205
212
|
The project includes comprehensive security testing integrated into the CI/CD pipeline:
|
|
206
213
|
|
|
207
214
|
#### Main CI Pipeline (`.github/workflows/ci.yml`)
|
|
215
|
+
|
|
208
216
|
The CI pipeline runs on every PR and push to main/develop branches and includes:
|
|
209
217
|
|
|
210
218
|
**Security Test Suite:**
|
|
219
|
+
|
|
211
220
|
- SSRF Protection validation
|
|
212
221
|
- Input validation (XSS, SQL injection, command injection)
|
|
213
222
|
- Rate limiting functionality
|
|
@@ -215,42 +224,50 @@ The CI pipeline runs on every PR and push to main/develop branches and includes:
|
|
|
215
224
|
- Regex DoS vulnerability detection
|
|
216
225
|
|
|
217
226
|
**Dependency Security:**
|
|
227
|
+
|
|
218
228
|
- npm audit with JSON output and summary generation
|
|
219
229
|
- Vulnerability severity analysis (critical/high/moderate/low)
|
|
220
230
|
- License compliance checking
|
|
221
231
|
- Outdated package detection
|
|
222
232
|
|
|
223
233
|
**Static Code Analysis:**
|
|
234
|
+
|
|
224
235
|
- CodeQL security analysis with extended queries
|
|
225
236
|
- ESLint security rules for dangerous patterns
|
|
226
237
|
- Hardcoded secret detection
|
|
227
238
|
- Security file scanning
|
|
228
239
|
|
|
229
240
|
**Reporting & Artifacts:**
|
|
241
|
+
|
|
230
242
|
- Comprehensive security reports generated
|
|
231
243
|
- PR comments with security summaries
|
|
232
244
|
- Artifact upload for detailed analysis
|
|
233
245
|
- Build failure on critical vulnerabilities
|
|
234
246
|
|
|
235
247
|
#### Dedicated Security Workflow (`.github/workflows/security.yml`)
|
|
248
|
+
|
|
236
249
|
Daily scheduled comprehensive security scanning:
|
|
237
250
|
|
|
238
251
|
**Dependency Security Scan:**
|
|
252
|
+
|
|
239
253
|
- Full vulnerability audit with configurable severity levels
|
|
240
254
|
- License compliance verification
|
|
241
255
|
- Detailed vulnerability reporting
|
|
242
256
|
|
|
243
257
|
**Static Code Analysis:**
|
|
258
|
+
|
|
244
259
|
- Extended CodeQL analysis with security-focused queries
|
|
245
260
|
- ESLint security plugin integration
|
|
246
261
|
- Pattern-based secret detection
|
|
247
262
|
|
|
248
263
|
**Container Security:**
|
|
264
|
+
|
|
249
265
|
- Trivy vulnerability scanning
|
|
250
266
|
- SARIF report generation
|
|
251
267
|
- Container base image analysis
|
|
252
268
|
|
|
253
269
|
**Automated Issue Creation:**
|
|
270
|
+
|
|
254
271
|
- GitHub issues created for critical vulnerabilities
|
|
255
272
|
- Detailed security reports with remediation steps
|
|
256
273
|
- Configurable severity thresholds
|
|
@@ -258,11 +275,13 @@ Daily scheduled comprehensive security scanning:
|
|
|
258
275
|
### Security Thresholds and Policies
|
|
259
276
|
|
|
260
277
|
**Build Failure Conditions:**
|
|
278
|
+
|
|
261
279
|
- Any critical severity vulnerabilities
|
|
262
280
|
- More than 3 high severity vulnerabilities
|
|
263
281
|
- Security test suite failures
|
|
264
282
|
|
|
265
283
|
**Automated Actions:**
|
|
284
|
+
|
|
266
285
|
- Daily security scans at 2 AM UTC
|
|
267
286
|
- PR blocking for security failures
|
|
268
287
|
- Automatic security issue creation
|
|
@@ -291,6 +310,7 @@ node tests/security/security-test-suite.js
|
|
|
291
310
|
### Security Artifacts and Reports
|
|
292
311
|
|
|
293
312
|
**Generated Reports:**
|
|
313
|
+
|
|
294
314
|
- `SECURITY-REPORT.md`: Comprehensive security assessment
|
|
295
315
|
- `npm-audit.json`: Detailed vulnerability data
|
|
296
316
|
- `security-tests.log`: Test execution logs
|
|
@@ -298,6 +318,7 @@ node tests/security/security-test-suite.js
|
|
|
298
318
|
- `license-check.md`: License compliance report
|
|
299
319
|
|
|
300
320
|
**Artifact Retention:**
|
|
321
|
+
|
|
301
322
|
- CI security results: 30 days
|
|
302
323
|
- Comprehensive security reports: 90 days
|
|
303
324
|
- Critical vulnerability reports: Indefinite
|
|
@@ -317,18 +338,21 @@ gh workflow run security.yml \
|
|
|
317
338
|
```
|
|
318
339
|
|
|
319
340
|
**Available Options:**
|
|
341
|
+
|
|
320
342
|
- `scan_type`: all, dependencies, code-analysis, container-scan
|
|
321
343
|
- `severity_threshold`: low, moderate, high, critical
|
|
322
344
|
|
|
323
345
|
### Security Integration Best Practices
|
|
324
346
|
|
|
325
347
|
**For Contributors:**
|
|
348
|
+
|
|
326
349
|
1. Always run `npm run test:security` before submitting PRs
|
|
327
350
|
2. Address any security warnings in your code
|
|
328
351
|
3. Keep dependencies updated with `npm audit fix`
|
|
329
352
|
4. Review security artifacts when CI fails
|
|
330
353
|
|
|
331
354
|
**For Maintainers:**
|
|
355
|
+
|
|
332
356
|
1. Review security reports weekly
|
|
333
357
|
2. Respond to automated security issues promptly
|
|
334
358
|
3. Keep security thresholds updated
|
|
@@ -337,11 +361,13 @@ gh workflow run security.yml \
|
|
|
337
361
|
### Security Documentation
|
|
338
362
|
|
|
339
363
|
Comprehensive security documentation is available in:
|
|
364
|
+
|
|
340
365
|
- `.github/SECURITY.md` - Complete security policy and procedures
|
|
341
366
|
- Security workflow logs and artifacts
|
|
342
367
|
- Generated security reports in CI runs
|
|
343
368
|
|
|
344
369
|
The security integration ensures that:
|
|
370
|
+
|
|
345
371
|
- No critical vulnerabilities reach production
|
|
346
372
|
- Security issues are detected early in development
|
|
347
373
|
- Comprehensive audit trails are maintained
|
|
@@ -350,7 +376,9 @@ The security integration ensures that:
|
|
|
350
376
|
## Important Implementation Patterns
|
|
351
377
|
|
|
352
378
|
### Tool Structure
|
|
379
|
+
|
|
353
380
|
All tools follow a consistent class-based pattern:
|
|
381
|
+
|
|
354
382
|
```javascript
|
|
355
383
|
export class ToolName {
|
|
356
384
|
constructor(config) {
|
|
@@ -372,26 +400,32 @@ export class ToolName {
|
|
|
372
400
|
```
|
|
373
401
|
|
|
374
402
|
### Search Provider Architecture
|
|
403
|
+
|
|
375
404
|
Search providers implement a factory pattern:
|
|
405
|
+
|
|
376
406
|
- `searchProviderFactory.js` selects provider based on config
|
|
377
407
|
- Providers implement common interface: `search(query, options)`
|
|
378
408
|
- Auto-fallback: Google → DuckDuckGo if Google credentials missing
|
|
379
409
|
- Each provider in `src/tools/search/adapters/`
|
|
380
410
|
|
|
381
411
|
### Browser Management
|
|
412
|
+
|
|
382
413
|
- Playwright used for browser automation (ActionExecutor, ScrapeWithActionsTool)
|
|
383
414
|
- Stealth features in StealthBrowserManager
|
|
384
415
|
- Always cleanup browsers in error handlers
|
|
385
416
|
- Context isolation per operation for security
|
|
386
417
|
|
|
387
418
|
### Memory Management
|
|
419
|
+
|
|
388
420
|
Critical for long-running processes:
|
|
421
|
+
|
|
389
422
|
- Graceful shutdown handlers registered for SIGINT/SIGTERM
|
|
390
423
|
- All tools with heavy resources must implement `destroy()` or `cleanup()`
|
|
391
424
|
- Memory monitoring in development mode (server.js line 1955-1963)
|
|
392
425
|
- Force GC on shutdown if available
|
|
393
426
|
|
|
394
427
|
### Error Handling Pattern
|
|
428
|
+
|
|
395
429
|
```javascript
|
|
396
430
|
try {
|
|
397
431
|
const result = await tool.execute(params);
|
|
@@ -399,14 +433,24 @@ try {
|
|
|
399
433
|
} catch (error) {
|
|
400
434
|
return {
|
|
401
435
|
content: [{ type: "text", text: `Operation failed: ${error.message}` }],
|
|
402
|
-
isError: true
|
|
436
|
+
isError: true,
|
|
403
437
|
};
|
|
404
438
|
}
|
|
405
439
|
```
|
|
406
440
|
|
|
407
441
|
### Configuration Validation
|
|
442
|
+
|
|
408
443
|
- All config in `src/constants/config.js` with defaults
|
|
409
444
|
- `validateConfig()` checks required settings
|
|
410
445
|
- Environment variables parsed with fallbacks
|
|
411
446
|
- Config errors only fail in production (warnings in dev)
|
|
412
447
|
|
|
448
|
+
## 🎯 Project Management Rules
|
|
449
|
+
|
|
450
|
+
## 🎯 Project Management Rules
|
|
451
|
+
|
|
452
|
+
- always have the project manager work with the appropriate sub agents in parallel
|
|
453
|
+
- i want the project manager to always be in charge and then get the appropriate sub agents to work on the tasks in parallel. each sub agent must work on their strengths. when they are done they let the project manager know and the project manager updates the @PRODUCTION_READINESS.md file.
|
|
454
|
+
- whenever a phase is completed push all changes to github
|
|
455
|
+
- put all the documentation md files into the docs folders to keep everything organized
|
|
456
|
+
- every time you finish a phase run npm run build and fix all errors. do this before you push to github.
|
package/README.md
CHANGED
|
@@ -9,7 +9,7 @@ Professional web scraping and content extraction server implementing the Model C
|
|
|
9
9
|
|
|
10
10
|
## 🎯 Features
|
|
11
11
|
|
|
12
|
-
- **
|
|
12
|
+
- **19 Professional Tools**: Web scraping, deep research, stealth browsing, content analysis
|
|
13
13
|
- **Free Tier**: 1,000 credits to get started instantly
|
|
14
14
|
- **MCP Compatible**: Works with Claude, Cursor, and other MCP-enabled AI tools
|
|
15
15
|
- **Enterprise Ready**: Scale up with paid plans for production use
|
|
@@ -34,7 +34,7 @@ This will:
|
|
|
34
34
|
- Configure your credentials securely
|
|
35
35
|
- Verify your setup is working
|
|
36
36
|
|
|
37
|
-
**Don't have an API key?** Get one free at [https://crawlforge.
|
|
37
|
+
**Don't have an API key?** Get one free at [https://www.crawlforge.dev/signup](https://www.crawlforge.dev/signup)
|
|
38
38
|
|
|
39
39
|
### 3. Configure Your IDE
|
|
40
40
|
|
|
@@ -105,15 +105,19 @@ Or use the MCP plugin in Cursor settings.
|
|
|
105
105
|
|
|
106
106
|
## 💳 Pricing
|
|
107
107
|
|
|
108
|
-
| Plan | Credits/Month |
|
|
109
|
-
|
|
110
|
-
| **Free** | 1,000 |
|
|
111
|
-
| **
|
|
112
|
-
| **
|
|
113
|
-
| **
|
|
114
|
-
| **Enterprise** | Unlimited | Custom | Large scale operations |
|
|
108
|
+
| Plan | Credits/Month | Best For |
|
|
109
|
+
|------|---------------|----------|
|
|
110
|
+
| **Free** | 1,000 | Testing & personal projects |
|
|
111
|
+
| **Starter** | 5,000 | Small projects & development |
|
|
112
|
+
| **Professional** | 50,000 | Professional use & production |
|
|
113
|
+
| **Enterprise** | 250,000 | Large scale operations |
|
|
115
114
|
|
|
116
|
-
|
|
115
|
+
**All plans include:**
|
|
116
|
+
- Access to all 19 tools
|
|
117
|
+
- Credits never expire and roll over month-to-month
|
|
118
|
+
- API access and webhook notifications
|
|
119
|
+
|
|
120
|
+
[View full pricing](https://www.crawlforge.dev/pricing)
|
|
117
121
|
|
|
118
122
|
## 🔧 Advanced Configuration
|
|
119
123
|
|
|
@@ -124,7 +128,7 @@ Or use the MCP plugin in Cursor settings.
|
|
|
124
128
|
export CRAWLFORGE_API_KEY="sk_live_your_api_key_here"
|
|
125
129
|
|
|
126
130
|
# Optional: Custom API endpoint (for enterprise)
|
|
127
|
-
export CRAWLFORGE_API_URL="https://api.crawlforge.
|
|
131
|
+
export CRAWLFORGE_API_URL="https://api.crawlforge.dev"
|
|
128
132
|
```
|
|
129
133
|
|
|
130
134
|
### Manual Configuration
|
|
@@ -161,9 +165,9 @@ Once configured, use these tools in your AI assistant:
|
|
|
161
165
|
|
|
162
166
|
## 🆘 Support
|
|
163
167
|
|
|
164
|
-
- **Documentation**: [https://crawlforge.
|
|
165
|
-
- **Issues**: [GitHub Issues](https://github.com/crawlforge
|
|
166
|
-
- **Email**: support@crawlforge.
|
|
168
|
+
- **Documentation**: [https://www.crawlforge.dev/docs](https://www.crawlforge.dev/docs)
|
|
169
|
+
- **Issues**: [GitHub Issues](https://github.com/mysleekdesigns/crawlforge-mcp/issues)
|
|
170
|
+
- **Email**: support@crawlforge.dev
|
|
167
171
|
- **Discord**: [Join our community](https://discord.gg/crawlforge)
|
|
168
172
|
|
|
169
173
|
## 📄 License
|
|
@@ -178,4 +182,4 @@ Contributions are welcome! Please read our [Contributing Guide](CONTRIBUTING.md)
|
|
|
178
182
|
|
|
179
183
|
**Built with ❤️ by the CrawlForge team**
|
|
180
184
|
|
|
181
|
-
[Website](https://crawlforge.
|
|
185
|
+
[Website](https://www.crawlforge.dev) | [Documentation](https://www.crawlforge.dev/docs) | [API Reference](https://www.crawlforge.dev/api-reference)
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "crawlforge-mcp-server",
|
|
3
|
-
"version": "3.0.
|
|
4
|
-
"description": "CrawlForge MCP Server - Professional Model Context Protocol server with
|
|
3
|
+
"version": "3.0.2",
|
|
4
|
+
"description": "CrawlForge MCP Server - Professional Model Context Protocol server with 19 comprehensive web scraping, crawling, and content processing tools.",
|
|
5
5
|
"main": "server.js",
|
|
6
6
|
"bin": {
|
|
7
7
|
"crawlforge": "server.js",
|
|
@@ -48,17 +48,17 @@
|
|
|
48
48
|
],
|
|
49
49
|
"author": {
|
|
50
50
|
"name": "Simon Lacey",
|
|
51
|
-
"email": "
|
|
51
|
+
"email": "support@crawlforge.dev"
|
|
52
52
|
},
|
|
53
53
|
"license": "MIT",
|
|
54
54
|
"repository": {
|
|
55
55
|
"type": "git",
|
|
56
|
-
"url": "git+https://github.com/crawlforge
|
|
56
|
+
"url": "git+https://github.com/mysleekdesigns/crawlforge-mcp.git"
|
|
57
57
|
},
|
|
58
58
|
"bugs": {
|
|
59
|
-
"url": "https://github.com/crawlforge
|
|
59
|
+
"url": "https://github.com/mysleekdesigns/crawlforge-mcp/issues"
|
|
60
60
|
},
|
|
61
|
-
"homepage": "https://crawlforge.
|
|
61
|
+
"homepage": "https://crawlforge.dev",
|
|
62
62
|
"type": "module",
|
|
63
63
|
"engines": {
|
|
64
64
|
"node": ">=18.0.0",
|
package/server.js
CHANGED
|
@@ -16,8 +16,8 @@ import { BatchScrapeTool } from "./src/tools/advanced/BatchScrapeTool.js";
|
|
|
16
16
|
import { ScrapeWithActionsTool } from "./src/tools/advanced/ScrapeWithActionsTool.js";
|
|
17
17
|
// Deep Research Tool
|
|
18
18
|
import { DeepResearchTool } from "./src/tools/research/deepResearch.js";
|
|
19
|
-
// Change Tracking Tool
|
|
20
|
-
|
|
19
|
+
// Change Tracking Tool
|
|
20
|
+
import { TrackChangesTool } from "./src/tools/tracking/trackChanges.js";
|
|
21
21
|
// LLMs.txt Generator Tool (Phase 2.5)
|
|
22
22
|
import { GenerateLLMsTxtTool } from "./src/tools/llmstxt/generateLLMsTxt.js";
|
|
23
23
|
// Wave 3-4 Core Managers
|
|
@@ -62,7 +62,7 @@ if (!AuthManager.isAuthenticated() && !AuthManager.isCreatorMode()) {
|
|
|
62
62
|
console.log('Or set your API key via environment variable:');
|
|
63
63
|
console.log(' export CRAWLFORGE_API_KEY="your_api_key_here"');
|
|
64
64
|
console.log('');
|
|
65
|
-
console.log('Get your free API key at: https://crawlforge.
|
|
65
|
+
console.log('Get your free API key at: https://www.crawlforge.dev/signup');
|
|
66
66
|
console.log('(Includes 1,000 free credits!)');
|
|
67
67
|
console.log('');
|
|
68
68
|
process.exit(0);
|
|
@@ -77,7 +77,7 @@ if (configErrors.length > 0 && config.server.nodeEnv === 'production') {
|
|
|
77
77
|
}
|
|
78
78
|
|
|
79
79
|
// Create the server
|
|
80
|
-
const server = new McpServer({ name: "crawlforge", version: "3.0.
|
|
80
|
+
const server = new McpServer({ name: "crawlforge", version: "3.0.1" });
|
|
81
81
|
|
|
82
82
|
// Helper function to wrap tool handlers with authentication and credit tracking
|
|
83
83
|
function withAuth(toolName, handler) {
|
|
@@ -97,7 +97,7 @@ function withAuth(toolName, handler) {
|
|
|
97
97
|
type: "text",
|
|
98
98
|
text: JSON.stringify({
|
|
99
99
|
error: "Insufficient credits",
|
|
100
|
-
message: `This operation requires ${creditCost} credits. Please upgrade your plan at https://crawlforge.
|
|
100
|
+
message: `This operation requires ${creditCost} credits. Please upgrade your plan at https://www.crawlforge.dev/pricing`,
|
|
101
101
|
creditsRequired: creditCost
|
|
102
102
|
}, null, 2)
|
|
103
103
|
}]
|
|
@@ -161,8 +161,8 @@ const scrapeWithActionsTool = new ScrapeWithActionsTool();
|
|
|
161
161
|
// Initialize Deep Research Tool
|
|
162
162
|
const deepResearchTool = new DeepResearchTool();
|
|
163
163
|
|
|
164
|
-
// Initialize Change Tracking Tool
|
|
165
|
-
|
|
164
|
+
// Initialize Change Tracking Tool
|
|
165
|
+
const trackChangesTool = new TrackChangesTool();
|
|
166
166
|
|
|
167
167
|
// Initialize LLMs.txt Generator Tool (Phase 2.5)
|
|
168
168
|
const generateLLMsTxtTool = new GenerateLLMsTxtTool();
|
|
@@ -1407,8 +1407,6 @@ server.registerTool("deep_research", {
|
|
|
1407
1407
|
}));
|
|
1408
1408
|
|
|
1409
1409
|
// Tool: track_changes - Enhanced Content change tracking with baseline capture and monitoring (Phase 2.4)
|
|
1410
|
-
// Temporarily disabled due to import issue
|
|
1411
|
-
/*
|
|
1412
1410
|
server.registerTool("track_changes", {
|
|
1413
1411
|
description: "Enhanced content change tracking with baseline capture, comparison, scheduled monitoring, advanced comparison engine, alert system, and historical analysis",
|
|
1414
1412
|
inputSchema: {
|
|
@@ -1512,8 +1510,8 @@ server.registerTool("track_changes", {
|
|
|
1512
1510
|
includeTrends: z.boolean().default(true),
|
|
1513
1511
|
includeMonitorStatus: z.boolean().default(true)
|
|
1514
1512
|
}).optional()
|
|
1515
|
-
}
|
|
1516
|
-
}, async (params) => {
|
|
1513
|
+
}
|
|
1514
|
+
}, withAuth("track_changes", async (params) => {
|
|
1517
1515
|
try {
|
|
1518
1516
|
const result = await trackChangesTool.execute(params);
|
|
1519
1517
|
return {
|
|
@@ -1531,7 +1529,7 @@ server.registerTool("track_changes", {
|
|
|
1531
1529
|
isError: true
|
|
1532
1530
|
};
|
|
1533
1531
|
}
|
|
1534
|
-
});
|
|
1532
|
+
}));
|
|
1535
1533
|
|
|
1536
1534
|
// Tool: generate_llms_txt - Generate LLMs.txt and LLMs-full.txt files (Phase 2.5)
|
|
1537
1535
|
server.registerTool("generate_llms_txt", {
|
|
@@ -1575,8 +1573,7 @@ server.registerTool("generate_llms_txt", {
|
|
|
1575
1573
|
isError: true
|
|
1576
1574
|
};
|
|
1577
1575
|
}
|
|
1578
|
-
});
|
|
1579
|
-
*/
|
|
1576
|
+
}));
|
|
1580
1577
|
|
|
1581
1578
|
// Tool: stealth_mode - Advanced anti-detection browser management (Wave 3)
|
|
1582
1579
|
server.registerTool("stealth_mode", {
|
|
@@ -1854,7 +1851,7 @@ async function runServer() {
|
|
|
1854
1851
|
const phase3Tools = ', extract_content, process_document, summarize_content, analyze_content';
|
|
1855
1852
|
const wave2Tools = ', batch_scrape, scrape_with_actions';
|
|
1856
1853
|
const researchTools = ', deep_research';
|
|
1857
|
-
const trackingTools = '';
|
|
1854
|
+
const trackingTools = ', track_changes';
|
|
1858
1855
|
const llmsTxtTools = ', generate_llms_txt';
|
|
1859
1856
|
const wave3Tools = ', stealth_mode, localization';
|
|
1860
1857
|
console.error(`Tools available: ${baseTools}${searchTool}${phase3Tools}${wave2Tools}${researchTools}${trackingTools}${llmsTxtTools}${wave3Tools}`);
|
|
@@ -1890,7 +1887,7 @@ async function gracefulShutdown(signal) {
|
|
|
1890
1887
|
batchScrapeTool,
|
|
1891
1888
|
scrapeWithActionsTool,
|
|
1892
1889
|
deepResearchTool,
|
|
1893
|
-
|
|
1890
|
+
trackChangesTool,
|
|
1894
1891
|
generateLLMsTxtTool,
|
|
1895
1892
|
stealthBrowserManager,
|
|
1896
1893
|
localizationManager
|
package/setup.js
CHANGED
|
@@ -29,7 +29,7 @@ async function main() {
|
|
|
29
29
|
console.log(' • An internet connection');
|
|
30
30
|
console.log('');
|
|
31
31
|
console.log('Don\'t have an API key yet?');
|
|
32
|
-
console.log('Get one free at: https://crawlforge.
|
|
32
|
+
console.log('Get one free at: https://www.crawlforge.dev/signup');
|
|
33
33
|
console.log('(Includes 1,000 free credits to get started!)');
|
|
34
34
|
console.log('');
|
|
35
35
|
console.log('────────────────────────────────────────────────────────');
|
|
@@ -57,7 +57,7 @@ async function main() {
|
|
|
57
57
|
if (!apiKey || !apiKey.trim()) {
|
|
58
58
|
console.log('');
|
|
59
59
|
console.log('❌ API key is required');
|
|
60
|
-
console.log('Get your free API key at: https://crawlforge.
|
|
60
|
+
console.log('Get your free API key at: https://www.crawlforge.dev/signup');
|
|
61
61
|
rl.close();
|
|
62
62
|
process.exit(1);
|
|
63
63
|
}
|
|
@@ -78,15 +78,15 @@ async function main() {
|
|
|
78
78
|
console.log(' npm start # Start the MCP server');
|
|
79
79
|
console.log(' npm run test # Test your setup');
|
|
80
80
|
console.log('');
|
|
81
|
-
console.log('Need help? Visit: https://crawlforge.
|
|
81
|
+
console.log('Need help? Visit: https://www.crawlforge.dev/docs');
|
|
82
82
|
console.log('');
|
|
83
83
|
} else {
|
|
84
84
|
console.log('');
|
|
85
85
|
console.log('Setup failed. Please check your API key and try again.');
|
|
86
86
|
console.log('');
|
|
87
87
|
console.log('Need help?');
|
|
88
|
-
console.log(' • Documentation: https://crawlforge.
|
|
89
|
-
console.log(' • Support: support@crawlforge.
|
|
88
|
+
console.log(' • Documentation: https://www.crawlforge.dev/docs');
|
|
89
|
+
console.log(' • Support: support@crawlforge.dev');
|
|
90
90
|
console.log('');
|
|
91
91
|
rl.close();
|
|
92
92
|
process.exit(1);
|
|
@@ -704,7 +704,23 @@ export class ActionExecutor extends EventEmitter {
|
|
|
704
704
|
* @param {Object} action - JavaScript action
|
|
705
705
|
* @returns {Promise<Object>} JavaScript result
|
|
706
706
|
*/
|
|
707
|
+
|
|
707
708
|
async executeJavaScriptAction(page, action) {
|
|
709
|
+
// SECURITY: JavaScript execution is disabled by default for security
|
|
710
|
+
// Set ALLOW_JAVASCRIPT_EXECUTION=true to enable (NOT recommended in production)
|
|
711
|
+
const allowJsExecution = process.env.ALLOW_JAVASCRIPT_EXECUTION === 'true';
|
|
712
|
+
|
|
713
|
+
if (!allowJsExecution) {
|
|
714
|
+
throw new Error(
|
|
715
|
+
'JavaScript execution is disabled for security reasons. ' +
|
|
716
|
+
'Set ALLOW_JAVASCRIPT_EXECUTION=true environment variable to enable (NOT recommended in production). ' +
|
|
717
|
+
'This feature allows arbitrary code execution and should only be used in trusted environments.'
|
|
718
|
+
);
|
|
719
|
+
}
|
|
720
|
+
|
|
721
|
+
// Log security warning when JS execution is enabled
|
|
722
|
+
console.warn('⚠️ SECURITY WARNING: JavaScript execution is enabled. This allows arbitrary code execution!');
|
|
723
|
+
|
|
708
724
|
const result = await page.evaluate(
|
|
709
725
|
new Function('...args', action.script),
|
|
710
726
|
...action.args
|
|
@@ -716,7 +732,6 @@ export class ActionExecutor extends EventEmitter {
|
|
|
716
732
|
result: action.returnResult ? result : undefined
|
|
717
733
|
};
|
|
718
734
|
}
|
|
719
|
-
|
|
720
735
|
/**
|
|
721
736
|
* Capture screenshot
|
|
722
737
|
* @param {Page} page - Playwright page
|
package/src/core/AuthManager.js
CHANGED
|
@@ -9,7 +9,7 @@ import path from 'path';
|
|
|
9
9
|
|
|
10
10
|
class AuthManager {
|
|
11
11
|
constructor() {
|
|
12
|
-
this.apiEndpoint = process.env.CRAWLFORGE_API_URL || 'https://api.crawlforge.
|
|
12
|
+
this.apiEndpoint = process.env.CRAWLFORGE_API_URL || 'https://api.crawlforge.dev';
|
|
13
13
|
this.configPath = path.join(process.env.HOME || process.env.USERPROFILE, '.crawlforge', 'config.json');
|
|
14
14
|
this.config = null;
|
|
15
15
|
this.creditCache = new Map();
|
|
@@ -95,7 +95,7 @@ class AuthManager {
|
|
|
95
95
|
|
|
96
96
|
if (!apiKey) {
|
|
97
97
|
console.log('❌ API key is required for setup');
|
|
98
|
-
console.log('Get your API key from: https://crawlforge.
|
|
98
|
+
console.log('Get your API key from: https://www.crawlforge.dev/dashboard/api-keys');
|
|
99
99
|
return false;
|
|
100
100
|
}
|
|
101
101
|
|