crawlforge-mcp-server 3.0.15 → 3.0.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +3 -3
- package/README.md +4 -2
- package/package.json +16 -7
- package/src/core/ResearchOrchestrator.js +12 -7
package/CLAUDE.md
CHANGED
|
@@ -60,7 +60,7 @@ These guidelines are working if: fewer unnecessary changes in diffs, fewer rewri
|
|
|
60
60
|
|
|
61
61
|
## Project Overview
|
|
62
62
|
|
|
63
|
-
CrawlForge MCP Server - A professional MCP (Model Context Protocol) server providing
|
|
63
|
+
CrawlForge MCP Server - A professional MCP (Model Context Protocol) server providing 20 web scraping, crawling, and content processing tools.
|
|
64
64
|
|
|
65
65
|
**Current Version:** 3.0.12
|
|
66
66
|
|
|
@@ -141,13 +141,13 @@ Tools are organized in subdirectories by category:
|
|
|
141
141
|
- `tracking/` - trackChanges
|
|
142
142
|
- `llmstxt/` - generateLLMsTxt
|
|
143
143
|
|
|
144
|
-
### Available MCP Tools (
|
|
144
|
+
### Available MCP Tools (20 total)
|
|
145
145
|
|
|
146
146
|
**Basic Tools (server.js inline):**
|
|
147
147
|
fetch_url, extract_text, extract_links, extract_metadata, scrape_structured
|
|
148
148
|
|
|
149
149
|
**Advanced Tools:**
|
|
150
|
-
search_web, crawl_deep, map_site, extract_content, process_document, summarize_content, analyze_content, batch_scrape, scrape_with_actions, deep_research, track_changes, generate_llms_txt, stealth_mode, localization
|
|
150
|
+
search_web, crawl_deep, map_site, extract_content, process_document, summarize_content, analyze_content, extract_structured, batch_scrape, scrape_with_actions, deep_research, track_changes, generate_llms_txt, stealth_mode, localization
|
|
151
151
|
|
|
152
152
|
### MCP Server Entry Point
|
|
153
153
|
|
package/README.md
CHANGED
|
@@ -9,7 +9,7 @@ Professional web scraping and content extraction server implementing the Model C
|
|
|
9
9
|
|
|
10
10
|
## 🎯 Features
|
|
11
11
|
|
|
12
|
-
- **
|
|
12
|
+
- **20 Professional Tools**: Web scraping, deep research, stealth browsing, content analysis
|
|
13
13
|
- **Free Tier**: 1,000 credits to get started instantly
|
|
14
14
|
- **MCP Compatible**: Works with Claude, Cursor, and other MCP-enabled AI tools
|
|
15
15
|
- **Enterprise Ready**: Scale up with paid plans for production use
|
|
@@ -111,6 +111,8 @@ Restart Cursor to activate.
|
|
|
111
111
|
- `search_web` - Search the web using Google Search API
|
|
112
112
|
- `summarize_content` - Generate intelligent summaries
|
|
113
113
|
- `analyze_content` - Comprehensive content analysis
|
|
114
|
+
- `extract_structured` - LLM-powered schema-driven extraction
|
|
115
|
+
- `track_changes` - Monitor content changes over time
|
|
114
116
|
|
|
115
117
|
### Premium Tools (5-10 credits)
|
|
116
118
|
- `crawl_deep` - Deep crawl entire websites
|
|
@@ -136,7 +138,7 @@ Restart Cursor to activate.
|
|
|
136
138
|
| **Enterprise** | 250,000 | Large scale operations |
|
|
137
139
|
|
|
138
140
|
**All plans include:**
|
|
139
|
-
- Access to all
|
|
141
|
+
- Access to all 20 tools
|
|
140
142
|
- Credits never expire and roll over month-to-month
|
|
141
143
|
- API access and webhook notifications
|
|
142
144
|
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "crawlforge-mcp-server",
|
|
3
|
-
"version": "3.0.
|
|
4
|
-
"description": "CrawlForge MCP Server - Professional Model Context Protocol server with
|
|
3
|
+
"version": "3.0.17",
|
|
4
|
+
"description": "CrawlForge MCP Server - Professional Model Context Protocol server with 20 comprehensive web scraping, crawling, and content processing tools.",
|
|
5
5
|
"main": "server.js",
|
|
6
6
|
"bin": {
|
|
7
7
|
"crawlforge": "server.js",
|
|
@@ -90,15 +90,15 @@
|
|
|
90
90
|
],
|
|
91
91
|
"dependencies": {
|
|
92
92
|
"@googleapis/customsearch": "^5.0.1",
|
|
93
|
-
"@modelcontextprotocol/sdk": "^1.
|
|
93
|
+
"@modelcontextprotocol/sdk": "^1.29.0",
|
|
94
94
|
"@mozilla/readability": "^0.6.0",
|
|
95
95
|
"cheerio": "^1.1.2",
|
|
96
96
|
"compromise": "^14.14.4",
|
|
97
97
|
"diff": "^8.0.2",
|
|
98
98
|
"dotenv": "^17.2.1",
|
|
99
99
|
"franc": "^6.2.0",
|
|
100
|
-
"isomorphic-dompurify": "^
|
|
101
|
-
"jsdom": "^
|
|
100
|
+
"isomorphic-dompurify": "^3.9.0",
|
|
101
|
+
"jsdom": "^29.0.2",
|
|
102
102
|
"lru-cache": "^11.1.0",
|
|
103
103
|
"node-cron": "^3.0.3",
|
|
104
104
|
"node-summarizer": "^1.0.7",
|
|
@@ -110,9 +110,18 @@
|
|
|
110
110
|
"zod": "^3.23.8"
|
|
111
111
|
},
|
|
112
112
|
"devDependencies": {
|
|
113
|
-
"@jest/globals": "^30.0
|
|
113
|
+
"@jest/globals": "^30.3.0",
|
|
114
114
|
"cross-env": "^10.0.0",
|
|
115
|
-
"jest": "^30.0
|
|
115
|
+
"jest": "^30.3.0",
|
|
116
116
|
"shx": "^0.4.0"
|
|
117
|
+
},
|
|
118
|
+
"overrides": {
|
|
119
|
+
"undici": "^7.24.0",
|
|
120
|
+
"underscore": "^1.13.8",
|
|
121
|
+
"qs": "^6.14.2",
|
|
122
|
+
"path-to-regexp": "^8.4.2",
|
|
123
|
+
"@hono/node-server": "^1.19.13",
|
|
124
|
+
"hono": "^4.12.4",
|
|
125
|
+
"dompurify": "^3.4.0"
|
|
117
126
|
}
|
|
118
127
|
}
|
|
@@ -508,23 +508,28 @@ export class ResearchOrchestrator extends EventEmitter {
|
|
|
508
508
|
|
|
509
509
|
if (contentData && contentData.content) {
|
|
510
510
|
this.metrics.contentExtracted++;
|
|
511
|
-
|
|
511
|
+
|
|
512
|
+
// Normalize content to string (extract_content returns {text: "..."}, fallback returns string)
|
|
513
|
+
const contentText = typeof contentData.content === 'string'
|
|
514
|
+
? contentData.content
|
|
515
|
+
: (contentData.content.text || JSON.stringify(contentData.content));
|
|
516
|
+
|
|
512
517
|
// Enhance source with extracted content
|
|
513
518
|
let enhancedSource = {
|
|
514
519
|
...source,
|
|
515
|
-
extractedContent:
|
|
520
|
+
extractedContent: contentText,
|
|
516
521
|
metadata: contentData.metadata,
|
|
517
522
|
structuredData: contentData.structuredData,
|
|
518
523
|
extractedAt: new Date().toISOString(),
|
|
519
|
-
wordCount:
|
|
520
|
-
readabilityScore: this.calculateReadabilityScore(
|
|
524
|
+
wordCount: contentText.split(' ').length,
|
|
525
|
+
readabilityScore: this.calculateReadabilityScore(contentText)
|
|
521
526
|
};
|
|
522
527
|
|
|
523
528
|
// LLM-powered relevance analysis
|
|
524
529
|
if (this.enableLLMFeatures && topic) {
|
|
525
530
|
try {
|
|
526
531
|
const relevanceAnalysis = await this.llmManager.analyzeRelevance(
|
|
527
|
-
|
|
532
|
+
contentText,
|
|
528
533
|
topic,
|
|
529
534
|
{ maxContentLength: 2000 }
|
|
530
535
|
);
|
|
@@ -546,11 +551,11 @@ export class ResearchOrchestrator extends EventEmitter {
|
|
|
546
551
|
error: llmError.message
|
|
547
552
|
});
|
|
548
553
|
// Set default relevance score
|
|
549
|
-
enhancedSource.relevanceScore = this.calculateTraditionalRelevance(
|
|
554
|
+
enhancedSource.relevanceScore = this.calculateTraditionalRelevance(contentText, topic);
|
|
550
555
|
}
|
|
551
556
|
} else {
|
|
552
557
|
// Fallback relevance calculation
|
|
553
|
-
enhancedSource.relevanceScore = this.calculateTraditionalRelevance(
|
|
558
|
+
enhancedSource.relevanceScore = this.calculateTraditionalRelevance(contentText, topic);
|
|
554
559
|
}
|
|
555
560
|
|
|
556
561
|
this.researchState.extractedContent.set(source.link, enhancedSource);
|