crawlforge-mcp-server 4.2.9 → 4.2.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/core/StealthBrowserManager.js +3 -3
- package/src/core/WebhookDispatcher.js +1 -1
- package/src/core/crawlers/BFSCrawler.js +3 -3
- package/src/tools/advanced/ScrapeWithActionsTool.js +2 -2
- package/src/tools/extract/extractContent.js +1 -1
- package/src/tools/extract/processDocument.js +1 -1
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "crawlforge-mcp-server",
|
|
3
|
-
"version": "4.2.
|
|
3
|
+
"version": "4.2.11",
|
|
4
4
|
"description": "CrawlForge MCP Server - Professional Model Context Protocol server with 23 web scraping, crawling, and content processing tools. Defaults to local Ollama for LLM extraction (no API key needed); OpenAI/Anthropic available as opt-in. v4.0 adds Markdown-first output, pre-built site templates, Camoufox stealth engine, and cost transparency.",
|
|
5
5
|
"main": "server.js",
|
|
6
6
|
"bin": {
|
|
@@ -1376,7 +1376,7 @@ export class StealthBrowserManager {
|
|
|
1376
1376
|
});
|
|
1377
1377
|
|
|
1378
1378
|
if (challengeDetected) {
|
|
1379
|
-
console.
|
|
1379
|
+
console.error('CloudFlare challenge detected, attempting bypass...');
|
|
1380
1380
|
|
|
1381
1381
|
// Simulate human behavior during challenge
|
|
1382
1382
|
if (this.humanBehaviorSimulator) {
|
|
@@ -1437,7 +1437,7 @@ export class StealthBrowserManager {
|
|
|
1437
1437
|
});
|
|
1438
1438
|
|
|
1439
1439
|
if (recaptchaDetected) {
|
|
1440
|
-
console.
|
|
1440
|
+
console.error('reCAPTCHA detected, implementing human behavior...');
|
|
1441
1441
|
|
|
1442
1442
|
// Simulate human inspection of the reCAPTCHA
|
|
1443
1443
|
if (this.humanBehaviorSimulator) {
|
|
@@ -1491,7 +1491,7 @@ export class StealthBrowserManager {
|
|
|
1491
1491
|
this.proxyManager.currentProxy = proxies[this.proxyManager.proxyIndex];
|
|
1492
1492
|
this.proxyManager.lastRotation = now;
|
|
1493
1493
|
|
|
1494
|
-
console.
|
|
1494
|
+
console.error('Rotated to proxy:', this.proxyManager.currentProxy);
|
|
1495
1495
|
}
|
|
1496
1496
|
|
|
1497
1497
|
return this.proxyManager.currentProxy;
|
|
@@ -74,7 +74,7 @@ export class WebhookDispatcher extends EventEmitter {
|
|
|
74
74
|
onRetry: (error, attempt, delay, context) => {
|
|
75
75
|
this.stats.retriedDeliveries++;
|
|
76
76
|
if (this.enableLogging) {
|
|
77
|
-
console.
|
|
77
|
+
console.error('Webhook retry ' + attempt + ' for ' + context.url + ' after ' + delay + 'ms: ' + error.message);
|
|
78
78
|
}
|
|
79
79
|
}
|
|
80
80
|
});
|
|
@@ -142,13 +142,13 @@ export class BFSCrawler {
|
|
|
142
142
|
});
|
|
143
143
|
|
|
144
144
|
if (!filterDecision.allowed) {
|
|
145
|
-
console.
|
|
145
|
+
console.error(`Domain filter blocks: ${normalizedUrl} - ${filterDecision.reason}`);
|
|
146
146
|
return;
|
|
147
147
|
}
|
|
148
148
|
|
|
149
149
|
// Backward compatibility: also check legacy patterns
|
|
150
150
|
if (!this.shouldCrawlUrl(normalizedUrl)) {
|
|
151
|
-
console.
|
|
151
|
+
console.error(`Legacy pattern blocks: ${normalizedUrl}`);
|
|
152
152
|
return;
|
|
153
153
|
}
|
|
154
154
|
|
|
@@ -156,7 +156,7 @@ export class BFSCrawler {
|
|
|
156
156
|
if (this.respectRobots && this.robotsChecker) {
|
|
157
157
|
const canFetch = await this.robotsChecker.canFetch(normalizedUrl);
|
|
158
158
|
if (!canFetch) {
|
|
159
|
-
console.
|
|
159
|
+
console.error(`Robots.txt blocks: ${normalizedUrl}`);
|
|
160
160
|
return;
|
|
161
161
|
}
|
|
162
162
|
}
|
|
@@ -253,7 +253,7 @@ export class ScrapeWithActionsTool extends EventEmitter {
|
|
|
253
253
|
const startTime = Date.now();
|
|
254
254
|
|
|
255
255
|
if (this.enableLogging) {
|
|
256
|
-
console.
|
|
256
|
+
console.error(`Starting scrape session ${sessionId} with ${validated.actions.length} actions on ${validated.url}`);
|
|
257
257
|
}
|
|
258
258
|
|
|
259
259
|
// Check concurrent sessions limit
|
|
@@ -734,7 +734,7 @@ export class ScrapeWithActionsTool extends EventEmitter {
|
|
|
734
734
|
|
|
735
735
|
log(level, message) {
|
|
736
736
|
if (this.enableLogging) {
|
|
737
|
-
console.
|
|
737
|
+
console.error(`[ScrapeWithActionsTool:${level.toUpperCase()}] ${message}`);
|
|
738
738
|
}
|
|
739
739
|
}
|
|
740
740
|
|
|
@@ -138,7 +138,7 @@ export class ExtractContentTool {
|
|
|
138
138
|
const shouldUseJavaScript = options.requiresJavaScript || await this.shouldUseJavaScript(url);
|
|
139
139
|
|
|
140
140
|
if (shouldUseJavaScript) {
|
|
141
|
-
console.
|
|
141
|
+
console.error('Using browser rendering for JavaScript content...');
|
|
142
142
|
const browserResult = await this.browserProcessor.processURL({
|
|
143
143
|
url,
|
|
144
144
|
options: {
|
|
@@ -250,7 +250,7 @@ export class ProcessDocumentTool {
|
|
|
250
250
|
const shouldUseJavaScript = options.requiresJavaScript || await this.shouldUseJavaScript(source);
|
|
251
251
|
|
|
252
252
|
if (shouldUseJavaScript) {
|
|
253
|
-
console.
|
|
253
|
+
console.error('Using browser rendering for JavaScript content...');
|
|
254
254
|
const browserResult = await this.browserProcessor.processURL({
|
|
255
255
|
url: source,
|
|
256
256
|
options: {
|