crawlforge-mcp-server 4.7.0 → 4.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -199,7 +199,7 @@ For the full canonical capabilities reference (all tools, CLI commands, stealth
199
199
  | **Business** ($399) | 250,000 | Large scale operations |
200
200
 
201
201
  **All plans include:**
202
- - Access to all 26 tools (the 15 local tools never consume credits)
202
+ - Access to all 26 tools
203
203
  - Credits never expire and roll over month-to-month
204
204
  - API access and webhook notifications
205
205
 
@@ -298,7 +298,7 @@ Once configured, use these tools in your AI assistant:
298
298
 
299
299
  ## 🔒 Security & Privacy
300
300
 
301
- - **Secure Authentication**: API keys required for all metered premium tools (the 15 free local tools run without one)
301
+ - **Secure Authentication**: API keys required for all metered tools
302
302
  - **Local Storage**: API keys stored securely at `~/.crawlforge/config.json`
303
303
  - **HTTPS Only**: All connections use encrypted HTTPS
304
304
  - **No Data Retention**: We don't store scraped data, only usage logs
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "crawlforge-mcp-server",
3
- "version": "4.7.0",
3
+ "version": "4.7.1",
4
4
  "mcpName": "io.github.mysleekdesigns/crawlforge-mcp-server",
5
5
  "description": "CrawlForge MCP Server - Professional Model Context Protocol server with 26 web scraping, crawling, deep-research, and autonomous-extraction tools. Returns clean Markdown and structured JSON for Claude, Cursor, and any MCP client. Defaults to local Ollama for LLM extraction (no API key needed); OpenAI/Anthropic available as opt-in. Includes a unified multi-format scrape tool, an autonomous agent, pre-built site templates, and Camoufox stealth browsing.",
6
6
  "main": "server.js",
package/server.js CHANGED
@@ -89,7 +89,7 @@ if (configErrors.length > 0 && config.server.nodeEnv === 'production') {
89
89
  // Create the server
90
90
  const server = new McpServer({
91
91
  name: "crawlforge",
92
- version: "4.7.0",
92
+ version: "4.7.1",
93
93
  description: "Production-ready MCP server with 26 web scraping, crawling, and content processing tools. Features MCP Resources (crawlforge://), Prompts, Sampling fallback, Elicitation, stealth browsing, deep research, structured extraction, change tracking, local-LLM extraction via Ollama, unified multi-format scrape, and autonomous agent tool.",
94
94
  homepage: "https://www.crawlforge.dev",
95
95
  icon: "https://www.crawlforge.dev/icon.png"
@@ -50,7 +50,16 @@ export class LLMsTxtAnalyzer {
50
50
  apis: [],
51
51
  contentTypes: {},
52
52
  securityAreas: [],
53
- rateLimit: {},
53
+ // Conservative defaults so output never renders `undefined` when live
54
+ // rate-limit probing is skipped (analyzeRateLimiting only runs with
55
+ // probeRateLimit:true). Overwritten with measured values when probed.
56
+ rateLimit: {
57
+ recommendedDelay: 1000,
58
+ maxConcurrency: 5,
59
+ recommendedRPM: 30,
60
+ reasoning: 'Conservative defaults applied; live rate-limit probing was not performed (pass probeRateLimit:true to measure actual response times).',
61
+ averageResponseTime: null
62
+ },
54
63
  guidelines: {},
55
64
  metadata: {},
56
65
  errors: []
@@ -32,6 +32,7 @@ export class ResearchOrchestrator extends EventEmitter {
32
32
  concurrency = 5,
33
33
  enableSourceVerification = true,
34
34
  enableConflictDetection = true,
35
+ credibilityThreshold = 0.3,
35
36
  cacheEnabled = true,
36
37
  cacheTTL = 1800000, // 30 minutes
37
38
  researchApproach = 'broad',
@@ -61,6 +62,7 @@ export class ResearchOrchestrator extends EventEmitter {
61
62
  this.concurrency = Math.min(Math.max(1, concurrency), 20);
62
63
  this.enableSourceVerification = enableSourceVerification;
63
64
  this.enableConflictDetection = enableConflictDetection;
65
+ this.credibilityThreshold = Math.min(Math.max(0, credibilityThreshold), 1);
64
66
 
65
67
  // Stealth fallback config + lazy state (browser launched only on first block)
66
68
  this.enableStealthFallback = enableStealthFallback;
@@ -859,7 +861,7 @@ export class ResearchOrchestrator extends EventEmitter {
859
861
  }
860
862
 
861
863
  // Only include sources that meet minimum credibility threshold
862
- if (overallCredibility >= 0.3) {
864
+ if (overallCredibility >= this.credibilityThreshold) {
863
865
  verifiedSources.push({
864
866
  ...source,
865
867
  credibilityFactors,
@@ -1360,7 +1362,7 @@ export class ResearchOrchestrator extends EventEmitter {
1360
1362
 
1361
1363
  generateKeyFindings(claimGroups, sources) {
1362
1364
  return claimGroups
1363
- .filter(group => group.avgCredibility >= 0.3)
1365
+ .filter(group => group.avgCredibility >= this.credibilityThreshold)
1364
1366
  .sort((a, b) => b.consensusStrength - a.consensusStrength)
1365
1367
  .slice(0, 10)
1366
1368
  .map(group => ({
@@ -1373,7 +1375,7 @@ export class ResearchOrchestrator extends EventEmitter {
1373
1375
 
1374
1376
  compileSupportingEvidence(sources) {
1375
1377
  return sources
1376
- .filter(source => source.overallCredibility >= 0.3)
1378
+ .filter(source => source.overallCredibility >= this.credibilityThreshold)
1377
1379
  .map(source => ({
1378
1380
  title: source.title,
1379
1381
  url: source.link,
@@ -391,7 +391,9 @@ export class GenerateLLMsTxtTool {
391
391
  lines.push('');
392
392
  lines.push('### Technical Justification');
393
393
  lines.push(`${analysis.rateLimit.reasoning}`);
394
- lines.push(`Average response time: ${analysis.rateLimit.averageResponseTime}ms`);
394
+ if (analysis.rateLimit.averageResponseTime != null) {
395
+ lines.push(`Average response time: ${analysis.rateLimit.averageResponseTime}ms`);
396
+ }
395
397
  lines.push('');
396
398
  }
397
399
 
@@ -272,6 +272,10 @@ export class DeepResearchTool {
272
272
  maxUrls: params.maxUrls,
273
273
  timeLimit: params.timeLimit,
274
274
  concurrency: params.concurrency,
275
+ // Minimum credibility a source must clear in verifySourceCredibility.
276
+ // Must be on the orchestrator *constructor* config (not the
277
+ // conductResearch options) — that is the only place it is now read.
278
+ credibilityThreshold: params.credibilityThreshold,
275
279
  // The orchestrator tunes its query expansion to the approach (commercial
276
280
  // vs academic vs current-events); without this it always used academic
277
281
  // variations, which poisoned commercial/comparative searches.
@@ -356,7 +360,6 @@ export class DeepResearchTool {
356
360
  buildResearchOptions(params) {
357
361
  return {
358
362
  sourceTypes: params.sourceTypes,
359
- credibilityThreshold: params.credibilityThreshold,
360
363
  includeRecentOnly: params.includeRecentOnly,
361
364
  queryExpansion: params.queryExpansion,
362
365
  enableConflictDetection: params.enableConflictDetection,