crawlforge-mcp-server 4.6.6 → 4.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +29 -34
- package/package.json +1 -1
- package/server.js +8 -9
- package/src/core/AuthManager.js +44 -50
- package/src/core/LLMsTxtAnalyzer.js +10 -1
- package/src/core/ResearchOrchestrator.js +5 -3
- package/src/server/withAuth.js +5 -9
- package/src/tools/llmstxt/generateLLMsTxt.js +3 -1
- package/src/tools/research/deepResearch.js +4 -1
- package/src/tools/search/searchWeb.js +5 -4
package/README.md
CHANGED
|
@@ -67,9 +67,9 @@
|
|
|
67
67
|
npm install -g crawlforge-mcp-server
|
|
68
68
|
```
|
|
69
69
|
|
|
70
|
-
### 2. Setup Your API Key (
|
|
70
|
+
### 2. Setup Your API Key (required)
|
|
71
71
|
|
|
72
|
-
|
|
72
|
+
Every tool requires a CrawlForge API key — new accounts get 1,000 free trial credits to start:
|
|
73
73
|
|
|
74
74
|
```bash
|
|
75
75
|
npx crawlforge-setup
|
|
@@ -150,43 +150,38 @@ Restart Cursor to activate.
|
|
|
150
150
|
|
|
151
151
|
## 📊 Available Tools
|
|
152
152
|
|
|
153
|
-
CrawlForge
|
|
154
|
-
|
|
155
|
-
**
|
|
156
|
-
|
|
157
|
-
| Tool | What it does |
|
|
158
|
-
|------|--------------|
|
|
159
|
-
| `fetch_url` | Fetch content from any URL |
|
|
160
|
-
| `extract_text` | Extract clean text from web pages |
|
|
161
|
-
| `extract_links` | Get all links from a page |
|
|
162
|
-
| `extract_metadata` | Extract page metadata (title, OG tags, schema.org) |
|
|
163
|
-
| `scrape` | **Unified single-fetch, multi-format extraction.** Pass a `formats` array (markdown/html/rawHtml/text/links/metadata/screenshot/json-schema) plus `onlyMainContent`; one fetch serves every requested format with per-format partial-success warnings. *The `screenshot` format is the one metered exception (2 credits — needs a server browser)* |
|
|
164
|
-
| `scrape_structured` | Extract structured data with CSS selectors |
|
|
165
|
-
| `scrape_template` | Structured data from well-known sites (Amazon, GitHub, LinkedIn, YouTube, Reddit, Hacker News, npm, and more) without writing selectors |
|
|
166
|
-
| `extract_content` | Enhanced content extraction |
|
|
167
|
-
| `summarize_content` | Generate intelligent summaries |
|
|
168
|
-
| `analyze_content` | Comprehensive content analysis |
|
|
169
|
-
| `extract_structured` | LLM-powered schema-driven extraction (your own LLM key or local Ollama) |
|
|
170
|
-
| `extract_with_llm` | Natural-language extraction. **Defaults to a local Ollama model — no API key, no API costs.** Pass `provider: "openai" \| "anthropic"` with the matching key for cloud models |
|
|
171
|
-
| `process_document` | Multi-format document processing |
|
|
172
|
-
| `list_ollama_models` | List the Ollama models installed locally (helps you pick a `model` for `extract_with_llm`) |
|
|
173
|
-
| `get_batch_results` | Retrieve paginated results for a `batch_scrape` job by `batchId` |
|
|
174
|
-
|
|
175
|
-
**Metered Premium Tools** (3–10 credits, API key required)
|
|
153
|
+
CrawlForge requires a CrawlForge API key — **every tool is metered and consumes credits**. New accounts get **1,000 free trial credits** to start. Get a key at [crawlforge.dev/signup](https://www.crawlforge.dev/signup).
|
|
154
|
+
|
|
155
|
+
**All Tools** (API key required)
|
|
176
156
|
|
|
177
157
|
| Tool | Credits | What it does |
|
|
178
158
|
|------|---------|--------------|
|
|
179
|
-
| `
|
|
159
|
+
| `fetch_url` | 1 | Fetch content from any URL |
|
|
160
|
+
| `extract_text` | 1 | Extract clean text from web pages |
|
|
161
|
+
| `extract_links` | 1 | Get all links from a page |
|
|
162
|
+
| `extract_metadata` | 1 | Extract page metadata (title, OG tags, schema.org) |
|
|
163
|
+
| `scrape_template` | 1 | Structured data from well-known sites (Amazon, GitHub, LinkedIn, YouTube, Reddit, Hacker News, npm, and more) without writing selectors |
|
|
164
|
+
| `list_ollama_models` | 1 | List the Ollama models installed locally (helps you pick a `model` for `extract_with_llm`) |
|
|
165
|
+
| `get_batch_results` | 1 | Retrieve paginated results for a `batch_scrape` job by `batchId` |
|
|
166
|
+
| `scrape` | 2 | **Unified single-fetch, multi-format extraction.** Pass a `formats` array (markdown/html/rawHtml/text/links/metadata/screenshot/json-schema) plus `onlyMainContent`; one fetch serves every requested format with per-format partial-success warnings |
|
|
167
|
+
| `scrape_structured` | 2 | Extract structured data with CSS selectors |
|
|
168
|
+
| `extract_content` | 2 | Enhanced content extraction |
|
|
169
|
+
| `map_site` | 2 | Discover and map website structure (optional `search=` ranks the discovered URLs) |
|
|
170
|
+
| `process_document` | 2 | Multi-format document processing |
|
|
171
|
+
| `localization` | 2 | Multi-language and geo-location management |
|
|
180
172
|
| `track_changes` | 3 | Monitor content changes over time |
|
|
173
|
+
| `analyze_content` | 3 | Comprehensive content analysis |
|
|
174
|
+
| `extract_structured` | 3 | LLM-powered schema-driven extraction (your own LLM key or local Ollama) |
|
|
175
|
+
| `extract_with_llm` | 3 | Natural-language extraction. Defaults to a local Ollama model; pass `provider: "openai" \| "anthropic"` with the matching key for cloud models (external LLM billed by your provider) |
|
|
176
|
+
| `summarize_content` | 4 | Generate intelligent summaries |
|
|
177
|
+
| `crawl_deep` | 4 | Deep crawl entire websites |
|
|
181
178
|
| `search_web` | 5 | Search the web using Google Search API |
|
|
182
|
-
| `crawl_deep` | 5 | Deep crawl entire websites |
|
|
183
179
|
| `batch_scrape` | 5 | Process multiple URLs simultaneously |
|
|
184
180
|
| `scrape_with_actions` | 5 | Browser automation chains |
|
|
185
181
|
| `generate_llms_txt` | 5 | Generate AI interaction guidelines |
|
|
186
|
-
| `
|
|
182
|
+
| `stealth_mode` | 5 | Anti-detection browser management |
|
|
187
183
|
| `agent` | 8 | **Autonomous research/extraction from a natural-language prompt — no URLs required.** Plans, gathers, and shapes an answer under hard safety stops (max steps/URLs/wall-clock enforced by the orchestrator, never the LLM) |
|
|
188
184
|
| `deep_research` | 10 | Multi-stage research with source verification |
|
|
189
|
-
| `stealth_mode` | 10 | Anti-detection browser management |
|
|
190
185
|
|
|
191
186
|
For the full canonical capabilities reference (all tools, CLI commands, stealth engines, research workflow), see [SKILL.md](SKILL.md).
|
|
192
187
|
|
|
@@ -194,7 +189,7 @@ For the full canonical capabilities reference (all tools, CLI commands, stealth
|
|
|
194
189
|
|
|
195
190
|
## 💳 Pricing
|
|
196
191
|
|
|
197
|
-
**
|
|
192
|
+
**Every tool is metered and requires an API key.** New accounts get 1,000 free trial credits — no credit card required to start.
|
|
198
193
|
|
|
199
194
|
| Plan | Credits/Month | Best For |
|
|
200
195
|
|------|---------------|----------|
|
|
@@ -204,7 +199,7 @@ For the full canonical capabilities reference (all tools, CLI commands, stealth
|
|
|
204
199
|
| **Business** ($399) | 250,000 | Large scale operations |
|
|
205
200
|
|
|
206
201
|
**All plans include:**
|
|
207
|
-
- Access to all 26 tools
|
|
202
|
+
- Access to all 26 tools
|
|
208
203
|
- Credits never expire and roll over month-to-month
|
|
209
204
|
- API access and webhook notifications
|
|
210
205
|
|
|
@@ -238,7 +233,7 @@ export RESEARCH_MAX_STEALTH_RETRIES="8" # cap on stealth retries per research
|
|
|
238
233
|
|
|
239
234
|
### Local-LLM quickstart (`extract_with_llm` with Ollama)
|
|
240
235
|
|
|
241
|
-
`extract_with_llm` defaults to a local Ollama model — no
|
|
236
|
+
`extract_with_llm` defaults to a local Ollama model — no LLM-provider key, no per-token LLM costs, and no data leaving your machine (the CrawlForge credit cost still applies).
|
|
242
237
|
|
|
243
238
|
```bash
|
|
244
239
|
# 1. Install Ollama: https://ollama.com
|
|
@@ -303,7 +298,7 @@ Once configured, use these tools in your AI assistant:
|
|
|
303
298
|
|
|
304
299
|
## 🔒 Security & Privacy
|
|
305
300
|
|
|
306
|
-
- **Secure Authentication**: API keys required for all metered
|
|
301
|
+
- **Secure Authentication**: API keys required for all metered tools
|
|
307
302
|
- **Local Storage**: API keys stored securely at `~/.crawlforge/config.json`
|
|
308
303
|
- **HTTPS Only**: All connections use encrypted HTTPS
|
|
309
304
|
- **No Data Retention**: We don't store scraped data, only usage logs
|
|
@@ -317,7 +312,7 @@ Once configured, use these tools in your AI assistant:
|
|
|
317
312
|
- **Action allowlist**: `scrape_with_actions` accepts only 7 action types (`wait`, `click`, `type`, `press`, `scroll`, `screenshot`, `executeJavaScript`). No download, file-write, or arbitrary cross-page navigation primitives exist.
|
|
318
313
|
- **JavaScript gate**: The `executeJavaScript` action throws by default. Set `ALLOW_JAVASCRIPT_EXECUTION=true` at deploy time to enable (not recommended in production).
|
|
319
314
|
- **MCP Elicitation** (v3.6.0): Four tools request user confirmation before executing expensive operations — `deep_research` (>50 URLs), `batch_scrape` (sync mode, >25 URLs), `crawl_deep` (projected >500 pages), `extract_structured` (schema has >3 required fields with no LLM configured). Credit-low situations also elicit. Confirmation is best-effort: if the MCP client does not support elicitation the tool proceeds (fail-open).
|
|
320
|
-
- **Per-tool credit gating**: Every tool is wrapped with `withAuth()
|
|
315
|
+
- **Per-tool credit gating**: Every tool is wrapped with `withAuth()` and is metered — credits are checked and deducted before execution, and a valid API key is required for every tool (fail-closed since v3.0.18).
|
|
321
316
|
|
|
322
317
|
See [docs/sandboxing-and-approvals.md](docs/sandboxing-and-approvals.md) for the full reference.
|
|
323
318
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "crawlforge-mcp-server",
|
|
3
|
-
"version": "4.
|
|
3
|
+
"version": "4.7.1",
|
|
4
4
|
"mcpName": "io.github.mysleekdesigns/crawlforge-mcp-server",
|
|
5
5
|
"description": "CrawlForge MCP Server - Professional Model Context Protocol server with 26 web scraping, crawling, deep-research, and autonomous-extraction tools. Returns clean Markdown and structured JSON for Claude, Cursor, and any MCP client. Defaults to local Ollama for LLM extraction (no API key needed); OpenAI/Anthropic available as opt-in. Includes a unified multi-format scrape tool, an autonomous agent, pre-built site templates, and Camoufox stealth browsing.",
|
|
6
6
|
"main": "server.js",
|
package/server.js
CHANGED
|
@@ -68,15 +68,14 @@ if (!AuthManager.isAuthenticated() && !AuthManager.isCreatorMode()) {
|
|
|
68
68
|
process.exit(1);
|
|
69
69
|
}
|
|
70
70
|
} else {
|
|
71
|
-
//
|
|
72
|
-
//
|
|
73
|
-
//
|
|
71
|
+
// Every tool is metered and requires an API key — there is no free tier.
|
|
72
|
+
// The server still starts so the MCP client can list tools, but every
|
|
73
|
+
// tool call errors with "not configured" until a key is set.
|
|
74
74
|
// Status → stderr; stdout is reserved for the MCP JSON-RPC stream.
|
|
75
|
-
console.error('
|
|
76
|
-
console.error('
|
|
77
|
-
console.error('
|
|
78
|
-
console.error('
|
|
79
|
-
console.error(' or set CRAWLFORGE_API_KEY.');
|
|
75
|
+
console.error('⚠️ No CrawlForge API key configured — all tools require a key.');
|
|
76
|
+
console.error(' Every tool (fetch_url, search_web, deep_research, …) is metered.');
|
|
77
|
+
console.error(' Get a key at https://www.crawlforge.dev/signup, then run `npm run setup`');
|
|
78
|
+
console.error(' or set CRAWLFORGE_API_KEY. Tool calls will error until a key is set.');
|
|
80
79
|
}
|
|
81
80
|
}
|
|
82
81
|
|
|
@@ -90,7 +89,7 @@ if (configErrors.length > 0 && config.server.nodeEnv === 'production') {
|
|
|
90
89
|
// Create the server
|
|
91
90
|
const server = new McpServer({
|
|
92
91
|
name: "crawlforge",
|
|
93
|
-
version: "4.
|
|
92
|
+
version: "4.7.1",
|
|
94
93
|
description: "Production-ready MCP server with 26 web scraping, crawling, and content processing tools. Features MCP Resources (crawlforge://), Prompts, Sampling fallback, Elicitation, stealth browsing, deep research, structured extraction, change tracking, local-LLM extraction via Ollama, unified multi-format scrape, and autonomous agent tool.",
|
|
95
94
|
homepage: "https://www.crawlforge.dev",
|
|
96
95
|
icon: "https://www.crawlforge.dev/icon.png"
|
package/src/core/AuthManager.js
CHANGED
|
@@ -239,11 +239,6 @@ class AuthManager {
|
|
|
239
239
|
return true;
|
|
240
240
|
}
|
|
241
241
|
|
|
242
|
-
// Open-core Phase 2: Tier-0 tools cost 0 and run without an API key
|
|
243
|
-
if (estimatedCredits === 0) {
|
|
244
|
-
return true;
|
|
245
|
-
}
|
|
246
|
-
|
|
247
242
|
if (!this.config) {
|
|
248
243
|
throw new Error('CrawlForge not configured. Run setup first.');
|
|
249
244
|
}
|
|
@@ -507,51 +502,53 @@ class AuthManager {
|
|
|
507
502
|
/**
|
|
508
503
|
* Get credit cost for a tool.
|
|
509
504
|
*
|
|
510
|
-
*
|
|
511
|
-
* truth shared with the backend
|
|
512
|
-
*
|
|
513
|
-
* are metered per COGS.
|
|
505
|
+
* Every tool is metered and requires an API key — there is no free tier.
|
|
506
|
+
* This table is the single source of truth shared with the backend
|
|
507
|
+
* (crawlforge-website/src/lib/credits.ts TOOL_CREDIT_COSTS).
|
|
514
508
|
*
|
|
515
509
|
* @param {string} tool
|
|
516
|
-
* @param {object} [params] — invocation params; only used for per-call
|
|
517
|
-
* exceptions (scrape's screenshot format needs a server browser).
|
|
518
510
|
*/
|
|
519
|
-
getToolCost(tool
|
|
520
|
-
// Tier-0 exception: the screenshot format of `scrape` is browser-backed
|
|
521
|
-
if (tool === 'scrape' && Array.isArray(params?.formats) && params.formats.includes('screenshot')) {
|
|
522
|
-
return 2;
|
|
523
|
-
}
|
|
524
|
-
|
|
511
|
+
getToolCost(tool) {
|
|
525
512
|
const costs = {
|
|
526
|
-
//
|
|
527
|
-
fetch_url:
|
|
528
|
-
extract_text:
|
|
529
|
-
extract_links:
|
|
530
|
-
extract_metadata:
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
process_document:
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
//
|
|
544
|
-
map_site: 3,
|
|
513
|
+
// 1 credit
|
|
514
|
+
fetch_url: 1,
|
|
515
|
+
extract_text: 1,
|
|
516
|
+
extract_links: 1,
|
|
517
|
+
extract_metadata: 1,
|
|
518
|
+
scrape_template: 1,
|
|
519
|
+
list_ollama_models: 1,
|
|
520
|
+
get_batch_results: 1, // retrieval of an already-paid batch job
|
|
521
|
+
|
|
522
|
+
// 2 credits
|
|
523
|
+
scrape_structured: 2,
|
|
524
|
+
extract_content: 2,
|
|
525
|
+
map_site: 2,
|
|
526
|
+
process_document: 2,
|
|
527
|
+
localization: 2,
|
|
528
|
+
scrape: 2,
|
|
529
|
+
|
|
530
|
+
// 3 credits
|
|
545
531
|
track_changes: 3,
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
532
|
+
analyze_content: 3,
|
|
533
|
+
extract_structured: 3,
|
|
534
|
+
extract_with_llm: 3,
|
|
535
|
+
|
|
536
|
+
// 4 credits
|
|
537
|
+
summarize_content: 4,
|
|
538
|
+
crawl_deep: 4,
|
|
539
|
+
|
|
540
|
+
// 5 credits
|
|
541
|
+
stealth_mode: 5,
|
|
550
542
|
scrape_with_actions: 5,
|
|
551
|
-
|
|
543
|
+
batch_scrape: 5,
|
|
544
|
+
search_web: 5,
|
|
545
|
+
generate_llms_txt: 5,
|
|
546
|
+
|
|
547
|
+
// 8 credits
|
|
552
548
|
agent: 8, // projectCost() scales with maxUrls
|
|
553
|
-
|
|
554
|
-
|
|
549
|
+
|
|
550
|
+
// 10 credits
|
|
551
|
+
deep_research: 10
|
|
555
552
|
};
|
|
556
553
|
|
|
557
554
|
return costs[tool] ?? 1;
|
|
@@ -574,7 +571,7 @@ class AuthManager {
|
|
|
574
571
|
|
|
575
572
|
// Override for tools whose cost scales with params
|
|
576
573
|
let projected = base;
|
|
577
|
-
let note =
|
|
574
|
+
let note = 'Fixed cost per invocation.';
|
|
578
575
|
|
|
579
576
|
switch (toolName) {
|
|
580
577
|
case 'batch_scrape': {
|
|
@@ -596,14 +593,11 @@ class AuthManager {
|
|
|
596
593
|
break;
|
|
597
594
|
}
|
|
598
595
|
case 'extract_with_llm':
|
|
599
|
-
note = '
|
|
596
|
+
note = 'External LLM API call billed by your LLM provider, separate from the credit cost.';
|
|
600
597
|
break;
|
|
601
598
|
case 'scrape': {
|
|
602
|
-
// Free local tool; only the browser-backed screenshot format is metered
|
|
603
599
|
projected = base;
|
|
604
|
-
note =
|
|
605
|
-
? 'screenshot format requires a server browser (2 credits). Other formats are free.'
|
|
606
|
-
: 'Free local tool — no credits charged. json format may incur external LLM cost.';
|
|
600
|
+
note = 'Fixed cost per invocation. json format may incur external LLM cost (billed by your provider).';
|
|
607
601
|
break;
|
|
608
602
|
}
|
|
609
603
|
case 'agent': {
|
|
@@ -614,7 +608,7 @@ class AuthManager {
|
|
|
614
608
|
break;
|
|
615
609
|
}
|
|
616
610
|
default:
|
|
617
|
-
note =
|
|
611
|
+
note = 'Fixed cost per invocation.';
|
|
618
612
|
}
|
|
619
613
|
|
|
620
614
|
return { projected, note };
|
|
@@ -50,7 +50,16 @@ export class LLMsTxtAnalyzer {
|
|
|
50
50
|
apis: [],
|
|
51
51
|
contentTypes: {},
|
|
52
52
|
securityAreas: [],
|
|
53
|
-
|
|
53
|
+
// Conservative defaults so output never renders `undefined` when live
|
|
54
|
+
// rate-limit probing is skipped (analyzeRateLimiting only runs with
|
|
55
|
+
// probeRateLimit:true). Overwritten with measured values when probed.
|
|
56
|
+
rateLimit: {
|
|
57
|
+
recommendedDelay: 1000,
|
|
58
|
+
maxConcurrency: 5,
|
|
59
|
+
recommendedRPM: 30,
|
|
60
|
+
reasoning: 'Conservative defaults applied; live rate-limit probing was not performed (pass probeRateLimit:true to measure actual response times).',
|
|
61
|
+
averageResponseTime: null
|
|
62
|
+
},
|
|
54
63
|
guidelines: {},
|
|
55
64
|
metadata: {},
|
|
56
65
|
errors: []
|
|
@@ -32,6 +32,7 @@ export class ResearchOrchestrator extends EventEmitter {
|
|
|
32
32
|
concurrency = 5,
|
|
33
33
|
enableSourceVerification = true,
|
|
34
34
|
enableConflictDetection = true,
|
|
35
|
+
credibilityThreshold = 0.3,
|
|
35
36
|
cacheEnabled = true,
|
|
36
37
|
cacheTTL = 1800000, // 30 minutes
|
|
37
38
|
researchApproach = 'broad',
|
|
@@ -61,6 +62,7 @@ export class ResearchOrchestrator extends EventEmitter {
|
|
|
61
62
|
this.concurrency = Math.min(Math.max(1, concurrency), 20);
|
|
62
63
|
this.enableSourceVerification = enableSourceVerification;
|
|
63
64
|
this.enableConflictDetection = enableConflictDetection;
|
|
65
|
+
this.credibilityThreshold = Math.min(Math.max(0, credibilityThreshold), 1);
|
|
64
66
|
|
|
65
67
|
// Stealth fallback config + lazy state (browser launched only on first block)
|
|
66
68
|
this.enableStealthFallback = enableStealthFallback;
|
|
@@ -859,7 +861,7 @@ export class ResearchOrchestrator extends EventEmitter {
|
|
|
859
861
|
}
|
|
860
862
|
|
|
861
863
|
// Only include sources that meet minimum credibility threshold
|
|
862
|
-
if (overallCredibility >=
|
|
864
|
+
if (overallCredibility >= this.credibilityThreshold) {
|
|
863
865
|
verifiedSources.push({
|
|
864
866
|
...source,
|
|
865
867
|
credibilityFactors,
|
|
@@ -1360,7 +1362,7 @@ export class ResearchOrchestrator extends EventEmitter {
|
|
|
1360
1362
|
|
|
1361
1363
|
generateKeyFindings(claimGroups, sources) {
|
|
1362
1364
|
return claimGroups
|
|
1363
|
-
.filter(group => group.avgCredibility >=
|
|
1365
|
+
.filter(group => group.avgCredibility >= this.credibilityThreshold)
|
|
1364
1366
|
.sort((a, b) => b.consensusStrength - a.consensusStrength)
|
|
1365
1367
|
.slice(0, 10)
|
|
1366
1368
|
.map(group => ({
|
|
@@ -1373,7 +1375,7 @@ export class ResearchOrchestrator extends EventEmitter {
|
|
|
1373
1375
|
|
|
1374
1376
|
compileSupportingEvidence(sources) {
|
|
1375
1377
|
return sources
|
|
1376
|
-
.filter(source => source.overallCredibility >=
|
|
1378
|
+
.filter(source => source.overallCredibility >= this.credibilityThreshold)
|
|
1377
1379
|
.map(source => ({
|
|
1378
1380
|
title: source.title,
|
|
1379
1381
|
url: source.link,
|
package/src/server/withAuth.js
CHANGED
|
@@ -4,8 +4,8 @@
|
|
|
4
4
|
* (OpenTelemetry spans + Prometheus counters) added in v3.2.0.
|
|
5
5
|
*
|
|
6
6
|
* Contract:
|
|
7
|
-
* - resolves toolCost once per call
|
|
8
|
-
*
|
|
7
|
+
* - resolves toolCost once per call; every tool is metered (no free tier),
|
|
8
|
+
* so a valid API key is required for every invocation
|
|
9
9
|
* - try/finally guarantees a single `tool invocation` log line per call
|
|
10
10
|
* - log payload: { toolName, paramHash, durationMs, outcome, creditCost, creatorMode }
|
|
11
11
|
* - outcome ∈ { 'success' | 'error' | 'insufficient_credits' }
|
|
@@ -36,16 +36,12 @@ export function makeWithAuth({ authManager, logger, metrics = null }) {
|
|
|
36
36
|
const startTime = Date.now();
|
|
37
37
|
const paramHash = hashParams(params);
|
|
38
38
|
const creatorMode = authManager.isCreatorMode();
|
|
39
|
-
// Params-aware: scrape's screenshot format is metered, other formats free
|
|
40
39
|
const creditCost = creatorMode ? 0 : authManager.getToolCost(toolName, params);
|
|
41
|
-
// Open-core Phase 2: Tier-0 tools (cost 0) run locally for free — no
|
|
42
|
-
// credit check, no usage report, and no API key required.
|
|
43
|
-
const freeTier = creditCost === 0;
|
|
44
40
|
let outcome = 'pending';
|
|
45
41
|
let thrown = null;
|
|
46
42
|
|
|
47
43
|
try {
|
|
48
|
-
if (!creatorMode
|
|
44
|
+
if (!creatorMode) {
|
|
49
45
|
const hasCredits = await authManager.checkCredits(creditCost);
|
|
50
46
|
if (!hasCredits) {
|
|
51
47
|
outcome = 'insufficient_credits';
|
|
@@ -90,7 +86,7 @@ export function makeWithAuth({ authManager, logger, metrics = null }) {
|
|
|
90
86
|
// Cost injection must never break the request path
|
|
91
87
|
}
|
|
92
88
|
|
|
93
|
-
if (!creatorMode
|
|
89
|
+
if (!creatorMode) {
|
|
94
90
|
await authManager.reportUsage(toolName, creditCost, params, 200, Date.now() - startTime);
|
|
95
91
|
}
|
|
96
92
|
|
|
@@ -98,7 +94,7 @@ export function makeWithAuth({ authManager, logger, metrics = null }) {
|
|
|
98
94
|
} catch (error) {
|
|
99
95
|
outcome = 'error';
|
|
100
96
|
thrown = error;
|
|
101
|
-
if (!creatorMode
|
|
97
|
+
if (!creatorMode) {
|
|
102
98
|
await authManager.reportUsage(
|
|
103
99
|
toolName,
|
|
104
100
|
Math.max(1, Math.floor(creditCost * 0.5)),
|
|
@@ -391,7 +391,9 @@ export class GenerateLLMsTxtTool {
|
|
|
391
391
|
lines.push('');
|
|
392
392
|
lines.push('### Technical Justification');
|
|
393
393
|
lines.push(`${analysis.rateLimit.reasoning}`);
|
|
394
|
-
|
|
394
|
+
if (analysis.rateLimit.averageResponseTime != null) {
|
|
395
|
+
lines.push(`Average response time: ${analysis.rateLimit.averageResponseTime}ms`);
|
|
396
|
+
}
|
|
395
397
|
lines.push('');
|
|
396
398
|
}
|
|
397
399
|
|
|
@@ -272,6 +272,10 @@ export class DeepResearchTool {
|
|
|
272
272
|
maxUrls: params.maxUrls,
|
|
273
273
|
timeLimit: params.timeLimit,
|
|
274
274
|
concurrency: params.concurrency,
|
|
275
|
+
// Minimum credibility a source must clear in verifySourceCredibility.
|
|
276
|
+
// Must be on the orchestrator *constructor* config (not the
|
|
277
|
+
// conductResearch options) — that is the only place it is now read.
|
|
278
|
+
credibilityThreshold: params.credibilityThreshold,
|
|
275
279
|
// The orchestrator tunes its query expansion to the approach (commercial
|
|
276
280
|
// vs academic vs current-events); without this it always used academic
|
|
277
281
|
// variations, which poisoned commercial/comparative searches.
|
|
@@ -356,7 +360,6 @@ export class DeepResearchTool {
|
|
|
356
360
|
buildResearchOptions(params) {
|
|
357
361
|
return {
|
|
358
362
|
sourceTypes: params.sourceTypes,
|
|
359
|
-
credibilityThreshold: params.credibilityThreshold,
|
|
360
363
|
includeRecentOnly: params.includeRecentOnly,
|
|
361
364
|
queryExpansion: params.queryExpansion,
|
|
362
365
|
enableConflictDetection: params.enableConflictDetection,
|
|
@@ -79,9 +79,10 @@ export class SearchWebTool {
|
|
|
79
79
|
// Check for Creator Mode - allows search without API key for development/testing
|
|
80
80
|
const isCreatorMode = isCreatorModeVerified();
|
|
81
81
|
|
|
82
|
-
//
|
|
83
|
-
//
|
|
84
|
-
// enforced
|
|
82
|
+
// The server can start without a key so the MCP client can list tools, so
|
|
83
|
+
// construction must not throw here. Every tool is metered and the key
|
|
84
|
+
// requirement is enforced before execute() runs (withAuth credit check)
|
|
85
|
+
// and again at execute() time below.
|
|
85
86
|
if (!apiKey && !isCreatorMode) {
|
|
86
87
|
this.searchAdapter = null;
|
|
87
88
|
this.isCreatorModeFallback = false;
|
|
@@ -127,7 +128,7 @@ export class SearchWebTool {
|
|
|
127
128
|
}
|
|
128
129
|
// --- end SearXNG short-circuit ---
|
|
129
130
|
|
|
130
|
-
//
|
|
131
|
+
// Search via the CrawlForge proxy needs an API key
|
|
131
132
|
if (!this.searchAdapter) {
|
|
132
133
|
throw new Error('CrawlForge API key is required for search functionality. Get one at https://www.crawlforge.dev/signup');
|
|
133
134
|
}
|