crawlforge-mcp-server 3.0.17 → 3.0.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +2 -0
- package/README.md +1 -0
- package/package.json +2 -1
- package/src/constants/config.js +2 -1
- package/src/core/AuthManager.js +112 -27
- package/src/core/ResearchOrchestrator.js +86 -5
- package/src/core/endpointGuard.js +37 -0
- package/src/tools/research/deepResearch.js +33 -8
package/CLAUDE.md
CHANGED
|
@@ -183,6 +183,8 @@ MAX_PAGES_PER_CRAWL=100
|
|
|
183
183
|
RESPECT_ROBOTS_TXT=true
|
|
184
184
|
```
|
|
185
185
|
|
|
186
|
+
`OPENAI_API_KEY` / `ANTHROPIC_API_KEY` are optional. They only affect `deep_research`: when set, it produces a fully synthesized report internally; when unset, it returns raw evidence for the calling LLM (e.g. Claude Code) to synthesize.
|
|
187
|
+
|
|
186
188
|
### Configuration Files
|
|
187
189
|
|
|
188
190
|
- `~/.crawlforge/config.json` - User authentication and API key storage
|
package/README.md
CHANGED
|
@@ -154,6 +154,7 @@ export CRAWLFORGE_API_KEY="cf_live_your_api_key_here"
|
|
|
154
154
|
|
|
155
155
|
# Optional: Custom API endpoint (for enterprise)
|
|
156
156
|
export CRAWLFORGE_API_URL="https://api.crawlforge.dev"
|
|
157
|
+
# As of v3.0.18, this variable is validated against an allow-list of CrawlForge backend hosts.
|
|
157
158
|
```
|
|
158
159
|
|
|
159
160
|
### Manual Configuration
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "crawlforge-mcp-server",
|
|
3
|
-
"version": "3.0.
|
|
3
|
+
"version": "3.0.18",
|
|
4
4
|
"description": "CrawlForge MCP Server - Professional Model Context Protocol server with 20 comprehensive web scraping, crawling, and content processing tools.",
|
|
5
5
|
"main": "server.js",
|
|
6
6
|
"bin": {
|
|
@@ -13,6 +13,7 @@
|
|
|
13
13
|
"setup": "node setup.js",
|
|
14
14
|
"dev": "cross-env NODE_ENV=development node server.js",
|
|
15
15
|
"test": "node tests/integration/mcp-protocol-compliance.test.js",
|
|
16
|
+
"test:unit": "CRAWLFORGE_CREATOR_SECRET= node --test 'tests/unit/*.test.js'",
|
|
16
17
|
"test:tools": "node test-tools.js",
|
|
17
18
|
"test:real-world": "node test-real-world.js",
|
|
18
19
|
"test:all": "bash run-all-tests.sh",
|
package/src/constants/config.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import dotenv from 'dotenv';
|
|
2
2
|
import { fileURLToPath } from 'url';
|
|
3
3
|
import { dirname, join } from 'path';
|
|
4
|
+
import { resolveApiEndpoint } from '../core/endpointGuard.js';
|
|
4
5
|
|
|
5
6
|
// Load environment variables
|
|
6
7
|
const __filename = fileURLToPath(import.meta.url);
|
|
@@ -11,7 +12,7 @@ export const config = {
|
|
|
11
12
|
// CrawlForge API Configuration
|
|
12
13
|
crawlforge: {
|
|
13
14
|
apiKey: process.env.CRAWLFORGE_API_KEY || '',
|
|
14
|
-
apiBaseUrl: process.env.CRAWLFORGE_API_URL || 'https://www.crawlforge.dev'
|
|
15
|
+
apiBaseUrl: resolveApiEndpoint(process.env.CRAWLFORGE_API_URL || 'https://www.crawlforge.dev')
|
|
15
16
|
},
|
|
16
17
|
|
|
17
18
|
// Performance
|
package/src/core/AuthManager.js
CHANGED
|
@@ -7,15 +7,18 @@
|
|
|
7
7
|
import fs from 'fs/promises';
|
|
8
8
|
import path from 'path';
|
|
9
9
|
import { isCreatorModeVerified } from './creatorMode.js';
|
|
10
|
+
import { resolveApiEndpoint } from './endpointGuard.js';
|
|
10
11
|
|
|
11
12
|
class AuthManager {
|
|
12
13
|
constructor() {
|
|
13
|
-
this.apiEndpoint = process.env.CRAWLFORGE_API_URL || 'https://www.crawlforge.dev';
|
|
14
|
+
this.apiEndpoint = resolveApiEndpoint(process.env.CRAWLFORGE_API_URL || 'https://www.crawlforge.dev');
|
|
14
15
|
this.configPath = path.join(process.env.HOME || process.env.USERPROFILE, '.crawlforge', 'config.json');
|
|
16
|
+
this.pendingUsagePath = path.join(process.env.HOME || process.env.USERPROFILE, '.crawlforge', 'pending-usage.json');
|
|
15
17
|
this.config = null;
|
|
16
18
|
this.creditCache = new Map();
|
|
17
19
|
this.lastCreditCheck = null;
|
|
18
|
-
this.
|
|
20
|
+
this.lastSuccessfulCreditCheck = new Map();
|
|
21
|
+
this.CREDIT_CHECK_INTERVAL = 15000;
|
|
19
22
|
this.initialized = false;
|
|
20
23
|
// NOTE: Don't read creator mode in constructor - it's set dynamically in server.js
|
|
21
24
|
}
|
|
@@ -48,6 +51,12 @@ class AuthManager {
|
|
|
48
51
|
console.log('No existing CrawlForge configuration found. Run setup to configure.');
|
|
49
52
|
this.initialized = true;
|
|
50
53
|
}
|
|
54
|
+
|
|
55
|
+
try {
|
|
56
|
+
await this._flushPendingUsage();
|
|
57
|
+
} catch {
|
|
58
|
+
// Best-effort flush — do not block startup
|
|
59
|
+
}
|
|
51
60
|
}
|
|
52
61
|
|
|
53
62
|
/**
|
|
@@ -192,20 +201,16 @@ class AuthManager {
|
|
|
192
201
|
const data = await response.json();
|
|
193
202
|
this.creditCache.set(this.config.userId, data.creditsRemaining);
|
|
194
203
|
this.lastCreditCheck = now;
|
|
204
|
+
this.lastSuccessfulCreditCheck.set(this.config.userId, now);
|
|
195
205
|
return data.creditsRemaining >= estimatedCredits;
|
|
196
206
|
}
|
|
197
207
|
} catch (error) {
|
|
198
208
|
console.error('Failed to check credits:', error.message);
|
|
199
209
|
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
// failing closed when there's no cached data (no free usage bypass)
|
|
210
|
+
const lastOk = this.lastSuccessfulCreditCheck.get(this.config.userId) ?? 0;
|
|
211
|
+
const withinGrace = Date.now() - lastOk < 30_000;
|
|
203
212
|
const cached = this.creditCache.get(this.config.userId);
|
|
204
|
-
if (cached !== undefined && cached >= estimatedCredits)
|
|
205
|
-
console.warn('Using cached credits due to network error — will re-verify on next call');
|
|
206
|
-
return true;
|
|
207
|
-
}
|
|
208
|
-
|
|
213
|
+
if (withinGrace && cached !== undefined && cached >= estimatedCredits) return true;
|
|
209
214
|
throw new Error('Unable to verify credits. Please check your connection and try again.');
|
|
210
215
|
}
|
|
211
216
|
}
|
|
@@ -218,39 +223,119 @@ class AuthManager {
|
|
|
218
223
|
if (this.isCreatorMode()) {
|
|
219
224
|
return;
|
|
220
225
|
}
|
|
221
|
-
|
|
226
|
+
|
|
222
227
|
if (!this.config) {
|
|
223
228
|
return; // Silently skip if not configured
|
|
224
229
|
}
|
|
225
230
|
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
timestamp: new Date().toISOString(),
|
|
234
|
-
version: '3.0.3'
|
|
235
|
-
};
|
|
231
|
+
const userId = this.config.userId;
|
|
232
|
+
|
|
233
|
+
// Pre-decrement cache before fetch so network failures still deplete credits
|
|
234
|
+
const cached = this.creditCache.get(userId);
|
|
235
|
+
if (cached !== undefined) {
|
|
236
|
+
this.creditCache.set(userId, Math.max(0, cached - creditsUsed));
|
|
237
|
+
}
|
|
236
238
|
|
|
239
|
+
const payload = {
|
|
240
|
+
tool,
|
|
241
|
+
creditsUsed,
|
|
242
|
+
requestData,
|
|
243
|
+
responseStatus,
|
|
244
|
+
processingTime,
|
|
245
|
+
timestamp: new Date().toISOString(),
|
|
246
|
+
version: '3.0.3'
|
|
247
|
+
};
|
|
248
|
+
|
|
249
|
+
try {
|
|
237
250
|
await fetch(`${this.apiEndpoint}/api/v1/usage`, {
|
|
238
251
|
method: 'POST',
|
|
239
252
|
headers: {
|
|
240
253
|
'Content-Type': 'application/json',
|
|
241
254
|
'X-API-Key': this.config.apiKey
|
|
242
255
|
},
|
|
243
|
-
body: JSON.stringify(payload)
|
|
256
|
+
body: JSON.stringify(payload),
|
|
257
|
+
signal: AbortSignal.timeout(5000)
|
|
244
258
|
});
|
|
245
259
|
|
|
246
|
-
|
|
247
|
-
const cached = this.creditCache.get(this.config.userId);
|
|
248
|
-
if (cached !== undefined) {
|
|
249
|
-
this.creditCache.set(this.config.userId, Math.max(0, cached - creditsUsed));
|
|
250
|
-
}
|
|
260
|
+
await this._flushPendingUsage();
|
|
251
261
|
} catch (error) {
|
|
252
262
|
// Log but don't throw - usage reporting should not break tool execution
|
|
253
263
|
console.error('Failed to report usage:', error.message);
|
|
264
|
+
await this._appendPendingUsage({ toolName: tool, creditsUsed, userId, timestamp: payload.timestamp });
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
async _appendPendingUsage(entry) {
|
|
269
|
+
try {
|
|
270
|
+
const configDir = path.dirname(this.pendingUsagePath);
|
|
271
|
+
await fs.mkdir(configDir, { recursive: true });
|
|
272
|
+
|
|
273
|
+
let entries = [];
|
|
274
|
+
try {
|
|
275
|
+
const raw = await fs.readFile(this.pendingUsagePath, 'utf-8');
|
|
276
|
+
entries = JSON.parse(raw);
|
|
277
|
+
} catch {
|
|
278
|
+
// File absent or corrupt — start fresh
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
entries.push(entry);
|
|
282
|
+
|
|
283
|
+
// Cap at 1 MB — drop oldest entries until serialized size fits
|
|
284
|
+
let serialized = JSON.stringify(entries);
|
|
285
|
+
while (serialized.length > 1_048_576 && entries.length > 1) {
|
|
286
|
+
entries.shift();
|
|
287
|
+
serialized = JSON.stringify(entries);
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
await fs.writeFile(this.pendingUsagePath, serialized, { mode: 0o600 });
|
|
291
|
+
} catch (error) {
|
|
292
|
+
console.error('Failed to append pending usage:', error.message);
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
async _flushPendingUsage() {
|
|
297
|
+
if (!this.config) return;
|
|
298
|
+
|
|
299
|
+
let entries;
|
|
300
|
+
try {
|
|
301
|
+
const raw = await fs.readFile(this.pendingUsagePath, 'utf-8');
|
|
302
|
+
entries = JSON.parse(raw);
|
|
303
|
+
} catch {
|
|
304
|
+
return; // Nothing to flush
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
if (!Array.isArray(entries) || entries.length === 0) return;
|
|
308
|
+
|
|
309
|
+
const remaining = [];
|
|
310
|
+
for (const entry of entries) {
|
|
311
|
+
try {
|
|
312
|
+
await fetch(`${this.apiEndpoint}/api/v1/usage`, {
|
|
313
|
+
method: 'POST',
|
|
314
|
+
headers: {
|
|
315
|
+
'Content-Type': 'application/json',
|
|
316
|
+
'X-API-Key': this.config.apiKey
|
|
317
|
+
},
|
|
318
|
+
body: JSON.stringify({
|
|
319
|
+
tool: entry.toolName,
|
|
320
|
+
creditsUsed: entry.creditsUsed,
|
|
321
|
+
timestamp: entry.timestamp,
|
|
322
|
+
version: '3.0.3'
|
|
323
|
+
}),
|
|
324
|
+
signal: AbortSignal.timeout(5000)
|
|
325
|
+
});
|
|
326
|
+
} catch {
|
|
327
|
+
remaining.push(entry);
|
|
328
|
+
}
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
try {
|
|
332
|
+
if (remaining.length === 0) {
|
|
333
|
+
await fs.unlink(this.pendingUsagePath);
|
|
334
|
+
} else {
|
|
335
|
+
await fs.writeFile(this.pendingUsagePath, JSON.stringify(remaining), { mode: 0o600 });
|
|
336
|
+
}
|
|
337
|
+
} catch (error) {
|
|
338
|
+
console.error('Failed to update pending usage file:', error.message);
|
|
254
339
|
}
|
|
255
340
|
}
|
|
256
341
|
|
|
@@ -120,30 +120,35 @@ export class ResearchOrchestrator extends EventEmitter {
|
|
|
120
120
|
|
|
121
121
|
// Stage 1: Initial topic exploration and query expansion
|
|
122
122
|
const expandedQueries = await this.expandResearchTopic(topic);
|
|
123
|
+
this.researchState.currentDepth = 1;
|
|
123
124
|
this.logActivity('topic_expansion', { originalTopic: topic, expandedQueries });
|
|
124
125
|
|
|
125
126
|
// Stage 2: Broad information gathering
|
|
126
127
|
const initialSources = await this.gatherInitialSources(expandedQueries, options);
|
|
128
|
+
this.researchState.currentDepth = 2;
|
|
127
129
|
this.logActivity('initial_gathering', { sourcesFound: initialSources.length });
|
|
128
130
|
|
|
129
131
|
// Stage 3: Deep exploration of promising sources
|
|
130
132
|
const detailedFindings = await this.exploreSourcesInDepth(initialSources, options);
|
|
133
|
+
this.researchState.currentDepth = 3;
|
|
131
134
|
this.logActivity('deep_exploration', { findingsCount: detailedFindings.length });
|
|
132
135
|
|
|
133
136
|
// Stage 4: Source credibility assessment
|
|
134
|
-
const verifiedSources = this.enableSourceVerification ?
|
|
137
|
+
const verifiedSources = this.enableSourceVerification ?
|
|
135
138
|
await this.verifySourceCredibility(detailedFindings) : detailedFindings;
|
|
139
|
+
this.researchState.currentDepth = 4;
|
|
136
140
|
this.logActivity('source_verification', { verifiedCount: verifiedSources.length });
|
|
137
141
|
|
|
138
142
|
// Stage 5: Information synthesis and conflict detection
|
|
139
143
|
const synthesizedResults = await this.synthesizeInformation(verifiedSources, topic);
|
|
144
|
+
this.researchState.currentDepth = 5;
|
|
140
145
|
this.logActivity('information_synthesis', { conflictsFound: synthesizedResults.conflicts.length });
|
|
141
146
|
|
|
142
|
-
// Stage 6: Final result compilation
|
|
143
|
-
const finalResults = this.compileResearchResults(topic, synthesizedResults, options);
|
|
144
|
-
|
|
145
147
|
const totalTime = Date.now() - startTime;
|
|
146
148
|
this.metrics.totalProcessingTime = totalTime;
|
|
149
|
+
|
|
150
|
+
// Stage 6: Final result compilation
|
|
151
|
+
const finalResults = this.compileResearchResults(topic, synthesizedResults, options);
|
|
147
152
|
|
|
148
153
|
this.logger.info('Research completed', {
|
|
149
154
|
sessionId,
|
|
@@ -636,10 +641,22 @@ export class ResearchOrchestrator extends EventEmitter {
|
|
|
636
641
|
consensus: [],
|
|
637
642
|
gaps: [],
|
|
638
643
|
recommendations: [],
|
|
639
|
-
llmSynthesis: null
|
|
644
|
+
llmSynthesis: null,
|
|
645
|
+
rawEvidence: null,
|
|
646
|
+
synthesisMode: this.enableLLMFeatures ? 'llm' : 'raw_evidence'
|
|
640
647
|
};
|
|
641
648
|
|
|
642
649
|
try {
|
|
650
|
+
// Without an LLM the keyword/frequency-based synthesis produces
|
|
651
|
+
// unreadable output. Skip it and return raw evidence for the calling
|
|
652
|
+
// LLM (e.g. Claude Code) to synthesize.
|
|
653
|
+
if (!this.enableLLMFeatures) {
|
|
654
|
+
synthesis.rawEvidence = this.buildRawEvidence(sources);
|
|
655
|
+
synthesis.supportingEvidence = this.compileSupportingEvidence(sources);
|
|
656
|
+
this.metrics.synthesisTime += Date.now() - startTime;
|
|
657
|
+
return synthesis;
|
|
658
|
+
}
|
|
659
|
+
|
|
643
660
|
// Extract key claims and facts from each source
|
|
644
661
|
const extractedClaims = await this.extractKeyClaims(sources);
|
|
645
662
|
|
|
@@ -1110,6 +1127,36 @@ export class ResearchOrchestrator extends EventEmitter {
|
|
|
1110
1127
|
.slice(0, 15);
|
|
1111
1128
|
}
|
|
1112
1129
|
|
|
1130
|
+
buildRawEvidence(sources) {
|
|
1131
|
+
return sources
|
|
1132
|
+
.filter(s => s.extractedContent && s.extractedContent.length > 0)
|
|
1133
|
+
.map(s => ({
|
|
1134
|
+
title: s.title,
|
|
1135
|
+
url: s.link,
|
|
1136
|
+
credibility: s.overallCredibility ?? 0.5,
|
|
1137
|
+
contentSnippet: s.extractedContent.substring(0, 4000),
|
|
1138
|
+
topSentences: this.extractTopSentences(s.extractedContent, 5)
|
|
1139
|
+
}))
|
|
1140
|
+
.slice(0, 20);
|
|
1141
|
+
}
|
|
1142
|
+
|
|
1143
|
+
extractTopSentences(text, n = 5) {
|
|
1144
|
+
if (!text) return [];
|
|
1145
|
+
const sentences = text
|
|
1146
|
+
.split(/(?<=[.!?])\s+/)
|
|
1147
|
+
.map(s => s.trim())
|
|
1148
|
+
.filter(s => s.length >= 40 && s.length <= 500);
|
|
1149
|
+
|
|
1150
|
+
return sentences
|
|
1151
|
+
.map(s => ({
|
|
1152
|
+
text: s,
|
|
1153
|
+
score: s.length * 0.5 + (s.match(/[A-Z][a-z]+/g)?.length || 0) * 5
|
|
1154
|
+
}))
|
|
1155
|
+
.sort((a, b) => b.score - a.score)
|
|
1156
|
+
.slice(0, n)
|
|
1157
|
+
.map(item => item.text);
|
|
1158
|
+
}
|
|
1159
|
+
|
|
1113
1160
|
identifyResearchGaps(claimGroups, topic) {
|
|
1114
1161
|
const gaps = [];
|
|
1115
1162
|
|
|
@@ -1158,6 +1205,40 @@ export class ResearchOrchestrator extends EventEmitter {
|
|
|
1158
1205
|
}
|
|
1159
1206
|
|
|
1160
1207
|
compileResearchResults(topic, synthesis, options) {
|
|
1208
|
+
if (synthesis.synthesisMode === 'raw_evidence') {
|
|
1209
|
+
const sources = synthesis.rawEvidence || [];
|
|
1210
|
+
return {
|
|
1211
|
+
sessionId: this.researchState.sessionId,
|
|
1212
|
+
topic,
|
|
1213
|
+
synthesisMode: 'raw_evidence',
|
|
1214
|
+
note: "This response contains raw research evidence with no AI synthesis. The calling LLM (you) should synthesize these sources to answer the user's question. To enable internal LLM synthesis instead, set OPENAI_API_KEY or ANTHROPIC_API_KEY in the MCP server environment.",
|
|
1215
|
+
sources,
|
|
1216
|
+
findings: [],
|
|
1217
|
+
researchSummary: {
|
|
1218
|
+
totalSources: this.metrics.urlsProcessed,
|
|
1219
|
+
verifiedSources: this.metrics.sourcesVerified,
|
|
1220
|
+
sourcesReturned: sources.length,
|
|
1221
|
+
llmEnhanced: false
|
|
1222
|
+
},
|
|
1223
|
+
activityLog: this.researchState.activityLog,
|
|
1224
|
+
performance: {
|
|
1225
|
+
...this.metrics,
|
|
1226
|
+
timeLimit: this.timeLimit,
|
|
1227
|
+
completedWithinLimit: this.metrics.totalProcessingTime < this.timeLimit
|
|
1228
|
+
},
|
|
1229
|
+
metadata: {
|
|
1230
|
+
generatedAt: new Date().toISOString(),
|
|
1231
|
+
researchDepth: this.researchState.currentDepth,
|
|
1232
|
+
configuration: {
|
|
1233
|
+
maxDepth: this.maxDepth,
|
|
1234
|
+
maxUrls: this.maxUrls,
|
|
1235
|
+
timeLimit: this.timeLimit,
|
|
1236
|
+
llmEnabled: false
|
|
1237
|
+
}
|
|
1238
|
+
}
|
|
1239
|
+
};
|
|
1240
|
+
}
|
|
1241
|
+
|
|
1161
1242
|
const baseResults = {
|
|
1162
1243
|
sessionId: this.researchState.sessionId,
|
|
1163
1244
|
topic,
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import { isCreatorModeVerified } from './creatorMode.js';
|
|
2
|
+
|
|
3
|
+
export const ALLOWED_HOSTS = ['www.crawlforge.dev', 'crawlforge.dev', 'api.crawlforge.dev'];
|
|
4
|
+
|
|
5
|
+
const LOCALHOST_HOSTS = new Set(['localhost', '127.0.0.1', '::1']);
|
|
6
|
+
|
|
7
|
+
export function resolveApiEndpoint(rawUrl) {
|
|
8
|
+
let parsed;
|
|
9
|
+
try {
|
|
10
|
+
parsed = new URL(rawUrl);
|
|
11
|
+
} catch {
|
|
12
|
+
throw new Error(`Invalid API endpoint URL: "${rawUrl}"`);
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
const hostname = parsed.hostname;
|
|
16
|
+
|
|
17
|
+
if (LOCALHOST_HOSTS.has(hostname)) {
|
|
18
|
+
if (!isCreatorModeVerified()) {
|
|
19
|
+
throw new Error(`Refusing to use API endpoint "${rawUrl}" — not in allow-list`);
|
|
20
|
+
}
|
|
21
|
+
// Strip trailing slash from pathname
|
|
22
|
+
parsed.pathname = parsed.pathname.replace(/\/+$/, '');
|
|
23
|
+
return parsed.toString();
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
if (parsed.protocol !== 'https:') {
|
|
27
|
+
throw new Error(`Refusing to use API endpoint "${rawUrl}" — not in allow-list`);
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
if (!ALLOWED_HOSTS.includes(hostname)) {
|
|
31
|
+
throw new Error(`Refusing to use API endpoint "${rawUrl}" — not in allow-list`);
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
// Strip trailing slash from pathname
|
|
35
|
+
parsed.pathname = parsed.pathname.replace(/\/+$/, '');
|
|
36
|
+
return parsed.toString();
|
|
37
|
+
}
|
|
@@ -208,11 +208,20 @@ export class DeepResearchTool {
|
|
|
208
208
|
baseConfig.llmConfig = params.llmConfig;
|
|
209
209
|
}
|
|
210
210
|
|
|
211
|
-
//
|
|
211
|
+
// Every approach must propagate the user's scope params (maxUrls,
|
|
212
|
+
// timeLimit, concurrency) — only `broad` did before, so non-broad
|
|
213
|
+
// approaches silently fell back to orchestrator defaults.
|
|
214
|
+
const scopeConfig = {
|
|
215
|
+
maxUrls: params.maxUrls,
|
|
216
|
+
timeLimit: params.timeLimit,
|
|
217
|
+
concurrency: params.concurrency
|
|
218
|
+
};
|
|
219
|
+
|
|
212
220
|
switch (params.researchApproach) {
|
|
213
221
|
case 'academic':
|
|
214
222
|
return {
|
|
215
223
|
...baseConfig,
|
|
224
|
+
...scopeConfig,
|
|
216
225
|
maxDepth: Math.min(params.maxDepth, 8),
|
|
217
226
|
enableSourceVerification: true,
|
|
218
227
|
searchConfig: {
|
|
@@ -225,10 +234,11 @@ export class DeepResearchTool {
|
|
|
225
234
|
}
|
|
226
235
|
}
|
|
227
236
|
};
|
|
228
|
-
|
|
237
|
+
|
|
229
238
|
case 'current_events':
|
|
230
239
|
return {
|
|
231
240
|
...baseConfig,
|
|
241
|
+
...scopeConfig,
|
|
232
242
|
maxDepth: Math.min(params.maxDepth, 6),
|
|
233
243
|
searchConfig: {
|
|
234
244
|
enableRanking: true,
|
|
@@ -240,18 +250,20 @@ export class DeepResearchTool {
|
|
|
240
250
|
}
|
|
241
251
|
}
|
|
242
252
|
};
|
|
243
|
-
|
|
253
|
+
|
|
244
254
|
case 'focused':
|
|
245
255
|
return {
|
|
246
256
|
...baseConfig,
|
|
257
|
+
...scopeConfig,
|
|
247
258
|
maxDepth: Math.min(params.maxDepth, 4),
|
|
248
259
|
maxUrls: Math.min(params.maxUrls, 30),
|
|
249
260
|
concurrency: Math.min(params.concurrency, 3)
|
|
250
261
|
};
|
|
251
|
-
|
|
262
|
+
|
|
252
263
|
case 'comparative':
|
|
253
264
|
return {
|
|
254
265
|
...baseConfig,
|
|
266
|
+
...scopeConfig,
|
|
255
267
|
enableConflictDetection: true,
|
|
256
268
|
maxDepth: params.maxDepth,
|
|
257
269
|
searchConfig: {
|
|
@@ -263,14 +275,13 @@ export class DeepResearchTool {
|
|
|
263
275
|
}
|
|
264
276
|
}
|
|
265
277
|
};
|
|
266
|
-
|
|
278
|
+
|
|
267
279
|
case 'broad':
|
|
268
280
|
default:
|
|
269
281
|
return {
|
|
270
282
|
...baseConfig,
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
timeLimit: params.timeLimit
|
|
283
|
+
...scopeConfig,
|
|
284
|
+
maxDepth: params.maxDepth
|
|
274
285
|
};
|
|
275
286
|
}
|
|
276
287
|
}
|
|
@@ -334,6 +345,20 @@ export class DeepResearchTool {
|
|
|
334
345
|
* Format research results according to output preferences
|
|
335
346
|
*/
|
|
336
347
|
formatResults(results, params) {
|
|
348
|
+
// Raw evidence mode (no LLM configured): pass through the clean shape
|
|
349
|
+
// designed for the calling LLM to synthesize.
|
|
350
|
+
if (results.synthesisMode === 'raw_evidence') {
|
|
351
|
+
return {
|
|
352
|
+
synthesisMode: 'raw_evidence',
|
|
353
|
+
note: results.note,
|
|
354
|
+
sources: results.sources,
|
|
355
|
+
researchSummary: results.researchSummary,
|
|
356
|
+
metadata: results.metadata,
|
|
357
|
+
performance: results.performance,
|
|
358
|
+
activityLog: params.includeActivityLog ? results.activityLog : undefined
|
|
359
|
+
};
|
|
360
|
+
}
|
|
361
|
+
|
|
337
362
|
const formatted = {
|
|
338
363
|
researchSummary: results.researchSummary,
|
|
339
364
|
metadata: results.metadata
|