crawlforge-mcp-server 4.6.4 → 4.6.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -229,6 +229,11 @@ export OLLAMA_DEFAULT_MODEL="llama3.2" # default; any locally-pulled
229
229
  # Optional: Cloud LLM keys — only needed when you pass provider: "openai" or "anthropic"
230
230
  export OPENAI_API_KEY="sk-..."
231
231
  export ANTHROPIC_API_KEY="sk-ant-..."
232
+
233
+ # Optional: deep_research stealth extraction fallback (v4.6.6) — see below
234
+ export RESEARCH_STEALTH_ENGINE="auto" # auto (default) | camoufox | chromium
235
+ export RESEARCH_STEALTH_FALLBACK="true" # set to "false" to disable entirely
236
+ export RESEARCH_MAX_STEALTH_RETRIES="8" # cap on stealth retries per research run
232
237
  ```
233
238
 
234
239
  ### Local-LLM quickstart (`extract_with_llm` with Ollama)
@@ -247,6 +252,31 @@ ollama pull llama3.2
247
252
  # extract_with_llm({ url: "https://example.com", prompt: "…", model: "llama3.2" })
248
253
  ```
249
254
 
255
+ ### Stealth extraction for `deep_research` (Camoufox)
256
+
257
+ `deep_research` automatically retries sources that block the normal fetch path (Reddit, Quora, forums, and Cloudflare/DataDome-protected pages return HTTP 403) through a **real fingerprinted browser**, then re-extracts from the rendered HTML. It's bounded (`RESEARCH_MAX_STEALTH_RETRIES`, default 8, plus a per-page timeout) and lazy — the browser stack only loads when a source is actually blocked.
258
+
259
+ Engine selection (`RESEARCH_STEALTH_ENGINE`):
260
+
261
+ - **`auto`** (default) — prefer **Camoufox** (Firefox anti-detect), fall back to Chromium stealth, then plain fetch.
262
+ - **`camoufox`** — force Camoufox.
263
+ - **`chromium`** — force the Chromium stealth engine.
264
+
265
+ Headless Chromium **cannot** clear modern challenges (Cloudflare Turnstile, DataDome) — **Camoufox can**. In testing it recovered Quora and Trustpilot pages that were otherwise fully blocked. To enable it, install the optional dependency and run its one-time binary fetch:
266
+
267
+ ```bash
268
+ # Camoufox is declared as an optional dependency, so a normal install already pulls it.
269
+ # If you installed with --no-optional, add it explicitly:
270
+ npm install camoufox
271
+
272
+ # One-time download of the Camoufox Firefox binary (~130 MB):
273
+ npx camoufox fetch
274
+ ```
275
+
276
+ Without the Camoufox binary, `deep_research` silently falls back to Chromium stealth and then to plain fetch — no errors, just lower recovery on heavily-protected sites. Disable the whole fallback with `RESEARCH_STEALTH_FALLBACK=false`.
277
+
278
+ > **Note:** Hard IP-reputation blocks (e.g. Reddit's edge `403`) resist headless stealth from any IP and require residential/mobile proxies, which CrawlForge does not provide. See [docs/stealth-engines.md](docs/stealth-engines.md) for details.
279
+
250
280
  ### Manual Configuration
251
281
 
252
282
  Your configuration is stored at `~/.crawlforge/config.json`:
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "crawlforge-mcp-server",
3
- "version": "4.6.4",
3
+ "version": "4.6.6",
4
4
  "mcpName": "io.github.mysleekdesigns/crawlforge-mcp-server",
5
5
  "description": "CrawlForge MCP Server - Professional Model Context Protocol server with 26 web scraping, crawling, deep-research, and autonomous-extraction tools. Returns clean Markdown and structured JSON for Claude, Cursor, and any MCP client. Defaults to local Ollama for LLM extraction (no API key needed); OpenAI/Anthropic available as opt-in. Includes a unified multi-format scrape tool, an autonomous agent, pre-built site templates, and Camoufox stealth browsing.",
6
6
  "main": "server.js",
@@ -18,7 +18,7 @@
18
18
  "test": "node tests/integration/mcp-protocol-compliance.test.js",
19
19
  "test:unit": "CRAWLFORGE_CREATOR_SECRET= node --test 'tests/unit/*.test.js'",
20
20
  "test:integration": "CRAWLFORGE_CREATOR_SECRET= node --test 'tests/integration/tools/*.test.js'",
21
- "test:coverage": "CRAWLFORGE_CREATOR_SECRET= c8 --reporter=text --reporter=lcov --include='src/**/*.js' --exclude='src/**/_*.js' --lines=60 --statements=60 --functions=55 --branches=45 node --test 'tests/unit/*.test.js' 'tests/integration/tools/*.test.js'",
21
+ "test:coverage": "CRAWLFORGE_CREATOR_SECRET= c8 --reporter=text --reporter=lcov --include='src/**/*.js' --exclude='src/**/_*.js' --lines=60 --statements=60 --functions=55 --branches=45 node --test --test-force-exit 'tests/unit/*.test.js' 'tests/integration/tools/*.test.js'",
22
22
  "test:tools": "node test-tools.js",
23
23
  "test:real-world": "node test-real-world.js",
24
24
  "test:all": "bash run-all-tests.sh",
@@ -131,6 +131,9 @@
131
131
  "winston": "^3.11.0",
132
132
  "zod": "^3.23.8"
133
133
  },
134
+ "optionalDependencies": {
135
+ "camoufox": "^0.1.19"
136
+ },
134
137
  "devDependencies": {
135
138
  "@jest/globals": "^30.3.0",
136
139
  "c8": "^11.0.0",
package/server.js CHANGED
@@ -90,7 +90,7 @@ if (configErrors.length > 0 && config.server.nodeEnv === 'production') {
90
90
  // Create the server
91
91
  const server = new McpServer({
92
92
  name: "crawlforge",
93
- version: "4.6.4",
93
+ version: "4.6.6",
94
94
  description: "Production-ready MCP server with 26 web scraping, crawling, and content processing tools. Features MCP Resources (crawlforge://), Prompts, Sampling fallback, Elicitation, stealth browsing, deep research, structured extraction, change tracking, local-LLM extraction via Ollama, unified multi-format scrape, and autonomous agent tool.",
95
95
  homepage: "https://www.crawlforge.dev",
96
96
  icon: "https://www.crawlforge.dev/icon.png"
@@ -34,12 +34,27 @@ export class ResearchOrchestrator extends EventEmitter {
34
34
  enableConflictDetection = true,
35
35
  cacheEnabled = true,
36
36
  cacheTTL = 1800000, // 30 minutes
37
+ researchApproach = 'broad',
38
+ // Stealth-browser fallback for sources that block the plain fetch/extract
39
+ // path (Reddit, Quora, forums → HTTP 403). On by default; bounded so it
40
+ // cannot blow the research time budget. Disable with
41
+ // RESEARCH_STEALTH_FALLBACK=false.
42
+ enableStealthFallback = process.env.RESEARCH_STEALTH_FALLBACK !== 'false',
43
+ maxStealthRetries = parseInt(process.env.RESEARCH_MAX_STEALTH_RETRIES || '8', 10),
44
+ // 'auto' (default) prefers Camoufox (Firefox anti-detect — beats
45
+ // Cloudflare/DataDome that headless Chromium can't) and falls back to
46
+ // Chromium stealth when Camoufox/its binary is unavailable. Force one
47
+ // with RESEARCH_STEALTH_ENGINE=camoufox|chromium.
48
+ stealthEngine = process.env.RESEARCH_STEALTH_ENGINE || 'auto',
49
+ stealthLevel = 'medium',
50
+ stealthTimeoutMs = 20000,
37
51
  searchConfig = {},
38
52
  crawlConfig = {},
39
53
  extractConfig = {},
40
54
  summarizeConfig = {}
41
55
  } = options;
42
56
 
57
+ this.researchApproach = researchApproach;
43
58
  this.maxDepth = Math.min(Math.max(1, maxDepth), 10);
44
59
  this.maxUrls = Math.min(Math.max(1, maxUrls), 1000);
45
60
  this.timeLimit = Math.min(Math.max(30000, timeLimit), 300000);
@@ -47,6 +62,18 @@ export class ResearchOrchestrator extends EventEmitter {
47
62
  this.enableSourceVerification = enableSourceVerification;
48
63
  this.enableConflictDetection = enableConflictDetection;
49
64
 
65
+ // Stealth fallback config + lazy state (browser launched only on first block)
66
+ this.enableStealthFallback = enableStealthFallback;
67
+ this.maxStealthRetries = Math.max(0, maxStealthRetries);
68
+ this.stealthEngine = stealthEngine;
69
+ this.stealthLevel = stealthLevel;
70
+ this.stealthTimeoutMs = stealthTimeoutMs;
71
+ this._stealthManager = null; // Chromium StealthBrowserManager (fallback engine)
72
+ this._stealthBrowser = null; // Camoufox browser handle (preferred engine)
73
+ this._stealthEngineActive = null;
74
+ this._stealthInit = null;
75
+ this._stealthCount = 0;
76
+
50
77
  // Initialize tools
51
78
  this.searchTool = new SearchWebTool(searchConfig);
52
79
  this.crawlTool = new CrawlDeepTool(crawlConfig);
@@ -99,7 +126,9 @@ export class ResearchOrchestrator extends EventEmitter {
99
126
  llmAnalysisCalls: 0,
100
127
  semanticAnalysisTime: 0,
101
128
  queryExpansionTime: 0,
102
- synthesisTime: 0
129
+ synthesisTime: 0,
130
+ stealthRetries: 0,
131
+ stealthRecovered: 0
103
132
  };
104
133
  }
105
134
 
@@ -201,6 +230,9 @@ export class ResearchOrchestrator extends EventEmitter {
201
230
  Object.keys(this.metrics).forEach(key => {
202
231
  this.metrics[key] = 0;
203
232
  });
233
+
234
+ // Reset per-run stealth-retry budget
235
+ this._stealthCount = 0;
204
236
  }
205
237
 
206
238
  /**
@@ -269,32 +301,50 @@ export class ResearchOrchestrator extends EventEmitter {
269
301
  }
270
302
 
271
303
  /**
272
- * Generate research-specific query variations
304
+ * Generate research-specific query variations, tuned to the research approach.
305
+ *
306
+ * Academic/scientific suffixes ("peer reviewed", "research paper", "what is")
307
+ * only help when the caller actually asked for an academic search. Appending
308
+ * them to commercial or comparative topics dragged web search toward
309
+ * irrelevant government/academic PDFs and long-tail noise — the cause of
310
+ * near-empty research runs on niche commercial topics.
273
311
  */
274
312
  generateResearchVariations(topic) {
275
- const variations = [];
276
-
277
- // Question-based variations
278
- variations.push(`what is ${topic}`);
279
- variations.push(`how does ${topic} work`);
280
- variations.push(`${topic} explained`);
281
- variations.push(`${topic} research`);
282
- variations.push(`${topic} studies`);
283
- variations.push(`${topic} analysis`);
284
-
285
- // Academic and authoritative variations
286
- variations.push(`${topic} academic`);
287
- variations.push(`${topic} scientific`);
288
- variations.push(`${topic} research paper`);
289
- variations.push(`${topic} peer reviewed`);
290
-
291
- // Current and historical context
292
- variations.push(`latest ${topic}`);
293
- variations.push(`current ${topic}`);
294
- variations.push(`${topic} 2024`);
295
- variations.push(`${topic} trends`);
296
-
297
- return variations.slice(0, 10); // Limit variations
313
+ const approach = this.researchApproach || 'broad';
314
+
315
+ if (approach === 'academic') {
316
+ return [
317
+ `${topic} research`,
318
+ `${topic} study`,
319
+ `${topic} analysis`,
320
+ `${topic} academic`,
321
+ `${topic} scientific`,
322
+ `${topic} research paper`,
323
+ `${topic} peer reviewed`,
324
+ `${topic} explained`
325
+ ];
326
+ }
327
+
328
+ if (approach === 'current_events') {
329
+ return [
330
+ `latest ${topic}`,
331
+ `${topic} news`,
332
+ `recent ${topic}`,
333
+ `${topic} update`,
334
+ `${topic} announcement`
335
+ ];
336
+ }
337
+
338
+ // broad / focused / comparative — commercial & general intent
339
+ return [
340
+ `${topic} review`,
341
+ `${topic} reviews`,
342
+ `${topic} comparison`,
343
+ `${topic} vs alternatives`,
344
+ `${topic} pricing`,
345
+ `best ${topic}`,
346
+ `${topic} company`
347
+ ];
298
348
  }
299
349
 
300
350
  /**
@@ -531,11 +581,38 @@ export class ResearchOrchestrator extends EventEmitter {
531
581
  }
532
582
 
533
583
  // Normalize content to string (extract_content returns {text: "..."}, fallback returns string)
534
- const contentText = contentData && contentData.content
535
- ? (typeof contentData.content === 'string'
536
- ? contentData.content
537
- : (contentData.content.text || ''))
584
+ const normalizeContent = (cd) => cd && cd.content
585
+ ? (typeof cd.content === 'string' ? cd.content : (cd.content.text || ''))
538
586
  : '';
587
+ let contentText = normalizeContent(contentData);
588
+
589
+ // Stealth fallback: high-value discussion sources (Reddit, Quora,
590
+ // forums) return HTTP 403 to the plain fetch/extract path. When the
591
+ // normal path produced no usable content, retry through a real
592
+ // fingerprinted browser and re-run extraction on the rendered HTML.
593
+ // Bounded by maxStealthRetries + a per-page timeout.
594
+ const blocked = !contentData || contentData.success === false || contentText.trim().length === 0;
595
+ if (blocked && this.enableStealthFallback && this._stealthCount < this.maxStealthRetries) {
596
+ this._stealthCount++;
597
+ this.metrics.stealthRetries++;
598
+ try {
599
+ const stealthHtml = await this._stealthFetchHtml(source.link);
600
+ if (stealthHtml) {
601
+ contentData = await this.extractTool.execute({
602
+ url: source.link,
603
+ html: stealthHtml,
604
+ options: { includeMetadata: true, includeStructuredData: true }
605
+ });
606
+ contentText = normalizeContent(contentData);
607
+ if (contentData && contentData.success !== false && contentText.trim().length > 0) {
608
+ this.metrics.stealthRecovered++;
609
+ this.logActivity('stealth_recovery', { url: source.link });
610
+ }
611
+ }
612
+ } catch (stealthError) {
613
+ this.logger.warn('Stealth fallback failed', { url: source.link, error: stealthError.message });
614
+ }
615
+ }
539
616
 
540
617
  // Only count and enhance sources that actually produced non-empty content.
541
618
  // Skip failed extractions and empty {text:""} results.
@@ -621,10 +698,134 @@ export class ResearchOrchestrator extends EventEmitter {
621
698
  }
622
699
  });
623
700
 
701
+ // Tear down the stealth browser as soon as the extraction stage is done —
702
+ // it is only needed here and would otherwise leak a Playwright handle.
703
+ await this._closeStealth();
704
+
624
705
  // Sort by relevance score (LLM or traditional)
625
706
  return detailedFindings.sort((a, b) => (b.relevanceScore || 0) - (a.relevanceScore || 0));
626
707
  }
627
708
 
709
+ /**
710
+ * Lazily launch the stealth browser once. The heavy browser stack is only
711
+ * loaded when a source actually blocks the plain path. Engine selection:
712
+ * - 'camoufox'/'auto' → Camoufox (Firefox anti-detect). Loaded via the CJS
713
+ * build (its ESM bundle has a broken dynamic-require). Beats Cloudflare/
714
+ * DataDome challenges that patched headless Chromium can't pass.
715
+ * - 'chromium', or any Camoufox failure under 'auto' → StealthBrowserManager.
716
+ */
717
+ async _getStealthBrowser() {
718
+ if (!this._stealthInit) {
719
+ this._stealthInit = (async () => {
720
+ if (this.stealthEngine === 'camoufox' || this.stealthEngine === 'auto') {
721
+ try {
722
+ const { createRequire } = await import('module');
723
+ const require = createRequire(import.meta.url);
724
+ const camoufox = require('camoufox'); // CJS build — ESM build is broken
725
+ await this._ensureCamoufoxLayout(camoufox);
726
+ this._stealthBrowser = await camoufox.Camoufox({ headless: true });
727
+ this._stealthEngineActive = 'camoufox';
728
+ this.logger.info('Stealth fallback using Camoufox (Firefox) engine');
729
+ return;
730
+ } catch (e) {
731
+ if (this.stealthEngine === 'camoufox') throw e; // explicit request → surface
732
+ this.logger.warn('Camoufox unavailable, falling back to Chromium stealth', { error: e.message });
733
+ }
734
+ }
735
+ const { StealthBrowserManager } = await import('./StealthBrowserManager.js');
736
+ this._stealthManager = new StealthBrowserManager();
737
+ await this._stealthManager.launchStealthBrowser({ level: this.stealthLevel });
738
+ this._stealthEngineActive = 'chromium';
739
+ })();
740
+ }
741
+ await this._stealthInit;
742
+ }
743
+
744
+ /**
745
+ * macOS packaging fix for camoufox-js: it expects properties.json in
746
+ * Camoufox.app/Contents/MacOS/, but the .app bundle ships it under
747
+ * Contents/Resources/. Bridge it so the launcher can boot. Best-effort.
748
+ */
749
+ async _ensureCamoufoxLayout(camoufox) {
750
+ if (process.platform !== 'darwin' || !camoufox?.INSTALL_DIR) return;
751
+ try {
752
+ const fs = await import('fs');
753
+ const path = await import('path');
754
+ const appDir = path.join(camoufox.INSTALL_DIR, 'Camoufox.app', 'Contents');
755
+ const target = path.join(appDir, 'MacOS', 'properties.json');
756
+ const source = path.join(appDir, 'Resources', 'properties.json');
757
+ if (!fs.existsSync(target) && fs.existsSync(source)) {
758
+ fs.copyFileSync(source, target);
759
+ }
760
+ } catch { /* best-effort; launch surfaces a real error if it matters */ }
761
+ }
762
+
763
+ /**
764
+ * Fetch a URL's fully-rendered HTML through the stealth browser. Returns the
765
+ * HTML string, or null if every attempt was blocked / empty.
766
+ *
767
+ * Cloudflare/DataDome challenges are probabilistic — the same URL may serve a
768
+ * challenge on one load and the real page on the next — so Camoufox retries a
769
+ * few times with a fresh page each attempt. Chromium can't clear these at all
770
+ * (proven), so it gets a single attempt to avoid burning the time budget.
771
+ */
772
+ async _stealthFetchHtml(url) {
773
+ await this._getStealthBrowser();
774
+ const attempts = this._stealthEngineActive === 'camoufox' ? 3 : 1;
775
+ for (let i = 0; i < attempts; i++) {
776
+ const html = await this._stealthFetchOnce(url);
777
+ if (html) return html;
778
+ }
779
+ return null;
780
+ }
781
+
782
+ /** One stealth navigation. Fresh page/context; judges blocked by rendered content. */
783
+ async _stealthFetchOnce(url) {
784
+ let page;
785
+ if (this._stealthEngineActive === 'camoufox') {
786
+ page = await this._stealthBrowser.newPage();
787
+ } else {
788
+ const { contextId } = await this._stealthManager.createStealthContext({ level: this.stealthLevel });
789
+ page = await this._stealthManager.createStealthPage(contextId);
790
+ }
791
+ try {
792
+ const resp = await page.goto(url, { waitUntil: 'domcontentloaded', timeout: this.stealthTimeoutMs });
793
+ // Do NOT bail on the initial HTTP status: anti-bot challenges (Cloudflare
794
+ // Turnstile) return 403 on the first response and only resolve to the
795
+ // real page after their JS runs. Let it settle, then judge by the
796
+ // *rendered* content instead.
797
+ await page.waitForLoadState('networkidle', { timeout: 8000 }).catch(() => {});
798
+ await page.waitForTimeout(2500).catch(() => {});
799
+ const html = await page.content();
800
+ const title = (await page.title().catch(() => '')) || '';
801
+ const bodyLen = await page.evaluate(() => document.body?.innerText?.trim().length || 0).catch(() => 0);
802
+
803
+ // Still a challenge/block page → treat as blocked.
804
+ const challengeTitle = /just a moment|checking your browser|attention required|verify you are human|access denied|^blocked$/i.test(title);
805
+ const status = resp ? resp.status() : 0;
806
+ if (challengeTitle) return null;
807
+ if (status >= 400 && bodyLen < 500) return null; // hard block (e.g. Reddit 403 shell)
808
+ if (bodyLen < 200) return null; // empty / interstitial
809
+ return html && html.length > 200 ? html : null;
810
+ } finally {
811
+ await page.close().catch(() => {});
812
+ }
813
+ }
814
+
815
+ /** Close the stealth browser and reset its lazy state (idempotent). */
816
+ async _closeStealth() {
817
+ try {
818
+ if (this._stealthBrowser) await this._stealthBrowser.close().catch(() => {});
819
+ if (this._stealthManager) await this._stealthManager.cleanup().catch(() => {});
820
+ } catch (e) {
821
+ this.logger.warn('Stealth browser cleanup failed', { error: e.message });
822
+ }
823
+ this._stealthBrowser = null;
824
+ this._stealthManager = null;
825
+ this._stealthEngineActive = null;
826
+ this._stealthInit = null;
827
+ }
828
+
628
829
  /**
629
830
  * Verify source credibility using multiple factors
630
831
  */
@@ -644,8 +845,19 @@ export class ResearchOrchestrator extends EventEmitter {
644
845
  citationPotential: this.assessCitationPotential(source)
645
846
  };
646
847
 
647
- const overallCredibility = this.calculateOverallCredibility(credibilityFactors);
648
-
848
+ let overallCredibility = this.calculateOverallCredibility(credibilityFactors);
849
+
850
+ // Down-weight topically-irrelevant sources so high-authority but
851
+ // off-topic pages (e.g. a .gov PDF unrelated to the query) don't
852
+ // dominate the results. relevanceScore is keyword-based here (no LLM):
853
+ // ~1 when the topic appears in the content, ~0 when it doesn't.
854
+ const relevance = typeof source.relevanceScore === 'number'
855
+ ? source.relevanceScore
856
+ : null;
857
+ if (relevance !== null) {
858
+ overallCredibility *= (0.4 + 0.6 * relevance);
859
+ }
860
+
649
861
  // Only include sources that meet minimum credibility threshold
650
862
  if (overallCredibility >= 0.3) {
651
863
  verifiedSources.push({
@@ -1453,7 +1665,10 @@ export class ResearchOrchestrator extends EventEmitter {
1453
1665
  try {
1454
1666
  // Stop any active research
1455
1667
  this.stopResearch();
1456
-
1668
+
1669
+ // Tear down the stealth browser if one was launched
1670
+ await this._closeStealth();
1671
+
1457
1672
  // Clear cache if available
1458
1673
  if (this.cache && typeof this.cache.clear === "function") {
1459
1674
  await this.cache.clear();
@@ -1491,9 +1706,11 @@ export class ResearchOrchestrator extends EventEmitter {
1491
1706
  llmAnalysisCalls: 0,
1492
1707
  semanticAnalysisTime: 0,
1493
1708
  queryExpansionTime: 0,
1494
- synthesisTime: 0
1709
+ synthesisTime: 0,
1710
+ stealthRetries: 0,
1711
+ stealthRecovered: 0
1495
1712
  };
1496
-
1713
+
1497
1714
  } catch (error) {
1498
1715
  // Silent cleanup - do not throw errors during cleanup
1499
1716
  console.warn("Warning during ResearchOrchestrator cleanup:", error.message);
@@ -11,6 +11,11 @@ import { htmlToMarkdown } from '../../utils/htmlToMarkdown.js'; // D3.1
11
11
 
12
12
  const ExtractContentSchema = z.object({
13
13
  url: z.string().url(),
14
+ // Pre-rendered HTML to process directly instead of fetching `url` (e.g. a
15
+ // post-action page from scrape_with_actions, or a stealth-browser render in
16
+ // deep_research). Without this field Zod stripped it and the tool always
17
+ // re-fetched the URL — silently defeating any pre-fetched-HTML caller.
18
+ html: z.string().optional(),
14
19
  options: z.object({
15
20
  // Content extraction options
16
21
  useReadability: z.boolean().default(true),
@@ -271,7 +271,11 @@ export class DeepResearchTool {
271
271
  const scopeConfig = {
272
272
  maxUrls: params.maxUrls,
273
273
  timeLimit: params.timeLimit,
274
- concurrency: params.concurrency
274
+ concurrency: params.concurrency,
275
+ // The orchestrator tunes its query expansion to the approach (commercial
276
+ // vs academic vs current-events); without this it always used academic
277
+ // variations, which poisoned commercial/comparative searches.
278
+ researchApproach: params.researchApproach
275
279
  };
276
280
 
277
281
  switch (params.researchApproach) {