mcp-researchpowerpack 6.0.15 → 6.0.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -1
- package/dist/mcp-use.json +2 -2
- package/dist/src/config/index.js +10 -1
- package/dist/src/config/index.js.map +2 -2
- package/dist/src/schemas/start-research.js +1 -1
- package/dist/src/schemas/start-research.js.map +1 -1
- package/dist/src/schemas/web-search.js +22 -3
- package/dist/src/schemas/web-search.js.map +2 -2
- package/dist/src/services/llm-processor.js +6 -1
- package/dist/src/services/llm-processor.js.map +2 -2
- package/dist/src/tools/scrape.js +7 -16
- package/dist/src/tools/scrape.js.map +2 -2
- package/dist/src/tools/search.js +6 -14
- package/dist/src/tools/search.js.map +2 -2
- package/dist/src/tools/start-research.js +4 -1
- package/dist/src/tools/start-research.js.map +2 -2
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -11,7 +11,7 @@ Built on [mcp-use](https://github.com/nicepkg/mcp-use). No stdio, HTTP only.
|
|
|
11
11
|
| tool | what it does | needs |
|
|
12
12
|
|------|-------------|-------|
|
|
13
13
|
| `start-research` | returns a goal-tailored brief: `primary_branch` (reddit / web / both), exact `first_call_sequence`, 25–50 keyword seeds, iteration hints, gaps to watch, stop criteria. Call FIRST every session. | `LLM_API_KEY` + `LLM_BASE_URL` + `LLM_MODEL` for non-degraded brief generation (optional) |
|
|
14
|
-
| `web-search` | parallel Google search, up to 50 queries per call, parallel-callable across turns. `scope: "web" \| "reddit" \| "both"` — reddit mode filters to post permalinks. Returns tiered markdown (HIGHLY_RELEVANT / MAYBE_RELEVANT / OTHER) + grounded synthesis + gaps + refine suggestions. | `SERPER_API_KEY` |
|
|
14
|
+
| `web-search` | parallel Google search, up to 50 queries per call, parallel-callable across turns. `scope: "web" \| "reddit" \| "both"` — reddit mode filters to post permalinks. Queries should be retrieval probes, not topic labels: rewrite vague phrases into source-aware searches with anchors such as `site:`, exact quoted terms, versions, error text, package names, or community filters. Returns tiered markdown (HIGHLY_RELEVANT / MAYBE_RELEVANT / OTHER) + grounded synthesis + gaps + refine suggestions. | `SERPER_API_KEY` |
|
|
15
15
|
| `scrape-links` | fetch URLs in parallel with per-URL LLM extraction. Auto-detects `reddit.com/r/.../comments/` permalinks and routes them through the Reddit API (threaded post + comments); PDF / DOCX / PPTX / XLSX URLs route through Jina Reader; non-reddit, non-document web URLs flow through Scrape.do. Parallel-callable. | `SCRAPEDO_API_KEY` for web URLs (+ `REDDIT_CLIENT_ID` / `REDDIT_CLIENT_SECRET` for reddit URLs; optional `JINA_API_KEY` for higher document limits) |
|
|
16
16
|
|
|
17
17
|
Also exposes `/health` and `health://status`.
|
|
@@ -20,6 +20,8 @@ Also exposes `/health` and `health://status`.
|
|
|
20
20
|
|
|
21
21
|
Call `start-research` once at the beginning of each session with your goal. The server returns a brief that tells the agent exactly which tool to call first (reddit-first for sentiment/migration, web-first for spec/bug/pricing, both when opinion-heavy AND needs official sources), what keyword seeds to fire, and when to stop.
|
|
22
22
|
|
|
23
|
+
For search fan-out, use bad → better rewrite thinking before calling `web-search`: turn broad phrases like `<feature> support`, `<product> pricing`, `<library> bug fix`, or `<tool> reviews` into source-aware probes such as `site:<official-docs-domain> "<feature>" "<platform-or-version>"`, `site:<vendor-domain> "<product>" pricing "enterprise" OR "free tier"`, `"<exact error text>" "<library-or-package>" "<version>" site:github.com`, or `site:reddit.com/r/<community>/comments "<tool>" "migration" OR "regression"`.
|
|
24
|
+
|
|
23
25
|
Pair the server with the [`run-research`](https://github.com/yigitkonur/skills-by-yigitkonur/tree/main/skills/run-research) skill for the full agentic playbook:
|
|
24
26
|
|
|
25
27
|
```bash
|
package/dist/mcp-use.json
CHANGED
package/dist/src/config/index.js
CHANGED
|
@@ -87,7 +87,9 @@ const CTR_WEIGHTS = {
|
|
|
87
87
|
9: 13.33,
|
|
88
88
|
10: 12.56
|
|
89
89
|
};
|
|
90
|
+
let cachedLlmConfigStatus = null;
|
|
90
91
|
function getLLMConfigStatus() {
|
|
92
|
+
if (cachedLlmConfigStatus) return cachedLlmConfigStatus;
|
|
91
93
|
const apiKeyPresent = !!process.env.LLM_API_KEY?.trim();
|
|
92
94
|
const baseUrlPresent = !!process.env.LLM_BASE_URL?.trim();
|
|
93
95
|
const modelPresent = !!process.env.LLM_MODEL?.trim();
|
|
@@ -96,7 +98,7 @@ function getLLMConfigStatus() {
|
|
|
96
98
|
if (!baseUrlPresent) missingVars.push("LLM_BASE_URL");
|
|
97
99
|
if (!modelPresent) missingVars.push("LLM_MODEL");
|
|
98
100
|
const configured = missingVars.length === 0;
|
|
99
|
-
|
|
101
|
+
cachedLlmConfigStatus = {
|
|
100
102
|
configured,
|
|
101
103
|
apiKeyPresent,
|
|
102
104
|
baseUrlPresent,
|
|
@@ -104,6 +106,12 @@ function getLLMConfigStatus() {
|
|
|
104
106
|
missingVars,
|
|
105
107
|
error: configured ? null : `LLM disabled: missing ${missingVars.join(", ")}`
|
|
106
108
|
};
|
|
109
|
+
return cachedLlmConfigStatus;
|
|
110
|
+
}
|
|
111
|
+
function _resetLLMConfigStatusForTests() {
|
|
112
|
+
cachedLlmConfigStatus = null;
|
|
113
|
+
cachedLlmExtraction = null;
|
|
114
|
+
cachedEnv = null;
|
|
107
115
|
}
|
|
108
116
|
let cachedLlmExtraction = null;
|
|
109
117
|
function getLlmExtraction() {
|
|
@@ -142,6 +150,7 @@ export {
|
|
|
142
150
|
REDDIT,
|
|
143
151
|
SCRAPER,
|
|
144
152
|
SERVER,
|
|
153
|
+
_resetLLMConfigStatusForTests,
|
|
145
154
|
getCapabilities,
|
|
146
155
|
getLLMConfigStatus,
|
|
147
156
|
getMissingEnvMessage,
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"version": 3,
|
|
3
3
|
"sources": ["../../../src/config/index.ts"],
|
|
4
|
-
"sourcesContent": ["/**\n * Consolidated configuration\n * All environment variables, constants, and LLM config in one place\n */\n\nimport { Logger } from 'mcp-use';\n\nimport { VERSION, PACKAGE_NAME, PACKAGE_DESCRIPTION } from '../version.js';\n\n// ============================================================================\n// Safe Integer Parsing Helper\n// ============================================================================\n\n/**\n * Safely parse an integer from environment variable with bounds checking\n */\nfunction safeParseInt(\n value: string | undefined,\n defaultVal: number,\n min: number,\n max: number\n): number {\n const logger = Logger.get('config');\n\n if (!value) {\n return defaultVal;\n }\n\n const parsed = parseInt(value, 10);\n\n if (isNaN(parsed)) {\n logger.warn(`Invalid number \"${value}\", using default ${defaultVal}`);\n return defaultVal;\n }\n\n if (parsed < min) {\n logger.warn(`Value ${parsed} below minimum ${min}, clamping to ${min}`);\n return min;\n }\n\n if (parsed > max) {\n logger.warn(`Value ${parsed} above maximum ${max}, clamping to ${max}`);\n return max;\n }\n\n return parsed;\n}\n\n\n// ============================================================================\n// Environment Parsing\n// ============================================================================\n\ninterface EnvConfig {\n SCRAPER_API_KEY: string;\n SEARCH_API_KEY: string | undefined;\n REDDIT_CLIENT_ID: string | undefined;\n REDDIT_CLIENT_SECRET: string | undefined;\n JINA_API_KEY: string | undefined;\n}\n\nlet cachedEnv: EnvConfig | null = null;\n\nexport function parseEnv(): EnvConfig {\n if (cachedEnv) return cachedEnv;\n cachedEnv = {\n SCRAPER_API_KEY: process.env.SCRAPEDO_API_KEY || '',\n SEARCH_API_KEY: process.env.SERPER_API_KEY || undefined,\n REDDIT_CLIENT_ID: process.env.REDDIT_CLIENT_ID || undefined,\n REDDIT_CLIENT_SECRET: process.env.REDDIT_CLIENT_SECRET || undefined,\n JINA_API_KEY: process.env.JINA_API_KEY || undefined,\n };\n return cachedEnv;\n}\n\n// ============================================================================\n// MCP Server Configuration\n// ============================================================================\n\nexport const SERVER = {\n NAME: PACKAGE_NAME,\n VERSION: VERSION,\n DESCRIPTION: PACKAGE_DESCRIPTION,\n} as const;\n\n// ============================================================================\n// Capability Detection (which features are available based on ENV)\n// ============================================================================\n\nexport interface Capabilities {\n reddit: boolean; // REDDIT_CLIENT_ID + REDDIT_CLIENT_SECRET\n search: boolean; // SERPER_API_KEY\n scraping: boolean; // SCRAPEDO_API_KEY\n llmExtraction: boolean; // LLM_API_KEY + LLM_BASE_URL + LLM_MODEL\n}\n\nexport function getCapabilities(): Capabilities {\n const env = parseEnv();\n return {\n reddit: !!(env.REDDIT_CLIENT_ID && env.REDDIT_CLIENT_SECRET),\n search: !!env.SEARCH_API_KEY,\n scraping: !!env.SCRAPER_API_KEY,\n llmExtraction: getLLMConfigStatus().configured,\n };\n}\n\nexport function getMissingEnvMessage(capability: keyof Capabilities): string {\n const messages: Record<keyof Capabilities, string> = {\n reddit: '\u274C **Reddit tools unavailable.** Set `REDDIT_CLIENT_ID` and `REDDIT_CLIENT_SECRET` to enable `get-reddit-post`.\\n\\n\uD83D\uDC49 Create a Reddit app at: https://www.reddit.com/prefs/apps (select \"script\" type)',\n search: '\u274C **Search unavailable.** Set `SERPER_API_KEY` to enable `web-search` (including `scope: \"reddit\"`).\\n\\n\uD83D\uDC49 Get your free API key at: https://serper.dev (2,500 free queries)',\n scraping: '\u274C **Web scraping unavailable.** Set `SCRAPEDO_API_KEY` to enable `scrape-links`.\\n\\n\uD83D\uDC49 Sign up at: https://scrape.do (1,000 free credits)',\n llmExtraction: '\u26A0\uFE0F **AI extraction disabled.** Set `LLM_API_KEY`, `LLM_BASE_URL`, and `LLM_MODEL` to enable AI-powered content extraction and search classification.\\n\\nScraping will work but without intelligent content filtering.',\n };\n return messages[capability];\n}\n\n// ============================================================================\n// Concurrency Limits\n// ============================================================================\n\nexport const CONCURRENCY = {\n SEARCH: safeParseInt(process.env.CONCURRENCY_SEARCH, 50, 1, 200),\n SCRAPER: safeParseInt(process.env.CONCURRENCY_SCRAPER, 50, 1, 200),\n REDDIT: safeParseInt(process.env.CONCURRENCY_REDDIT, 50, 1, 200),\n LLM_EXTRACTION: safeParseInt(process.env.LLM_CONCURRENCY, 50, 1, 200),\n} as const;\n\nexport const SCRAPER = {\n BATCH_SIZE: 30,\n EXTRACTION_PREFIX: 'Extract from document only \u2014 never hallucinate or add external knowledge.',\n EXTRACTION_SUFFIX: 'First line = content, not preamble. No confirmation messages.',\n} as const;\n\n// ============================================================================\n// Reddit Configuration\n// ============================================================================\n\nexport const REDDIT = {\n BATCH_SIZE: 10,\n MAX_WORDS_PER_POST: 50_000,\n MAX_WORDS_TOTAL: 500_000,\n MIN_POSTS: 1,\n MAX_POSTS: 50,\n RETRY_COUNT: 5,\n RETRY_DELAYS: [2000, 4000, 8000, 16000, 32000] as const,\n} as const;\n\n// ============================================================================\n// CTR Weights for URL Ranking (inspired from CTR research)\n// ============================================================================\n\nexport const CTR_WEIGHTS: Record<number, number> = {\n 1: 100.00,\n 2: 60.00,\n 3: 48.89,\n 4: 33.33,\n 5: 28.89,\n 6: 26.44,\n 7: 24.44,\n 8: 17.78,\n 9: 13.33,\n 10: 12.56,\n} as const;\n\n// ============================================================================\n// LLM Configuration\n//\n// Required vars (all must be set together when LLM is enabled):\n// LLM_API_KEY \u2014 API key for the OpenAI-compatible endpoint\n// LLM_BASE_URL \u2014 endpoint base URL (e.g. https://server.up.railway.app/v1)\n// LLM_MODEL \u2014 primary model (e.g. gpt-5.4-mini)\n//\n// Optional:\n// LLM_FALLBACK_MODEL \u2014 model to use after primary exhausts all retries (e.g. gpt-5.4)\n// LLM_CONCURRENCY \u2014 parallel LLM calls (default: 50)\n//\n// Reasoning effort is always 'low' \u2014 not configurable.\n// ============================================================================\n\ninterface LlmExtractionConfig {\n readonly MODEL: string;\n readonly FALLBACK_MODEL: string;\n readonly BASE_URL: string;\n readonly API_KEY: string;\n}\n\nexport type LLMRequiredEnvVar = 'LLM_API_KEY' | 'LLM_BASE_URL' | 'LLM_MODEL';\n\nexport interface LLMConfigStatus {\n readonly configured: boolean;\n readonly apiKeyPresent: boolean;\n readonly baseUrlPresent: boolean;\n readonly modelPresent: boolean;\n readonly missingVars: readonly LLMRequiredEnvVar[];\n readonly error: string | null;\n}\n\nexport function getLLMConfigStatus(): LLMConfigStatus {\n const apiKeyPresent = !!process.env.LLM_API_KEY?.trim();\n const baseUrlPresent = !!process.env.LLM_BASE_URL?.trim();\n const modelPresent = !!process.env.LLM_MODEL?.trim();\n const missingVars: LLMRequiredEnvVar[] = [];\n\n if (!apiKeyPresent) missingVars.push('LLM_API_KEY');\n if (!baseUrlPresent) missingVars.push('LLM_BASE_URL');\n if (!modelPresent) missingVars.push('LLM_MODEL');\n\n const configured = missingVars.length === 0;\n
|
|
5
|
-
"mappings": "AAKA,SAAS,cAAc;AAEvB,SAAS,SAAS,cAAc,2BAA2B;AAS3D,SAAS,aACP,OACA,YACA,KACA,KACQ;AACR,QAAM,SAAS,OAAO,IAAI,QAAQ;AAElC,MAAI,CAAC,OAAO;AACV,WAAO;AAAA,EACT;AAEA,QAAM,SAAS,SAAS,OAAO,EAAE;AAEjC,MAAI,MAAM,MAAM,GAAG;AACjB,WAAO,KAAK,mBAAmB,KAAK,oBAAoB,UAAU,EAAE;AACpE,WAAO;AAAA,EACT;AAEA,MAAI,SAAS,KAAK;AAChB,WAAO,KAAK,SAAS,MAAM,kBAAkB,GAAG,iBAAiB,GAAG,EAAE;AACtE,WAAO;AAAA,EACT;AAEA,MAAI,SAAS,KAAK;AAChB,WAAO,KAAK,SAAS,MAAM,kBAAkB,GAAG,iBAAiB,GAAG,EAAE;AACtE,WAAO;AAAA,EACT;AAEA,SAAO;AACT;AAeA,IAAI,YAA8B;AAE3B,SAAS,WAAsB;AACpC,MAAI,UAAW,QAAO;AACtB,cAAY;AAAA,IACV,iBAAiB,QAAQ,IAAI,oBAAoB;AAAA,IACjD,gBAAgB,QAAQ,IAAI,kBAAkB;AAAA,IAC9C,kBAAkB,QAAQ,IAAI,oBAAoB;AAAA,IAClD,sBAAsB,QAAQ,IAAI,wBAAwB;AAAA,IAC1D,cAAc,QAAQ,IAAI,gBAAgB;AAAA,EAC5C;AACA,SAAO;AACT;AAMO,MAAM,SAAS;AAAA,EACpB,MAAM;AAAA,EACN;AAAA,EACA,aAAa;AACf;AAaO,SAAS,kBAAgC;AAC9C,QAAM,MAAM,SAAS;AACrB,SAAO;AAAA,IACL,QAAQ,CAAC,EAAE,IAAI,oBAAoB,IAAI;AAAA,IACvC,QAAQ,CAAC,CAAC,IAAI;AAAA,IACd,UAAU,CAAC,CAAC,IAAI;AAAA,IAChB,eAAe,mBAAmB,EAAE;AAAA,EACtC;AACF;AAEO,SAAS,qBAAqB,YAAwC;AAC3E,QAAM,WAA+C;AAAA,IACnD,QAAQ;AAAA,IACR,QAAQ;AAAA,IACR,UAAU;AAAA,IACV,eAAe;AAAA,EACjB;AACA,SAAO,SAAS,UAAU;AAC5B;AAMO,MAAM,cAAc;AAAA,EACzB,QAAQ,aAAa,QAAQ,IAAI,oBAAoB,IAAI,GAAG,GAAG;AAAA,EAC/D,SAAS,aAAa,QAAQ,IAAI,qBAAqB,IAAI,GAAG,GAAG;AAAA,EACjE,QAAQ,aAAa,QAAQ,IAAI,oBAAoB,IAAI,GAAG,GAAG;AAAA,EAC/D,gBAAgB,aAAa,QAAQ,IAAI,iBAAiB,IAAI,GAAG,GAAG;AACtE;AAEO,MAAM,UAAU;AAAA,EACrB,YAAY;AAAA,EACZ,mBAAmB;AAAA,EACnB,mBAAmB;AACrB;AAMO,MAAM,SAAS;AAAA,EACpB,YAAY;AAAA,EACZ,oBAAoB;AAAA,EACpB,iBAAiB;AAAA,EACjB,WAAW;AAAA,EACX,WAAW;AAAA,EACX,aAAa;AAAA,EACb,cAAc,CAAC,KAAM,KAAM,KAAM,MAAO,IAAK;AAC/C;AAMO,MAAM,cAAsC;AAAA,EACjD,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,IAAI;AACN;
|
|
4
|
+
"sourcesContent": ["/**\n * Consolidated configuration\n * All environment variables, constants, and LLM config in one place\n */\n\nimport { Logger } from 'mcp-use';\n\nimport { VERSION, PACKAGE_NAME, PACKAGE_DESCRIPTION } from '../version.js';\n\n// ============================================================================\n// Safe Integer Parsing Helper\n// ============================================================================\n\n/**\n * Safely parse an integer from environment variable with bounds checking\n */\nfunction safeParseInt(\n value: string | undefined,\n defaultVal: number,\n min: number,\n max: number\n): number {\n const logger = Logger.get('config');\n\n if (!value) {\n return defaultVal;\n }\n\n const parsed = parseInt(value, 10);\n\n if (isNaN(parsed)) {\n logger.warn(`Invalid number \"${value}\", using default ${defaultVal}`);\n return defaultVal;\n }\n\n if (parsed < min) {\n logger.warn(`Value ${parsed} below minimum ${min}, clamping to ${min}`);\n return min;\n }\n\n if (parsed > max) {\n logger.warn(`Value ${parsed} above maximum ${max}, clamping to ${max}`);\n return max;\n }\n\n return parsed;\n}\n\n\n// ============================================================================\n// Environment Parsing\n// ============================================================================\n\ninterface EnvConfig {\n SCRAPER_API_KEY: string;\n SEARCH_API_KEY: string | undefined;\n REDDIT_CLIENT_ID: string | undefined;\n REDDIT_CLIENT_SECRET: string | undefined;\n JINA_API_KEY: string | undefined;\n}\n\nlet cachedEnv: EnvConfig | null = null;\n\nexport function parseEnv(): EnvConfig {\n if (cachedEnv) return cachedEnv;\n cachedEnv = {\n SCRAPER_API_KEY: process.env.SCRAPEDO_API_KEY || '',\n SEARCH_API_KEY: process.env.SERPER_API_KEY || undefined,\n REDDIT_CLIENT_ID: process.env.REDDIT_CLIENT_ID || undefined,\n REDDIT_CLIENT_SECRET: process.env.REDDIT_CLIENT_SECRET || undefined,\n JINA_API_KEY: process.env.JINA_API_KEY || undefined,\n };\n return cachedEnv;\n}\n\n// ============================================================================\n// MCP Server Configuration\n// ============================================================================\n\nexport const SERVER = {\n NAME: PACKAGE_NAME,\n VERSION: VERSION,\n DESCRIPTION: PACKAGE_DESCRIPTION,\n} as const;\n\n// ============================================================================\n// Capability Detection (which features are available based on ENV)\n// ============================================================================\n\nexport interface Capabilities {\n reddit: boolean; // REDDIT_CLIENT_ID + REDDIT_CLIENT_SECRET\n search: boolean; // SERPER_API_KEY\n scraping: boolean; // SCRAPEDO_API_KEY\n llmExtraction: boolean; // LLM_API_KEY + LLM_BASE_URL + LLM_MODEL\n}\n\nexport function getCapabilities(): Capabilities {\n const env = parseEnv();\n return {\n reddit: !!(env.REDDIT_CLIENT_ID && env.REDDIT_CLIENT_SECRET),\n search: !!env.SEARCH_API_KEY,\n scraping: !!env.SCRAPER_API_KEY,\n llmExtraction: getLLMConfigStatus().configured,\n };\n}\n\nexport function getMissingEnvMessage(capability: keyof Capabilities): string {\n const messages: Record<keyof Capabilities, string> = {\n reddit: '\u274C **Reddit tools unavailable.** Set `REDDIT_CLIENT_ID` and `REDDIT_CLIENT_SECRET` to enable `get-reddit-post`.\\n\\n\uD83D\uDC49 Create a Reddit app at: https://www.reddit.com/prefs/apps (select \"script\" type)',\n search: '\u274C **Search unavailable.** Set `SERPER_API_KEY` to enable `web-search` (including `scope: \"reddit\"`).\\n\\n\uD83D\uDC49 Get your free API key at: https://serper.dev (2,500 free queries)',\n scraping: '\u274C **Web scraping unavailable.** Set `SCRAPEDO_API_KEY` to enable `scrape-links`.\\n\\n\uD83D\uDC49 Sign up at: https://scrape.do (1,000 free credits)',\n llmExtraction: '\u26A0\uFE0F **AI extraction disabled.** Set `LLM_API_KEY`, `LLM_BASE_URL`, and `LLM_MODEL` to enable AI-powered content extraction and search classification.\\n\\nScraping will work but without intelligent content filtering.',\n };\n return messages[capability];\n}\n\n// ============================================================================\n// Concurrency Limits\n// ============================================================================\n\nexport const CONCURRENCY = {\n SEARCH: safeParseInt(process.env.CONCURRENCY_SEARCH, 50, 1, 200),\n SCRAPER: safeParseInt(process.env.CONCURRENCY_SCRAPER, 50, 1, 200),\n REDDIT: safeParseInt(process.env.CONCURRENCY_REDDIT, 50, 1, 200),\n LLM_EXTRACTION: safeParseInt(process.env.LLM_CONCURRENCY, 50, 1, 200),\n} as const;\n\nexport const SCRAPER = {\n BATCH_SIZE: 30,\n EXTRACTION_PREFIX: 'Extract from document only \u2014 never hallucinate or add external knowledge.',\n EXTRACTION_SUFFIX: 'First line = content, not preamble. No confirmation messages.',\n} as const;\n\n// ============================================================================\n// Reddit Configuration\n// ============================================================================\n\nexport const REDDIT = {\n BATCH_SIZE: 10,\n MAX_WORDS_PER_POST: 50_000,\n MAX_WORDS_TOTAL: 500_000,\n MIN_POSTS: 1,\n MAX_POSTS: 50,\n RETRY_COUNT: 5,\n RETRY_DELAYS: [2000, 4000, 8000, 16000, 32000] as const,\n} as const;\n\n// ============================================================================\n// CTR Weights for URL Ranking (inspired from CTR research)\n// ============================================================================\n\nexport const CTR_WEIGHTS: Record<number, number> = {\n 1: 100.00,\n 2: 60.00,\n 3: 48.89,\n 4: 33.33,\n 5: 28.89,\n 6: 26.44,\n 7: 24.44,\n 8: 17.78,\n 9: 13.33,\n 10: 12.56,\n} as const;\n\n// ============================================================================\n// LLM Configuration\n//\n// Required vars (all must be set together when LLM is enabled):\n// LLM_API_KEY \u2014 API key for the OpenAI-compatible endpoint\n// LLM_BASE_URL \u2014 endpoint base URL (e.g. https://server.up.railway.app/v1)\n// LLM_MODEL \u2014 primary model (e.g. gpt-5.4-mini)\n//\n// Optional:\n// LLM_FALLBACK_MODEL \u2014 model to use after primary exhausts all retries (e.g. gpt-5.4)\n// LLM_CONCURRENCY \u2014 parallel LLM calls (default: 50)\n//\n// Reasoning effort is always 'low' \u2014 not configurable.\n// ============================================================================\n\ninterface LlmExtractionConfig {\n readonly MODEL: string;\n readonly FALLBACK_MODEL: string;\n readonly BASE_URL: string;\n readonly API_KEY: string;\n}\n\nexport type LLMRequiredEnvVar = 'LLM_API_KEY' | 'LLM_BASE_URL' | 'LLM_MODEL';\n\nexport interface LLMConfigStatus {\n readonly configured: boolean;\n readonly apiKeyPresent: boolean;\n readonly baseUrlPresent: boolean;\n readonly modelPresent: boolean;\n readonly missingVars: readonly LLMRequiredEnvVar[];\n readonly error: string | null;\n}\n\nlet cachedLlmConfigStatus: LLMConfigStatus | null = null;\n\nexport function getLLMConfigStatus(): LLMConfigStatus {\n if (cachedLlmConfigStatus) return cachedLlmConfigStatus;\n\n const apiKeyPresent = !!process.env.LLM_API_KEY?.trim();\n const baseUrlPresent = !!process.env.LLM_BASE_URL?.trim();\n const modelPresent = !!process.env.LLM_MODEL?.trim();\n const missingVars: LLMRequiredEnvVar[] = [];\n\n if (!apiKeyPresent) missingVars.push('LLM_API_KEY');\n if (!baseUrlPresent) missingVars.push('LLM_BASE_URL');\n if (!modelPresent) missingVars.push('LLM_MODEL');\n\n const configured = missingVars.length === 0;\n cachedLlmConfigStatus = {\n configured,\n apiKeyPresent,\n baseUrlPresent,\n modelPresent,\n missingVars,\n error: configured\n ? null\n : `LLM disabled: missing ${missingVars.join(', ')}`,\n };\n return cachedLlmConfigStatus;\n}\n\n/**\n * Test-only \u2014 drop every env-derived cache so a test can mutate process.env\n * and re-read fresh values. Covers the LLM config caches AND the parseEnv()\n * cache (which holds SCRAPEDO_API_KEY, SERPER_API_KEY, REDDIT_CLIENT_*,\n * JINA_API_KEY). Tests that scrub non-LLM env vars must also see a clean\n * env on the next parseEnv() call, otherwise getCapabilities() returns\n * stale flags.\n */\nexport function _resetLLMConfigStatusForTests(): void {\n cachedLlmConfigStatus = null;\n cachedLlmExtraction = null;\n cachedEnv = null;\n}\n\nlet cachedLlmExtraction: LlmExtractionConfig | null = null;\n\nfunction getLlmExtraction(): LlmExtractionConfig {\n if (cachedLlmExtraction) return cachedLlmExtraction;\n\n const apiKey = process.env.LLM_API_KEY?.trim() || '';\n const baseUrl = process.env.LLM_BASE_URL?.trim();\n const model = process.env.LLM_MODEL?.trim();\n const fallbackModel = process.env.LLM_FALLBACK_MODEL?.trim() || '';\n\n if (apiKey && !baseUrl) {\n throw new Error(\n 'LLM_BASE_URL is required when LLM_API_KEY is set. ' +\n 'Set LLM_BASE_URL to your OpenAI-compatible endpoint.',\n );\n }\n if (apiKey && !model) {\n throw new Error(\n 'LLM_MODEL is required when LLM_API_KEY is set.',\n );\n }\n\n cachedLlmExtraction = {\n API_KEY: apiKey,\n BASE_URL: baseUrl || '',\n MODEL: model || '',\n FALLBACK_MODEL: fallbackModel,\n };\n return cachedLlmExtraction;\n}\n\nexport const LLM_EXTRACTION: LlmExtractionConfig = new Proxy({} as LlmExtractionConfig, {\n get(_target, prop: string) {\n return getLlmExtraction()[prop as keyof LlmExtractionConfig];\n },\n});\n"],
|
|
5
|
+
"mappings": "AAKA,SAAS,cAAc;AAEvB,SAAS,SAAS,cAAc,2BAA2B;AAS3D,SAAS,aACP,OACA,YACA,KACA,KACQ;AACR,QAAM,SAAS,OAAO,IAAI,QAAQ;AAElC,MAAI,CAAC,OAAO;AACV,WAAO;AAAA,EACT;AAEA,QAAM,SAAS,SAAS,OAAO,EAAE;AAEjC,MAAI,MAAM,MAAM,GAAG;AACjB,WAAO,KAAK,mBAAmB,KAAK,oBAAoB,UAAU,EAAE;AACpE,WAAO;AAAA,EACT;AAEA,MAAI,SAAS,KAAK;AAChB,WAAO,KAAK,SAAS,MAAM,kBAAkB,GAAG,iBAAiB,GAAG,EAAE;AACtE,WAAO;AAAA,EACT;AAEA,MAAI,SAAS,KAAK;AAChB,WAAO,KAAK,SAAS,MAAM,kBAAkB,GAAG,iBAAiB,GAAG,EAAE;AACtE,WAAO;AAAA,EACT;AAEA,SAAO;AACT;AAeA,IAAI,YAA8B;AAE3B,SAAS,WAAsB;AACpC,MAAI,UAAW,QAAO;AACtB,cAAY;AAAA,IACV,iBAAiB,QAAQ,IAAI,oBAAoB;AAAA,IACjD,gBAAgB,QAAQ,IAAI,kBAAkB;AAAA,IAC9C,kBAAkB,QAAQ,IAAI,oBAAoB;AAAA,IAClD,sBAAsB,QAAQ,IAAI,wBAAwB;AAAA,IAC1D,cAAc,QAAQ,IAAI,gBAAgB;AAAA,EAC5C;AACA,SAAO;AACT;AAMO,MAAM,SAAS;AAAA,EACpB,MAAM;AAAA,EACN;AAAA,EACA,aAAa;AACf;AAaO,SAAS,kBAAgC;AAC9C,QAAM,MAAM,SAAS;AACrB,SAAO;AAAA,IACL,QAAQ,CAAC,EAAE,IAAI,oBAAoB,IAAI;AAAA,IACvC,QAAQ,CAAC,CAAC,IAAI;AAAA,IACd,UAAU,CAAC,CAAC,IAAI;AAAA,IAChB,eAAe,mBAAmB,EAAE;AAAA,EACtC;AACF;AAEO,SAAS,qBAAqB,YAAwC;AAC3E,QAAM,WAA+C;AAAA,IACnD,QAAQ;AAAA,IACR,QAAQ;AAAA,IACR,UAAU;AAAA,IACV,eAAe;AAAA,EACjB;AACA,SAAO,SAAS,UAAU;AAC5B;AAMO,MAAM,cAAc;AAAA,EACzB,QAAQ,aAAa,QAAQ,IAAI,oBAAoB,IAAI,GAAG,GAAG;AAAA,EAC/D,SAAS,aAAa,QAAQ,IAAI,qBAAqB,IAAI,GAAG,GAAG;AAAA,EACjE,QAAQ,aAAa,QAAQ,IAAI,oBAAoB,IAAI,GAAG,GAAG;AAAA,EAC/D,gBAAgB,aAAa,QAAQ,IAAI,iBAAiB,IAAI,GAAG,GAAG;AACtE;AAEO,MAAM,UAAU;AAAA,EACrB,YAAY;AAAA,EACZ,mBAAmB;AAAA,EACnB,mBAAmB;AACrB;AAMO,MAAM,SAAS;AAAA,EACpB,YAAY;AAAA,EACZ,oBAAoB;AAAA,EACpB,iBAAiB;AAAA,EACjB,WAAW;AAAA,EACX,WAAW;AAAA,EACX,aAAa;AAAA,EACb,cAAc,CAAC,KAAM,KAAM,KAAM,MAAO,IAAK;AAC/C;AAMO,MAAM,cAAsC;AAAA,EACjD,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,IAAI;AACN;AAmCA,IAAI,wBAAgD;AAE7C,SAAS,qBAAsC;AACpD,MAAI,sBAAuB,QAAO;AAElC,QAAM,gBAAgB,CAAC,CAAC,QAAQ,IAAI,aAAa,KAAK;AACtD,QAAM,iBAAiB,CAAC,CAAC,QAAQ,IAAI,cAAc,KAAK;AACxD,QAAM,eAAe,CAAC,CAAC,QAAQ,IAAI,WAAW,KAAK;AACnD,QAAM,cAAmC,CAAC;AAE1C,MAAI,CAAC,cAAe,aAAY,KAAK,aAAa;AAClD,MAAI,CAAC,eAAgB,aAAY,KAAK,cAAc;AACpD,MAAI,CAAC,aAAc,aAAY,KAAK,WAAW;AAE/C,QAAM,aAAa,YAAY,WAAW;AAC1C,0BAAwB;AAAA,IACtB;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA,OAAO,aACH,OACA,yBAAyB,YAAY,KAAK,IAAI,CAAC;AAAA,EACrD;AACA,SAAO;AACT;AAUO,SAAS,gCAAsC;AACpD,0BAAwB;AACxB,wBAAsB;AACtB,cAAY;AACd;AAEA,IAAI,sBAAkD;AAEtD,SAAS,mBAAwC;AAC/C,MAAI,oBAAqB,QAAO;AAEhC,QAAM,SAAS,QAAQ,IAAI,aAAa,KAAK,KAAK;AAClD,QAAM,UAAU,QAAQ,IAAI,cAAc,KAAK;AAC/C,QAAM,QAAQ,QAAQ,IAAI,WAAW,KAAK;AAC1C,QAAM,gBAAgB,QAAQ,IAAI,oBAAoB,KAAK,KAAK;AAEhE,MAAI,UAAU,CAAC,SAAS;AACtB,UAAM,IAAI;AAAA,MACR;AAAA,IAEF;AAAA,EACF;AACA,MAAI,UAAU,CAAC,OAAO;AACpB,UAAM,IAAI;AAAA,MACR;AAAA,IACF;AAAA,EACF;AAEA,wBAAsB;AAAA,IACpB,SAAS;AAAA,IACT,UAAU,WAAW;AAAA,IACrB,OAAO,SAAS;AAAA,IAChB,gBAAgB;AAAA,EAClB;AACA,SAAO;AACT;AAEO,MAAM,iBAAsC,IAAI,MAAM,CAAC,GAA0B;AAAA,EACtF,IAAI,SAAS,MAAc;AACzB,WAAO,iBAAiB,EAAE,IAAiC;AAAA,EAC7D;AACF,CAAC;",
|
|
6
6
|
"names": []
|
|
7
7
|
}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { z } from "zod";
|
|
2
2
|
const startResearchParamsSchema = z.object({
|
|
3
3
|
goal: z.string().min(1, { message: "start-research: goal cannot be empty" }).optional().describe(
|
|
4
|
-
'Research goal for this session. When provided AND the LLM planner (LLM_API_KEY
|
|
4
|
+
'Research goal for this session. When provided AND the LLM planner is configured (LLM_API_KEY + LLM_BASE_URL + LLM_MODEL all set), the server returns a goal-tailored brief: classified goal type (spec | bug | migration | sentiment | pricing | security | synthesis | product_launch), a `primary_branch` recommendation (reddit for sentiment/migration; web for spec/bug/pricing; both when opinion-heavy AND needs official sources), the exact `first_call_sequence` of web-search + scrape-links calls to fire, 25\u201350 keyword seeds for the first `web-search` call, iteration hints, gaps to watch, and stop criteria. The goal also sets the post-sort relevance target, so state the evidence you need and what "done" means. No goal \u2192 the generic 3-tool playbook (no tailored brief). Write the goal as you would to a human researcher \u2014 one or two sentences, specific about what "done" looks like.'
|
|
5
5
|
),
|
|
6
6
|
include_playbook: z.boolean().default(false).describe(
|
|
7
7
|
"Include the full 3-tool research playbook (toolbelt overview, the loop, output discipline). Default false \u2014 when the LLM planner is offline the server emits a compact stub that already names the 3 tools and the loop. Pass true only if the agent needs the verbose tactic reference, or to override the degraded-mode shrink."
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"version": 3,
|
|
3
3
|
"sources": ["../../../src/schemas/start-research.ts"],
|
|
4
|
-
"sourcesContent": ["import { z } from 'zod';\n\nexport const startResearchParamsSchema = z.object({\n goal: z\n .string()\n .min(1, { message: 'start-research: goal cannot be empty' })\n .optional()\n .describe(\n 'Research goal for this session. When provided AND the LLM planner (LLM_API_KEY
|
|
4
|
+
"sourcesContent": ["import { z } from 'zod';\n\nexport const startResearchParamsSchema = z.object({\n goal: z\n .string()\n .min(1, { message: 'start-research: goal cannot be empty' })\n .optional()\n .describe(\n 'Research goal for this session. When provided AND the LLM planner is configured (LLM_API_KEY + LLM_BASE_URL + LLM_MODEL all set), the server returns a goal-tailored brief: classified goal type (spec | bug | migration | sentiment | pricing | security | synthesis | product_launch), a `primary_branch` recommendation (reddit for sentiment/migration; web for spec/bug/pricing; both when opinion-heavy AND needs official sources), the exact `first_call_sequence` of web-search + scrape-links calls to fire, 25\u201350 keyword seeds for the first `web-search` call, iteration hints, gaps to watch, and stop criteria. The goal also sets the post-sort relevance target, so state the evidence you need and what \"done\" means. No goal \u2192 the generic 3-tool playbook (no tailored brief). Write the goal as you would to a human researcher \u2014 one or two sentences, specific about what \"done\" looks like.',\n ),\n include_playbook: z\n .boolean()\n .default(false)\n .describe(\n 'Include the full 3-tool research playbook (toolbelt overview, the loop, output discipline). Default false \u2014 when the LLM planner is offline the server emits a compact stub that already names the 3 tools and the loop. Pass true only if the agent needs the verbose tactic reference, or to override the degraded-mode shrink.',\n ),\n}).strict();\n\nexport type StartResearchParams = z.infer<typeof startResearchParamsSchema>;\n\n// `start-research` is text-only: the tool registration deliberately omits\n// `outputSchema`, and successful calls omit `structuredContent`.\nexport type StartResearchOutput = Record<string, never>;\n"],
|
|
5
5
|
"mappings": "AAAA,SAAS,SAAS;AAEX,MAAM,4BAA4B,EAAE,OAAO;AAAA,EAChD,MAAM,EACH,OAAO,EACP,IAAI,GAAG,EAAE,SAAS,uCAAuC,CAAC,EAC1D,SAAS,EACT;AAAA,IACC;AAAA,EACF;AAAA,EACF,kBAAkB,EACf,QAAQ,EACR,QAAQ,KAAK,EACb;AAAA,IACC;AAAA,EACF;AACJ,CAAC,EAAE,OAAO;",
|
|
6
6
|
"names": []
|
|
7
7
|
}
|
|
@@ -1,12 +1,28 @@
|
|
|
1
1
|
import { z } from "zod";
|
|
2
|
+
const QUERY_REWRITE_PAIR_EXAMPLES = [
|
|
3
|
+
'Bad: `<feature> support` \u2192 Better: `site:<official-docs-domain> "<feature>" "<platform-or-version>"`',
|
|
4
|
+
'Bad: `<product> pricing` \u2192 Better: `site:<vendor-domain> "<product>" pricing "enterprise" OR "free tier"`',
|
|
5
|
+
'Bad: `<library> bug fix` \u2192 Better: `"<exact error text>" "<library-or-package>" "<version>" site:github.com`',
|
|
6
|
+
'Bad: `<tool> reviews` \u2192 Better: `site:reddit.com/r/<community>/comments "<tool>" "migration" OR "regression"`'
|
|
7
|
+
];
|
|
8
|
+
const QUERY_REWRITE_PAIR_GUIDANCE = [
|
|
9
|
+
"Write Google retrieval probes, not topic labels.",
|
|
10
|
+
"For each broad idea, rewrite it into a query that names the evidence source class, discriminating anchor terms, and one useful operator when possible.",
|
|
11
|
+
"Use rewrite-pair thinking before searching:",
|
|
12
|
+
...QUERY_REWRITE_PAIR_EXAMPLES,
|
|
13
|
+
"Do not repeat the same noun phrase with adjectives changed; fan out by source type and evidence need."
|
|
14
|
+
];
|
|
15
|
+
const QUERY_REWRITE_PAIR_GUIDANCE_TEXT = QUERY_REWRITE_PAIR_GUIDANCE.join(" ");
|
|
2
16
|
const webSearchParamsSchema = z.object({
|
|
3
17
|
queries: z.array(
|
|
4
|
-
z.string().min(1, { message: "web-search: Query cannot be empty" }).describe(
|
|
18
|
+
z.string().min(1, { message: "web-search: Query cannot be empty" }).describe(
|
|
19
|
+
`A single Google search query. Each query runs as a separate parallel search. ${QUERY_REWRITE_PAIR_GUIDANCE_TEXT}`
|
|
20
|
+
)
|
|
5
21
|
).min(1, { message: "web-search: At least 1 query required" }).describe(
|
|
6
|
-
|
|
22
|
+
`Search queries to run in parallel via Google. ${QUERY_REWRITE_PAIR_GUIDANCE_TEXT} Think of queries as **concept groups** \u2014 clusters of semantically distinct facets of your research goal, each probing a DIFFERENT angle (official spec, implementation, failures, comparison, sentiment, changelog, CVE, pricing). Fire all groups in ONE call as a flat array. Overlapping queries waste budget; orthogonal facets multiply coverage. A narrow bug needs 10\u201320 queries across 2\u20133 facets; a comparison needs 25\u201335 across 4\u20136 facets; open-ended synthesis needs 40\u201380 across 8+ facets.`
|
|
7
23
|
),
|
|
8
24
|
extract: z.string().min(1, { message: "web-search: extract cannot be empty" }).describe(
|
|
9
|
-
'Semantic instruction for the relevance classifier \u2014 what "relevant" means for THIS goal. Drives tiering (HIGHLY_RELEVANT / MAYBE_RELEVANT / OTHER), synthesis, gap analysis, and refine-query suggestions. Be specific: "OAuth 2.1 support in TypeScript MCP frameworks \u2014 runnable code, not marketing", not "MCP OAuth".
|
|
25
|
+
'Semantic instruction for the relevance classifier \u2014 what "relevant" means for THIS goal. This is the post-sort target, so name the evidence you need and the source-of-truth expectation: e.g. official docs/release notes for specs, issue/PR/error text for bugs, Reddit/HN/blogs for lived experience, vendor pricing pages for pricing, CVE databases for security. Drives tiering (HIGHLY_RELEVANT / MAYBE_RELEVANT / OTHER), synthesis, gap analysis, and refine-query suggestions. Be specific: "OAuth 2.1 support in TypeScript MCP frameworks \u2014 runnable code, not marketing", not "MCP OAuth".'
|
|
10
26
|
),
|
|
11
27
|
raw: z.boolean().default(false).describe("Skip LLM classification and return the raw ranked URL list. Use when you need unprocessed results."),
|
|
12
28
|
scope: z.enum(["web", "reddit", "both"]).default("web").describe(
|
|
@@ -67,6 +83,9 @@ const webSearchOutputSchema = z.object({
|
|
|
67
83
|
}).strict()
|
|
68
84
|
}).strict();
|
|
69
85
|
export {
|
|
86
|
+
QUERY_REWRITE_PAIR_EXAMPLES,
|
|
87
|
+
QUERY_REWRITE_PAIR_GUIDANCE,
|
|
88
|
+
QUERY_REWRITE_PAIR_GUIDANCE_TEXT,
|
|
70
89
|
webSearchOutputSchema,
|
|
71
90
|
webSearchParamsSchema
|
|
72
91
|
};
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"version": 3,
|
|
3
3
|
"sources": ["../../../src/schemas/web-search.ts"],
|
|
4
|
-
"sourcesContent": ["import { z } from 'zod';\n\nexport const webSearchParamsSchema = z.object({\n queries: z\n .array(\n z.string()\n .min(1, { message: 'web-search: Query cannot be empty' })\n .describe(
|
|
5
|
-
"mappings": "AAAA,SAAS,SAAS;AAEX,MAAM,wBAAwB,EAAE,OAAO;AAAA,EAC5C,SAAS,EACN;AAAA,IACC,EAAE,OAAO,EACN,IAAI,GAAG,EAAE,SAAS,oCAAoC,CAAC,EACvD,
|
|
4
|
+
"sourcesContent": ["import { z } from 'zod';\n\nexport const QUERY_REWRITE_PAIR_EXAMPLES = [\n 'Bad: `<feature> support` \u2192 Better: `site:<official-docs-domain> \"<feature>\" \"<platform-or-version>\"`',\n 'Bad: `<product> pricing` \u2192 Better: `site:<vendor-domain> \"<product>\" pricing \"enterprise\" OR \"free tier\"`',\n 'Bad: `<library> bug fix` \u2192 Better: `\"<exact error text>\" \"<library-or-package>\" \"<version>\" site:github.com`',\n 'Bad: `<tool> reviews` \u2192 Better: `site:reddit.com/r/<community>/comments \"<tool>\" \"migration\" OR \"regression\"`',\n] as const;\n\nexport const QUERY_REWRITE_PAIR_GUIDANCE = [\n 'Write Google retrieval probes, not topic labels.',\n 'For each broad idea, rewrite it into a query that names the evidence source class, discriminating anchor terms, and one useful operator when possible.',\n 'Use rewrite-pair thinking before searching:',\n ...QUERY_REWRITE_PAIR_EXAMPLES,\n 'Do not repeat the same noun phrase with adjectives changed; fan out by source type and evidence need.',\n] as const;\n\nexport const QUERY_REWRITE_PAIR_GUIDANCE_TEXT = QUERY_REWRITE_PAIR_GUIDANCE.join(' ');\n\nexport const webSearchParamsSchema = z.object({\n queries: z\n .array(\n z.string()\n .min(1, { message: 'web-search: Query cannot be empty' })\n .describe(\n `A single Google search query. Each query runs as a separate parallel search. ${QUERY_REWRITE_PAIR_GUIDANCE_TEXT}`,\n ),\n )\n .min(1, { message: 'web-search: At least 1 query required' })\n .describe(\n `Search queries to run in parallel via Google. ${QUERY_REWRITE_PAIR_GUIDANCE_TEXT} Think of queries as **concept groups** \u2014 clusters of semantically distinct facets of your research goal, each probing a DIFFERENT angle (official spec, implementation, failures, comparison, sentiment, changelog, CVE, pricing). Fire all groups in ONE call as a flat array. Overlapping queries waste budget; orthogonal facets multiply coverage. A narrow bug needs 10\u201320 queries across 2\u20133 facets; a comparison needs 25\u201335 across 4\u20136 facets; open-ended synthesis needs 40\u201380 across 8+ facets.`,\n ),\n extract: z\n .string()\n .min(1, { message: 'web-search: extract cannot be empty' })\n .describe(\n 'Semantic instruction for the relevance classifier \u2014 what \"relevant\" means for THIS goal. This is the post-sort target, so name the evidence you need and the source-of-truth expectation: e.g. official docs/release notes for specs, issue/PR/error text for bugs, Reddit/HN/blogs for lived experience, vendor pricing pages for pricing, CVE databases for security. Drives tiering (HIGHLY_RELEVANT / MAYBE_RELEVANT / OTHER), synthesis, gap analysis, and refine-query suggestions. Be specific: \"OAuth 2.1 support in TypeScript MCP frameworks \u2014 runnable code, not marketing\", not \"MCP OAuth\".',\n ),\n raw: z\n .boolean()\n .default(false)\n .describe('Skip LLM classification and return the raw ranked URL list. Use when you need unprocessed results.'),\n scope: z\n .enum(['web', 'reddit', 'both'])\n .default('web')\n .describe(\n 'Search scope. \"web\" (default) = open web, no augmentation. \"reddit\" = server appends `site:reddit.com` to every query and filters results to post permalinks (`/r/.+/comments/[a-z0-9]+/`); subreddit homepages are dropped. \"both\" = runs every query twice (open web + reddit-scoped), merges the result set, and tags each row with its source. Use \"reddit\" for sentiment/migration/lived-experience research; use \"both\" when you want one call to cover both branches.',\n ),\n verbose: z\n .boolean()\n .default(false)\n .describe(\n 'Include the per-row scoring/coverage metadata, the trailing Signals block, and the CONSENSUS labels even when they carry little signal (single-query hits, threshold of 1). Default false \u2014 most agents do not need this and it costs ~1.5KB per call on a typical 3-query fan-out.',\n ),\n}).strict();\n\nexport type WebSearchParams = z.infer<typeof webSearchParamsSchema>;\n\nexport const webSearchOutputSchema = z.object({\n content: z\n .string()\n .describe(\n 'Rendered search report, including ranked URLs, classification synthesis, gaps, and follow-up searches. Duplicates the MCP content text for clients that only expose structuredContent.',\n ),\n results: z\n .array(z.object({\n rank: z.number().int().positive().describe('1-based rank in the merged ranking.'),\n url: z.string().describe('Result URL.'),\n title: z.string().describe('Page title from the result.'),\n snippet: z.string().describe('Search snippet from the result.'),\n source_type: z\n .enum(['reddit', 'github', 'docs', 'blog', 'paper', 'qa', 'cve', 'news', 'video', 'web'])\n .describe(\n 'Heuristic source kind from the URL. When the LLM classifier is online its tag overrides this.',\n ),\n score: z.number().describe('Composite CTR-weighted score, normalized to 100.'),\n seen_in: z.number().int().nonnegative().describe('Number of input queries this URL appeared in.'),\n best_position: z.number().int().nonnegative().describe('Best (lowest) SERP position observed.'),\n }))\n .optional()\n .describe('Per-result structured payload \u2014 same data the markdown table renders, machine-readable.'),\n metadata: z.object({\n total_items: z.number().int().nonnegative().describe('Number of queries executed.'),\n successful: z.number().int().nonnegative().describe('Queries that returned results.'),\n failed: z.number().int().nonnegative().describe('Queries that failed.'),\n execution_time_ms: z.number().int().nonnegative().describe('Wall clock time in milliseconds.'),\n llm_classified: z.boolean().describe('Whether LLM classification was applied.'),\n llm_error: z.string().optional().describe('LLM error if classification failed and fell back to raw.'),\n scope: z.enum(['web', 'reddit', 'both']).optional().describe('Search scope used.'),\n coverage_summary: z\n .array(z.object({\n query: z.string().describe('The search query.'),\n result_count: z.number().int().nonnegative().describe('Results returned for this query.'),\n top_url: z.string().optional().describe('Domain of the top result.'),\n }))\n .optional()\n .describe('Per-query result counts and top URLs.'),\n low_yield_queries: z\n .array(z.string())\n .optional()\n .describe('Queries that produced 0-1 results.'),\n query_rewrites: z\n .array(z.object({\n original: z.string().describe('The query as the agent submitted it.'),\n rewritten: z.string().describe('The query as dispatched to Google after Phase A normalization.'),\n rules: z.array(z.string()).describe('Rule ids applied (A1=operator-char de-quote, A2=path/URL de-quote, A3=phrase-AND collapse).'),\n }))\n .optional()\n .describe('Pre-dispatch query rewrites \u2014 Phase A normalizations (operator-char and path/URL de-quote, phrase-AND \u2192 anchor + OR collapse).'),\n retried_queries: z\n .array(z.object({\n original: z.string().describe('The query as dispatched (post-Phase-A) that returned 0 results.'),\n retried_with: z.string().describe('The relaxed form retried after the empty initial response.'),\n rules: z.array(z.string()).describe('Rule ids applied (B1=strip all quotes, B2=drop site: filter).'),\n recovered_results: z.number().int().nonnegative().describe('How many hits the retry produced; 0 means the retry also failed.'),\n }))\n .optional()\n .describe('On-empty retries \u2014 Phase B relaxations applied after the initial Serper batch returned 0 results for a query.'),\n retry_error: z\n .object({\n phase: z.literal('relax-retry').describe('Retry phase that failed after the initial batch succeeded.'),\n code: z.string().describe('Structured error code from the retry batch.'),\n message: z.string().describe('Provider error message from the retry batch.'),\n retryable: z.boolean().describe('Whether the retry-batch provider failure is retryable.'),\n statusCode: z.number().int().optional().describe('Provider status code when available.'),\n })\n .optional()\n .describe('Non-fatal failure from the relaxed retry batch; initial search results were preserved.'),\n }).strict(),\n}).strict();\n\nexport type WebSearchOutput = z.infer<typeof webSearchOutputSchema>;\n"],
|
|
5
|
+
"mappings": "AAAA,SAAS,SAAS;AAEX,MAAM,8BAA8B;AAAA,EACzC;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AAEO,MAAM,8BAA8B;AAAA,EACzC;AAAA,EACA;AAAA,EACA;AAAA,EACA,GAAG;AAAA,EACH;AACF;AAEO,MAAM,mCAAmC,4BAA4B,KAAK,GAAG;AAE7E,MAAM,wBAAwB,EAAE,OAAO;AAAA,EAC5C,SAAS,EACN;AAAA,IACC,EAAE,OAAO,EACN,IAAI,GAAG,EAAE,SAAS,oCAAoC,CAAC,EACvD;AAAA,MACC,gFAAgF,gCAAgC;AAAA,IAClH;AAAA,EACJ,EACC,IAAI,GAAG,EAAE,SAAS,wCAAwC,CAAC,EAC3D;AAAA,IACC,iDAAiD,gCAAgC;AAAA,EACnF;AAAA,EACF,SAAS,EACN,OAAO,EACP,IAAI,GAAG,EAAE,SAAS,sCAAsC,CAAC,EACzD;AAAA,IACC;AAAA,EACF;AAAA,EACF,KAAK,EACF,QAAQ,EACR,QAAQ,KAAK,EACb,SAAS,oGAAoG;AAAA,EAChH,OAAO,EACJ,KAAK,CAAC,OAAO,UAAU,MAAM,CAAC,EAC9B,QAAQ,KAAK,EACb;AAAA,IACC;AAAA,EACF;AAAA,EACF,SAAS,EACN,QAAQ,EACR,QAAQ,KAAK,EACb;AAAA,IACC;AAAA,EACF;AACJ,CAAC,EAAE,OAAO;AAIH,MAAM,wBAAwB,EAAE,OAAO;AAAA,EAC5C,SAAS,EACN,OAAO,EACP;AAAA,IACC;AAAA,EACF;AAAA,EACF,SAAS,EACN,MAAM,EAAE,OAAO;AAAA,IACd,MAAM,EAAE,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,SAAS,qCAAqC;AAAA,IAChF,KAAK,EAAE,OAAO,EAAE,SAAS,aAAa;AAAA,IACtC,OAAO,EAAE,OAAO,EAAE,SAAS,6BAA6B;AAAA,IACxD,SAAS,EAAE,OAAO,EAAE,SAAS,iCAAiC;AAAA,IAC9D,aAAa,EACV,KAAK,CAAC,UAAU,UAAU,QAAQ,QAAQ,SAAS,MAAM,OAAO,QAAQ,SAAS,KAAK,CAAC,EACvF;AAAA,MACC;AAAA,IACF;AAAA,IACF,OAAO,EAAE,OAAO,EAAE,SAAS,kDAAkD;AAAA,IAC7E,SAAS,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,+CAA+C;AAAA,IAChG,eAAe,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,uCAAuC;AAAA,EAChG,CAAC,CAAC,EACD,SAAS,EACT,SAAS,8FAAyF;AAAA,EACrG,UAAU,EAAE,OAAO;AAAA,IACjB,aAAa,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,6BAA6B;AAAA,IAClF,YAAY,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,gCAAgC;AAAA,IACpF,QAAQ,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,sBAAsB;AAAA,IACtE,mBAAmB,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,kCAAkC;AAAA,IAC7F,gBAAgB,EAAE,QAAQ,EAAE,SAAS,yCAAyC;AAAA,IAC9E,WAAW,EAAE,OAAO,EAAE,SAAS,EAAE,SAAS,0DAA0D;AAAA,IACpG,OAAO,EAAE,KAAK,CAAC,OAAO,UAAU,MAAM,CAAC,EAAE,SAAS,EAAE,SAAS,oBAAoB;AAAA,IACjF,kBAAkB,EACf,MAAM,EAAE,OAAO;AAAA,MACd,OAAO,EAAE,OAAO,EAAE,SAAS,mBAAmB;AAAA,MAC9C,cAAc,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,kCAAkC;AAAA,MACxF,SAAS,EAAE,OAAO,EAAE,SAAS,EAAE,SAAS,2BAA2B;AAAA,IACrE,CAAC,CAAC,EACD,SAAS,EACT,SAAS,uCAAuC;AAAA,IACnD,mBAAmB,EAChB,MAAM,EAAE,OAAO,CAAC,EAChB,SAAS,EACT,SAAS,oCAAoC;AAAA,IAChD,gBAAgB,EACb,MAAM,EAAE,OAAO;AAAA,MACd,UAAU,EAAE,OAAO,EAAE,SAAS,sCAAsC;AAAA,MACpE,WAAW,EAAE,OAAO,EAAE,SAAS,gEAAgE;AAAA,MAC/F,OAAO,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,SAAS,6FAA6F;AAAA,IACnI,CAAC,CAAC,EACD,SAAS,EACT,SAAS,0IAAgI;AAAA,IAC5I,iBAAiB,EACd,MAAM,EAAE,OAAO;AAAA,MACd,UAAU,EAAE,OAAO,EAAE,SAAS,iEAAiE;AAAA,MAC/F,cAAc,EAAE,OAAO,EAAE,SAAS,4DAA4D;AAAA,MAC9F,OAAO,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,SAAS,+DAA+D;AAAA,MACnG,mBAAmB,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,kEAAkE;AAAA,IAC/H,CAAC,CAAC,EACD,SAAS,EACT,SAAS,oHAA+G;AAAA,IAC3H,aAAa,EACV,OAAO;AAAA,MACN,OAAO,EAAE,QAAQ,aAAa,EAAE,SAAS,4DAA4D;AAAA,MACrG,MAAM,EAAE,OAAO,EAAE,SAAS,6CAA6C;AAAA,MACvE,SAAS,EAAE,OAAO,EAAE,SAAS,8CAA8C;AAAA,MAC3E,WAAW,EAAE,QAAQ,EAAE,SAAS,wDAAwD;AAAA,MACxF,YAAY,EAAE,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,SAAS,sCAAsC;AAAA,IACzF,CAAC,EACA,SAAS,EACT,SAAS,wFAAwF;AAAA,EACtG,CAAC,EAAE,OAAO;AACZ,CAAC,EAAE,OAAO;",
|
|
6
6
|
"names": []
|
|
7
7
|
}
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import OpenAI from "openai";
|
|
2
2
|
import { LLM_EXTRACTION, getCapabilities } from "../config/index.js";
|
|
3
|
+
import { QUERY_REWRITE_PAIR_GUIDANCE_TEXT } from "../schemas/web-search.js";
|
|
3
4
|
import {
|
|
4
5
|
classifyError,
|
|
5
6
|
sleep,
|
|
@@ -408,6 +409,7 @@ ${truncatedContent}`;
|
|
|
408
409
|
} catch (err) {
|
|
409
410
|
lastError = classifyError(err);
|
|
410
411
|
mcpLog("error", `Fallback error (attempt ${attempt + 1}): ${lastError.message}`, "llm");
|
|
412
|
+
if (isContextWindowError(err) || !isRetryableLLMError(err)) break;
|
|
411
413
|
}
|
|
412
414
|
}
|
|
413
415
|
}
|
|
@@ -690,7 +692,10 @@ first_call_sequence:
|
|
|
690
692
|
|
|
691
693
|
keyword_seeds:
|
|
692
694
|
- 25\u201350 total. Narrow bug \u2192 fewer. Open synthesis \u2192 more.
|
|
693
|
-
-
|
|
695
|
+
- Write Google retrieval probes, not topic labels.
|
|
696
|
+
- For each broad idea, first do a bad \u2192 better rewrite in your head: replace a vague phrase with a query that names the evidence source class, discriminating anchor terms, and one useful operator when possible.
|
|
697
|
+
- ${QUERY_REWRITE_PAIR_GUIDANCE_TEXT}
|
|
698
|
+
- Use operators where helpful (site:, quotes, verbatim version numbers, exact error text, package names, release/version strings).
|
|
694
699
|
- DIVERSE facets \u2014 same noun-phrase cannot repeat across seeds with adjectives-only variation.
|
|
695
700
|
- Do NOT invent vendor names you are uncertain exist.
|
|
696
701
|
- For \`site:<domain>\` filters, ONLY use domains you are highly confident are real. Safe choices: \`github.com\`, \`stackoverflow.com\`, \`reddit.com\`, \`news.ycombinator.com\`, \`arxiv.org\`, \`nvd.nist.gov\`, \`pypi.org\`, \`npmjs.com\`, plus any canonical homepage/docs domain explicitly spelled out in the goal itself (e.g. goal names "Cursor" \u2192 \`cursor.com\`/\`docs.cursor.com\` is acceptable). If you don't know the product's real docs domain, leave the query open (no \`site:\`) instead of guessing.
|