mcp-researchpowerpack 6.0.3 → 6.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -8
- package/dist/index.js +2 -2
- package/dist/index.js.map +2 -2
- package/dist/mcp-use.json +2 -2
- package/dist/src/config/index.js +9 -28
- package/dist/src/config/index.js.map +2 -2
- package/dist/src/services/llm-processor.js +130 -56
- package/dist/src/services/llm-processor.js.map +2 -2
- package/dist/src/tools/search.js +1 -1
- package/dist/src/tools/search.js.map +1 -1
- package/package.json +1 -2
package/README.md
CHANGED
|
@@ -72,14 +72,14 @@ Copy `.env.example`, set only what you need. Missing keys don't crash the server
|
|
|
72
72
|
|
|
73
73
|
### llm (AI extraction + classification)
|
|
74
74
|
|
|
75
|
-
Any OpenAI-compatible endpoint
|
|
75
|
+
Any OpenAI-compatible endpoint. `LLM_API_KEY`, `LLM_BASE_URL`, and `LLM_MODEL` are all required together. Reasoning effort is always `low`.
|
|
76
76
|
|
|
77
77
|
| var | required? | |
|
|
78
78
|
|-----|-----------|---|
|
|
79
|
-
| `LLM_API_KEY` | yes
|
|
80
|
-
| `LLM_BASE_URL` | yes
|
|
81
|
-
| `LLM_MODEL` | yes
|
|
82
|
-
| `
|
|
79
|
+
| `LLM_API_KEY` | yes | API key for the endpoint |
|
|
80
|
+
| `LLM_BASE_URL` | yes | base URL for the OpenAI-compatible endpoint (e.g. `https://server.up.railway.app/v1`) |
|
|
81
|
+
| `LLM_MODEL` | yes | primary model (e.g. `gpt-5.4-mini`) |
|
|
82
|
+
| `LLM_FALLBACK_MODEL` | no | model to use after primary exhausts all retries — gets 3 additional attempts (e.g. `gpt-5.4`) |
|
|
83
83
|
| `LLM_CONCURRENCY` | no (default `50`) | parallel LLM calls |
|
|
84
84
|
|
|
85
85
|
### evals
|
|
@@ -108,9 +108,10 @@ pnpm inspect # mcp-use inspector
|
|
|
108
108
|
|
|
109
109
|
## deploy
|
|
110
110
|
|
|
111
|
+
Deploy to Manufact Cloud via the `mcp-use` CLI (GitHub-backed):
|
|
112
|
+
|
|
111
113
|
```bash
|
|
112
|
-
pnpm
|
|
113
|
-
pnpm deploy # manufact cloud
|
|
114
|
+
pnpm deploy # runs: mcp-use deploy --org <your-org>
|
|
114
115
|
```
|
|
115
116
|
|
|
116
117
|
Or self-host anywhere with Node 20.19+ / 22.12+:
|
|
@@ -135,7 +136,7 @@ src/
|
|
|
135
136
|
mcp-helpers.ts response builders (markdown + structured MCP output)
|
|
136
137
|
utils.ts shared formatters
|
|
137
138
|
services/
|
|
138
|
-
llm-processor.ts AI extraction, classification, brief generation
|
|
139
|
+
llm-processor.ts AI extraction, classification, brief generation — primary + fallback model, always low reasoning
|
|
139
140
|
markdown-cleaner.ts HTML/markdown cleanup
|
|
140
141
|
schemas/ zod v4 input validation per tool
|
|
141
142
|
utils/
|
package/dist/index.js
CHANGED
|
@@ -177,8 +177,8 @@ async function main() {
|
|
|
177
177
|
research_powerpack: {
|
|
178
178
|
planner_available: llm.plannerConfigured,
|
|
179
179
|
extractor_available: llm.extractorConfigured,
|
|
180
|
-
planner_model: process.env.LLM_MODEL ??
|
|
181
|
-
extractor_model: process.env.LLM_MODEL ??
|
|
180
|
+
planner_model: process.env.LLM_MODEL ?? null,
|
|
181
|
+
extractor_model: process.env.LLM_MODEL ?? null
|
|
182
182
|
}
|
|
183
183
|
}
|
|
184
184
|
});
|
package/dist/index.js.map
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"version": 3,
|
|
3
3
|
"sources": ["../index.ts"],
|
|
4
|
-
"sourcesContent": ["#!/usr/bin/env node\n\n// Expand libuv thread pool for parallel DNS lookups (default 4 is too low for 20+ concurrent connections)\nif (!process.env.UV_THREADPOOL_SIZE) {\n process.env.UV_THREADPOOL_SIZE = '8';\n}\n\nimport { Logger } from 'mcp-use';\nimport {\n InMemorySessionStore,\n InMemoryStreamManager,\n MCPServer,\n object,\n type ServerConfig,\n} from 'mcp-use/server';\n\nimport { SERVER } from './src/config/index.js';\nimport { getLLMHealth } from './src/services/llm-processor.js';\nimport { registerAllTools } from './src/tools/registry.js';\n\nconst DEFAULT_PORT = 3000 as const;\nconst SHUTDOWN_TIMEOUT_MS = 10_000 as const;\nconst WEBSITE_URL = 'https://github.com/yigitkonur/mcp-researchpowerpack-http' as const;\nconst LOCAL_DEFAULT_HOST = '127.0.0.1' as const;\n\ntype CleanupFn = () => Promise<void>;\n\nconst startupLogger = Logger.get('startup');\n\nfunction parseCsvEnv(value: string | undefined): string[] | undefined {\n if (!value) return undefined;\n\n const parts = value\n .split(',')\n .map((part) => part.trim())\n .filter(Boolean);\n\n return parts.length > 0 ? parts : undefined;\n}\n\nfunction parsePort(value: string | undefined, fallback: number): number {\n const parsed = Number.parseInt(value ?? '', 10);\n if (Number.isFinite(parsed) && parsed > 0) {\n return parsed;\n }\n\n return fallback;\n}\n\nfunction resolvePort(): number {\n const portFlagIndex = process.argv.findIndex((arg) => arg === '--port');\n if (portFlagIndex >= 0) {\n return parsePort(process.argv[portFlagIndex + 1], DEFAULT_PORT);\n }\n\n return parsePort(process.env.PORT, DEFAULT_PORT);\n}\n\nfunction resolveHost(): string {\n const explicitHost = process.env.HOST?.trim();\n if (explicitHost) {\n return explicitHost;\n }\n\n // Cloud runtimes typically inject PORT and expect the process to listen on all interfaces.\n if (process.env.PORT?.trim()) {\n return '0.0.0.0';\n }\n\n return LOCAL_DEFAULT_HOST;\n}\n\nfunction buildCors(allowedOrigins: string[] | undefined): ServerConfig['cors'] {\n if (!allowedOrigins || allowedOrigins.length === 0) {\n return undefined;\n }\n\n return {\n origin: allowedOrigins,\n allowMethods: ['GET', 'HEAD', 'POST', 'PUT', 'DELETE', 'OPTIONS'],\n allowHeaders: [\n 'Content-Type',\n 'Accept',\n 'Authorization',\n 'mcp-protocol-version',\n 'mcp-session-id',\n 'X-Proxy-Token',\n 'X-Target-URL',\n ],\n exposeHeaders: ['mcp-session-id'],\n };\n}\n\nfunction configureLogging(): void {\n Logger.configure({\n level: process.env.NODE_ENV === 'production' ? 'info' : 'debug',\n format: 'minimal',\n });\n\n const debug = process.env.DEBUG?.trim();\n if (debug === '2') {\n Logger.setDebug(2);\n } else if (debug) {\n Logger.setDebug(1);\n }\n}\n\nfunction normalizeOrigin(value: string, envName: string): string {\n try {\n return new URL(value).origin;\n } catch {\n throw new Error(`${envName} must contain absolute URLs with protocol. Received: ${value}`);\n }\n}\n\nfunction resolveAllowedOrigins(): string[] | undefined {\n const explicitOrigins = parseCsvEnv(process.env.ALLOWED_ORIGINS);\n if (explicitOrigins && explicitOrigins.length > 0) {\n return explicitOrigins.map(origin => normalizeOrigin(origin, 'ALLOWED_ORIGINS'));\n }\n\n return undefined;\n}\n\nfunction buildSessionConfig(): {\n sessionConfig: Pick<ServerConfig, 'sessionStore' | 'streamManager'>;\n cleanupFns: CleanupFn[];\n} {\n return {\n sessionConfig: {\n sessionStore: new InMemorySessionStore(),\n streamManager: new InMemoryStreamManager(),\n },\n cleanupFns: [],\n };\n}\n\nfunction buildHealthPayload(server: MCPServer, startedAt: number) {\n const llm = getLLMHealth();\n // Distinguish \"never probed\" (checkedAt === null) from \"probed and failed\"\n // (checkedAt set, ok=false). The raw `lastPlannerOk` defaults to `false`\n // at startup, which would mislead operators into thinking the LLM is\n // broken before it has been exercised once.\n const plannerOkForHealth = llm.lastPlannerCheckedAt === null ? null : llm.lastPlannerOk;\n const extractorOkForHealth = llm.lastExtractorCheckedAt === null ? null : llm.lastExtractorOk;\n return {\n status: 'ok',\n name: SERVER.NAME,\n version: SERVER.VERSION,\n transport: 'http',\n uptime_seconds: Math.floor((Date.now() - startedAt) / 1000),\n active_sessions: server.getActiveSessions().length,\n llm_planner_ok: plannerOkForHealth,\n llm_extractor_ok: extractorOkForHealth,\n llm_planner_checked_at: llm.lastPlannerCheckedAt,\n llm_extractor_checked_at: llm.lastExtractorCheckedAt,\n llm_planner_error: llm.lastPlannerError,\n llm_extractor_error: llm.lastExtractorError,\n planner_configured: llm.plannerConfigured,\n extractor_configured: llm.extractorConfigured,\n // Counter surfacing lets operators diagnose gate behavior from outside\n // the process (see src/tools/start-research.ts for the gate semantics).\n consecutive_planner_failures: llm.consecutivePlannerFailures,\n consecutive_extractor_failures: llm.consecutiveExtractorFailures,\n timestamp: new Date().toISOString(),\n };\n}\n\nasync function main(): Promise<void> {\n configureLogging();\n\n const isProduction = process.env.NODE_ENV === 'production';\n const host = resolveHost();\n const port = resolvePort();\n const baseUrl = process.env.MCP_URL?.trim() || undefined;\n const allowedOrigins = resolveAllowedOrigins();\n\n const { sessionConfig, cleanupFns } = buildSessionConfig();\n\n startupLogger.info(`Starting ${SERVER.NAME} v${SERVER.VERSION}`);\n startupLogger.info(`Binding HTTP server to ${host}:${port}`);\n if (allowedOrigins && allowedOrigins.length > 0) {\n startupLogger.info(`Host validation enabled for origins: ${allowedOrigins.join(', ')}`);\n } else if (isProduction) {\n if (!baseUrl) {\n startupLogger.error(\n 'Production mode requires ALLOWED_ORIGINS or MCP_URL to be set. ' +\n 'Without host validation, the server is vulnerable to DNS rebinding attacks. ' +\n 'Set ALLOWED_ORIGINS to the public deployment URL or custom domain.',\n );\n process.exit(1);\n }\n startupLogger.warn(\n 'Host validation is disabled because ALLOWED_ORIGINS is not set. ' +\n 'MCP_URL is set, so the server will start \u2014 but set ALLOWED_ORIGINS for full origin protection.',\n );\n } else {\n startupLogger.info('Host validation disabled for local development');\n }\n\n const server = new MCPServer({\n name: SERVER.NAME,\n title: 'Research Powerpack',\n version: SERVER.VERSION,\n description: SERVER.DESCRIPTION,\n websiteUrl: WEBSITE_URL,\n host,\n baseUrl,\n cors: buildCors(allowedOrigins),\n allowedOrigins,\n ...sessionConfig,\n });\n\n registerAllTools(server);\n\n // Advertise our LLM-augmentation capability via the MCP `experimental`\n // namespace so capability-aware clients can branch at initialize-time\n // instead of parsing per-call footers. mcp-use creates a fresh native MCP\n // server per session via `getServerForSession()`, so we patch that factory\n // to register our experimental capability on every session. The capability\n // values are read fresh on each session so health flips are observable.\n // See: docs/code-review/context/06-mcp-use-best-practices-primer.md (#3, #6).\n try {\n type Native = { server?: { registerCapabilities?: (caps: Record<string, unknown>) => void } };\n type Patched = { getServerForSession?: (sessionId?: string) => Native };\n const patched = server as unknown as Patched;\n const original = patched.getServerForSession?.bind(server);\n if (original) {\n patched.getServerForSession = (sessionId?: string): Native => {\n const native = original(sessionId);\n try {\n const llm = getLLMHealth();\n native.server?.registerCapabilities?.({\n experimental: {\n research_powerpack: {\n planner_available: llm.plannerConfigured,\n extractor_available: llm.extractorConfigured,\n planner_model:\n process.env.LLM_MODEL ?? process.env.LLM_EXTRACTION_MODEL ?? null,\n extractor_model:\n process.env.LLM_MODEL ?? process.env.LLM_EXTRACTION_MODEL ?? null,\n },\n },\n });\n } catch {\n // Capability registration is advisory; never block session creation.\n }\n return native;\n };\n }\n } catch (err) {\n startupLogger.warn(`Could not patch session-server factory: ${String(err)}`);\n }\n\n const startedAt = Date.now();\n\n server.get('/health', (c) => c.json(buildHealthPayload(server, startedAt)));\n server.get('/healthz', (c) => c.json(buildHealthPayload(server, startedAt)));\n\n // Some MCP clients (Claude Desktop, Cursor, VS Code) proactively probe\n // /.well-known/oauth-protected-resource before receiving any 401, per the\n // MCP 2025-03-26 spec. Without these routes the server returns 404 and some\n // clients surface a spurious \"authentication required\" error. A minimal PRM\n // response with no authorization_servers field explicitly signals that this\n // server requires no authentication.\n const resourceBaseUrl = baseUrl ?? `http://${host}:${port}`;\n server.get('/.well-known/oauth-protected-resource', (c) =>\n c.json({ resource: resourceBaseUrl }),\n );\n server.get('/.well-known/oauth-protected-resource/mcp', (c) =>\n c.json({ resource: `${resourceBaseUrl}/mcp` }),\n );\n\n server.resource(\n {\n name: 'server-health',\n uri: 'health://status',\n description: 'Current server health, uptime, and active MCP session count.',\n mimeType: 'application/json',\n },\n async () => object(buildHealthPayload(server, startedAt)),\n );\n\n let isShuttingDown = false;\n\n async function shutdown(signal: string, exitCode: number): Promise<void> {\n if (isShuttingDown) return;\n isShuttingDown = true;\n\n const forceExit = setTimeout(() => {\n startupLogger.error(`Forced exit after ${SHUTDOWN_TIMEOUT_MS}ms (${signal})`);\n process.exit(1);\n }, SHUTDOWN_TIMEOUT_MS);\n\n try {\n startupLogger.warn(`Shutdown signal received: ${signal}`);\n await server.close();\n\n for (const cleanupFn of cleanupFns) {\n await cleanupFn();\n }\n\n clearTimeout(forceExit);\n process.exit(exitCode);\n } catch (error) {\n clearTimeout(forceExit);\n const message = error instanceof Error ? (error.stack ?? error.message) : String(error);\n startupLogger.error(`Error while stopping server: ${message}`);\n process.exit(1);\n }\n }\n\n process.on('SIGTERM', () => {\n void shutdown('SIGTERM', 0);\n });\n\n process.on('SIGINT', () => {\n void shutdown('SIGINT', 0);\n });\n\n process.on('uncaughtException', (error) => {\n startupLogger.error(`Uncaught exception: ${error.stack ?? error.message}`);\n void shutdown('uncaughtException', 1);\n });\n\n process.on('unhandledRejection', (reason) => {\n startupLogger.error(`Unhandled rejection: ${String(reason)}`);\n void shutdown('unhandledRejection', 1);\n });\n\n await server.listen(port);\n\n startupLogger.info(`${SERVER.NAME} v${SERVER.VERSION} listening on http://${host}:${port}/mcp`);\n}\n\nvoid main().catch((error) => {\n const message = error instanceof Error ? (error.stack ?? error.message) : String(error);\n startupLogger.error(`Server failed to start: ${message}`);\n process.exit(1);\n});\n"],
|
|
5
|
-
"mappings": ";AAGA,IAAI,CAAC,QAAQ,IAAI,oBAAoB;AACnC,UAAQ,IAAI,qBAAqB;AACnC;AAEA,SAAS,cAAc;AACvB;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OAEK;AAEP,SAAS,cAAc;AACvB,SAAS,oBAAoB;AAC7B,SAAS,wBAAwB;AAEjC,MAAM,eAAe;AACrB,MAAM,sBAAsB;AAC5B,MAAM,cAAc;AACpB,MAAM,qBAAqB;AAI3B,MAAM,gBAAgB,OAAO,IAAI,SAAS;AAE1C,SAAS,YAAY,OAAiD;AACpE,MAAI,CAAC,MAAO,QAAO;AAEnB,QAAM,QAAQ,MACX,MAAM,GAAG,EACT,IAAI,CAAC,SAAS,KAAK,KAAK,CAAC,EACzB,OAAO,OAAO;AAEjB,SAAO,MAAM,SAAS,IAAI,QAAQ;AACpC;AAEA,SAAS,UAAU,OAA2B,UAA0B;AACtE,QAAM,SAAS,OAAO,SAAS,SAAS,IAAI,EAAE;AAC9C,MAAI,OAAO,SAAS,MAAM,KAAK,SAAS,GAAG;AACzC,WAAO;AAAA,EACT;AAEA,SAAO;AACT;AAEA,SAAS,cAAsB;AAC7B,QAAM,gBAAgB,QAAQ,KAAK,UAAU,CAAC,QAAQ,QAAQ,QAAQ;AACtE,MAAI,iBAAiB,GAAG;AACtB,WAAO,UAAU,QAAQ,KAAK,gBAAgB,CAAC,GAAG,YAAY;AAAA,EAChE;AAEA,SAAO,UAAU,QAAQ,IAAI,MAAM,YAAY;AACjD;AAEA,SAAS,cAAsB;AAC7B,QAAM,eAAe,QAAQ,IAAI,MAAM,KAAK;AAC5C,MAAI,cAAc;AAChB,WAAO;AAAA,EACT;AAGA,MAAI,QAAQ,IAAI,MAAM,KAAK,GAAG;AAC5B,WAAO;AAAA,EACT;AAEA,SAAO;AACT;AAEA,SAAS,UAAU,gBAA4D;AAC7E,MAAI,CAAC,kBAAkB,eAAe,WAAW,GAAG;AAClD,WAAO;AAAA,EACT;AAEA,SAAO;AAAA,IACL,QAAQ;AAAA,IACR,cAAc,CAAC,OAAO,QAAQ,QAAQ,OAAO,UAAU,SAAS;AAAA,IAChE,cAAc;AAAA,MACZ;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAAA,IACA,eAAe,CAAC,gBAAgB;AAAA,EAClC;AACF;AAEA,SAAS,mBAAyB;AAChC,SAAO,UAAU;AAAA,IACf,OAAO,QAAQ,IAAI,aAAa,eAAe,SAAS;AAAA,IACxD,QAAQ;AAAA,EACV,CAAC;AAED,QAAM,QAAQ,QAAQ,IAAI,OAAO,KAAK;AACtC,MAAI,UAAU,KAAK;AACjB,WAAO,SAAS,CAAC;AAAA,EACnB,WAAW,OAAO;AAChB,WAAO,SAAS,CAAC;AAAA,EACnB;AACF;AAEA,SAAS,gBAAgB,OAAe,SAAyB;AAC/D,MAAI;AACF,WAAO,IAAI,IAAI,KAAK,EAAE;AAAA,EACxB,QAAQ;AACN,UAAM,IAAI,MAAM,GAAG,OAAO,wDAAwD,KAAK,EAAE;AAAA,EAC3F;AACF;AAEA,SAAS,wBAA8C;AACrD,QAAM,kBAAkB,YAAY,QAAQ,IAAI,eAAe;AAC/D,MAAI,mBAAmB,gBAAgB,SAAS,GAAG;AACjD,WAAO,gBAAgB,IAAI,YAAU,gBAAgB,QAAQ,iBAAiB,CAAC;AAAA,EACjF;AAEA,SAAO;AACT;AAEA,SAAS,qBAGP;AACA,SAAO;AAAA,IACL,eAAe;AAAA,MACb,cAAc,IAAI,qBAAqB;AAAA,MACvC,eAAe,IAAI,sBAAsB;AAAA,IAC3C;AAAA,IACA,YAAY,CAAC;AAAA,EACf;AACF;AAEA,SAAS,mBAAmB,QAAmB,WAAmB;AAChE,QAAM,MAAM,aAAa;AAKzB,QAAM,qBAAqB,IAAI,yBAAyB,OAAO,OAAO,IAAI;AAC1E,QAAM,uBAAuB,IAAI,2BAA2B,OAAO,OAAO,IAAI;AAC9E,SAAO;AAAA,IACL,QAAQ;AAAA,IACR,MAAM,OAAO;AAAA,IACb,SAAS,OAAO;AAAA,IAChB,WAAW;AAAA,IACX,gBAAgB,KAAK,OAAO,KAAK,IAAI,IAAI,aAAa,GAAI;AAAA,IAC1D,iBAAiB,OAAO,kBAAkB,EAAE;AAAA,IAC5C,gBAAgB;AAAA,IAChB,kBAAkB;AAAA,IAClB,wBAAwB,IAAI;AAAA,IAC5B,0BAA0B,IAAI;AAAA,IAC9B,mBAAmB,IAAI;AAAA,IACvB,qBAAqB,IAAI;AAAA,IACzB,oBAAoB,IAAI;AAAA,IACxB,sBAAsB,IAAI;AAAA;AAAA;AAAA,IAG1B,8BAA8B,IAAI;AAAA,IAClC,gCAAgC,IAAI;AAAA,IACpC,YAAW,oBAAI,KAAK,GAAE,YAAY;AAAA,EACpC;AACF;AAEA,eAAe,OAAsB;AACnC,mBAAiB;AAEjB,QAAM,eAAe,QAAQ,IAAI,aAAa;AAC9C,QAAM,OAAO,YAAY;AACzB,QAAM,OAAO,YAAY;AACzB,QAAM,UAAU,QAAQ,IAAI,SAAS,KAAK,KAAK;AAC/C,QAAM,iBAAiB,sBAAsB;AAE7C,QAAM,EAAE,eAAe,WAAW,IAAI,mBAAmB;AAEzD,gBAAc,KAAK,YAAY,OAAO,IAAI,KAAK,OAAO,OAAO,EAAE;AAC/D,gBAAc,KAAK,0BAA0B,IAAI,IAAI,IAAI,EAAE;AAC3D,MAAI,kBAAkB,eAAe,SAAS,GAAG;AAC/C,kBAAc,KAAK,wCAAwC,eAAe,KAAK,IAAI,CAAC,EAAE;AAAA,EACxF,WAAW,cAAc;AACvB,QAAI,CAAC,SAAS;AACZ,oBAAc;AAAA,QACZ;AAAA,MAGF;AACA,cAAQ,KAAK,CAAC;AAAA,IAChB;AACA,kBAAc;AAAA,MACZ;AAAA,IAEF;AAAA,EACF,OAAO;AACL,kBAAc,KAAK,gDAAgD;AAAA,EACrE;AAEA,QAAM,SAAS,IAAI,UAAU;AAAA,IAC3B,MAAM,OAAO;AAAA,IACb,OAAO;AAAA,IACP,SAAS,OAAO;AAAA,IAChB,aAAa,OAAO;AAAA,IACpB,YAAY;AAAA,IACZ;AAAA,IACA;AAAA,IACA,MAAM,UAAU,cAAc;AAAA,IAC9B;AAAA,IACA,GAAG;AAAA,EACL,CAAC;AAED,mBAAiB,MAAM;AASvB,MAAI;AAGF,UAAM,UAAU;AAChB,UAAM,WAAW,QAAQ,qBAAqB,KAAK,MAAM;AACzD,QAAI,UAAU;AACZ,cAAQ,sBAAsB,CAAC,cAA+B;AAC5D,cAAM,SAAS,SAAS,SAAS;AACjC,YAAI;AACF,gBAAM,MAAM,aAAa;AACzB,iBAAO,QAAQ,uBAAuB;AAAA,YACpC,cAAc;AAAA,cACZ,oBAAoB;AAAA,gBAClB,mBAAmB,IAAI;AAAA,gBACvB,qBAAqB,IAAI;AAAA,gBACzB,
|
|
4
|
+
"sourcesContent": ["#!/usr/bin/env node\n\n// Expand libuv thread pool for parallel DNS lookups (default 4 is too low for 20+ concurrent connections)\nif (!process.env.UV_THREADPOOL_SIZE) {\n process.env.UV_THREADPOOL_SIZE = '8';\n}\n\nimport { Logger } from 'mcp-use';\nimport {\n InMemorySessionStore,\n InMemoryStreamManager,\n MCPServer,\n object,\n type ServerConfig,\n} from 'mcp-use/server';\n\nimport { SERVER } from './src/config/index.js';\nimport { getLLMHealth } from './src/services/llm-processor.js';\nimport { registerAllTools } from './src/tools/registry.js';\n\nconst DEFAULT_PORT = 3000 as const;\nconst SHUTDOWN_TIMEOUT_MS = 10_000 as const;\nconst WEBSITE_URL = 'https://github.com/yigitkonur/mcp-researchpowerpack-http' as const;\nconst LOCAL_DEFAULT_HOST = '127.0.0.1' as const;\n\ntype CleanupFn = () => Promise<void>;\n\nconst startupLogger = Logger.get('startup');\n\nfunction parseCsvEnv(value: string | undefined): string[] | undefined {\n if (!value) return undefined;\n\n const parts = value\n .split(',')\n .map((part) => part.trim())\n .filter(Boolean);\n\n return parts.length > 0 ? parts : undefined;\n}\n\nfunction parsePort(value: string | undefined, fallback: number): number {\n const parsed = Number.parseInt(value ?? '', 10);\n if (Number.isFinite(parsed) && parsed > 0) {\n return parsed;\n }\n\n return fallback;\n}\n\nfunction resolvePort(): number {\n const portFlagIndex = process.argv.findIndex((arg) => arg === '--port');\n if (portFlagIndex >= 0) {\n return parsePort(process.argv[portFlagIndex + 1], DEFAULT_PORT);\n }\n\n return parsePort(process.env.PORT, DEFAULT_PORT);\n}\n\nfunction resolveHost(): string {\n const explicitHost = process.env.HOST?.trim();\n if (explicitHost) {\n return explicitHost;\n }\n\n // Cloud runtimes typically inject PORT and expect the process to listen on all interfaces.\n if (process.env.PORT?.trim()) {\n return '0.0.0.0';\n }\n\n return LOCAL_DEFAULT_HOST;\n}\n\nfunction buildCors(allowedOrigins: string[] | undefined): ServerConfig['cors'] {\n if (!allowedOrigins || allowedOrigins.length === 0) {\n return undefined;\n }\n\n return {\n origin: allowedOrigins,\n allowMethods: ['GET', 'HEAD', 'POST', 'PUT', 'DELETE', 'OPTIONS'],\n allowHeaders: [\n 'Content-Type',\n 'Accept',\n 'Authorization',\n 'mcp-protocol-version',\n 'mcp-session-id',\n 'X-Proxy-Token',\n 'X-Target-URL',\n ],\n exposeHeaders: ['mcp-session-id'],\n };\n}\n\nfunction configureLogging(): void {\n Logger.configure({\n level: process.env.NODE_ENV === 'production' ? 'info' : 'debug',\n format: 'minimal',\n });\n\n const debug = process.env.DEBUG?.trim();\n if (debug === '2') {\n Logger.setDebug(2);\n } else if (debug) {\n Logger.setDebug(1);\n }\n}\n\nfunction normalizeOrigin(value: string, envName: string): string {\n try {\n return new URL(value).origin;\n } catch {\n throw new Error(`${envName} must contain absolute URLs with protocol. Received: ${value}`);\n }\n}\n\nfunction resolveAllowedOrigins(): string[] | undefined {\n const explicitOrigins = parseCsvEnv(process.env.ALLOWED_ORIGINS);\n if (explicitOrigins && explicitOrigins.length > 0) {\n return explicitOrigins.map(origin => normalizeOrigin(origin, 'ALLOWED_ORIGINS'));\n }\n\n return undefined;\n}\n\nfunction buildSessionConfig(): {\n sessionConfig: Pick<ServerConfig, 'sessionStore' | 'streamManager'>;\n cleanupFns: CleanupFn[];\n} {\n return {\n sessionConfig: {\n sessionStore: new InMemorySessionStore(),\n streamManager: new InMemoryStreamManager(),\n },\n cleanupFns: [],\n };\n}\n\nfunction buildHealthPayload(server: MCPServer, startedAt: number) {\n const llm = getLLMHealth();\n // Distinguish \"never probed\" (checkedAt === null) from \"probed and failed\"\n // (checkedAt set, ok=false). The raw `lastPlannerOk` defaults to `false`\n // at startup, which would mislead operators into thinking the LLM is\n // broken before it has been exercised once.\n const plannerOkForHealth = llm.lastPlannerCheckedAt === null ? null : llm.lastPlannerOk;\n const extractorOkForHealth = llm.lastExtractorCheckedAt === null ? null : llm.lastExtractorOk;\n return {\n status: 'ok',\n name: SERVER.NAME,\n version: SERVER.VERSION,\n transport: 'http',\n uptime_seconds: Math.floor((Date.now() - startedAt) / 1000),\n active_sessions: server.getActiveSessions().length,\n llm_planner_ok: plannerOkForHealth,\n llm_extractor_ok: extractorOkForHealth,\n llm_planner_checked_at: llm.lastPlannerCheckedAt,\n llm_extractor_checked_at: llm.lastExtractorCheckedAt,\n llm_planner_error: llm.lastPlannerError,\n llm_extractor_error: llm.lastExtractorError,\n planner_configured: llm.plannerConfigured,\n extractor_configured: llm.extractorConfigured,\n // Counter surfacing lets operators diagnose gate behavior from outside\n // the process (see src/tools/start-research.ts for the gate semantics).\n consecutive_planner_failures: llm.consecutivePlannerFailures,\n consecutive_extractor_failures: llm.consecutiveExtractorFailures,\n timestamp: new Date().toISOString(),\n };\n}\n\nasync function main(): Promise<void> {\n configureLogging();\n\n const isProduction = process.env.NODE_ENV === 'production';\n const host = resolveHost();\n const port = resolvePort();\n const baseUrl = process.env.MCP_URL?.trim() || undefined;\n const allowedOrigins = resolveAllowedOrigins();\n\n const { sessionConfig, cleanupFns } = buildSessionConfig();\n\n startupLogger.info(`Starting ${SERVER.NAME} v${SERVER.VERSION}`);\n startupLogger.info(`Binding HTTP server to ${host}:${port}`);\n if (allowedOrigins && allowedOrigins.length > 0) {\n startupLogger.info(`Host validation enabled for origins: ${allowedOrigins.join(', ')}`);\n } else if (isProduction) {\n if (!baseUrl) {\n startupLogger.error(\n 'Production mode requires ALLOWED_ORIGINS or MCP_URL to be set. ' +\n 'Without host validation, the server is vulnerable to DNS rebinding attacks. ' +\n 'Set ALLOWED_ORIGINS to the public deployment URL or custom domain.',\n );\n process.exit(1);\n }\n startupLogger.warn(\n 'Host validation is disabled because ALLOWED_ORIGINS is not set. ' +\n 'MCP_URL is set, so the server will start \u2014 but set ALLOWED_ORIGINS for full origin protection.',\n );\n } else {\n startupLogger.info('Host validation disabled for local development');\n }\n\n const server = new MCPServer({\n name: SERVER.NAME,\n title: 'Research Powerpack',\n version: SERVER.VERSION,\n description: SERVER.DESCRIPTION,\n websiteUrl: WEBSITE_URL,\n host,\n baseUrl,\n cors: buildCors(allowedOrigins),\n allowedOrigins,\n ...sessionConfig,\n });\n\n registerAllTools(server);\n\n // Advertise our LLM-augmentation capability via the MCP `experimental`\n // namespace so capability-aware clients can branch at initialize-time\n // instead of parsing per-call footers. mcp-use creates a fresh native MCP\n // server per session via `getServerForSession()`, so we patch that factory\n // to register our experimental capability on every session. The capability\n // values are read fresh on each session so health flips are observable.\n // See: docs/code-review/context/06-mcp-use-best-practices-primer.md (#3, #6).\n try {\n type Native = { server?: { registerCapabilities?: (caps: Record<string, unknown>) => void } };\n type Patched = { getServerForSession?: (sessionId?: string) => Native };\n const patched = server as unknown as Patched;\n const original = patched.getServerForSession?.bind(server);\n if (original) {\n patched.getServerForSession = (sessionId?: string): Native => {\n const native = original(sessionId);\n try {\n const llm = getLLMHealth();\n native.server?.registerCapabilities?.({\n experimental: {\n research_powerpack: {\n planner_available: llm.plannerConfigured,\n extractor_available: llm.extractorConfigured,\n planner_model: process.env.LLM_MODEL ?? null,\n extractor_model: process.env.LLM_MODEL ?? null,\n },\n },\n });\n } catch {\n // Capability registration is advisory; never block session creation.\n }\n return native;\n };\n }\n } catch (err) {\n startupLogger.warn(`Could not patch session-server factory: ${String(err)}`);\n }\n\n const startedAt = Date.now();\n\n server.get('/health', (c) => c.json(buildHealthPayload(server, startedAt)));\n server.get('/healthz', (c) => c.json(buildHealthPayload(server, startedAt)));\n\n // Some MCP clients (Claude Desktop, Cursor, VS Code) proactively probe\n // /.well-known/oauth-protected-resource before receiving any 401, per the\n // MCP 2025-03-26 spec. Without these routes the server returns 404 and some\n // clients surface a spurious \"authentication required\" error. A minimal PRM\n // response with no authorization_servers field explicitly signals that this\n // server requires no authentication.\n const resourceBaseUrl = baseUrl ?? `http://${host}:${port}`;\n server.get('/.well-known/oauth-protected-resource', (c) =>\n c.json({ resource: resourceBaseUrl }),\n );\n server.get('/.well-known/oauth-protected-resource/mcp', (c) =>\n c.json({ resource: `${resourceBaseUrl}/mcp` }),\n );\n\n server.resource(\n {\n name: 'server-health',\n uri: 'health://status',\n description: 'Current server health, uptime, and active MCP session count.',\n mimeType: 'application/json',\n },\n async () => object(buildHealthPayload(server, startedAt)),\n );\n\n let isShuttingDown = false;\n\n async function shutdown(signal: string, exitCode: number): Promise<void> {\n if (isShuttingDown) return;\n isShuttingDown = true;\n\n const forceExit = setTimeout(() => {\n startupLogger.error(`Forced exit after ${SHUTDOWN_TIMEOUT_MS}ms (${signal})`);\n process.exit(1);\n }, SHUTDOWN_TIMEOUT_MS);\n\n try {\n startupLogger.warn(`Shutdown signal received: ${signal}`);\n await server.close();\n\n for (const cleanupFn of cleanupFns) {\n await cleanupFn();\n }\n\n clearTimeout(forceExit);\n process.exit(exitCode);\n } catch (error) {\n clearTimeout(forceExit);\n const message = error instanceof Error ? (error.stack ?? error.message) : String(error);\n startupLogger.error(`Error while stopping server: ${message}`);\n process.exit(1);\n }\n }\n\n process.on('SIGTERM', () => {\n void shutdown('SIGTERM', 0);\n });\n\n process.on('SIGINT', () => {\n void shutdown('SIGINT', 0);\n });\n\n process.on('uncaughtException', (error) => {\n startupLogger.error(`Uncaught exception: ${error.stack ?? error.message}`);\n void shutdown('uncaughtException', 1);\n });\n\n process.on('unhandledRejection', (reason) => {\n startupLogger.error(`Unhandled rejection: ${String(reason)}`);\n void shutdown('unhandledRejection', 1);\n });\n\n await server.listen(port);\n\n startupLogger.info(`${SERVER.NAME} v${SERVER.VERSION} listening on http://${host}:${port}/mcp`);\n}\n\nvoid main().catch((error) => {\n const message = error instanceof Error ? (error.stack ?? error.message) : String(error);\n startupLogger.error(`Server failed to start: ${message}`);\n process.exit(1);\n});\n"],
|
|
5
|
+
"mappings": ";AAGA,IAAI,CAAC,QAAQ,IAAI,oBAAoB;AACnC,UAAQ,IAAI,qBAAqB;AACnC;AAEA,SAAS,cAAc;AACvB;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OAEK;AAEP,SAAS,cAAc;AACvB,SAAS,oBAAoB;AAC7B,SAAS,wBAAwB;AAEjC,MAAM,eAAe;AACrB,MAAM,sBAAsB;AAC5B,MAAM,cAAc;AACpB,MAAM,qBAAqB;AAI3B,MAAM,gBAAgB,OAAO,IAAI,SAAS;AAE1C,SAAS,YAAY,OAAiD;AACpE,MAAI,CAAC,MAAO,QAAO;AAEnB,QAAM,QAAQ,MACX,MAAM,GAAG,EACT,IAAI,CAAC,SAAS,KAAK,KAAK,CAAC,EACzB,OAAO,OAAO;AAEjB,SAAO,MAAM,SAAS,IAAI,QAAQ;AACpC;AAEA,SAAS,UAAU,OAA2B,UAA0B;AACtE,QAAM,SAAS,OAAO,SAAS,SAAS,IAAI,EAAE;AAC9C,MAAI,OAAO,SAAS,MAAM,KAAK,SAAS,GAAG;AACzC,WAAO;AAAA,EACT;AAEA,SAAO;AACT;AAEA,SAAS,cAAsB;AAC7B,QAAM,gBAAgB,QAAQ,KAAK,UAAU,CAAC,QAAQ,QAAQ,QAAQ;AACtE,MAAI,iBAAiB,GAAG;AACtB,WAAO,UAAU,QAAQ,KAAK,gBAAgB,CAAC,GAAG,YAAY;AAAA,EAChE;AAEA,SAAO,UAAU,QAAQ,IAAI,MAAM,YAAY;AACjD;AAEA,SAAS,cAAsB;AAC7B,QAAM,eAAe,QAAQ,IAAI,MAAM,KAAK;AAC5C,MAAI,cAAc;AAChB,WAAO;AAAA,EACT;AAGA,MAAI,QAAQ,IAAI,MAAM,KAAK,GAAG;AAC5B,WAAO;AAAA,EACT;AAEA,SAAO;AACT;AAEA,SAAS,UAAU,gBAA4D;AAC7E,MAAI,CAAC,kBAAkB,eAAe,WAAW,GAAG;AAClD,WAAO;AAAA,EACT;AAEA,SAAO;AAAA,IACL,QAAQ;AAAA,IACR,cAAc,CAAC,OAAO,QAAQ,QAAQ,OAAO,UAAU,SAAS;AAAA,IAChE,cAAc;AAAA,MACZ;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAAA,IACA,eAAe,CAAC,gBAAgB;AAAA,EAClC;AACF;AAEA,SAAS,mBAAyB;AAChC,SAAO,UAAU;AAAA,IACf,OAAO,QAAQ,IAAI,aAAa,eAAe,SAAS;AAAA,IACxD,QAAQ;AAAA,EACV,CAAC;AAED,QAAM,QAAQ,QAAQ,IAAI,OAAO,KAAK;AACtC,MAAI,UAAU,KAAK;AACjB,WAAO,SAAS,CAAC;AAAA,EACnB,WAAW,OAAO;AAChB,WAAO,SAAS,CAAC;AAAA,EACnB;AACF;AAEA,SAAS,gBAAgB,OAAe,SAAyB;AAC/D,MAAI;AACF,WAAO,IAAI,IAAI,KAAK,EAAE;AAAA,EACxB,QAAQ;AACN,UAAM,IAAI,MAAM,GAAG,OAAO,wDAAwD,KAAK,EAAE;AAAA,EAC3F;AACF;AAEA,SAAS,wBAA8C;AACrD,QAAM,kBAAkB,YAAY,QAAQ,IAAI,eAAe;AAC/D,MAAI,mBAAmB,gBAAgB,SAAS,GAAG;AACjD,WAAO,gBAAgB,IAAI,YAAU,gBAAgB,QAAQ,iBAAiB,CAAC;AAAA,EACjF;AAEA,SAAO;AACT;AAEA,SAAS,qBAGP;AACA,SAAO;AAAA,IACL,eAAe;AAAA,MACb,cAAc,IAAI,qBAAqB;AAAA,MACvC,eAAe,IAAI,sBAAsB;AAAA,IAC3C;AAAA,IACA,YAAY,CAAC;AAAA,EACf;AACF;AAEA,SAAS,mBAAmB,QAAmB,WAAmB;AAChE,QAAM,MAAM,aAAa;AAKzB,QAAM,qBAAqB,IAAI,yBAAyB,OAAO,OAAO,IAAI;AAC1E,QAAM,uBAAuB,IAAI,2BAA2B,OAAO,OAAO,IAAI;AAC9E,SAAO;AAAA,IACL,QAAQ;AAAA,IACR,MAAM,OAAO;AAAA,IACb,SAAS,OAAO;AAAA,IAChB,WAAW;AAAA,IACX,gBAAgB,KAAK,OAAO,KAAK,IAAI,IAAI,aAAa,GAAI;AAAA,IAC1D,iBAAiB,OAAO,kBAAkB,EAAE;AAAA,IAC5C,gBAAgB;AAAA,IAChB,kBAAkB;AAAA,IAClB,wBAAwB,IAAI;AAAA,IAC5B,0BAA0B,IAAI;AAAA,IAC9B,mBAAmB,IAAI;AAAA,IACvB,qBAAqB,IAAI;AAAA,IACzB,oBAAoB,IAAI;AAAA,IACxB,sBAAsB,IAAI;AAAA;AAAA;AAAA,IAG1B,8BAA8B,IAAI;AAAA,IAClC,gCAAgC,IAAI;AAAA,IACpC,YAAW,oBAAI,KAAK,GAAE,YAAY;AAAA,EACpC;AACF;AAEA,eAAe,OAAsB;AACnC,mBAAiB;AAEjB,QAAM,eAAe,QAAQ,IAAI,aAAa;AAC9C,QAAM,OAAO,YAAY;AACzB,QAAM,OAAO,YAAY;AACzB,QAAM,UAAU,QAAQ,IAAI,SAAS,KAAK,KAAK;AAC/C,QAAM,iBAAiB,sBAAsB;AAE7C,QAAM,EAAE,eAAe,WAAW,IAAI,mBAAmB;AAEzD,gBAAc,KAAK,YAAY,OAAO,IAAI,KAAK,OAAO,OAAO,EAAE;AAC/D,gBAAc,KAAK,0BAA0B,IAAI,IAAI,IAAI,EAAE;AAC3D,MAAI,kBAAkB,eAAe,SAAS,GAAG;AAC/C,kBAAc,KAAK,wCAAwC,eAAe,KAAK,IAAI,CAAC,EAAE;AAAA,EACxF,WAAW,cAAc;AACvB,QAAI,CAAC,SAAS;AACZ,oBAAc;AAAA,QACZ;AAAA,MAGF;AACA,cAAQ,KAAK,CAAC;AAAA,IAChB;AACA,kBAAc;AAAA,MACZ;AAAA,IAEF;AAAA,EACF,OAAO;AACL,kBAAc,KAAK,gDAAgD;AAAA,EACrE;AAEA,QAAM,SAAS,IAAI,UAAU;AAAA,IAC3B,MAAM,OAAO;AAAA,IACb,OAAO;AAAA,IACP,SAAS,OAAO;AAAA,IAChB,aAAa,OAAO;AAAA,IACpB,YAAY;AAAA,IACZ;AAAA,IACA;AAAA,IACA,MAAM,UAAU,cAAc;AAAA,IAC9B;AAAA,IACA,GAAG;AAAA,EACL,CAAC;AAED,mBAAiB,MAAM;AASvB,MAAI;AAGF,UAAM,UAAU;AAChB,UAAM,WAAW,QAAQ,qBAAqB,KAAK,MAAM;AACzD,QAAI,UAAU;AACZ,cAAQ,sBAAsB,CAAC,cAA+B;AAC5D,cAAM,SAAS,SAAS,SAAS;AACjC,YAAI;AACF,gBAAM,MAAM,aAAa;AACzB,iBAAO,QAAQ,uBAAuB;AAAA,YACpC,cAAc;AAAA,cACZ,oBAAoB;AAAA,gBAClB,mBAAmB,IAAI;AAAA,gBACvB,qBAAqB,IAAI;AAAA,gBACzB,eAAe,QAAQ,IAAI,aAAa;AAAA,gBACxC,iBAAiB,QAAQ,IAAI,aAAa;AAAA,cAC5C;AAAA,YACF;AAAA,UACF,CAAC;AAAA,QACH,QAAQ;AAAA,QAER;AACA,eAAO;AAAA,MACT;AAAA,IACF;AAAA,EACF,SAAS,KAAK;AACZ,kBAAc,KAAK,2CAA2C,OAAO,GAAG,CAAC,EAAE;AAAA,EAC7E;AAEA,QAAM,YAAY,KAAK,IAAI;AAE3B,SAAO,IAAI,WAAW,CAAC,MAAM,EAAE,KAAK,mBAAmB,QAAQ,SAAS,CAAC,CAAC;AAC1E,SAAO,IAAI,YAAY,CAAC,MAAM,EAAE,KAAK,mBAAmB,QAAQ,SAAS,CAAC,CAAC;AAQ3E,QAAM,kBAAkB,WAAW,UAAU,IAAI,IAAI,IAAI;AACzD,SAAO;AAAA,IAAI;AAAA,IAAyC,CAAC,MACnD,EAAE,KAAK,EAAE,UAAU,gBAAgB,CAAC;AAAA,EACtC;AACA,SAAO;AAAA,IAAI;AAAA,IAA6C,CAAC,MACvD,EAAE,KAAK,EAAE,UAAU,GAAG,eAAe,OAAO,CAAC;AAAA,EAC/C;AAEA,SAAO;AAAA,IACL;AAAA,MACE,MAAM;AAAA,MACN,KAAK;AAAA,MACL,aAAa;AAAA,MACb,UAAU;AAAA,IACZ;AAAA,IACA,YAAY,OAAO,mBAAmB,QAAQ,SAAS,CAAC;AAAA,EAC1D;AAEA,MAAI,iBAAiB;AAErB,iBAAe,SAAS,QAAgB,UAAiC;AACvE,QAAI,eAAgB;AACpB,qBAAiB;AAEjB,UAAM,YAAY,WAAW,MAAM;AACjC,oBAAc,MAAM,qBAAqB,mBAAmB,OAAO,MAAM,GAAG;AAC5E,cAAQ,KAAK,CAAC;AAAA,IAChB,GAAG,mBAAmB;AAEtB,QAAI;AACF,oBAAc,KAAK,6BAA6B,MAAM,EAAE;AACxD,YAAM,OAAO,MAAM;AAEnB,iBAAW,aAAa,YAAY;AAClC,cAAM,UAAU;AAAA,MAClB;AAEA,mBAAa,SAAS;AACtB,cAAQ,KAAK,QAAQ;AAAA,IACvB,SAAS,OAAO;AACd,mBAAa,SAAS;AACtB,YAAM,UAAU,iBAAiB,QAAS,MAAM,SAAS,MAAM,UAAW,OAAO,KAAK;AACtF,oBAAc,MAAM,gCAAgC,OAAO,EAAE;AAC7D,cAAQ,KAAK,CAAC;AAAA,IAChB;AAAA,EACF;AAEA,UAAQ,GAAG,WAAW,MAAM;AAC1B,SAAK,SAAS,WAAW,CAAC;AAAA,EAC5B,CAAC;AAED,UAAQ,GAAG,UAAU,MAAM;AACzB,SAAK,SAAS,UAAU,CAAC;AAAA,EAC3B,CAAC;AAED,UAAQ,GAAG,qBAAqB,CAAC,UAAU;AACzC,kBAAc,MAAM,uBAAuB,MAAM,SAAS,MAAM,OAAO,EAAE;AACzE,SAAK,SAAS,qBAAqB,CAAC;AAAA,EACtC,CAAC;AAED,UAAQ,GAAG,sBAAsB,CAAC,WAAW;AAC3C,kBAAc,MAAM,wBAAwB,OAAO,MAAM,CAAC,EAAE;AAC5D,SAAK,SAAS,sBAAsB,CAAC;AAAA,EACvC,CAAC;AAED,QAAM,OAAO,OAAO,IAAI;AAExB,gBAAc,KAAK,GAAG,OAAO,IAAI,KAAK,OAAO,OAAO,wBAAwB,IAAI,IAAI,IAAI,MAAM;AAChG;AAEA,KAAK,KAAK,EAAE,MAAM,CAAC,UAAU;AAC3B,QAAM,UAAU,iBAAiB,QAAS,MAAM,SAAS,MAAM,UAAW,OAAO,KAAK;AACtF,gBAAc,MAAM,2BAA2B,OAAO,EAAE;AACxD,UAAQ,KAAK,CAAC;AAChB,CAAC;",
|
|
6
6
|
"names": []
|
|
7
7
|
}
|
package/dist/mcp-use.json
CHANGED
package/dist/src/config/index.js
CHANGED
|
@@ -20,7 +20,6 @@ function safeParseInt(value, defaultVal, min, max) {
|
|
|
20
20
|
}
|
|
21
21
|
return parsed;
|
|
22
22
|
}
|
|
23
|
-
const VALID_REASONING_EFFORTS = ["low", "medium", "high"];
|
|
24
23
|
let cachedEnv = null;
|
|
25
24
|
function parseEnv() {
|
|
26
25
|
if (cachedEnv) return cachedEnv;
|
|
@@ -51,7 +50,7 @@ function getMissingEnvMessage(capability) {
|
|
|
51
50
|
reddit: '\u274C **Reddit tools unavailable.** Set `REDDIT_CLIENT_ID` and `REDDIT_CLIENT_SECRET` to enable `get-reddit-post`.\n\n\u{1F449} Create a Reddit app at: https://www.reddit.com/prefs/apps (select "script" type)',
|
|
52
51
|
search: '\u274C **Search unavailable.** Set `SERPER_API_KEY` to enable `web-search` (including `scope: "reddit"`).\n\n\u{1F449} Get your free API key at: https://serper.dev (2,500 free queries)',
|
|
53
52
|
scraping: "\u274C **Web scraping unavailable.** Set `SCRAPEDO_API_KEY` to enable `scrape-links`.\n\n\u{1F449} Sign up at: https://scrape.do (1,000 free credits)",
|
|
54
|
-
llmExtraction: "\u26A0\uFE0F **AI extraction disabled.** Set `LLM_API_KEY` to enable AI-powered content extraction and search classification.\n\nScraping will work but without intelligent content filtering."
|
|
53
|
+
llmExtraction: "\u26A0\uFE0F **AI extraction disabled.** Set `LLM_API_KEY`, `LLM_BASE_URL`, and `LLM_MODEL` to enable AI-powered content extraction and search classification.\n\nScraping will work but without intelligent content filtering."
|
|
55
54
|
};
|
|
56
55
|
return messages[capability];
|
|
57
56
|
}
|
|
@@ -59,12 +58,7 @@ const CONCURRENCY = {
|
|
|
59
58
|
SEARCH: safeParseInt(process.env.CONCURRENCY_SEARCH, 50, 1, 200),
|
|
60
59
|
SCRAPER: safeParseInt(process.env.CONCURRENCY_SCRAPER, 50, 1, 200),
|
|
61
60
|
REDDIT: safeParseInt(process.env.CONCURRENCY_REDDIT, 50, 1, 200),
|
|
62
|
-
LLM_EXTRACTION: safeParseInt(
|
|
63
|
-
process.env.LLM_CONCURRENCY || process.env.LLM_EXTRACTION_CONCURRENCY,
|
|
64
|
-
50,
|
|
65
|
-
1,
|
|
66
|
-
200
|
|
67
|
-
)
|
|
61
|
+
LLM_EXTRACTION: safeParseInt(process.env.LLM_CONCURRENCY, 50, 1, 200)
|
|
68
62
|
};
|
|
69
63
|
const SCRAPER = {
|
|
70
64
|
BATCH_SIZE: 30,
|
|
@@ -92,41 +86,28 @@ const CTR_WEIGHTS = {
|
|
|
92
86
|
9: 13.33,
|
|
93
87
|
10: 12.56
|
|
94
88
|
};
|
|
95
|
-
function parseLlmReasoningEffort(value) {
|
|
96
|
-
if (!value || value === "none") return "none";
|
|
97
|
-
if (VALID_REASONING_EFFORTS.includes(value)) {
|
|
98
|
-
return value;
|
|
99
|
-
}
|
|
100
|
-
return "none";
|
|
101
|
-
}
|
|
102
|
-
function envWithFallback(...names) {
|
|
103
|
-
for (const name of names) {
|
|
104
|
-
const val = process.env[name]?.trim();
|
|
105
|
-
if (val) return val;
|
|
106
|
-
}
|
|
107
|
-
return void 0;
|
|
108
|
-
}
|
|
109
89
|
let cachedLlmExtraction = null;
|
|
110
90
|
function getLlmExtraction() {
|
|
111
91
|
if (cachedLlmExtraction) return cachedLlmExtraction;
|
|
112
|
-
const apiKey =
|
|
113
|
-
const baseUrl =
|
|
114
|
-
const model =
|
|
92
|
+
const apiKey = process.env.LLM_API_KEY?.trim() || "";
|
|
93
|
+
const baseUrl = process.env.LLM_BASE_URL?.trim();
|
|
94
|
+
const model = process.env.LLM_MODEL?.trim();
|
|
95
|
+
const fallbackModel = process.env.LLM_FALLBACK_MODEL?.trim() || "";
|
|
115
96
|
if (apiKey && !baseUrl) {
|
|
116
97
|
throw new Error(
|
|
117
|
-
"LLM_BASE_URL is required when LLM_API_KEY is set. Set LLM_BASE_URL to your OpenAI-compatible endpoint
|
|
98
|
+
"LLM_BASE_URL is required when LLM_API_KEY is set. Set LLM_BASE_URL to your OpenAI-compatible endpoint."
|
|
118
99
|
);
|
|
119
100
|
}
|
|
120
101
|
if (apiKey && !model) {
|
|
121
102
|
throw new Error(
|
|
122
|
-
"LLM_MODEL is required when LLM_API_KEY is set.
|
|
103
|
+
"LLM_MODEL is required when LLM_API_KEY is set."
|
|
123
104
|
);
|
|
124
105
|
}
|
|
125
106
|
cachedLlmExtraction = {
|
|
126
107
|
API_KEY: apiKey,
|
|
127
108
|
BASE_URL: baseUrl || "",
|
|
128
109
|
MODEL: model || "",
|
|
129
|
-
|
|
110
|
+
FALLBACK_MODEL: fallbackModel
|
|
130
111
|
};
|
|
131
112
|
return cachedLlmExtraction;
|
|
132
113
|
}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"version": 3,
|
|
3
3
|
"sources": ["../../../src/config/index.ts"],
|
|
4
|
-
"sourcesContent": ["/**\n * Consolidated configuration\n * All environment variables, constants, and LLM config in one place\n */\n\nimport { Logger } from 'mcp-use';\n\nimport { VERSION, PACKAGE_NAME, PACKAGE_DESCRIPTION } from '../version.js';\n\n// ============================================================================\n// Safe Integer Parsing Helper\n// ============================================================================\n\n/**\n * Safely parse an integer from environment variable with bounds checking\n */\nfunction safeParseInt(\n value: string | undefined,\n defaultVal: number,\n min: number,\n max: number\n): number {\n const logger = Logger.get('config');\n\n if (!value) {\n return defaultVal;\n }\n\n const parsed = parseInt(value, 10);\n\n if (isNaN(parsed)) {\n logger.warn(`Invalid number \"${value}\", using default ${defaultVal}`);\n return defaultVal;\n }\n\n if (parsed < min) {\n logger.warn(`Value ${parsed} below minimum ${min}, clamping to ${min}`);\n return min;\n }\n\n if (parsed > max) {\n logger.warn(`Value ${parsed} above maximum ${max}, clamping to ${max}`);\n return max;\n }\n\n return parsed;\n}\n\n
|
|
5
|
-
"mappings": "AAKA,SAAS,cAAc;AAEvB,SAAS,SAAS,cAAc,2BAA2B;AAS3D,SAAS,aACP,OACA,YACA,KACA,KACQ;AACR,QAAM,SAAS,OAAO,IAAI,QAAQ;AAElC,MAAI,CAAC,OAAO;AACV,WAAO;AAAA,EACT;AAEA,QAAM,SAAS,SAAS,OAAO,EAAE;AAEjC,MAAI,MAAM,MAAM,GAAG;AACjB,WAAO,KAAK,mBAAmB,KAAK,oBAAoB,UAAU,EAAE;AACpE,WAAO;AAAA,EACT;AAEA,MAAI,SAAS,KAAK;AAChB,WAAO,KAAK,SAAS,MAAM,kBAAkB,GAAG,iBAAiB,GAAG,EAAE;AACtE,WAAO;AAAA,EACT;AAEA,MAAI,SAAS,KAAK;AAChB,WAAO,KAAK,SAAS,MAAM,kBAAkB,GAAG,iBAAiB,GAAG,EAAE;AACtE,WAAO;AAAA,EACT;AAEA,SAAO;AACT;
|
|
4
|
+
"sourcesContent": ["/**\n * Consolidated configuration\n * All environment variables, constants, and LLM config in one place\n */\n\nimport { Logger } from 'mcp-use';\n\nimport { VERSION, PACKAGE_NAME, PACKAGE_DESCRIPTION } from '../version.js';\n\n// ============================================================================\n// Safe Integer Parsing Helper\n// ============================================================================\n\n/**\n * Safely parse an integer from environment variable with bounds checking\n */\nfunction safeParseInt(\n value: string | undefined,\n defaultVal: number,\n min: number,\n max: number\n): number {\n const logger = Logger.get('config');\n\n if (!value) {\n return defaultVal;\n }\n\n const parsed = parseInt(value, 10);\n\n if (isNaN(parsed)) {\n logger.warn(`Invalid number \"${value}\", using default ${defaultVal}`);\n return defaultVal;\n }\n\n if (parsed < min) {\n logger.warn(`Value ${parsed} below minimum ${min}, clamping to ${min}`);\n return min;\n }\n\n if (parsed > max) {\n logger.warn(`Value ${parsed} above maximum ${max}, clamping to ${max}`);\n return max;\n }\n\n return parsed;\n}\n\n\n// ============================================================================\n// Environment Parsing\n// ============================================================================\n\ninterface EnvConfig {\n SCRAPER_API_KEY: string;\n SEARCH_API_KEY: string | undefined;\n REDDIT_CLIENT_ID: string | undefined;\n REDDIT_CLIENT_SECRET: string | undefined;\n}\n\nlet cachedEnv: EnvConfig | null = null;\n\nexport function parseEnv(): EnvConfig {\n if (cachedEnv) return cachedEnv;\n cachedEnv = {\n SCRAPER_API_KEY: process.env.SCRAPEDO_API_KEY || '',\n SEARCH_API_KEY: process.env.SERPER_API_KEY || undefined,\n REDDIT_CLIENT_ID: process.env.REDDIT_CLIENT_ID || undefined,\n REDDIT_CLIENT_SECRET: process.env.REDDIT_CLIENT_SECRET || undefined,\n };\n return cachedEnv;\n}\n\n// ============================================================================\n// MCP Server Configuration\n// ============================================================================\n\nexport const SERVER = {\n NAME: PACKAGE_NAME,\n VERSION: VERSION,\n DESCRIPTION: PACKAGE_DESCRIPTION,\n} as const;\n\n// ============================================================================\n// Capability Detection (which features are available based on ENV)\n// ============================================================================\n\nexport interface Capabilities {\n reddit: boolean; // REDDIT_CLIENT_ID + REDDIT_CLIENT_SECRET\n search: boolean; // SERPER_API_KEY\n scraping: boolean; // SCRAPEDO_API_KEY\n llmExtraction: boolean; // LLM_API_KEY\n}\n\nexport function getCapabilities(): Capabilities {\n const env = parseEnv();\n return {\n reddit: !!(env.REDDIT_CLIENT_ID && env.REDDIT_CLIENT_SECRET),\n search: !!env.SEARCH_API_KEY,\n scraping: !!env.SCRAPER_API_KEY,\n llmExtraction: !!LLM_EXTRACTION.API_KEY,\n };\n}\n\nexport function getMissingEnvMessage(capability: keyof Capabilities): string {\n const messages: Record<keyof Capabilities, string> = {\n reddit: '\u274C **Reddit tools unavailable.** Set `REDDIT_CLIENT_ID` and `REDDIT_CLIENT_SECRET` to enable `get-reddit-post`.\\n\\n\uD83D\uDC49 Create a Reddit app at: https://www.reddit.com/prefs/apps (select \"script\" type)',\n search: '\u274C **Search unavailable.** Set `SERPER_API_KEY` to enable `web-search` (including `scope: \"reddit\"`).\\n\\n\uD83D\uDC49 Get your free API key at: https://serper.dev (2,500 free queries)',\n scraping: '\u274C **Web scraping unavailable.** Set `SCRAPEDO_API_KEY` to enable `scrape-links`.\\n\\n\uD83D\uDC49 Sign up at: https://scrape.do (1,000 free credits)',\n llmExtraction: '\u26A0\uFE0F **AI extraction disabled.** Set `LLM_API_KEY`, `LLM_BASE_URL`, and `LLM_MODEL` to enable AI-powered content extraction and search classification.\\n\\nScraping will work but without intelligent content filtering.',\n };\n return messages[capability];\n}\n\n// ============================================================================\n// Concurrency Limits\n// ============================================================================\n\nexport const CONCURRENCY = {\n SEARCH: safeParseInt(process.env.CONCURRENCY_SEARCH, 50, 1, 200),\n SCRAPER: safeParseInt(process.env.CONCURRENCY_SCRAPER, 50, 1, 200),\n REDDIT: safeParseInt(process.env.CONCURRENCY_REDDIT, 50, 1, 200),\n LLM_EXTRACTION: safeParseInt(process.env.LLM_CONCURRENCY, 50, 1, 200),\n} as const;\n\nexport const SCRAPER = {\n BATCH_SIZE: 30,\n EXTRACTION_PREFIX: 'Extract from document only \u2014 never hallucinate or add external knowledge.',\n EXTRACTION_SUFFIX: 'First line = content, not preamble. No confirmation messages.',\n} as const;\n\n// ============================================================================\n// Reddit Configuration\n// ============================================================================\n\nexport const REDDIT = {\n BATCH_SIZE: 10,\n MAX_WORDS_PER_POST: 50_000,\n MAX_WORDS_TOTAL: 500_000,\n MIN_POSTS: 1,\n MAX_POSTS: 50,\n RETRY_COUNT: 5,\n RETRY_DELAYS: [2000, 4000, 8000, 16000, 32000] as const,\n} as const;\n\n// ============================================================================\n// CTR Weights for URL Ranking (inspired from CTR research)\n// ============================================================================\n\nexport const CTR_WEIGHTS: Record<number, number> = {\n 1: 100.00,\n 2: 60.00,\n 3: 48.89,\n 4: 33.33,\n 5: 28.89,\n 6: 26.44,\n 7: 24.44,\n 8: 17.78,\n 9: 13.33,\n 10: 12.56,\n} as const;\n\n// ============================================================================\n// LLM Configuration\n//\n// Required vars (all must be set together when LLM is enabled):\n// LLM_API_KEY \u2014 API key for the OpenAI-compatible endpoint\n// LLM_BASE_URL \u2014 endpoint base URL (e.g. https://server.up.railway.app/v1)\n// LLM_MODEL \u2014 primary model (e.g. gpt-5.4-mini)\n//\n// Optional:\n// LLM_FALLBACK_MODEL \u2014 model to use after primary exhausts all retries (e.g. gpt-5.4)\n// LLM_CONCURRENCY \u2014 parallel LLM calls (default: 50)\n//\n// Reasoning effort is always 'low' \u2014 not configurable.\n// ============================================================================\n\ninterface LlmExtractionConfig {\n readonly MODEL: string;\n readonly FALLBACK_MODEL: string;\n readonly BASE_URL: string;\n readonly API_KEY: string;\n}\n\nlet cachedLlmExtraction: LlmExtractionConfig | null = null;\n\nfunction getLlmExtraction(): LlmExtractionConfig {\n if (cachedLlmExtraction) return cachedLlmExtraction;\n\n const apiKey = process.env.LLM_API_KEY?.trim() || '';\n const baseUrl = process.env.LLM_BASE_URL?.trim();\n const model = process.env.LLM_MODEL?.trim();\n const fallbackModel = process.env.LLM_FALLBACK_MODEL?.trim() || '';\n\n if (apiKey && !baseUrl) {\n throw new Error(\n 'LLM_BASE_URL is required when LLM_API_KEY is set. ' +\n 'Set LLM_BASE_URL to your OpenAI-compatible endpoint.',\n );\n }\n if (apiKey && !model) {\n throw new Error(\n 'LLM_MODEL is required when LLM_API_KEY is set.',\n );\n }\n\n cachedLlmExtraction = {\n API_KEY: apiKey,\n BASE_URL: baseUrl || '',\n MODEL: model || '',\n FALLBACK_MODEL: fallbackModel,\n };\n return cachedLlmExtraction;\n}\n\nexport const LLM_EXTRACTION: LlmExtractionConfig = new Proxy({} as LlmExtractionConfig, {\n get(_target, prop: string) {\n return getLlmExtraction()[prop as keyof LlmExtractionConfig];\n },\n});\n"],
|
|
5
|
+
"mappings": "AAKA,SAAS,cAAc;AAEvB,SAAS,SAAS,cAAc,2BAA2B;AAS3D,SAAS,aACP,OACA,YACA,KACA,KACQ;AACR,QAAM,SAAS,OAAO,IAAI,QAAQ;AAElC,MAAI,CAAC,OAAO;AACV,WAAO;AAAA,EACT;AAEA,QAAM,SAAS,SAAS,OAAO,EAAE;AAEjC,MAAI,MAAM,MAAM,GAAG;AACjB,WAAO,KAAK,mBAAmB,KAAK,oBAAoB,UAAU,EAAE;AACpE,WAAO;AAAA,EACT;AAEA,MAAI,SAAS,KAAK;AAChB,WAAO,KAAK,SAAS,MAAM,kBAAkB,GAAG,iBAAiB,GAAG,EAAE;AACtE,WAAO;AAAA,EACT;AAEA,MAAI,SAAS,KAAK;AAChB,WAAO,KAAK,SAAS,MAAM,kBAAkB,GAAG,iBAAiB,GAAG,EAAE;AACtE,WAAO;AAAA,EACT;AAEA,SAAO;AACT;AAcA,IAAI,YAA8B;AAE3B,SAAS,WAAsB;AACpC,MAAI,UAAW,QAAO;AACtB,cAAY;AAAA,IACV,iBAAiB,QAAQ,IAAI,oBAAoB;AAAA,IACjD,gBAAgB,QAAQ,IAAI,kBAAkB;AAAA,IAC9C,kBAAkB,QAAQ,IAAI,oBAAoB;AAAA,IAClD,sBAAsB,QAAQ,IAAI,wBAAwB;AAAA,EAC5D;AACA,SAAO;AACT;AAMO,MAAM,SAAS;AAAA,EACpB,MAAM;AAAA,EACN;AAAA,EACA,aAAa;AACf;AAaO,SAAS,kBAAgC;AAC9C,QAAM,MAAM,SAAS;AACrB,SAAO;AAAA,IACL,QAAQ,CAAC,EAAE,IAAI,oBAAoB,IAAI;AAAA,IACvC,QAAQ,CAAC,CAAC,IAAI;AAAA,IACd,UAAU,CAAC,CAAC,IAAI;AAAA,IAChB,eAAe,CAAC,CAAC,eAAe;AAAA,EAClC;AACF;AAEO,SAAS,qBAAqB,YAAwC;AAC3E,QAAM,WAA+C;AAAA,IACnD,QAAQ;AAAA,IACR,QAAQ;AAAA,IACR,UAAU;AAAA,IACV,eAAe;AAAA,EACjB;AACA,SAAO,SAAS,UAAU;AAC5B;AAMO,MAAM,cAAc;AAAA,EACzB,QAAQ,aAAa,QAAQ,IAAI,oBAAoB,IAAI,GAAG,GAAG;AAAA,EAC/D,SAAS,aAAa,QAAQ,IAAI,qBAAqB,IAAI,GAAG,GAAG;AAAA,EACjE,QAAQ,aAAa,QAAQ,IAAI,oBAAoB,IAAI,GAAG,GAAG;AAAA,EAC/D,gBAAgB,aAAa,QAAQ,IAAI,iBAAiB,IAAI,GAAG,GAAG;AACtE;AAEO,MAAM,UAAU;AAAA,EACrB,YAAY;AAAA,EACZ,mBAAmB;AAAA,EACnB,mBAAmB;AACrB;AAMO,MAAM,SAAS;AAAA,EACpB,YAAY;AAAA,EACZ,oBAAoB;AAAA,EACpB,iBAAiB;AAAA,EACjB,WAAW;AAAA,EACX,WAAW;AAAA,EACX,aAAa;AAAA,EACb,cAAc,CAAC,KAAM,KAAM,KAAM,MAAO,IAAK;AAC/C;AAMO,MAAM,cAAsC;AAAA,EACjD,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,GAAG;AAAA,EACH,IAAI;AACN;AAwBA,IAAI,sBAAkD;AAEtD,SAAS,mBAAwC;AAC/C,MAAI,oBAAqB,QAAO;AAEhC,QAAM,SAAS,QAAQ,IAAI,aAAa,KAAK,KAAK;AAClD,QAAM,UAAU,QAAQ,IAAI,cAAc,KAAK;AAC/C,QAAM,QAAQ,QAAQ,IAAI,WAAW,KAAK;AAC1C,QAAM,gBAAgB,QAAQ,IAAI,oBAAoB,KAAK,KAAK;AAEhE,MAAI,UAAU,CAAC,SAAS;AACtB,UAAM,IAAI;AAAA,MACR;AAAA,IAEF;AAAA,EACF;AACA,MAAI,UAAU,CAAC,OAAO;AACpB,UAAM,IAAI;AAAA,MACR;AAAA,IACF;AAAA,EACF;AAEA,wBAAsB;AAAA,IACpB,SAAS;AAAA,IACT,UAAU,WAAW;AAAA,IACrB,OAAO,SAAS;AAAA,IAChB,gBAAgB;AAAA,EAClB;AACA,SAAO;AACT;AAEO,MAAM,iBAAsC,IAAI,MAAM,CAAC,GAA0B;AAAA,EACtF,IAAI,SAAS,MAAc;AACzB,WAAO,iBAAiB,EAAE,IAAiC;AAAA,EAC7D;AACF,CAAC;",
|
|
6
6
|
"names": []
|
|
7
7
|
}
|
|
@@ -7,11 +7,12 @@ import {
|
|
|
7
7
|
withStallProtection
|
|
8
8
|
} from "../utils/errors.js";
|
|
9
9
|
import { mcpLog } from "../utils/logger.js";
|
|
10
|
-
const MAX_LLM_INPUT_CHARS =
|
|
11
|
-
const
|
|
10
|
+
const MAX_LLM_INPUT_CHARS = 5e5;
|
|
11
|
+
const MAX_PRIMARY_MODEL_INPUT_CHARS = 1e5;
|
|
12
|
+
const LLM_CLIENT_TIMEOUT_MS = 6e5;
|
|
12
13
|
const BACKOFF_JITTER_FACTOR = 0.3;
|
|
13
|
-
const LLM_STALL_TIMEOUT_MS =
|
|
14
|
-
const LLM_REQUEST_DEADLINE_MS =
|
|
14
|
+
const LLM_STALL_TIMEOUT_MS = 75e3;
|
|
15
|
+
const LLM_REQUEST_DEADLINE_MS = 15e4;
|
|
15
16
|
const llmHealth = {
|
|
16
17
|
lastPlannerOk: false,
|
|
17
18
|
lastExtractorOk: false,
|
|
@@ -83,6 +84,7 @@ const LLM_RETRY_CONFIG = {
|
|
|
83
84
|
baseDelayMs: 1e3,
|
|
84
85
|
maxDelayMs: 5e3
|
|
85
86
|
};
|
|
87
|
+
const FALLBACK_RETRY_COUNT = 3;
|
|
86
88
|
const RETRYABLE_LLM_ERROR_CODES = /* @__PURE__ */ new Set([
|
|
87
89
|
"rate_limit_exceeded",
|
|
88
90
|
"server_error",
|
|
@@ -108,17 +110,14 @@ function createLLMProcessor() {
|
|
|
108
110
|
return llmClient;
|
|
109
111
|
}
|
|
110
112
|
function buildChatRequestBody(model, prompt) {
|
|
111
|
-
|
|
113
|
+
return {
|
|
112
114
|
model,
|
|
113
|
-
messages: [{ role: "user", content: prompt }]
|
|
115
|
+
messages: [{ role: "user", content: prompt }],
|
|
116
|
+
reasoning_effort: "low"
|
|
114
117
|
};
|
|
115
|
-
if (LLM_EXTRACTION.REASONING_EFFORT !== "none") {
|
|
116
|
-
requestBody.reasoning_effort = LLM_EXTRACTION.REASONING_EFFORT;
|
|
117
|
-
}
|
|
118
|
-
return requestBody;
|
|
119
118
|
}
|
|
120
|
-
async function requestText(processor, prompt, operationLabel, signal) {
|
|
121
|
-
const model = LLM_EXTRACTION.MODEL;
|
|
119
|
+
async function requestText(processor, prompt, operationLabel, signal, modelOverride) {
|
|
120
|
+
const model = modelOverride || LLM_EXTRACTION.MODEL;
|
|
122
121
|
try {
|
|
123
122
|
const response = await withStallProtection(
|
|
124
123
|
(stallSignal) => processor.chat.completions.create(
|
|
@@ -145,6 +144,29 @@ async function requestText(processor, prompt, operationLabel, signal) {
|
|
|
145
144
|
return { content: null, model, error: message };
|
|
146
145
|
}
|
|
147
146
|
}
|
|
147
|
+
async function requestTextWithFallback(processor, prompt, operationLabel, signal) {
|
|
148
|
+
const primary = await requestText(processor, prompt, operationLabel, signal);
|
|
149
|
+
if (primary.content) return primary;
|
|
150
|
+
const fallbackModel = LLM_EXTRACTION.FALLBACK_MODEL;
|
|
151
|
+
if (!fallbackModel) return primary;
|
|
152
|
+
mcpLog("warning", `Primary model failed, switching to fallback ${fallbackModel}`, "llm");
|
|
153
|
+
let lastError = primary.error;
|
|
154
|
+
for (let attempt = 0; attempt < FALLBACK_RETRY_COUNT; attempt++) {
|
|
155
|
+
if (attempt > 0) {
|
|
156
|
+
const delayMs = calculateLLMBackoff(attempt - 1);
|
|
157
|
+
mcpLog("warning", `Fallback retry ${attempt}/${FALLBACK_RETRY_COUNT - 1} in ${delayMs}ms`, "llm");
|
|
158
|
+
try {
|
|
159
|
+
await sleep(delayMs, signal);
|
|
160
|
+
} catch {
|
|
161
|
+
break;
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
const result = await requestText(processor, prompt, `${operationLabel} [fallback]`, signal, fallbackModel);
|
|
165
|
+
if (result.content) return result;
|
|
166
|
+
lastError = result.error;
|
|
167
|
+
}
|
|
168
|
+
return { content: null, model: fallbackModel, error: lastError };
|
|
169
|
+
}
|
|
148
170
|
function isRetryableLLMError(error) {
|
|
149
171
|
if (!error || typeof error !== "object") return false;
|
|
150
172
|
const stallCode = error?.code;
|
|
@@ -169,6 +191,21 @@ function isRetryableLLMError(error) {
|
|
|
169
191
|
}
|
|
170
192
|
return false;
|
|
171
193
|
}
|
|
194
|
+
function isContextWindowError(error) {
|
|
195
|
+
if (!error || typeof error !== "object") return false;
|
|
196
|
+
const record = error;
|
|
197
|
+
const nested = typeof record.error === "object" && record.error !== null ? record.error : null;
|
|
198
|
+
const code = typeof record.code === "string" ? record.code : void 0;
|
|
199
|
+
const nestedCode = nested && typeof nested.code === "string" ? nested.code : void 0;
|
|
200
|
+
if (code === "context_length_exceeded" || nestedCode === "context_length_exceeded") {
|
|
201
|
+
return true;
|
|
202
|
+
}
|
|
203
|
+
const messages = [];
|
|
204
|
+
if (typeof record.message === "string") messages.push(record.message);
|
|
205
|
+
if (nested && typeof nested.message === "string") messages.push(nested.message);
|
|
206
|
+
const combined = messages.join(" ").toLowerCase();
|
|
207
|
+
return combined.includes("context length") || combined.includes("context window") || combined.includes("maximum context") || combined.includes("maximum tokens") || combined.includes("token limit") || combined.includes("too many tokens") || combined.includes("prompt is too long") || combined.includes("reduce the length");
|
|
208
|
+
}
|
|
172
209
|
function calculateLLMBackoff(attempt) {
|
|
173
210
|
const exponentialDelay = LLM_RETRY_CONFIG.baseDelayMs * Math.pow(2, attempt);
|
|
174
211
|
const jitter = Math.random() * BACKOFF_JITTER_FACTOR * exponentialDelay;
|
|
@@ -182,7 +219,7 @@ async function processContentWithLLM(content, config, processor, signal) {
|
|
|
182
219
|
return {
|
|
183
220
|
content,
|
|
184
221
|
processed: false,
|
|
185
|
-
error: "LLM processor not available (
|
|
222
|
+
error: "LLM processor not available (LLM_API_KEY, LLM_BASE_URL, and LLM_MODEL must all be set)",
|
|
186
223
|
errorDetails: {
|
|
187
224
|
code: ErrorCode.AUTH_ERROR,
|
|
188
225
|
message: "LLM processor not available",
|
|
@@ -194,6 +231,7 @@ async function processContentWithLLM(content, config, processor, signal) {
|
|
|
194
231
|
return { content: content || "", processed: false, error: "Empty content provided" };
|
|
195
232
|
}
|
|
196
233
|
const truncatedContent = content.length > MAX_LLM_INPUT_CHARS ? content.substring(0, MAX_LLM_INPUT_CHARS) + "\n\n[Content truncated due to length]" : content;
|
|
234
|
+
const skipPrimaryForSize = truncatedContent.length > MAX_PRIMARY_MODEL_INPUT_CHARS && !!LLM_EXTRACTION.FALLBACK_MODEL;
|
|
197
235
|
const safeUrl = (() => {
|
|
198
236
|
if (!config.url) return void 0;
|
|
199
237
|
try {
|
|
@@ -267,52 +305,88 @@ ${truncatedContent}` : `Clean the following page content: drop navigation, ads,
|
|
|
267
305
|
${urlLine}Content:
|
|
268
306
|
${truncatedContent}`;
|
|
269
307
|
let lastError;
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
308
|
+
if (skipPrimaryForSize) {
|
|
309
|
+
mcpLog(
|
|
310
|
+
"info",
|
|
311
|
+
`Input ${truncatedContent.length} chars exceeds primary model cap (${MAX_PRIMARY_MODEL_INPUT_CHARS}); routing directly to fallback`,
|
|
312
|
+
"llm"
|
|
313
|
+
);
|
|
314
|
+
} else {
|
|
315
|
+
for (let attempt = 0; attempt <= LLM_RETRY_CONFIG.maxRetries; attempt++) {
|
|
316
|
+
try {
|
|
317
|
+
if (attempt === 0) {
|
|
318
|
+
mcpLog("info", `Starting extraction with ${LLM_EXTRACTION.MODEL}`, "llm");
|
|
319
|
+
} else {
|
|
320
|
+
mcpLog("warning", `Retry attempt ${attempt}/${LLM_RETRY_CONFIG.maxRetries}`, "llm");
|
|
321
|
+
}
|
|
322
|
+
const response = await requestText(processor, prompt, "LLM extraction", signal);
|
|
323
|
+
if (response.content) {
|
|
324
|
+
mcpLog("info", `Successfully extracted ${response.content.length} characters`, "llm");
|
|
325
|
+
markLLMSuccess("extractor");
|
|
326
|
+
return { content: response.content, processed: true };
|
|
327
|
+
}
|
|
328
|
+
mcpLog("warning", "Received empty response from LLM", "llm");
|
|
329
|
+
markLLMFailure("extractor", "LLM returned empty response");
|
|
330
|
+
return {
|
|
331
|
+
content,
|
|
332
|
+
processed: false,
|
|
333
|
+
error: "LLM returned empty response",
|
|
334
|
+
errorDetails: {
|
|
335
|
+
code: ErrorCode.INTERNAL_ERROR,
|
|
336
|
+
message: "LLM returned empty response",
|
|
337
|
+
retryable: false
|
|
338
|
+
}
|
|
339
|
+
};
|
|
340
|
+
} catch (err) {
|
|
341
|
+
lastError = classifyError(err);
|
|
342
|
+
const status = hasStatus(err) ? err.status : void 0;
|
|
343
|
+
const code = typeof err === "object" && err !== null && "code" in err ? String(err.code) : void 0;
|
|
344
|
+
const ctxErr = isContextWindowError(err);
|
|
345
|
+
mcpLog("error", `Error (attempt ${attempt + 1}): ${lastError.message} [status=${status}, code=${code}, retryable=${isRetryableLLMError(err)}, context_window=${ctxErr}]`, "llm");
|
|
346
|
+
if (ctxErr) {
|
|
347
|
+
mcpLog("warning", "Context window exceeded on primary \u2014 skipping remaining retries, routing to fallback", "llm");
|
|
348
|
+
break;
|
|
298
349
|
}
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
350
|
+
if (isRetryableLLMError(err) && attempt < LLM_RETRY_CONFIG.maxRetries) {
|
|
351
|
+
const delayMs = calculateLLMBackoff(attempt);
|
|
352
|
+
mcpLog("warning", `Retrying in ${delayMs}ms...`, "llm");
|
|
353
|
+
try {
|
|
354
|
+
await sleep(delayMs, signal);
|
|
355
|
+
} catch {
|
|
356
|
+
break;
|
|
357
|
+
}
|
|
358
|
+
continue;
|
|
359
|
+
}
|
|
360
|
+
break;
|
|
361
|
+
}
|
|
362
|
+
}
|
|
363
|
+
}
|
|
364
|
+
const fallbackModel = LLM_EXTRACTION.FALLBACK_MODEL;
|
|
365
|
+
if (fallbackModel) {
|
|
366
|
+
mcpLog("warning", `Primary exhausted, switching to fallback ${fallbackModel}`, "llm");
|
|
367
|
+
for (let attempt = 0; attempt < FALLBACK_RETRY_COUNT; attempt++) {
|
|
368
|
+
if (attempt > 0) {
|
|
369
|
+
const delayMs = calculateLLMBackoff(attempt - 1);
|
|
370
|
+
mcpLog("warning", `Fallback retry ${attempt}/${FALLBACK_RETRY_COUNT - 1} in ${delayMs}ms`, "llm");
|
|
308
371
|
try {
|
|
309
372
|
await sleep(delayMs, signal);
|
|
310
373
|
} catch {
|
|
311
374
|
break;
|
|
312
375
|
}
|
|
313
|
-
continue;
|
|
314
376
|
}
|
|
315
|
-
|
|
377
|
+
try {
|
|
378
|
+
const response = await requestText(processor, prompt, "LLM extraction [fallback]", signal, fallbackModel);
|
|
379
|
+
if (response.content) {
|
|
380
|
+
mcpLog("info", `Fallback extracted ${response.content.length} characters`, "llm");
|
|
381
|
+
markLLMSuccess("extractor");
|
|
382
|
+
return { content: response.content, processed: true };
|
|
383
|
+
}
|
|
384
|
+
mcpLog("warning", "Fallback returned empty response", "llm");
|
|
385
|
+
break;
|
|
386
|
+
} catch (err) {
|
|
387
|
+
lastError = classifyError(err);
|
|
388
|
+
mcpLog("error", `Fallback error (attempt ${attempt + 1}): ${lastError.message}`, "llm");
|
|
389
|
+
}
|
|
316
390
|
}
|
|
317
391
|
}
|
|
318
392
|
const errorMessage = lastError?.message || "Unknown LLM error";
|
|
@@ -320,7 +394,6 @@ ${truncatedContent}`;
|
|
|
320
394
|
markLLMFailure("extractor", errorMessage);
|
|
321
395
|
return {
|
|
322
396
|
content,
|
|
323
|
-
// Return original content as fallback
|
|
324
397
|
processed: false,
|
|
325
398
|
error: `LLM extraction failed: ${errorMessage}`,
|
|
326
399
|
errorDetails: lastError || {
|
|
@@ -415,7 +488,7 @@ SEARCH RESULTS (${urlsToClassify.length} URLs from ${totalQueries} queries):
|
|
|
415
488
|
${lines.join("\n")}`;
|
|
416
489
|
try {
|
|
417
490
|
mcpLog("info", `Classifying ${urlsToClassify.length} URLs against objective`, "llm");
|
|
418
|
-
const response = await
|
|
491
|
+
const response = await requestTextWithFallback(
|
|
419
492
|
processor,
|
|
420
493
|
prompt,
|
|
421
494
|
"Search classification"
|
|
@@ -477,7 +550,7 @@ RULES:
|
|
|
477
550
|
- Do not include URLs.
|
|
478
551
|
- Keep rationales \u226412 words.`;
|
|
479
552
|
try {
|
|
480
|
-
const response = await
|
|
553
|
+
const response = await requestTextWithFallback(
|
|
481
554
|
processor,
|
|
482
555
|
prompt,
|
|
483
556
|
"Raw-mode refine query generation"
|
|
@@ -604,7 +677,7 @@ freshness_window:
|
|
|
604
677
|
- If the goal mentions a recent release / date / version, use "days" or "weeks".
|
|
605
678
|
- Stable protocols / APIs \u2192 "months" or "years".`;
|
|
606
679
|
try {
|
|
607
|
-
const response = await
|
|
680
|
+
const response = await requestTextWithFallback(
|
|
608
681
|
processor,
|
|
609
682
|
prompt,
|
|
610
683
|
"Research brief generation",
|
|
@@ -684,6 +757,7 @@ export {
|
|
|
684
757
|
processContentWithLLM,
|
|
685
758
|
renderResearchBrief,
|
|
686
759
|
requestText,
|
|
760
|
+
requestTextWithFallback,
|
|
687
761
|
suggestRefineQueriesForRawMode
|
|
688
762
|
};
|
|
689
763
|
//# sourceMappingURL=llm-processor.js.map
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"version": 3,
|
|
3
3
|
"sources": ["../../../src/services/llm-processor.ts"],
|
|
4
|
-
"sourcesContent": ["/**\n * LLM Processor for content extraction\n * Uses OpenRouter via OPENROUTER_API_KEY for AI-powered content filtering\n * Implements robust retry logic and NEVER throws\n */\n\nimport OpenAI from 'openai';\nimport { LLM_EXTRACTION, getCapabilities } from '../config/index.js';\nimport {\n classifyError,\n sleep,\n ErrorCode,\n withStallProtection,\n type StructuredError,\n} from '../utils/errors.js';\nimport { mcpLog } from '../utils/logger.js';\n\n/** Maximum input characters for LLM processing (~25k tokens) */\nconst MAX_LLM_INPUT_CHARS = 100_000 as const;\n\n/** LLM client timeout in milliseconds */\nconst LLM_CLIENT_TIMEOUT_MS = 120_000 as const;\n\n/** Jitter factor for exponential backoff */\nconst BACKOFF_JITTER_FACTOR = 0.3 as const;\n\n/** Stall detection timeout \u2014 abort if no response in this time */\nconst LLM_STALL_TIMEOUT_MS = 15_000 as const;\n\n/** Hard request deadline for LLM calls */\nconst LLM_REQUEST_DEADLINE_MS = 30_000 as const;\n\n// ============================================================================\n// LLM health tracking \u2014 surfaced via health://status so capability-aware\n// clients can branch on degraded mode without parsing per-call footers.\n// ============================================================================\n\ntype LLMHealthKind = 'planner' | 'extractor';\n\nexport interface LLMHealthSnapshot {\n readonly lastPlannerOk: boolean;\n readonly lastExtractorOk: boolean;\n readonly lastPlannerCheckedAt: string | null;\n readonly lastExtractorCheckedAt: string | null;\n readonly lastPlannerError: string | null;\n readonly lastExtractorError: string | null;\n readonly plannerConfigured: boolean;\n readonly extractorConfigured: boolean;\n /** Failures since the last success. Reset to 0 on `markLLMSuccess`. */\n readonly consecutivePlannerFailures: number;\n readonly consecutiveExtractorFailures: number;\n}\n\nconst llmHealth = {\n lastPlannerOk: false,\n lastExtractorOk: false,\n lastPlannerCheckedAt: null as string | null,\n lastExtractorCheckedAt: null as string | null,\n lastPlannerError: null as string | null,\n lastExtractorError: null as string | null,\n consecutivePlannerFailures: 0,\n consecutiveExtractorFailures: 0,\n};\n\nexport function markLLMSuccess(kind: LLMHealthKind): void {\n const ts = new Date().toISOString();\n if (kind === 'planner') {\n llmHealth.lastPlannerOk = true;\n llmHealth.lastPlannerCheckedAt = ts;\n llmHealth.lastPlannerError = null;\n llmHealth.consecutivePlannerFailures = 0;\n } else {\n llmHealth.lastExtractorOk = true;\n llmHealth.lastExtractorCheckedAt = ts;\n llmHealth.lastExtractorError = null;\n llmHealth.consecutiveExtractorFailures = 0;\n }\n}\n\nexport function markLLMFailure(kind: LLMHealthKind, err: unknown): void {\n const ts = new Date().toISOString();\n const message = err instanceof Error ? err.message : String(err ?? 'unknown error');\n if (kind === 'planner') {\n llmHealth.lastPlannerOk = false;\n llmHealth.lastPlannerCheckedAt = ts;\n llmHealth.lastPlannerError = message;\n llmHealth.consecutivePlannerFailures += 1;\n } else {\n llmHealth.lastExtractorOk = false;\n llmHealth.lastExtractorCheckedAt = ts;\n llmHealth.lastExtractorError = message;\n llmHealth.consecutiveExtractorFailures += 1;\n }\n}\n\nexport function getLLMHealth(): LLMHealthSnapshot {\n const cap = getCapabilities();\n return {\n lastPlannerOk: llmHealth.lastPlannerOk,\n lastExtractorOk: llmHealth.lastExtractorOk,\n lastPlannerCheckedAt: llmHealth.lastPlannerCheckedAt,\n lastExtractorCheckedAt: llmHealth.lastExtractorCheckedAt,\n lastPlannerError: llmHealth.lastPlannerError,\n lastExtractorError: llmHealth.lastExtractorError,\n // Static capability \u2014 based on env presence at boot. Runtime health (above)\n // tells whether the last attempt actually succeeded.\n plannerConfigured: cap.llmExtraction,\n extractorConfigured: cap.llmExtraction,\n consecutivePlannerFailures: llmHealth.consecutivePlannerFailures,\n consecutiveExtractorFailures: llmHealth.consecutiveExtractorFailures,\n };\n}\n\n/** Test-only \u2014 reset state between tests. Not exported from index. */\nexport function _resetLLMHealthForTests(): void {\n llmHealth.lastPlannerOk = false;\n llmHealth.lastExtractorOk = false;\n llmHealth.lastPlannerCheckedAt = null;\n llmHealth.lastExtractorCheckedAt = null;\n llmHealth.lastPlannerError = null;\n llmHealth.lastExtractorError = null;\n llmHealth.consecutivePlannerFailures = 0;\n llmHealth.consecutiveExtractorFailures = 0;\n}\n\ninterface ProcessingConfig {\n readonly enabled: boolean;\n readonly extract: string | undefined;\n readonly url?: string;\n}\n\ninterface LLMResult {\n readonly content: string;\n readonly processed: boolean;\n readonly error?: string;\n readonly errorDetails?: StructuredError;\n}\n\n// LLM-specific retry configuration\nconst LLM_RETRY_CONFIG = {\n maxRetries: 2,\n baseDelayMs: 1000,\n maxDelayMs: 5000,\n} as const;\n\n// OpenRouter/OpenAI specific retryable error codes (using Set for type-safe lookup)\nconst RETRYABLE_LLM_ERROR_CODES = new Set([\n 'rate_limit_exceeded',\n 'server_error',\n 'timeout',\n 'service_unavailable',\n]);\n\n/** Type guard for errors with an HTTP status code */\nfunction hasStatus(error: unknown): error is { status: number } {\n return (\n typeof error === 'object' &&\n error !== null &&\n 'status' in error &&\n typeof (error as Record<string, unknown>).status === 'number'\n );\n}\n\nlet llmClient: OpenAI | null = null;\n\ntype OpenAITextGenerator = Pick<OpenAI, 'chat'>;\n\nexport function createLLMProcessor(): OpenAI | null {\n if (!getCapabilities().llmExtraction) return null;\n\n if (!llmClient) {\n llmClient = new OpenAI({\n baseURL: LLM_EXTRACTION.BASE_URL,\n apiKey: LLM_EXTRACTION.API_KEY,\n timeout: LLM_CLIENT_TIMEOUT_MS,\n maxRetries: 0,\n defaultHeaders: { 'X-Title': 'mcp-research-powerpack' },\n });\n mcpLog('info', `LLM extraction configured (model: ${LLM_EXTRACTION.MODEL}, baseURL: ${LLM_EXTRACTION.BASE_URL})`, 'llm');\n }\n return llmClient;\n}\n\nfunction buildChatRequestBody(model: string, prompt: string): Record<string, unknown> {\n const requestBody: Record<string, unknown> = {\n model,\n messages: [{ role: 'user', content: prompt }],\n };\n\n if (LLM_EXTRACTION.REASONING_EFFORT !== 'none') {\n requestBody.reasoning_effort = LLM_EXTRACTION.REASONING_EFFORT;\n }\n\n return requestBody;\n}\n\nexport async function requestText(\n processor: OpenAITextGenerator,\n prompt: string,\n operationLabel: string,\n signal?: AbortSignal,\n): Promise<{ content: string | null; model: string; error?: string }> {\n const model = LLM_EXTRACTION.MODEL;\n\n try {\n const response = await withStallProtection(\n (stallSignal) => processor.chat.completions.create(\n buildChatRequestBody(model, prompt) as unknown as OpenAI.ChatCompletionCreateParamsNonStreaming,\n {\n signal: signal ? AbortSignal.any([stallSignal, signal]) : stallSignal,\n timeout: LLM_REQUEST_DEADLINE_MS,\n },\n ),\n LLM_STALL_TIMEOUT_MS,\n 3,\n `${operationLabel} (${model})`,\n );\n\n const content = response.choices?.[0]?.message?.content?.trim();\n if (content) {\n return { content, model };\n }\n\n const err = `Empty response from model ${model}`;\n mcpLog('warning', `${operationLabel} returned empty content for model ${model}`, 'llm');\n return { content: null, model, error: err };\n } catch (err: unknown) {\n const message = err instanceof Error ? err.message : String(err);\n mcpLog('warning', `${operationLabel} failed for model ${model}: ${message}`, 'llm');\n return { content: null, model, error: message };\n }\n}\n\n/**\n * Check if an LLM error is retryable\n */\nfunction isRetryableLLMError(error: unknown): boolean {\n if (!error || typeof error !== 'object') return false;\n\n // Stall/timeout protection errors - always retry these\n const stallCode = (error as { code?: string })?.code;\n if (stallCode === 'ESTALLED' || stallCode === 'ETIMEDOUT') {\n return true;\n }\n\n // Check HTTP status codes\n if (hasStatus(error)) {\n if (error.status === 429 || error.status === 500 || error.status === 502 || error.status === 503 || error.status === 504) {\n return true;\n }\n }\n\n // Check error codes from OpenAI/OpenRouter\n const record = error as Record<string, unknown>;\n const code = typeof record.code === 'string' ? record.code : undefined;\n const nested =\n typeof record.error === 'object' && record.error !== null\n ? (record.error as Record<string, unknown>)\n : null;\n const errorCode =\n code ??\n (nested && typeof nested.code === 'string' ? nested.code : undefined) ??\n (nested && typeof nested.type === 'string' ? nested.type : undefined);\n if (errorCode && RETRYABLE_LLM_ERROR_CODES.has(errorCode)) {\n return true;\n }\n\n // Check message for common patterns\n const message = typeof record.message === 'string' ? record.message.toLowerCase() : '';\n if (\n message.includes('rate limit') ||\n message.includes('timeout') ||\n message.includes('timed out') ||\n message.includes('service unavailable') ||\n message.includes('server error') ||\n message.includes('connection') ||\n message.includes('econnreset')\n ) {\n return true;\n }\n\n return false;\n}\n\n/**\n * Calculate backoff delay with jitter for LLM retries\n */\nfunction calculateLLMBackoff(attempt: number): number {\n const exponentialDelay = LLM_RETRY_CONFIG.baseDelayMs * Math.pow(2, attempt);\n const jitter = Math.random() * BACKOFF_JITTER_FACTOR * exponentialDelay;\n return Math.min(exponentialDelay + jitter, LLM_RETRY_CONFIG.maxDelayMs);\n}\n\n/**\n * Process content with LLM extraction\n * NEVER throws - always returns a valid LLMResult\n * Implements retry logic with exponential backoff for transient failures\n */\nexport async function processContentWithLLM(\n content: string,\n config: ProcessingConfig,\n processor?: OpenAI | null,\n signal?: AbortSignal\n): Promise<LLMResult> {\n // Early returns for invalid/skip conditions\n if (!config.enabled) {\n return { content, processed: false };\n }\n\n if (!processor) {\n return {\n content,\n processed: false,\n error: 'LLM processor not available (LLM_EXTRACTION_API_KEY or OPENROUTER_API_KEY not set)',\n errorDetails: {\n code: ErrorCode.AUTH_ERROR,\n message: 'LLM processor not available',\n retryable: false,\n },\n };\n }\n\n if (!content?.trim()) {\n return { content: content || '', processed: false, error: 'Empty content provided' };\n }\n\n // Truncate extremely long content to avoid token limits\n const truncatedContent = content.length > MAX_LLM_INPUT_CHARS\n ? content.substring(0, MAX_LLM_INPUT_CHARS) + '\\n\\n[Content truncated due to length]'\n : content;\n\n // Sanitize URL before sending to LLM: drop query string and fragment\n // so signed URLs, session tokens, auth params, or tracking hashes never\n // land in a third-party LLM prompt. Keep origin + path for page-type classification.\n const safeUrl = (() => {\n if (!config.url) return undefined;\n try {\n const u = new URL(config.url);\n return `${u.origin}${u.pathname}`;\n } catch {\n return undefined;\n }\n })();\n const urlLine = safeUrl ? `PAGE URL: ${safeUrl}\\n\\n` : '';\n\n const prompt = config.extract\n ? `You are a factual extractor for a research agent. Extract ONLY the information that matches the instruction below. Do not summarize, interpret, or editorialize.\n\n${urlLine}EXTRACTION INSTRUCTION: ${config.extract}\n\nSTEP 1 \u2014 Classify this page. Look at the URL if present, plus structural cues (code blocks, table patterns, comment threads, marketing copy). Pick ONE:\n\\`docs | changelog | github-readme | github-thread | reddit | hackernews | forum | blog | marketing | announcement | qa | cve | paper | release-notes | other\\`\n\nSTEP 2 \u2014 Adjust emphasis by page type:\n- docs / changelog / github-readme / release-notes \u2192 API signatures, version numbers, flags, exact config keys, code blocks. Copy verbatim. Preserve tables as tables.\n- github-thread \u2192 weight MAINTAINER comments (label \"[maintainer]\") over drive-by commenters. Preserve stacktraces verbatim. Capture chronological resolution \u2014 what was decided and when. Link the accepted-fix commit/PR if referenced.\n- reddit / hackernews / forum \u2192 lived experience. Quote verbatim with attribution (\"u/foo wrote: \u2026\" or \"user <name>\"). Prioritize replies with stack details, specific failure stories, or replies that contradict the OP. Record overall sentiment distribution as one bullet if clear skew (\"~70% agree / ~20% dissent / rest off-topic\"). Drop context-free opinions (\"this sucks\") from Matches.\n- blog \u2192 prioritize concrete reproductions, code, measurements. If the author makes a claim without evidence, mark \"[unsourced claim]\".\n- marketing / announcement \u2192 pricing tiers, feature matrices verbatim, free-tier quotas, enterprise contact. Preserve tables as tables. Treat roadmap/future-tense claims skeptically \u2014 note them as \"[announced, not shipped]\" when framing is future-tense.\n- qa (stackoverflow) \u2192 accepted answer's code + high-voted disagreements. Always note the answer date \u2014 SO rots.\n- cve \u2192 CVSS vector verbatim, CWE, CPE ranges, affected versions, fix version, references. Each with its label.\n- paper \u2192 claim, method, dataset, benchmark numbers, comparison baseline. Preserve numeric deltas verbatim.\n\nSTEP 3 \u2014 Emit markdown with these sections, in order:\n\n## Source\n- URL: <verbatim if visible, else \"unknown\">\n- Page type: <the type you picked>\n- Page date: <verbatim if visible, else \"not visible\">\n- Author / maintainer (if identifiable): <verbatim>\n\n## Matches\nOne bullet per distinct piece of matching info:\n- **<short label>** \u2014 the information. Quote VERBATIM for: numbers, versions, dates, API names, prices, error messages, stacktraces, CVSS vectors, benchmark scores, command flags, proper nouns, and people's words. Backticks for code/identifiers. Preserve tables.\n\n## Not found\nEvery part of the extraction instruction this page did NOT answer. Be explicit. Example: \"Enterprise pricing contact \u2014 not present on this page.\"\n\n## Follow-up signals\nShort bullets \u2014 NEW angles this page surfaced that the agent should investigate. Include: new terms, unexpected vendor names, contradicting claims, referenced-but-unscraped URLs. Copy URLs VERBATIM from the source; if only anchor text is visible, write \"anchor: <text> (URL not in scraped content)\". Skip this section if nothing new surfaced. Do NOT invent.\n\n## Contradictions\n(Include this section only if the page contains internally contradictory claims.) Bullet each contradiction with both sides quoted verbatim.\n\n## Truncation\n(Include only if content appears cut mid-element.) \"Content cut mid-<table row / code block / comment / paragraph>; extraction may be incomplete for <section>.\"\n\nRULES:\n- Never paraphrase numbers, versions, code, or quoted text.\n- If an instruction item is not answered, it goes in \"Not found\" \u2014 do NOT invent an answer to please the caller.\n- Preserve code blocks, command examples, tables exactly.\n- Do NOT add commentary or recommendations outside \"Follow-up signals\".\n- Page language \u2260 English: quote verbatim in the original language AND provide a parenthetical gloss in English.\n- Content clearly failed to load: return ONLY a single line, choosing from:\n \\`## Matches\\\\n_Page did not load: 404_\\`\n \\`## Matches\\\\n_Page did not load: login-wall_\\`\n \\`## Matches\\\\n_Page did not load: paywall_\\`\n \\`## Matches\\\\n_Page did not load: JS-render-empty_\\`\n \\`## Matches\\\\n_Page did not load: non-text-asset_\\`\n \\`## Matches\\\\n_Page did not load: truncated-before-relevant-section_\\`\n\nContent:\n${truncatedContent}`\n : `Clean the following page content: drop navigation, ads, cookie banners, footers, author bios, related-article lists. Preserve headings, paragraphs, code blocks, tables, and inline links as \\`[text](url)\\`. Do NOT summarize \u2014 preserve the full body.\n\n${urlLine}Content:\n${truncatedContent}`;\n\n let lastError: StructuredError | undefined;\n\n // Retry loop\n for (let attempt = 0; attempt <= LLM_RETRY_CONFIG.maxRetries; attempt++) {\n try {\n if (attempt === 0) {\n mcpLog('info', `Starting extraction with ${LLM_EXTRACTION.MODEL}`, 'llm');\n } else {\n mcpLog('warning', `Retry attempt ${attempt}/${LLM_RETRY_CONFIG.maxRetries}`, 'llm');\n }\n\n const response = await requestText(\n processor,\n prompt,\n 'LLM extraction',\n signal,\n );\n\n if (response.content) {\n mcpLog('info', `Successfully extracted ${response.content.length} characters`, 'llm');\n markLLMSuccess('extractor');\n return { content: response.content, processed: true };\n }\n\n // Empty response - not retryable\n mcpLog('warning', 'Received empty response from LLM', 'llm');\n markLLMFailure('extractor', 'LLM returned empty response');\n return {\n content,\n processed: false,\n error: 'LLM returned empty response',\n errorDetails: {\n code: ErrorCode.INTERNAL_ERROR,\n message: 'LLM returned empty response',\n retryable: false,\n },\n };\n\n } catch (err: unknown) {\n lastError = classifyError(err);\n\n // Log the error\n const status = hasStatus(err) ? err.status : undefined;\n const code = typeof err === 'object' && err !== null && 'code' in err\n ? String((err as Record<string, unknown>).code)\n : undefined;\n mcpLog('error', `Error (attempt ${attempt + 1}): ${lastError.message} [status=${status}, code=${code}, retryable=${isRetryableLLMError(err)}]`, 'llm');\n\n // Check if we should retry\n if (isRetryableLLMError(err) && attempt < LLM_RETRY_CONFIG.maxRetries) {\n const delayMs = calculateLLMBackoff(attempt);\n mcpLog('warning', `Retrying in ${delayMs}ms...`, 'llm');\n try { await sleep(delayMs, signal); } catch { break; }\n continue;\n }\n\n // Non-retryable or max retries reached\n break;\n }\n }\n\n // All attempts failed - return original content with error info\n const errorMessage = lastError?.message || 'Unknown LLM error';\n mcpLog('error', `All attempts failed: ${errorMessage}. Returning original content.`, 'llm');\n markLLMFailure('extractor', errorMessage);\n\n return {\n content, // Return original content as fallback\n processed: false,\n error: `LLM extraction failed: ${errorMessage}`,\n errorDetails: lastError || {\n code: ErrorCode.UNKNOWN_ERROR,\n message: errorMessage,\n retryable: false,\n },\n };\n}\n\n// ============================================================================\n// Web-Search Result Classification\n// ============================================================================\n\n/** Maximum URLs to send to the LLM for classification */\nconst MAX_CLASSIFICATION_URLS = 50 as const;\n\n/** Classification tiers */\ntype ClassificationTier = 'HIGHLY_RELEVANT' | 'MAYBE_RELEVANT' | 'OTHER';\n\nexport interface ClassificationEntry {\n readonly rank: number;\n readonly tier: ClassificationTier;\n readonly source_type?: string;\n readonly reason?: string;\n}\n\nexport interface ClassificationGap {\n readonly id: number;\n readonly description: string;\n}\n\nexport interface ClassificationResult {\n readonly title: string;\n readonly synthesis: string;\n readonly results: ClassificationEntry[];\n readonly refine_queries?: Array<{\n readonly query: string;\n readonly rationale: string;\n readonly gap_id?: number;\n }>;\n readonly confidence?: 'high' | 'medium' | 'low';\n readonly confidence_reason?: string;\n readonly gaps?: ClassificationGap[];\n}\n\nexport interface RefineQuerySuggestion {\n readonly query: string;\n readonly rationale: string;\n readonly gap_id?: number;\n readonly gap_description?: string;\n}\n\n/**\n * Classify web-search results by relevance to an objective using the LLM.\n * Sends only titles, snippets, and domain names \u2014 does NOT fetch URLs.\n * Returns null on failure (caller should fall back to raw output).\n */\nexport async function classifySearchResults(\n rankedUrls: ReadonlyArray<{\n readonly rank: number;\n readonly url: string;\n readonly title: string;\n readonly snippet: string;\n readonly frequency: number;\n readonly queries: string[];\n }>,\n objective: string,\n totalQueries: number,\n processor: OpenAI,\n previousQueries: readonly string[] = [],\n): Promise<{ result: ClassificationResult | null; error?: string }> {\n const urlsToClassify = rankedUrls.slice(0, MAX_CLASSIFICATION_URLS);\n\n // Descending static weights fed to the LLM. Higher-ranked URLs get a bigger\n // weight so the classifier biases HIGHLY_RELEVANT toward them. The weights\n // here are a shown-to-LLM summary, not the internal CTR ranking (which\n // still runs in url-aggregator.ts). Rank 11+ all bucket to w=1.\n const STATIC_WEIGHTS = [30, 20, 15, 10, 8, 6, 5, 4, 3, 2] as const;\n const weightForRank = (rank: number): number => STATIC_WEIGHTS[rank - 1] ?? 1;\n\n // Build compressed result list \u2014 weight + title + domain + snippet (truncated)\n const lines: string[] = [];\n for (const url of urlsToClassify) {\n let domain: string;\n try {\n domain = new URL(url.url).hostname.replace(/^www\\./, '');\n } catch {\n domain = url.url;\n }\n const snippet = url.snippet.length > 120\n ? url.snippet.slice(0, 117) + '...'\n : url.snippet;\n lines.push(`[${url.rank}] w=${weightForRank(url.rank)} ${url.title} \u2014 ${domain} \u2014 ${snippet}`);\n }\n\n const prevQueriesBlock = previousQueries.length > 0\n ? previousQueries.map((q) => `- ${q}`).join('\\n')\n : '- (none provided)';\n const today = new Date().toISOString().slice(0, 10);\n\n const prompt = `You are the relevance filter for a research agent. Classify each search result below against the objective and produce a structured analysis.\n\nOBJECTIVE: ${objective}\nTODAY: ${today}\n\nPREVIOUS QUERIES (already run \u2014 do NOT paraphrase in refine_queries):\n${prevQueriesBlock}\n\nReturn ONLY a JSON object (no markdown, no code fences):\n\n{\n \"title\": \"2\u20138 word label for this RESULT CLUSTER (not the objective)\",\n \"synthesis\": \"3\u20135 sentences grounded in the results. Every non-trivial claim cites a rank in [brackets], e.g. '[3] documents the flag; [7][12] report it is broken on macOS.' A synthesis with zero citations is invalid.\",\n \"confidence\": \"high | medium | low\",\n \"confidence_reason\": \"one sentence \u2014 why\",\n \"gaps\": [\n { \"id\": 0, \"description\": \"specific, actionable thing the current results do NOT answer \u2014 not 'more info needed'\" }\n ],\n \"refine_queries\": [\n { \"query\": \"concrete next search\", \"gap_id\": 0, \"rationale\": \"\u226412 words\" }\n ],\n \"results\": [\n {\n \"rank\": 1,\n \"tier\": \"HIGHLY_RELEVANT | MAYBE_RELEVANT | OTHER\",\n \"source_type\": \"vendor_doc | github | reddit | hackernews | blog | news | marketing | stackoverflow | cve | paper | release_notes | aggregator | other\",\n \"reason\": \"\u226412 words citing the snippet cue that drove the tier\"\n }\n ]\n}\n\nWEIGHT SCHEME: each row is prefixed with a weight (w=N). Higher weight means the URL ranked better across input queries \u2014 prefer HIGHLY_RELEVANT for high-weight rows when content matches the objective. Weight alone never justifies HIGHLY_RELEVANT; snippet cues still drive the decision.\n\nSOURCE-OF-TRUTH RUBRIC (the \"primary source\" is goal-dependent \u2014 infer goal type from the objective):\n- spec / API / config questions \u2192 vendor_doc, github (README, RFC), release_notes are primary\n- bug / failure-mode questions \u2192 github (issue/PR), stackoverflow are primary\n- migration / sentiment / lived-experience \u2192 reddit, hackernews, blog are primary; docs are secondary\n- pricing / commercial \u2192 marketing (the vendor's own pricing page IS the primary source, but treat feature lists skeptically)\n- security / CVE \u2192 cve databases, distro security trackers (nvd.nist.gov, security-tracker.debian.org, ubuntu.com/security) are primary\n- synthesis / open-ended \u2192 blend; no single type is primary\n- product launch \u2192 vendor_doc + news + marketing for the launch itself; blogs + reddit for independent verification\n\nFRESHNESS: proportional to topic velocity. For a week-old release, demote anything older than 30 days. For general tech questions, demote older than 18 months. For stable protocols (HTTP, TCP, POSIX), don't demote by age.\n\nCONFIDENCE:\n- high = \u22653 HIGHLY_RELEVANT results from INDEPENDENT domains agree on the core answer\n- medium = \u22652 HIGHLY_RELEVANT exist but disagree or share a domain; OR a single authoritative primary source answers it\n- low = otherwise; snippet-only judgments cap at medium\n\nREFINE QUERIES \u2014 each MUST differ from every previousQuery by:\n- a new operator (site:, quotes, verbatim version number), OR\n- a domain-specific noun ABSENT from every prior query\nAdding a year alone does NOT count as differentiation.\nEach refine_query MUST reference a specific gap_id from the gaps array above.\nProduce 4\u20138 refine_queries total. Cover: (a) a primary-source probe, (b) a temporal sharpener, (c) a failure-mode or comparison probe, (d) at least one new-term probe seeded by a specific result's snippet.\n\nRULES:\n- Classify ALL ${urlsToClassify.length} results. Do not skip or collapse any.\n- Use only the three tier values.\n- Judge from title + domain + snippet only. Do NOT invent facts not present in the snippet.\n- If ALL results are OTHER: synthesis = \"\", confidence = \"low\", and \\`gaps\\` must explicitly state why the current queries missed the target.\n- Casing: tier = UPPERCASE_WITH_UNDERSCORES, confidence = lowercase.\n\nSEARCH RESULTS (${urlsToClassify.length} URLs from ${totalQueries} queries):\n${lines.join('\\n')}`;\n\n try {\n mcpLog('info', `Classifying ${urlsToClassify.length} URLs against objective`, 'llm');\n\n const response = await requestText(\n processor,\n prompt,\n 'Search classification',\n );\n\n if (!response.content) {\n const errMsg = response.error ?? 'LLM returned empty classification response';\n markLLMFailure('planner', errMsg);\n return { result: null, error: errMsg };\n }\n\n // Strip markdown code fences if present\n const cleaned = response.content.replace(/^```(?:json)?\\s*\\n?/m, '').replace(/\\n?```\\s*$/m, '').trim();\n const parsed = JSON.parse(cleaned) as ClassificationResult;\n\n // Validate the response shape.\n // Note: synthesis is typed not truthy \u2014 the prompt explicitly instructs an empty string\n // for the all-OTHER case, and we must not reject that.\n if (!parsed.title || typeof parsed.synthesis !== 'string' || !Array.isArray(parsed.results)) {\n const errMsg = 'LLM response missing required fields (title, synthesis, results)';\n markLLMFailure('planner', errMsg);\n return { result: null, error: errMsg };\n }\n\n mcpLog('info', `Classification complete: ${parsed.results.filter(r => r.tier === 'HIGHLY_RELEVANT').length} highly relevant`, 'llm');\n markLLMSuccess('planner');\n return { result: parsed };\n } catch (err: unknown) {\n const message = err instanceof Error ? err.message : String(err);\n mcpLog('error', `Classification failed: ${message}`, 'llm');\n markLLMFailure('planner', message);\n return { result: null, error: `Classification failed: ${message}` };\n }\n}\n\nexport async function suggestRefineQueriesForRawMode(\n rankedUrls: ReadonlyArray<{\n readonly rank: number;\n readonly url: string;\n readonly title: string;\n }>,\n objective: string,\n originalQueries: readonly string[],\n processor: OpenAI,\n): Promise<{ result: RefineQuerySuggestion[]; error?: string }> {\n const urlsToSummarize = rankedUrls.slice(0, 12);\n const lines = urlsToSummarize.map((url) => {\n let domain: string;\n try {\n domain = new URL(url.url).hostname.replace(/^www\\./, '');\n } catch {\n domain = url.url;\n }\n return `[${url.rank}] ${url.title} \u2014 ${domain}`;\n });\n\n const prompt = `You are generating follow-up search queries for an agent using raw web-search results.\n\nReturn ONLY a JSON object (no markdown, no code fences):\n{\n \"refine_queries\": [\n { \"query\": \"next search query\", \"gap_description\": \"what gap this closes\", \"rationale\": \"\u226412 words on why\" }\n ]\n}\n\nOBJECTIVE: ${objective}\n\nPREVIOUS QUERIES (already run \u2014 do NOT paraphrase):\n${originalQueries.map((query) => `- ${query}`).join('\\n')}\n\nTOP RESULT TITLES (to seed new-term probes):\n${lines.join('\\n')}\n\nRULES:\n- Produce 4\u20136 diverse follow-ups. Cover: (a) a primary-source probe (site:, RFC, vendor docs); (b) a temporal sharpener (changelog, version number); (c) a failure-mode or comparison probe; (d) at least one new-term probe seeded by a specific result title.\n- Each query MUST differ from every previousQuery by either a new operator (site:, quotes, a verbatim version number) OR a domain-specific noun absent from every prior query. Adding a year alone does NOT count.\n- Each refine_query MUST include a \\`gap_description\\` naming what the current results don't answer.\n- Do not include URLs.\n- Keep rationales \u226412 words.`;\n\n try {\n const response = await requestText(\n processor,\n prompt,\n 'Raw-mode refine query generation',\n );\n\n if (!response.content) {\n const errMsg = response.error ?? 'LLM returned empty raw-mode refine query response';\n markLLMFailure('planner', errMsg);\n return { result: [], error: errMsg };\n }\n\n const cleaned = response.content.replace(/^```(?:json)?\\s*\\n?/m, '').replace(/\\n?```\\s*$/m, '').trim();\n const parsed = JSON.parse(cleaned) as { refine_queries?: RefineQuerySuggestion[] };\n\n markLLMSuccess('planner');\n return { result: Array.isArray(parsed.refine_queries) ? parsed.refine_queries : [] };\n } catch (err: unknown) {\n const message = err instanceof Error ? err.message : String(err);\n mcpLog('error', `Raw-mode refine query generation failed: ${message}`, 'llm');\n markLLMFailure('planner', message);\n return { result: [], error: message };\n }\n}\n\n// ============================================================================\n// Research Brief \u2014 goal-aware orientation (called by start-research)\n// ============================================================================\n\nexport type PrimaryBranch = 'reddit' | 'web' | 'both';\n\nexport interface ResearchBriefStep {\n readonly tool: 'web-search' | 'scrape-links';\n readonly reason: string;\n}\n\nexport interface ResearchBrief {\n readonly goal_class: string;\n readonly goal_class_reason: string;\n readonly primary_branch: PrimaryBranch;\n readonly primary_branch_reason: string;\n readonly freshness_window: string;\n readonly first_call_sequence: readonly ResearchBriefStep[];\n readonly keyword_seeds: readonly string[];\n readonly iteration_hints: readonly string[];\n readonly gaps_to_watch: readonly string[];\n readonly stop_criteria: readonly string[];\n}\n\nconst VALID_GOAL_CLASSES = new Set([\n 'spec', 'bug', 'migration', 'sentiment', 'pricing', 'security',\n 'synthesis', 'product_launch', 'other',\n]);\n\nconst VALID_FRESHNESS = new Set(['days', 'weeks', 'months', 'years']);\nconst VALID_BRANCHES = new Set<PrimaryBranch>(['reddit', 'web', 'both']);\nconst VALID_STEP_TOOLS = new Set(['web-search', 'scrape-links']);\n\nfunction isStringArray(value: unknown): value is string[] {\n return Array.isArray(value) && value.every((v) => typeof v === 'string');\n}\n\nfunction isStepArray(value: unknown): value is ResearchBriefStep[] {\n return Array.isArray(value) && value.every((s) => {\n if (typeof s !== 'object' || s === null) return false;\n const tool = (s as Record<string, unknown>).tool;\n const reason = (s as Record<string, unknown>).reason;\n return typeof tool === 'string'\n && VALID_STEP_TOOLS.has(tool)\n && typeof reason === 'string'\n && reason.trim().length > 0;\n });\n}\n\nexport function parseResearchBrief(raw: string): ResearchBrief | null {\n try {\n const cleaned = raw.replace(/^```(?:json)?\\s*\\n?/m, '').replace(/\\n?```\\s*$/m, '').trim();\n const parsed = JSON.parse(cleaned) as Record<string, unknown>;\n\n const goal_class = typeof parsed.goal_class === 'string' ? parsed.goal_class : null;\n if (!goal_class || !VALID_GOAL_CLASSES.has(goal_class)) return null;\n\n const freshness_window = typeof parsed.freshness_window === 'string' ? parsed.freshness_window : null;\n if (!freshness_window || !VALID_FRESHNESS.has(freshness_window)) return null;\n\n const primary_branch = parsed.primary_branch;\n if (typeof primary_branch !== 'string' || !VALID_BRANCHES.has(primary_branch as PrimaryBranch)) return null;\n\n if (!isStepArray(parsed.first_call_sequence) || parsed.first_call_sequence.length === 0) return null;\n if (!isStringArray(parsed.keyword_seeds) || parsed.keyword_seeds.length === 0) return null;\n\n return {\n goal_class,\n goal_class_reason: typeof parsed.goal_class_reason === 'string' ? parsed.goal_class_reason : '',\n primary_branch: primary_branch as PrimaryBranch,\n primary_branch_reason: typeof parsed.primary_branch_reason === 'string' ? parsed.primary_branch_reason : '',\n freshness_window,\n first_call_sequence: parsed.first_call_sequence,\n keyword_seeds: parsed.keyword_seeds.filter((s) => s.trim().length > 0),\n iteration_hints: isStringArray(parsed.iteration_hints) ? parsed.iteration_hints : [],\n gaps_to_watch: isStringArray(parsed.gaps_to_watch) ? parsed.gaps_to_watch : [],\n stop_criteria: isStringArray(parsed.stop_criteria) ? parsed.stop_criteria : [],\n };\n } catch {\n return null;\n }\n}\n\nexport async function generateResearchBrief(\n goal: string,\n processor: OpenAI,\n signal?: AbortSignal,\n): Promise<ResearchBrief | null> {\n const today = new Date().toISOString().slice(0, 10);\n\n const prompt = `You are a research planner. An agent is about to run a multi-pass research loop on the goal below using 3 tools:\n\n - web-search: fan-out Google, scope: web|reddit|both, up to 50 queries per call, parallel-callable (multiple calls per turn)\n - scrape-links: fetch URLs in parallel, auto-detects reddit.com post permalinks \u2192 Reddit API (threaded post+comments); all other URLs \u2192 HTTP scraper; parallel-callable\n\nProduce a tailored JSON brief.\n\nGOAL: ${goal}\nTODAY: ${today}\n\nReturn ONLY a JSON object (no markdown, no code fences):\n\n{\n \"goal_class\": \"spec | bug | migration | sentiment | pricing | security | synthesis | product_launch | other\",\n \"goal_class_reason\": \"one sentence \u2014 why this class\",\n \"primary_branch\": \"reddit | web | both\",\n \"primary_branch_reason\": \"one sentence \u2014 why this branch leads\",\n \"freshness_window\": \"days | weeks | months | years\",\n \"first_call_sequence\": [\n { \"tool\": \"web-search | scrape-links\", \"reason\": \"what this call establishes for the agent\" }\n ],\n \"keyword_seeds\": [\"25\u201350 concrete Google queries \u2014 flat list, to be fired in the first web-search call\"],\n \"iteration_hints\": [\"2\u20135 pointers on which harvested terms / follow-up signals to watch for after pass 1\"],\n \"gaps_to_watch\": [\"2\u20135 concrete questions the agent MUST verify or the answer is incomplete\"],\n \"stop_criteria\": [\"2\u20134 checkable conditions \u2014 all must hold before the agent declares done\"]\n}\n\nRULES:\n\nprimary_branch:\n- \"reddit\" \u2192 sentiment / migration / lived-experience / community-consensus goals. Leads with scope:\"reddit\" web-search.\n- \"web\" \u2192 spec / bug / pricing / CVE / API / primary-source goals. Leads with scope:\"web\" web-search.\n- \"both\" \u2192 opinion-heavy AND needs official sources (e.g. product launch + practitioner reception).\n\nfirst_call_sequence:\n- 1\u20133 steps.\n- reddit-first: step 1 = web-search (caller sets scope:\"reddit\"), step 2 = scrape-links on best post permalinks.\n- web-first: step 1 = web-search (scope:\"web\"), step 2 = scrape-links on HIGHLY_RELEVANT URLs.\n- both: step 1 = two parallel web-search calls (one scope:\"reddit\", one scope:\"web\"), step 2 = merged scrape-links.\n\nkeyword_seeds:\n- 25\u201350 total. Narrow bug \u2192 fewer. Open synthesis \u2192 more.\n- Use operators where helpful (site:, quotes, verbatim version numbers).\n- DIVERSE facets \u2014 same noun-phrase cannot repeat across seeds with adjectives-only variation.\n- Do NOT invent vendor names you are uncertain exist.\n- For \\`site:<domain>\\` filters, ONLY use domains you are highly confident are real. Safe choices: \\`github.com\\`, \\`stackoverflow.com\\`, \\`reddit.com\\`, \\`news.ycombinator.com\\`, \\`arxiv.org\\`, \\`nvd.nist.gov\\`, \\`pypi.org\\`, \\`npmjs.com\\`, plus any canonical homepage/docs domain explicitly spelled out in the goal itself (e.g. goal names \"Cursor\" \u2192 \\`cursor.com\\`/\\`docs.cursor.com\\` is acceptable). If you don't know the product's real docs domain, leave the query open (no \\`site:\\`) instead of guessing.\n\nfreshness_window:\n- If the goal mentions a recent release / date / version, use \"days\" or \"weeks\".\n- Stable protocols / APIs \u2192 \"months\" or \"years\".`;\n\n try {\n const response = await requestText(\n processor,\n prompt,\n 'Research brief generation',\n signal,\n );\n\n if (!response.content) {\n mcpLog('warning', `Research brief generation returned no content: ${response.error ?? 'unknown'}`, 'llm');\n markLLMFailure('planner', response.error ?? 'empty response');\n return null;\n }\n\n const brief = parseResearchBrief(response.content);\n if (!brief) {\n mcpLog('warning', 'Research brief JSON parse or shape validation failed', 'llm');\n markLLMFailure('planner', 'brief parse/validation failed');\n return null;\n }\n\n markLLMSuccess('planner');\n return brief;\n } catch (err: unknown) {\n const message = err instanceof Error ? err.message : String(err);\n mcpLog('warning', `Research brief generation failed: ${message}`, 'llm');\n markLLMFailure('planner', message);\n return null;\n }\n}\n\nexport function renderResearchBrief(brief: ResearchBrief): string {\n const lines: string[] = [];\n\n lines.push('## Your research brief (goal-tailored)');\n lines.push('');\n lines.push(`**Goal class**: \\`${brief.goal_class}\\` \u2014 ${brief.goal_class_reason}`);\n lines.push(`**Primary branch**: \\`${brief.primary_branch}\\` \u2014 ${brief.primary_branch_reason}`);\n lines.push(`**Freshness**: \\`${brief.freshness_window}\\``);\n lines.push('');\n\n if (brief.first_call_sequence.length > 0) {\n lines.push('### First-call sequence');\n brief.first_call_sequence.forEach((step, i) => {\n lines.push(`${i + 1}. \\`${step.tool}\\` \u2014 ${step.reason}`);\n });\n lines.push('');\n }\n\n if (brief.keyword_seeds.length > 0) {\n lines.push(`### Keyword seeds (${brief.keyword_seeds.length}) \u2014 fire these in your first \\`web-search\\` call as a flat \\`queries\\` array`);\n for (const seed of brief.keyword_seeds) {\n lines.push(`- ${seed}`);\n }\n lines.push('');\n }\n\n if (brief.iteration_hints.length > 0) {\n lines.push('### Iteration hints (harvest new terms from scrape extracts\\' `## Follow-up signals`)');\n for (const hint of brief.iteration_hints) lines.push(`- ${hint}`);\n lines.push('');\n }\n\n if (brief.gaps_to_watch.length > 0) {\n lines.push('### Gaps to watch');\n for (const gap of brief.gaps_to_watch) lines.push(`- ${gap}`);\n lines.push('');\n }\n\n if (brief.stop_criteria.length > 0) {\n lines.push('### Stop criteria');\n for (const c of brief.stop_criteria) lines.push(`- ${c}`);\n lines.push('');\n }\n\n lines.push('---');\n lines.push('');\n lines.push('Fire `first_call_sequence` now. After each `scrape-links`, harvest new terms from `## Follow-up signals` and build your next `web-search` round. Stop when every gap is closed.');\n\n return lines.join('\\n');\n}\n"],
|
|
5
|
-
"mappings": "AAMA,OAAO,YAAY;AACnB,SAAS,gBAAgB,uBAAuB;AAChD;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OAEK;AACP,SAAS,cAAc;AAGvB,MAAM,sBAAsB;AAG5B,MAAM,wBAAwB;AAG9B,MAAM,wBAAwB;AAG9B,MAAM,uBAAuB;AAG7B,MAAM,0BAA0B;AAuBhC,MAAM,YAAY;AAAA,EAChB,eAAe;AAAA,EACf,iBAAiB;AAAA,EACjB,sBAAsB;AAAA,EACtB,wBAAwB;AAAA,EACxB,kBAAkB;AAAA,EAClB,oBAAoB;AAAA,EACpB,4BAA4B;AAAA,EAC5B,8BAA8B;AAChC;AAEO,SAAS,eAAe,MAA2B;AACxD,QAAM,MAAK,oBAAI,KAAK,GAAE,YAAY;AAClC,MAAI,SAAS,WAAW;AACtB,cAAU,gBAAgB;AAC1B,cAAU,uBAAuB;AACjC,cAAU,mBAAmB;AAC7B,cAAU,6BAA6B;AAAA,EACzC,OAAO;AACL,cAAU,kBAAkB;AAC5B,cAAU,yBAAyB;AACnC,cAAU,qBAAqB;AAC/B,cAAU,+BAA+B;AAAA,EAC3C;AACF;AAEO,SAAS,eAAe,MAAqB,KAAoB;AACtE,QAAM,MAAK,oBAAI,KAAK,GAAE,YAAY;AAClC,QAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,OAAO,eAAe;AAClF,MAAI,SAAS,WAAW;AACtB,cAAU,gBAAgB;AAC1B,cAAU,uBAAuB;AACjC,cAAU,mBAAmB;AAC7B,cAAU,8BAA8B;AAAA,EAC1C,OAAO;AACL,cAAU,kBAAkB;AAC5B,cAAU,yBAAyB;AACnC,cAAU,qBAAqB;AAC/B,cAAU,gCAAgC;AAAA,EAC5C;AACF;AAEO,SAAS,eAAkC;AAChD,QAAM,MAAM,gBAAgB;AAC5B,SAAO;AAAA,IACL,eAAe,UAAU;AAAA,IACzB,iBAAiB,UAAU;AAAA,IAC3B,sBAAsB,UAAU;AAAA,IAChC,wBAAwB,UAAU;AAAA,IAClC,kBAAkB,UAAU;AAAA,IAC5B,oBAAoB,UAAU;AAAA;AAAA;AAAA,IAG9B,mBAAmB,IAAI;AAAA,IACvB,qBAAqB,IAAI;AAAA,IACzB,4BAA4B,UAAU;AAAA,IACtC,8BAA8B,UAAU;AAAA,EAC1C;AACF;AAGO,SAAS,0BAAgC;AAC9C,YAAU,gBAAgB;AAC1B,YAAU,kBAAkB;AAC5B,YAAU,uBAAuB;AACjC,YAAU,yBAAyB;AACnC,YAAU,mBAAmB;AAC7B,YAAU,qBAAqB;AAC/B,YAAU,6BAA6B;AACvC,YAAU,+BAA+B;AAC3C;AAgBA,MAAM,mBAAmB;AAAA,EACvB,YAAY;AAAA,EACZ,aAAa;AAAA,EACb,YAAY;AACd;AAGA,MAAM,4BAA4B,oBAAI,IAAI;AAAA,EACxC;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF,CAAC;AAGD,SAAS,UAAU,OAA6C;AAC9D,SACE,OAAO,UAAU,YACjB,UAAU,QACV,YAAY,SACZ,OAAQ,MAAkC,WAAW;AAEzD;AAEA,IAAI,YAA2B;AAIxB,SAAS,qBAAoC;AAClD,MAAI,CAAC,gBAAgB,EAAE,cAAe,QAAO;AAE7C,MAAI,CAAC,WAAW;AACd,gBAAY,IAAI,OAAO;AAAA,MACrB,SAAS,eAAe;AAAA,MACxB,QAAQ,eAAe;AAAA,MACvB,SAAS;AAAA,MACT,YAAY;AAAA,MACZ,gBAAgB,EAAE,WAAW,yBAAyB;AAAA,IACxD,CAAC;AACD,WAAO,QAAQ,qCAAqC,eAAe,KAAK,cAAc,eAAe,QAAQ,KAAK,KAAK;AAAA,EACzH;AACA,SAAO;AACT;AAEA,SAAS,qBAAqB,OAAe,QAAyC;AACpF,QAAM,cAAuC;AAAA,IAC3C;AAAA,IACA,UAAU,CAAC,EAAE,MAAM,QAAQ,SAAS,OAAO,CAAC;AAAA,EAC9C;AAEA,MAAI,eAAe,qBAAqB,QAAQ;AAC9C,gBAAY,mBAAmB,eAAe;AAAA,EAChD;AAEA,SAAO;AACT;AAEA,eAAsB,YACpB,WACA,QACA,gBACA,QACoE;AACpE,QAAM,QAAQ,eAAe;AAE7B,MAAI;AACF,UAAM,WAAW,MAAM;AAAA,MACrB,CAAC,gBAAgB,UAAU,KAAK,YAAY;AAAA,QAC1C,qBAAqB,OAAO,MAAM;AAAA,QAClC;AAAA,UACE,QAAQ,SAAS,YAAY,IAAI,CAAC,aAAa,MAAM,CAAC,IAAI;AAAA,UAC1D,SAAS;AAAA,QACX;AAAA,MACF;AAAA,MACA;AAAA,MACA;AAAA,MACA,GAAG,cAAc,KAAK,KAAK;AAAA,IAC7B;AAEA,UAAM,UAAU,SAAS,UAAU,CAAC,GAAG,SAAS,SAAS,KAAK;AAC9D,QAAI,SAAS;AACX,aAAO,EAAE,SAAS,MAAM;AAAA,IAC1B;AAEA,UAAM,MAAM,6BAA6B,KAAK;AAC9C,WAAO,WAAW,GAAG,cAAc,qCAAqC,KAAK,IAAI,KAAK;AACtF,WAAO,EAAE,SAAS,MAAM,OAAO,OAAO,IAAI;AAAA,EAC5C,SAAS,KAAc;AACrB,UAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC/D,WAAO,WAAW,GAAG,cAAc,qBAAqB,KAAK,KAAK,OAAO,IAAI,KAAK;AAClF,WAAO,EAAE,SAAS,MAAM,OAAO,OAAO,QAAQ;AAAA,EAChD;AACF;AAKA,SAAS,oBAAoB,OAAyB;AACpD,MAAI,CAAC,SAAS,OAAO,UAAU,SAAU,QAAO;AAGhD,QAAM,YAAa,OAA6B;AAChD,MAAI,cAAc,cAAc,cAAc,aAAa;AACzD,WAAO;AAAA,EACT;AAGA,MAAI,UAAU,KAAK,GAAG;AACpB,QAAI,MAAM,WAAW,OAAO,MAAM,WAAW,OAAO,MAAM,WAAW,OAAO,MAAM,WAAW,OAAO,MAAM,WAAW,KAAK;AACxH,aAAO;AAAA,IACT;AAAA,EACF;AAGA,QAAM,SAAS;AACf,QAAM,OAAO,OAAO,OAAO,SAAS,WAAW,OAAO,OAAO;AAC7D,QAAM,SACJ,OAAO,OAAO,UAAU,YAAY,OAAO,UAAU,OAChD,OAAO,QACR;AACN,QAAM,YACJ,SACC,UAAU,OAAO,OAAO,SAAS,WAAW,OAAO,OAAO,YAC1D,UAAU,OAAO,OAAO,SAAS,WAAW,OAAO,OAAO;AAC7D,MAAI,aAAa,0BAA0B,IAAI,SAAS,GAAG;AACzD,WAAO;AAAA,EACT;AAGA,QAAM,UAAU,OAAO,OAAO,YAAY,WAAW,OAAO,QAAQ,YAAY,IAAI;AACpF,MACE,QAAQ,SAAS,YAAY,KAC7B,QAAQ,SAAS,SAAS,KAC1B,QAAQ,SAAS,WAAW,KAC5B,QAAQ,SAAS,qBAAqB,KACtC,QAAQ,SAAS,cAAc,KAC/B,QAAQ,SAAS,YAAY,KAC7B,QAAQ,SAAS,YAAY,GAC7B;AACA,WAAO;AAAA,EACT;AAEA,SAAO;AACT;AAKA,SAAS,oBAAoB,SAAyB;AACpD,QAAM,mBAAmB,iBAAiB,cAAc,KAAK,IAAI,GAAG,OAAO;AAC3E,QAAM,SAAS,KAAK,OAAO,IAAI,wBAAwB;AACvD,SAAO,KAAK,IAAI,mBAAmB,QAAQ,iBAAiB,UAAU;AACxE;AAOA,eAAsB,sBACpB,SACA,QACA,WACA,QACoB;AAEpB,MAAI,CAAC,OAAO,SAAS;AACnB,WAAO,EAAE,SAAS,WAAW,MAAM;AAAA,EACrC;AAEA,MAAI,CAAC,WAAW;AACd,WAAO;AAAA,MACL;AAAA,MACA,WAAW;AAAA,MACX,OAAO;AAAA,MACP,cAAc;AAAA,QACZ,MAAM,UAAU;AAAA,QAChB,SAAS;AAAA,QACT,WAAW;AAAA,MACb;AAAA,IACF;AAAA,EACF;AAEA,MAAI,CAAC,SAAS,KAAK,GAAG;AACpB,WAAO,EAAE,SAAS,WAAW,IAAI,WAAW,OAAO,OAAO,yBAAyB;AAAA,EACrF;AAGA,QAAM,mBAAmB,QAAQ,SAAS,sBACtC,QAAQ,UAAU,GAAG,mBAAmB,IAAI,0CAC5C;AAKJ,QAAM,WAAW,MAAM;AACrB,QAAI,CAAC,OAAO,IAAK,QAAO;AACxB,QAAI;AACF,YAAM,IAAI,IAAI,IAAI,OAAO,GAAG;AAC5B,aAAO,GAAG,EAAE,MAAM,GAAG,EAAE,QAAQ;AAAA,IACjC,QAAQ;AACN,aAAO;AAAA,IACT;AAAA,EACF,GAAG;AACH,QAAM,UAAU,UAAU,aAAa,OAAO;AAAA;AAAA,IAAS;AAEvD,QAAM,SAAS,OAAO,UAClB;AAAA;AAAA,EAEJ,OAAO,2BAA2B,OAAO,OAAO;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAsDhD,gBAAgB,KACZ;AAAA;AAAA,EAEJ,OAAO;AAAA,EACP,gBAAgB;AAEhB,MAAI;AAGJ,WAAS,UAAU,GAAG,WAAW,iBAAiB,YAAY,WAAW;AACvE,QAAI;AACF,UAAI,YAAY,GAAG;AACjB,eAAO,QAAQ,4BAA4B,eAAe,KAAK,IAAI,KAAK;AAAA,MAC1E,OAAO;AACL,eAAO,WAAW,iBAAiB,OAAO,IAAI,iBAAiB,UAAU,IAAI,KAAK;AAAA,MACpF;AAEA,YAAM,WAAW,MAAM;AAAA,QACrB;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,MACF;AAEA,UAAI,SAAS,SAAS;AACpB,eAAO,QAAQ,0BAA0B,SAAS,QAAQ,MAAM,eAAe,KAAK;AACpF,uBAAe,WAAW;AAC1B,eAAO,EAAE,SAAS,SAAS,SAAS,WAAW,KAAK;AAAA,MACtD;AAGA,aAAO,WAAW,oCAAoC,KAAK;AAC3D,qBAAe,aAAa,6BAA6B;AACzD,aAAO;AAAA,QACL;AAAA,QACA,WAAW;AAAA,QACX,OAAO;AAAA,QACP,cAAc;AAAA,UACZ,MAAM,UAAU;AAAA,UAChB,SAAS;AAAA,UACT,WAAW;AAAA,QACb;AAAA,MACF;AAAA,IAEF,SAAS,KAAc;AACrB,kBAAY,cAAc,GAAG;AAG7B,YAAM,SAAS,UAAU,GAAG,IAAI,IAAI,SAAS;AAC7C,YAAM,OAAO,OAAO,QAAQ,YAAY,QAAQ,QAAQ,UAAU,MAC9D,OAAQ,IAAgC,IAAI,IAC5C;AACJ,aAAO,SAAS,kBAAkB,UAAU,CAAC,MAAM,UAAU,OAAO,YAAY,MAAM,UAAU,IAAI,eAAe,oBAAoB,GAAG,CAAC,KAAK,KAAK;AAGrJ,UAAI,oBAAoB,GAAG,KAAK,UAAU,iBAAiB,YAAY;AACrE,cAAM,UAAU,oBAAoB,OAAO;AAC3C,eAAO,WAAW,eAAe,OAAO,SAAS,KAAK;AACtD,YAAI;AAAE,gBAAM,MAAM,SAAS,MAAM;AAAA,QAAG,QAAQ;AAAE;AAAA,QAAO;AACrD;AAAA,MACF;AAGA;AAAA,IACF;AAAA,EACF;AAGA,QAAM,eAAe,WAAW,WAAW;AAC3C,SAAO,SAAS,wBAAwB,YAAY,iCAAiC,KAAK;AAC1F,iBAAe,aAAa,YAAY;AAExC,SAAO;AAAA,IACL;AAAA;AAAA,IACA,WAAW;AAAA,IACX,OAAO,0BAA0B,YAAY;AAAA,IAC7C,cAAc,aAAa;AAAA,MACzB,MAAM,UAAU;AAAA,MAChB,SAAS;AAAA,MACT,WAAW;AAAA,IACb;AAAA,EACF;AACF;AAOA,MAAM,0BAA0B;AA2ChC,eAAsB,sBACpB,YAQA,WACA,cACA,WACA,kBAAqC,CAAC,GAC4B;AAClE,QAAM,iBAAiB,WAAW,MAAM,GAAG,uBAAuB;AAMlE,QAAM,iBAAiB,CAAC,IAAI,IAAI,IAAI,IAAI,GAAG,GAAG,GAAG,GAAG,GAAG,CAAC;AACxD,QAAM,gBAAgB,CAAC,SAAyB,eAAe,OAAO,CAAC,KAAK;AAG5E,QAAM,QAAkB,CAAC;AACzB,aAAW,OAAO,gBAAgB;AAChC,QAAI;AACJ,QAAI;AACF,eAAS,IAAI,IAAI,IAAI,GAAG,EAAE,SAAS,QAAQ,UAAU,EAAE;AAAA,IACzD,QAAQ;AACN,eAAS,IAAI;AAAA,IACf;AACA,UAAM,UAAU,IAAI,QAAQ,SAAS,MACjC,IAAI,QAAQ,MAAM,GAAG,GAAG,IAAI,QAC5B,IAAI;AACR,UAAM,KAAK,IAAI,IAAI,IAAI,OAAO,cAAc,IAAI,IAAI,CAAC,IAAI,IAAI,KAAK,WAAM,MAAM,WAAM,OAAO,EAAE;AAAA,EAC/F;AAEA,QAAM,mBAAmB,gBAAgB,SAAS,IAC9C,gBAAgB,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,EAAE,KAAK,IAAI,IAC9C;AACJ,QAAM,SAAQ,oBAAI,KAAK,GAAE,YAAY,EAAE,MAAM,GAAG,EAAE;AAElD,QAAM,SAAS;AAAA;AAAA,aAEJ,SAAS;AAAA,SACb,KAAK;AAAA;AAAA;AAAA,EAGZ,gBAAgB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,iBAmDD,eAAe,MAAM;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,kBAMpB,eAAe,MAAM,cAAc,YAAY;AAAA,EAC/D,MAAM,KAAK,IAAI,CAAC;AAEhB,MAAI;AACF,WAAO,QAAQ,eAAe,eAAe,MAAM,2BAA2B,KAAK;AAEnF,UAAM,WAAW,MAAM;AAAA,MACrB;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAEA,QAAI,CAAC,SAAS,SAAS;AACrB,YAAM,SAAS,SAAS,SAAS;AACjC,qBAAe,WAAW,MAAM;AAChC,aAAO,EAAE,QAAQ,MAAM,OAAO,OAAO;AAAA,IACvC;AAGA,UAAM,UAAU,SAAS,QAAQ,QAAQ,wBAAwB,EAAE,EAAE,QAAQ,eAAe,EAAE,EAAE,KAAK;AACrG,UAAM,SAAS,KAAK,MAAM,OAAO;AAKjC,QAAI,CAAC,OAAO,SAAS,OAAO,OAAO,cAAc,YAAY,CAAC,MAAM,QAAQ,OAAO,OAAO,GAAG;AAC3F,YAAM,SAAS;AACf,qBAAe,WAAW,MAAM;AAChC,aAAO,EAAE,QAAQ,MAAM,OAAO,OAAO;AAAA,IACvC;AAEA,WAAO,QAAQ,4BAA4B,OAAO,QAAQ,OAAO,OAAK,EAAE,SAAS,iBAAiB,EAAE,MAAM,oBAAoB,KAAK;AACnI,mBAAe,SAAS;AACxB,WAAO,EAAE,QAAQ,OAAO;AAAA,EAC1B,SAAS,KAAc;AACrB,UAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC/D,WAAO,SAAS,0BAA0B,OAAO,IAAI,KAAK;AAC1D,mBAAe,WAAW,OAAO;AACjC,WAAO,EAAE,QAAQ,MAAM,OAAO,0BAA0B,OAAO,GAAG;AAAA,EACpE;AACF;AAEA,eAAsB,+BACpB,YAKA,WACA,iBACA,WAC8D;AAC9D,QAAM,kBAAkB,WAAW,MAAM,GAAG,EAAE;AAC9C,QAAM,QAAQ,gBAAgB,IAAI,CAAC,QAAQ;AACzC,QAAI;AACJ,QAAI;AACF,eAAS,IAAI,IAAI,IAAI,GAAG,EAAE,SAAS,QAAQ,UAAU,EAAE;AAAA,IACzD,QAAQ;AACN,eAAS,IAAI;AAAA,IACf;AACA,WAAO,IAAI,IAAI,IAAI,KAAK,IAAI,KAAK,WAAM,MAAM;AAAA,EAC/C,CAAC;AAED,QAAM,SAAS;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,aASJ,SAAS;AAAA;AAAA;AAAA,EAGpB,gBAAgB,IAAI,CAAC,UAAU,KAAK,KAAK,EAAE,EAAE,KAAK,IAAI,CAAC;AAAA;AAAA;AAAA,EAGvD,MAAM,KAAK,IAAI,CAAC;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAShB,MAAI;AACF,UAAM,WAAW,MAAM;AAAA,MACrB;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAEA,QAAI,CAAC,SAAS,SAAS;AACrB,YAAM,SAAS,SAAS,SAAS;AACjC,qBAAe,WAAW,MAAM;AAChC,aAAO,EAAE,QAAQ,CAAC,GAAG,OAAO,OAAO;AAAA,IACrC;AAEA,UAAM,UAAU,SAAS,QAAQ,QAAQ,wBAAwB,EAAE,EAAE,QAAQ,eAAe,EAAE,EAAE,KAAK;AACrG,UAAM,SAAS,KAAK,MAAM,OAAO;AAEjC,mBAAe,SAAS;AACxB,WAAO,EAAE,QAAQ,MAAM,QAAQ,OAAO,cAAc,IAAI,OAAO,iBAAiB,CAAC,EAAE;AAAA,EACrF,SAAS,KAAc;AACrB,UAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC/D,WAAO,SAAS,4CAA4C,OAAO,IAAI,KAAK;AAC5E,mBAAe,WAAW,OAAO;AACjC,WAAO,EAAE,QAAQ,CAAC,GAAG,OAAO,QAAQ;AAAA,EACtC;AACF;AA0BA,MAAM,qBAAqB,oBAAI,IAAI;AAAA,EACjC;AAAA,EAAQ;AAAA,EAAO;AAAA,EAAa;AAAA,EAAa;AAAA,EAAW;AAAA,EACpD;AAAA,EAAa;AAAA,EAAkB;AACjC,CAAC;AAED,MAAM,kBAAkB,oBAAI,IAAI,CAAC,QAAQ,SAAS,UAAU,OAAO,CAAC;AACpE,MAAM,iBAAiB,oBAAI,IAAmB,CAAC,UAAU,OAAO,MAAM,CAAC;AACvE,MAAM,mBAAmB,oBAAI,IAAI,CAAC,cAAc,cAAc,CAAC;AAE/D,SAAS,cAAc,OAAmC;AACxD,SAAO,MAAM,QAAQ,KAAK,KAAK,MAAM,MAAM,CAAC,MAAM,OAAO,MAAM,QAAQ;AACzE;AAEA,SAAS,YAAY,OAA8C;AACjE,SAAO,MAAM,QAAQ,KAAK,KAAK,MAAM,MAAM,CAAC,MAAM;AAChD,QAAI,OAAO,MAAM,YAAY,MAAM,KAAM,QAAO;AAChD,UAAM,OAAQ,EAA8B;AAC5C,UAAM,SAAU,EAA8B;AAC9C,WAAO,OAAO,SAAS,YAClB,iBAAiB,IAAI,IAAI,KACzB,OAAO,WAAW,YAClB,OAAO,KAAK,EAAE,SAAS;AAAA,EAC9B,CAAC;AACH;AAEO,SAAS,mBAAmB,KAAmC;AACpE,MAAI;AACF,UAAM,UAAU,IAAI,QAAQ,wBAAwB,EAAE,EAAE,QAAQ,eAAe,EAAE,EAAE,KAAK;AACxF,UAAM,SAAS,KAAK,MAAM,OAAO;AAEjC,UAAM,aAAa,OAAO,OAAO,eAAe,WAAW,OAAO,aAAa;AAC/E,QAAI,CAAC,cAAc,CAAC,mBAAmB,IAAI,UAAU,EAAG,QAAO;AAE/D,UAAM,mBAAmB,OAAO,OAAO,qBAAqB,WAAW,OAAO,mBAAmB;AACjG,QAAI,CAAC,oBAAoB,CAAC,gBAAgB,IAAI,gBAAgB,EAAG,QAAO;AAExE,UAAM,iBAAiB,OAAO;AAC9B,QAAI,OAAO,mBAAmB,YAAY,CAAC,eAAe,IAAI,cAA+B,EAAG,QAAO;AAEvG,QAAI,CAAC,YAAY,OAAO,mBAAmB,KAAK,OAAO,oBAAoB,WAAW,EAAG,QAAO;AAChG,QAAI,CAAC,cAAc,OAAO,aAAa,KAAK,OAAO,cAAc,WAAW,EAAG,QAAO;AAEtF,WAAO;AAAA,MACL;AAAA,MACA,mBAAmB,OAAO,OAAO,sBAAsB,WAAW,OAAO,oBAAoB;AAAA,MAC7F;AAAA,MACA,uBAAuB,OAAO,OAAO,0BAA0B,WAAW,OAAO,wBAAwB;AAAA,MACzG;AAAA,MACA,qBAAqB,OAAO;AAAA,MAC5B,eAAe,OAAO,cAAc,OAAO,CAAC,MAAM,EAAE,KAAK,EAAE,SAAS,CAAC;AAAA,MACrE,iBAAiB,cAAc,OAAO,eAAe,IAAI,OAAO,kBAAkB,CAAC;AAAA,MACnF,eAAe,cAAc,OAAO,aAAa,IAAI,OAAO,gBAAgB,CAAC;AAAA,MAC7E,eAAe,cAAc,OAAO,aAAa,IAAI,OAAO,gBAAgB,CAAC;AAAA,IAC/E;AAAA,EACF,QAAQ;AACN,WAAO;AAAA,EACT;AACF;AAEA,eAAsB,sBACpB,MACA,WACA,QAC+B;AAC/B,QAAM,SAAQ,oBAAI,KAAK,GAAE,YAAY,EAAE,MAAM,GAAG,EAAE;AAElD,QAAM,SAAS;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,QAOT,IAAI;AAAA,SACH,KAAK;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AA2CZ,MAAI;AACF,UAAM,WAAW,MAAM;AAAA,MACrB;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAEA,QAAI,CAAC,SAAS,SAAS;AACrB,aAAO,WAAW,kDAAkD,SAAS,SAAS,SAAS,IAAI,KAAK;AACxG,qBAAe,WAAW,SAAS,SAAS,gBAAgB;AAC5D,aAAO;AAAA,IACT;AAEA,UAAM,QAAQ,mBAAmB,SAAS,OAAO;AACjD,QAAI,CAAC,OAAO;AACV,aAAO,WAAW,wDAAwD,KAAK;AAC/E,qBAAe,WAAW,+BAA+B;AACzD,aAAO;AAAA,IACT;AAEA,mBAAe,SAAS;AACxB,WAAO;AAAA,EACT,SAAS,KAAc;AACrB,UAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC/D,WAAO,WAAW,qCAAqC,OAAO,IAAI,KAAK;AACvE,mBAAe,WAAW,OAAO;AACjC,WAAO;AAAA,EACT;AACF;AAEO,SAAS,oBAAoB,OAA8B;AAChE,QAAM,QAAkB,CAAC;AAEzB,QAAM,KAAK,wCAAwC;AACnD,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,qBAAqB,MAAM,UAAU,aAAQ,MAAM,iBAAiB,EAAE;AACjF,QAAM,KAAK,yBAAyB,MAAM,cAAc,aAAQ,MAAM,qBAAqB,EAAE;AAC7F,QAAM,KAAK,oBAAoB,MAAM,gBAAgB,IAAI;AACzD,QAAM,KAAK,EAAE;AAEb,MAAI,MAAM,oBAAoB,SAAS,GAAG;AACxC,UAAM,KAAK,yBAAyB;AACpC,UAAM,oBAAoB,QAAQ,CAAC,MAAM,MAAM;AAC7C,YAAM,KAAK,GAAG,IAAI,CAAC,OAAO,KAAK,IAAI,aAAQ,KAAK,MAAM,EAAE;AAAA,IAC1D,CAAC;AACD,UAAM,KAAK,EAAE;AAAA,EACf;AAEA,MAAI,MAAM,cAAc,SAAS,GAAG;AAClC,UAAM,KAAK,sBAAsB,MAAM,cAAc,MAAM,mFAA8E;AACzI,eAAW,QAAQ,MAAM,eAAe;AACtC,YAAM,KAAK,KAAK,IAAI,EAAE;AAAA,IACxB;AACA,UAAM,KAAK,EAAE;AAAA,EACf;AAEA,MAAI,MAAM,gBAAgB,SAAS,GAAG;AACpC,UAAM,KAAK,sFAAuF;AAClG,eAAW,QAAQ,MAAM,gBAAiB,OAAM,KAAK,KAAK,IAAI,EAAE;AAChE,UAAM,KAAK,EAAE;AAAA,EACf;AAEA,MAAI,MAAM,cAAc,SAAS,GAAG;AAClC,UAAM,KAAK,mBAAmB;AAC9B,eAAW,OAAO,MAAM,cAAe,OAAM,KAAK,KAAK,GAAG,EAAE;AAC5D,UAAM,KAAK,EAAE;AAAA,EACf;AAEA,MAAI,MAAM,cAAc,SAAS,GAAG;AAClC,UAAM,KAAK,mBAAmB;AAC9B,eAAW,KAAK,MAAM,cAAe,OAAM,KAAK,KAAK,CAAC,EAAE;AACxD,UAAM,KAAK,EAAE;AAAA,EACf;AAEA,QAAM,KAAK,KAAK;AAChB,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,iLAAiL;AAE5L,SAAO,MAAM,KAAK,IAAI;AACxB;",
|
|
4
|
+
"sourcesContent": ["/**\n * LLM Processor for content extraction\n * Uses any OpenAI-compatible endpoint. Reasoning effort is always 'low'.\n * Primary model exhausts its retries first; fallback model (LLM_FALLBACK_MODEL) then\n * gets up to FALLBACK_RETRY_COUNT additional attempts before the call fails.\n * NEVER throws \u2014 always returns a valid result.\n */\n\nimport OpenAI from 'openai';\nimport { LLM_EXTRACTION, getCapabilities } from '../config/index.js';\nimport {\n classifyError,\n sleep,\n ErrorCode,\n withStallProtection,\n type StructuredError,\n} from '../utils/errors.js';\nimport { mcpLog } from '../utils/logger.js';\n\n/** Maximum input characters for LLM processing (~125k tokens, sized for the larger fallback model) */\nconst MAX_LLM_INPUT_CHARS = 500_000 as const;\n\n/**\n * Maximum input characters for the primary model when it has a smaller context window.\n * Used when an input would exceed the mini model's limits so the call goes straight to fallback\n * instead of burning retries on guaranteed context_length_exceeded errors.\n */\nconst MAX_PRIMARY_MODEL_INPUT_CHARS = 100_000 as const;\n\n/** LLM client timeout in milliseconds */\nconst LLM_CLIENT_TIMEOUT_MS = 600_000 as const;\n\n/** Jitter factor for exponential backoff */\nconst BACKOFF_JITTER_FACTOR = 0.3 as const;\n\n/** Stall detection timeout \u2014 abort if no response in this time */\nconst LLM_STALL_TIMEOUT_MS = 75_000 as const;\n\n/** Hard request deadline for LLM calls */\nconst LLM_REQUEST_DEADLINE_MS = 150_000 as const;\n\n// ============================================================================\n// LLM health tracking \u2014 surfaced via health://status so capability-aware\n// clients can branch on degraded mode without parsing per-call footers.\n// ============================================================================\n\ntype LLMHealthKind = 'planner' | 'extractor';\n\nexport interface LLMHealthSnapshot {\n readonly lastPlannerOk: boolean;\n readonly lastExtractorOk: boolean;\n readonly lastPlannerCheckedAt: string | null;\n readonly lastExtractorCheckedAt: string | null;\n readonly lastPlannerError: string | null;\n readonly lastExtractorError: string | null;\n readonly plannerConfigured: boolean;\n readonly extractorConfigured: boolean;\n /** Failures since the last success. Reset to 0 on `markLLMSuccess`. */\n readonly consecutivePlannerFailures: number;\n readonly consecutiveExtractorFailures: number;\n}\n\nconst llmHealth = {\n lastPlannerOk: false,\n lastExtractorOk: false,\n lastPlannerCheckedAt: null as string | null,\n lastExtractorCheckedAt: null as string | null,\n lastPlannerError: null as string | null,\n lastExtractorError: null as string | null,\n consecutivePlannerFailures: 0,\n consecutiveExtractorFailures: 0,\n};\n\nexport function markLLMSuccess(kind: LLMHealthKind): void {\n const ts = new Date().toISOString();\n if (kind === 'planner') {\n llmHealth.lastPlannerOk = true;\n llmHealth.lastPlannerCheckedAt = ts;\n llmHealth.lastPlannerError = null;\n llmHealth.consecutivePlannerFailures = 0;\n } else {\n llmHealth.lastExtractorOk = true;\n llmHealth.lastExtractorCheckedAt = ts;\n llmHealth.lastExtractorError = null;\n llmHealth.consecutiveExtractorFailures = 0;\n }\n}\n\nexport function markLLMFailure(kind: LLMHealthKind, err: unknown): void {\n const ts = new Date().toISOString();\n const message = err instanceof Error ? err.message : String(err ?? 'unknown error');\n if (kind === 'planner') {\n llmHealth.lastPlannerOk = false;\n llmHealth.lastPlannerCheckedAt = ts;\n llmHealth.lastPlannerError = message;\n llmHealth.consecutivePlannerFailures += 1;\n } else {\n llmHealth.lastExtractorOk = false;\n llmHealth.lastExtractorCheckedAt = ts;\n llmHealth.lastExtractorError = message;\n llmHealth.consecutiveExtractorFailures += 1;\n }\n}\n\nexport function getLLMHealth(): LLMHealthSnapshot {\n const cap = getCapabilities();\n return {\n lastPlannerOk: llmHealth.lastPlannerOk,\n lastExtractorOk: llmHealth.lastExtractorOk,\n lastPlannerCheckedAt: llmHealth.lastPlannerCheckedAt,\n lastExtractorCheckedAt: llmHealth.lastExtractorCheckedAt,\n lastPlannerError: llmHealth.lastPlannerError,\n lastExtractorError: llmHealth.lastExtractorError,\n // Static capability \u2014 based on env presence at boot. Runtime health (above)\n // tells whether the last attempt actually succeeded.\n plannerConfigured: cap.llmExtraction,\n extractorConfigured: cap.llmExtraction,\n consecutivePlannerFailures: llmHealth.consecutivePlannerFailures,\n consecutiveExtractorFailures: llmHealth.consecutiveExtractorFailures,\n };\n}\n\n/** Test-only \u2014 reset state between tests. Not exported from index. */\nexport function _resetLLMHealthForTests(): void {\n llmHealth.lastPlannerOk = false;\n llmHealth.lastExtractorOk = false;\n llmHealth.lastPlannerCheckedAt = null;\n llmHealth.lastExtractorCheckedAt = null;\n llmHealth.lastPlannerError = null;\n llmHealth.lastExtractorError = null;\n llmHealth.consecutivePlannerFailures = 0;\n llmHealth.consecutiveExtractorFailures = 0;\n}\n\ninterface ProcessingConfig {\n readonly enabled: boolean;\n readonly extract: string | undefined;\n readonly url?: string;\n}\n\ninterface LLMResult {\n readonly content: string;\n readonly processed: boolean;\n readonly error?: string;\n readonly errorDetails?: StructuredError;\n}\n\n// LLM-specific retry configuration\nconst LLM_RETRY_CONFIG = {\n maxRetries: 2,\n baseDelayMs: 1000,\n maxDelayMs: 5000,\n} as const;\n\n/** Number of additional attempts using the fallback model after primary exhausts. */\nconst FALLBACK_RETRY_COUNT = 3 as const;\n\n// OpenAI-compatible retryable error codes (using Set for type-safe lookup)\nconst RETRYABLE_LLM_ERROR_CODES = new Set([\n 'rate_limit_exceeded',\n 'server_error',\n 'timeout',\n 'service_unavailable',\n]);\n\n/** Type guard for errors with an HTTP status code */\nfunction hasStatus(error: unknown): error is { status: number } {\n return (\n typeof error === 'object' &&\n error !== null &&\n 'status' in error &&\n typeof (error as Record<string, unknown>).status === 'number'\n );\n}\n\nlet llmClient: OpenAI | null = null;\n\ntype OpenAITextGenerator = Pick<OpenAI, 'chat'>;\n\nexport function createLLMProcessor(): OpenAI | null {\n if (!getCapabilities().llmExtraction) return null;\n\n if (!llmClient) {\n llmClient = new OpenAI({\n baseURL: LLM_EXTRACTION.BASE_URL,\n apiKey: LLM_EXTRACTION.API_KEY,\n timeout: LLM_CLIENT_TIMEOUT_MS,\n maxRetries: 0,\n defaultHeaders: { 'X-Title': 'mcp-research-powerpack' },\n });\n mcpLog('info', `LLM extraction configured (model: ${LLM_EXTRACTION.MODEL}, baseURL: ${LLM_EXTRACTION.BASE_URL})`, 'llm');\n }\n return llmClient;\n}\n\nfunction buildChatRequestBody(model: string, prompt: string): Record<string, unknown> {\n return {\n model,\n messages: [{ role: 'user', content: prompt }],\n reasoning_effort: 'low',\n };\n}\n\nexport async function requestText(\n processor: OpenAITextGenerator,\n prompt: string,\n operationLabel: string,\n signal?: AbortSignal,\n modelOverride?: string,\n): Promise<{ content: string | null; model: string; error?: string }> {\n const model = modelOverride || LLM_EXTRACTION.MODEL;\n\n try {\n const response = await withStallProtection(\n (stallSignal) => processor.chat.completions.create(\n buildChatRequestBody(model, prompt) as unknown as OpenAI.ChatCompletionCreateParamsNonStreaming,\n {\n signal: signal ? AbortSignal.any([stallSignal, signal]) : stallSignal,\n timeout: LLM_REQUEST_DEADLINE_MS,\n },\n ),\n LLM_STALL_TIMEOUT_MS,\n 3,\n `${operationLabel} (${model})`,\n );\n\n const content = response.choices?.[0]?.message?.content?.trim();\n if (content) {\n return { content, model };\n }\n\n const err = `Empty response from model ${model}`;\n mcpLog('warning', `${operationLabel} returned empty content for model ${model}`, 'llm');\n return { content: null, model, error: err };\n } catch (err: unknown) {\n const message = err instanceof Error ? err.message : String(err);\n mcpLog('warning', `${operationLabel} failed for model ${model}: ${message}`, 'llm');\n return { content: null, model, error: message };\n }\n}\n\n/**\n * Single LLM call with automatic fallback model.\n * Tries the primary model once; if it fails and LLM_FALLBACK_MODEL is set,\n * retries up to FALLBACK_RETRY_COUNT times on the fallback model.\n * Used for single-shot calls (classify, brief, refine queries).\n */\nexport async function requestTextWithFallback(\n processor: OpenAITextGenerator,\n prompt: string,\n operationLabel: string,\n signal?: AbortSignal,\n): Promise<{ content: string | null; model: string; error?: string }> {\n const primary = await requestText(processor, prompt, operationLabel, signal);\n if (primary.content) return primary;\n\n const fallbackModel = LLM_EXTRACTION.FALLBACK_MODEL;\n if (!fallbackModel) return primary;\n\n mcpLog('warning', `Primary model failed, switching to fallback ${fallbackModel}`, 'llm');\n\n let lastError = primary.error;\n for (let attempt = 0; attempt < FALLBACK_RETRY_COUNT; attempt++) {\n if (attempt > 0) {\n const delayMs = calculateLLMBackoff(attempt - 1);\n mcpLog('warning', `Fallback retry ${attempt}/${FALLBACK_RETRY_COUNT - 1} in ${delayMs}ms`, 'llm');\n try { await sleep(delayMs, signal); } catch { break; }\n }\n const result = await requestText(processor, prompt, `${operationLabel} [fallback]`, signal, fallbackModel);\n if (result.content) return result;\n lastError = result.error;\n }\n\n return { content: null, model: fallbackModel, error: lastError };\n}\n\n/**\n * Check if an LLM error is retryable\n */\nfunction isRetryableLLMError(error: unknown): boolean {\n if (!error || typeof error !== 'object') return false;\n\n // Stall/timeout protection errors - always retry these\n const stallCode = (error as { code?: string })?.code;\n if (stallCode === 'ESTALLED' || stallCode === 'ETIMEDOUT') {\n return true;\n }\n\n // Check HTTP status codes\n if (hasStatus(error)) {\n if (error.status === 429 || error.status === 500 || error.status === 502 || error.status === 503 || error.status === 504) {\n return true;\n }\n }\n\n // Check error codes from the OpenAI-compatible endpoint\n const record = error as Record<string, unknown>;\n const code = typeof record.code === 'string' ? record.code : undefined;\n const nested =\n typeof record.error === 'object' && record.error !== null\n ? (record.error as Record<string, unknown>)\n : null;\n const errorCode =\n code ??\n (nested && typeof nested.code === 'string' ? nested.code : undefined) ??\n (nested && typeof nested.type === 'string' ? nested.type : undefined);\n if (errorCode && RETRYABLE_LLM_ERROR_CODES.has(errorCode)) {\n return true;\n }\n\n // Check message for common patterns\n const message = typeof record.message === 'string' ? record.message.toLowerCase() : '';\n if (\n message.includes('rate limit') ||\n message.includes('timeout') ||\n message.includes('timed out') ||\n message.includes('service unavailable') ||\n message.includes('server error') ||\n message.includes('connection') ||\n message.includes('econnreset')\n ) {\n return true;\n }\n\n return false;\n}\n\n/**\n * Detect \"the prompt is too long for this model\" errors.\n * These are NOT retryable on the same model \u2014 we should skip remaining primary retries\n * and go straight to the fallback model (which has a larger context window).\n */\nfunction isContextWindowError(error: unknown): boolean {\n if (!error || typeof error !== 'object') return false;\n\n const record = error as Record<string, unknown>;\n const nested =\n typeof record.error === 'object' && record.error !== null\n ? (record.error as Record<string, unknown>)\n : null;\n\n const code = typeof record.code === 'string' ? record.code : undefined;\n const nestedCode = nested && typeof nested.code === 'string' ? nested.code : undefined;\n if (code === 'context_length_exceeded' || nestedCode === 'context_length_exceeded') {\n return true;\n }\n\n const messages: string[] = [];\n if (typeof record.message === 'string') messages.push(record.message);\n if (nested && typeof nested.message === 'string') messages.push(nested.message);\n const combined = messages.join(' ').toLowerCase();\n return (\n combined.includes('context length') ||\n combined.includes('context window') ||\n combined.includes('maximum context') ||\n combined.includes('maximum tokens') ||\n combined.includes('token limit') ||\n combined.includes('too many tokens') ||\n combined.includes('prompt is too long') ||\n combined.includes('reduce the length')\n );\n}\n\n/**\n * Calculate backoff delay with jitter for LLM retries\n */\nfunction calculateLLMBackoff(attempt: number): number {\n const exponentialDelay = LLM_RETRY_CONFIG.baseDelayMs * Math.pow(2, attempt);\n const jitter = Math.random() * BACKOFF_JITTER_FACTOR * exponentialDelay;\n return Math.min(exponentialDelay + jitter, LLM_RETRY_CONFIG.maxDelayMs);\n}\n\n/**\n * Process content with LLM extraction\n * NEVER throws - always returns a valid LLMResult\n * Implements retry logic with exponential backoff for transient failures\n */\nexport async function processContentWithLLM(\n content: string,\n config: ProcessingConfig,\n processor?: OpenAI | null,\n signal?: AbortSignal\n): Promise<LLMResult> {\n // Early returns for invalid/skip conditions\n if (!config.enabled) {\n return { content, processed: false };\n }\n\n if (!processor) {\n return {\n content,\n processed: false,\n error: 'LLM processor not available (LLM_API_KEY, LLM_BASE_URL, and LLM_MODEL must all be set)',\n errorDetails: {\n code: ErrorCode.AUTH_ERROR,\n message: 'LLM processor not available',\n retryable: false,\n },\n };\n }\n\n if (!content?.trim()) {\n return { content: content || '', processed: false, error: 'Empty content provided' };\n }\n\n // Truncate extremely long content to avoid blowing past even the fallback model's context.\n const truncatedContent = content.length > MAX_LLM_INPUT_CHARS\n ? content.substring(0, MAX_LLM_INPUT_CHARS) + '\\n\\n[Content truncated due to length]'\n : content;\n\n // If the prompt would exceed the primary (mini) model's smaller context window,\n // skip it entirely and go straight to the fallback model. Saves burning retries\n // on guaranteed context_length_exceeded errors.\n const skipPrimaryForSize =\n truncatedContent.length > MAX_PRIMARY_MODEL_INPUT_CHARS && !!LLM_EXTRACTION.FALLBACK_MODEL;\n\n // Sanitize URL before sending to LLM: drop query string and fragment\n // so signed URLs, session tokens, auth params, or tracking hashes never\n // land in a third-party LLM prompt. Keep origin + path for page-type classification.\n const safeUrl = (() => {\n if (!config.url) return undefined;\n try {\n const u = new URL(config.url);\n return `${u.origin}${u.pathname}`;\n } catch {\n return undefined;\n }\n })();\n const urlLine = safeUrl ? `PAGE URL: ${safeUrl}\\n\\n` : '';\n\n const prompt = config.extract\n ? `You are a factual extractor for a research agent. Extract ONLY the information that matches the instruction below. Do not summarize, interpret, or editorialize.\n\n${urlLine}EXTRACTION INSTRUCTION: ${config.extract}\n\nSTEP 1 \u2014 Classify this page. Look at the URL if present, plus structural cues (code blocks, table patterns, comment threads, marketing copy). Pick ONE:\n\\`docs | changelog | github-readme | github-thread | reddit | hackernews | forum | blog | marketing | announcement | qa | cve | paper | release-notes | other\\`\n\nSTEP 2 \u2014 Adjust emphasis by page type:\n- docs / changelog / github-readme / release-notes \u2192 API signatures, version numbers, flags, exact config keys, code blocks. Copy verbatim. Preserve tables as tables.\n- github-thread \u2192 weight MAINTAINER comments (label \"[maintainer]\") over drive-by commenters. Preserve stacktraces verbatim. Capture chronological resolution \u2014 what was decided and when. Link the accepted-fix commit/PR if referenced.\n- reddit / hackernews / forum \u2192 lived experience. Quote verbatim with attribution (\"u/foo wrote: \u2026\" or \"user <name>\"). Prioritize replies with stack details, specific failure stories, or replies that contradict the OP. Record overall sentiment distribution as one bullet if clear skew (\"~70% agree / ~20% dissent / rest off-topic\"). Drop context-free opinions (\"this sucks\") from Matches.\n- blog \u2192 prioritize concrete reproductions, code, measurements. If the author makes a claim without evidence, mark \"[unsourced claim]\".\n- marketing / announcement \u2192 pricing tiers, feature matrices verbatim, free-tier quotas, enterprise contact. Preserve tables as tables. Treat roadmap/future-tense claims skeptically \u2014 note them as \"[announced, not shipped]\" when framing is future-tense.\n- qa (stackoverflow) \u2192 accepted answer's code + high-voted disagreements. Always note the answer date \u2014 SO rots.\n- cve \u2192 CVSS vector verbatim, CWE, CPE ranges, affected versions, fix version, references. Each with its label.\n- paper \u2192 claim, method, dataset, benchmark numbers, comparison baseline. Preserve numeric deltas verbatim.\n\nSTEP 3 \u2014 Emit markdown with these sections, in order:\n\n## Source\n- URL: <verbatim if visible, else \"unknown\">\n- Page type: <the type you picked>\n- Page date: <verbatim if visible, else \"not visible\">\n- Author / maintainer (if identifiable): <verbatim>\n\n## Matches\nOne bullet per distinct piece of matching info:\n- **<short label>** \u2014 the information. Quote VERBATIM for: numbers, versions, dates, API names, prices, error messages, stacktraces, CVSS vectors, benchmark scores, command flags, proper nouns, and people's words. Backticks for code/identifiers. Preserve tables.\n\n## Not found\nEvery part of the extraction instruction this page did NOT answer. Be explicit. Example: \"Enterprise pricing contact \u2014 not present on this page.\"\n\n## Follow-up signals\nShort bullets \u2014 NEW angles this page surfaced that the agent should investigate. Include: new terms, unexpected vendor names, contradicting claims, referenced-but-unscraped URLs. Copy URLs VERBATIM from the source; if only anchor text is visible, write \"anchor: <text> (URL not in scraped content)\". Skip this section if nothing new surfaced. Do NOT invent.\n\n## Contradictions\n(Include this section only if the page contains internally contradictory claims.) Bullet each contradiction with both sides quoted verbatim.\n\n## Truncation\n(Include only if content appears cut mid-element.) \"Content cut mid-<table row / code block / comment / paragraph>; extraction may be incomplete for <section>.\"\n\nRULES:\n- Never paraphrase numbers, versions, code, or quoted text.\n- If an instruction item is not answered, it goes in \"Not found\" \u2014 do NOT invent an answer to please the caller.\n- Preserve code blocks, command examples, tables exactly.\n- Do NOT add commentary or recommendations outside \"Follow-up signals\".\n- Page language \u2260 English: quote verbatim in the original language AND provide a parenthetical gloss in English.\n- Content clearly failed to load: return ONLY a single line, choosing from:\n \\`## Matches\\\\n_Page did not load: 404_\\`\n \\`## Matches\\\\n_Page did not load: login-wall_\\`\n \\`## Matches\\\\n_Page did not load: paywall_\\`\n \\`## Matches\\\\n_Page did not load: JS-render-empty_\\`\n \\`## Matches\\\\n_Page did not load: non-text-asset_\\`\n \\`## Matches\\\\n_Page did not load: truncated-before-relevant-section_\\`\n\nContent:\n${truncatedContent}`\n : `Clean the following page content: drop navigation, ads, cookie banners, footers, author bios, related-article lists. Preserve headings, paragraphs, code blocks, tables, and inline links as \\`[text](url)\\`. Do NOT summarize \u2014 preserve the full body.\n\n${urlLine}Content:\n${truncatedContent}`;\n\n let lastError: StructuredError | undefined;\n\n // Phase 1: primary model with up to LLM_RETRY_CONFIG.maxRetries retries.\n // Skip entirely when the input is too big for the primary's context window.\n if (skipPrimaryForSize) {\n mcpLog(\n 'info',\n `Input ${truncatedContent.length} chars exceeds primary model cap (${MAX_PRIMARY_MODEL_INPUT_CHARS}); routing directly to fallback`,\n 'llm',\n );\n } else {\n for (let attempt = 0; attempt <= LLM_RETRY_CONFIG.maxRetries; attempt++) {\n try {\n if (attempt === 0) {\n mcpLog('info', `Starting extraction with ${LLM_EXTRACTION.MODEL}`, 'llm');\n } else {\n mcpLog('warning', `Retry attempt ${attempt}/${LLM_RETRY_CONFIG.maxRetries}`, 'llm');\n }\n\n const response = await requestText(processor, prompt, 'LLM extraction', signal);\n\n if (response.content) {\n mcpLog('info', `Successfully extracted ${response.content.length} characters`, 'llm');\n markLLMSuccess('extractor');\n return { content: response.content, processed: true };\n }\n\n // Empty response \u2014 not retryable\n mcpLog('warning', 'Received empty response from LLM', 'llm');\n markLLMFailure('extractor', 'LLM returned empty response');\n return {\n content,\n processed: false,\n error: 'LLM returned empty response',\n errorDetails: {\n code: ErrorCode.INTERNAL_ERROR,\n message: 'LLM returned empty response',\n retryable: false,\n },\n };\n\n } catch (err: unknown) {\n lastError = classifyError(err);\n const status = hasStatus(err) ? err.status : undefined;\n const code = typeof err === 'object' && err !== null && 'code' in err\n ? String((err as Record<string, unknown>).code)\n : undefined;\n const ctxErr = isContextWindowError(err);\n mcpLog('error', `Error (attempt ${attempt + 1}): ${lastError.message} [status=${status}, code=${code}, retryable=${isRetryableLLMError(err)}, context_window=${ctxErr}]`, 'llm');\n\n // Context window errors are not retryable on the same model \u2014 jump to fallback.\n if (ctxErr) {\n mcpLog('warning', 'Context window exceeded on primary \u2014 skipping remaining retries, routing to fallback', 'llm');\n break;\n }\n\n if (isRetryableLLMError(err) && attempt < LLM_RETRY_CONFIG.maxRetries) {\n const delayMs = calculateLLMBackoff(attempt);\n mcpLog('warning', `Retrying in ${delayMs}ms...`, 'llm');\n try { await sleep(delayMs, signal); } catch { break; }\n continue;\n }\n break;\n }\n }\n }\n\n // Phase 2: fallback model \u2014 FALLBACK_RETRY_COUNT attempts before giving up\n const fallbackModel = LLM_EXTRACTION.FALLBACK_MODEL;\n if (fallbackModel) {\n mcpLog('warning', `Primary exhausted, switching to fallback ${fallbackModel}`, 'llm');\n for (let attempt = 0; attempt < FALLBACK_RETRY_COUNT; attempt++) {\n if (attempt > 0) {\n const delayMs = calculateLLMBackoff(attempt - 1);\n mcpLog('warning', `Fallback retry ${attempt}/${FALLBACK_RETRY_COUNT - 1} in ${delayMs}ms`, 'llm');\n try { await sleep(delayMs, signal); } catch { break; }\n }\n try {\n const response = await requestText(processor, prompt, 'LLM extraction [fallback]', signal, fallbackModel);\n if (response.content) {\n mcpLog('info', `Fallback extracted ${response.content.length} characters`, 'llm');\n markLLMSuccess('extractor');\n return { content: response.content, processed: true };\n }\n mcpLog('warning', 'Fallback returned empty response', 'llm');\n break;\n } catch (err: unknown) {\n lastError = classifyError(err);\n mcpLog('error', `Fallback error (attempt ${attempt + 1}): ${lastError.message}`, 'llm');\n }\n }\n }\n\n const errorMessage = lastError?.message || 'Unknown LLM error';\n mcpLog('error', `All attempts failed: ${errorMessage}. Returning original content.`, 'llm');\n markLLMFailure('extractor', errorMessage);\n\n return {\n content,\n processed: false,\n error: `LLM extraction failed: ${errorMessage}`,\n errorDetails: lastError || {\n code: ErrorCode.UNKNOWN_ERROR,\n message: errorMessage,\n retryable: false,\n },\n };\n}\n\n// ============================================================================\n// Web-Search Result Classification\n// ============================================================================\n\n/** Maximum URLs to send to the LLM for classification */\nconst MAX_CLASSIFICATION_URLS = 50 as const;\n\n/** Classification tiers */\ntype ClassificationTier = 'HIGHLY_RELEVANT' | 'MAYBE_RELEVANT' | 'OTHER';\n\nexport interface ClassificationEntry {\n readonly rank: number;\n readonly tier: ClassificationTier;\n readonly source_type?: string;\n readonly reason?: string;\n}\n\nexport interface ClassificationGap {\n readonly id: number;\n readonly description: string;\n}\n\nexport interface ClassificationResult {\n readonly title: string;\n readonly synthesis: string;\n readonly results: ClassificationEntry[];\n readonly refine_queries?: Array<{\n readonly query: string;\n readonly rationale: string;\n readonly gap_id?: number;\n }>;\n readonly confidence?: 'high' | 'medium' | 'low';\n readonly confidence_reason?: string;\n readonly gaps?: ClassificationGap[];\n}\n\nexport interface RefineQuerySuggestion {\n readonly query: string;\n readonly rationale: string;\n readonly gap_id?: number;\n readonly gap_description?: string;\n}\n\n/**\n * Classify web-search results by relevance to an objective using the LLM.\n * Sends only titles, snippets, and domain names \u2014 does NOT fetch URLs.\n * Returns null on failure (caller should fall back to raw output).\n */\nexport async function classifySearchResults(\n rankedUrls: ReadonlyArray<{\n readonly rank: number;\n readonly url: string;\n readonly title: string;\n readonly snippet: string;\n readonly frequency: number;\n readonly queries: string[];\n }>,\n objective: string,\n totalQueries: number,\n processor: OpenAI,\n previousQueries: readonly string[] = [],\n): Promise<{ result: ClassificationResult | null; error?: string }> {\n const urlsToClassify = rankedUrls.slice(0, MAX_CLASSIFICATION_URLS);\n\n // Descending static weights fed to the LLM. Higher-ranked URLs get a bigger\n // weight so the classifier biases HIGHLY_RELEVANT toward them. The weights\n // here are a shown-to-LLM summary, not the internal CTR ranking (which\n // still runs in url-aggregator.ts). Rank 11+ all bucket to w=1.\n const STATIC_WEIGHTS = [30, 20, 15, 10, 8, 6, 5, 4, 3, 2] as const;\n const weightForRank = (rank: number): number => STATIC_WEIGHTS[rank - 1] ?? 1;\n\n // Build compressed result list \u2014 weight + title + domain + snippet (truncated)\n const lines: string[] = [];\n for (const url of urlsToClassify) {\n let domain: string;\n try {\n domain = new URL(url.url).hostname.replace(/^www\\./, '');\n } catch {\n domain = url.url;\n }\n const snippet = url.snippet.length > 120\n ? url.snippet.slice(0, 117) + '...'\n : url.snippet;\n lines.push(`[${url.rank}] w=${weightForRank(url.rank)} ${url.title} \u2014 ${domain} \u2014 ${snippet}`);\n }\n\n const prevQueriesBlock = previousQueries.length > 0\n ? previousQueries.map((q) => `- ${q}`).join('\\n')\n : '- (none provided)';\n const today = new Date().toISOString().slice(0, 10);\n\n const prompt = `You are the relevance filter for a research agent. Classify each search result below against the objective and produce a structured analysis.\n\nOBJECTIVE: ${objective}\nTODAY: ${today}\n\nPREVIOUS QUERIES (already run \u2014 do NOT paraphrase in refine_queries):\n${prevQueriesBlock}\n\nReturn ONLY a JSON object (no markdown, no code fences):\n\n{\n \"title\": \"2\u20138 word label for this RESULT CLUSTER (not the objective)\",\n \"synthesis\": \"3\u20135 sentences grounded in the results. Every non-trivial claim cites a rank in [brackets], e.g. '[3] documents the flag; [7][12] report it is broken on macOS.' A synthesis with zero citations is invalid.\",\n \"confidence\": \"high | medium | low\",\n \"confidence_reason\": \"one sentence \u2014 why\",\n \"gaps\": [\n { \"id\": 0, \"description\": \"specific, actionable thing the current results do NOT answer \u2014 not 'more info needed'\" }\n ],\n \"refine_queries\": [\n { \"query\": \"concrete next search\", \"gap_id\": 0, \"rationale\": \"\u226412 words\" }\n ],\n \"results\": [\n {\n \"rank\": 1,\n \"tier\": \"HIGHLY_RELEVANT | MAYBE_RELEVANT | OTHER\",\n \"source_type\": \"vendor_doc | github | reddit | hackernews | blog | news | marketing | stackoverflow | cve | paper | release_notes | aggregator | other\",\n \"reason\": \"\u226412 words citing the snippet cue that drove the tier\"\n }\n ]\n}\n\nWEIGHT SCHEME: each row is prefixed with a weight (w=N). Higher weight means the URL ranked better across input queries \u2014 prefer HIGHLY_RELEVANT for high-weight rows when content matches the objective. Weight alone never justifies HIGHLY_RELEVANT; snippet cues still drive the decision.\n\nSOURCE-OF-TRUTH RUBRIC (the \"primary source\" is goal-dependent \u2014 infer goal type from the objective):\n- spec / API / config questions \u2192 vendor_doc, github (README, RFC), release_notes are primary\n- bug / failure-mode questions \u2192 github (issue/PR), stackoverflow are primary\n- migration / sentiment / lived-experience \u2192 reddit, hackernews, blog are primary; docs are secondary\n- pricing / commercial \u2192 marketing (the vendor's own pricing page IS the primary source, but treat feature lists skeptically)\n- security / CVE \u2192 cve databases, distro security trackers (nvd.nist.gov, security-tracker.debian.org, ubuntu.com/security) are primary\n- synthesis / open-ended \u2192 blend; no single type is primary\n- product launch \u2192 vendor_doc + news + marketing for the launch itself; blogs + reddit for independent verification\n\nFRESHNESS: proportional to topic velocity. For a week-old release, demote anything older than 30 days. For general tech questions, demote older than 18 months. For stable protocols (HTTP, TCP, POSIX), don't demote by age.\n\nCONFIDENCE:\n- high = \u22653 HIGHLY_RELEVANT results from INDEPENDENT domains agree on the core answer\n- medium = \u22652 HIGHLY_RELEVANT exist but disagree or share a domain; OR a single authoritative primary source answers it\n- low = otherwise; snippet-only judgments cap at medium\n\nREFINE QUERIES \u2014 each MUST differ from every previousQuery by:\n- a new operator (site:, quotes, verbatim version number), OR\n- a domain-specific noun ABSENT from every prior query\nAdding a year alone does NOT count as differentiation.\nEach refine_query MUST reference a specific gap_id from the gaps array above.\nProduce 4\u20138 refine_queries total. Cover: (a) a primary-source probe, (b) a temporal sharpener, (c) a failure-mode or comparison probe, (d) at least one new-term probe seeded by a specific result's snippet.\n\nRULES:\n- Classify ALL ${urlsToClassify.length} results. Do not skip or collapse any.\n- Use only the three tier values.\n- Judge from title + domain + snippet only. Do NOT invent facts not present in the snippet.\n- If ALL results are OTHER: synthesis = \"\", confidence = \"low\", and \\`gaps\\` must explicitly state why the current queries missed the target.\n- Casing: tier = UPPERCASE_WITH_UNDERSCORES, confidence = lowercase.\n\nSEARCH RESULTS (${urlsToClassify.length} URLs from ${totalQueries} queries):\n${lines.join('\\n')}`;\n\n try {\n mcpLog('info', `Classifying ${urlsToClassify.length} URLs against objective`, 'llm');\n\n const response = await requestTextWithFallback(\n processor,\n prompt,\n 'Search classification',\n );\n\n if (!response.content) {\n const errMsg = response.error ?? 'LLM returned empty classification response';\n markLLMFailure('planner', errMsg);\n return { result: null, error: errMsg };\n }\n\n // Strip markdown code fences if present\n const cleaned = response.content.replace(/^```(?:json)?\\s*\\n?/m, '').replace(/\\n?```\\s*$/m, '').trim();\n const parsed = JSON.parse(cleaned) as ClassificationResult;\n\n // Validate the response shape.\n // Note: synthesis is typed not truthy \u2014 the prompt explicitly instructs an empty string\n // for the all-OTHER case, and we must not reject that.\n if (!parsed.title || typeof parsed.synthesis !== 'string' || !Array.isArray(parsed.results)) {\n const errMsg = 'LLM response missing required fields (title, synthesis, results)';\n markLLMFailure('planner', errMsg);\n return { result: null, error: errMsg };\n }\n\n mcpLog('info', `Classification complete: ${parsed.results.filter(r => r.tier === 'HIGHLY_RELEVANT').length} highly relevant`, 'llm');\n markLLMSuccess('planner');\n return { result: parsed };\n } catch (err: unknown) {\n const message = err instanceof Error ? err.message : String(err);\n mcpLog('error', `Classification failed: ${message}`, 'llm');\n markLLMFailure('planner', message);\n return { result: null, error: `Classification failed: ${message}` };\n }\n}\n\nexport async function suggestRefineQueriesForRawMode(\n rankedUrls: ReadonlyArray<{\n readonly rank: number;\n readonly url: string;\n readonly title: string;\n }>,\n objective: string,\n originalQueries: readonly string[],\n processor: OpenAI,\n): Promise<{ result: RefineQuerySuggestion[]; error?: string }> {\n const urlsToSummarize = rankedUrls.slice(0, 12);\n const lines = urlsToSummarize.map((url) => {\n let domain: string;\n try {\n domain = new URL(url.url).hostname.replace(/^www\\./, '');\n } catch {\n domain = url.url;\n }\n return `[${url.rank}] ${url.title} \u2014 ${domain}`;\n });\n\n const prompt = `You are generating follow-up search queries for an agent using raw web-search results.\n\nReturn ONLY a JSON object (no markdown, no code fences):\n{\n \"refine_queries\": [\n { \"query\": \"next search query\", \"gap_description\": \"what gap this closes\", \"rationale\": \"\u226412 words on why\" }\n ]\n}\n\nOBJECTIVE: ${objective}\n\nPREVIOUS QUERIES (already run \u2014 do NOT paraphrase):\n${originalQueries.map((query) => `- ${query}`).join('\\n')}\n\nTOP RESULT TITLES (to seed new-term probes):\n${lines.join('\\n')}\n\nRULES:\n- Produce 4\u20136 diverse follow-ups. Cover: (a) a primary-source probe (site:, RFC, vendor docs); (b) a temporal sharpener (changelog, version number); (c) a failure-mode or comparison probe; (d) at least one new-term probe seeded by a specific result title.\n- Each query MUST differ from every previousQuery by either a new operator (site:, quotes, a verbatim version number) OR a domain-specific noun absent from every prior query. Adding a year alone does NOT count.\n- Each refine_query MUST include a \\`gap_description\\` naming what the current results don't answer.\n- Do not include URLs.\n- Keep rationales \u226412 words.`;\n\n try {\n const response = await requestTextWithFallback(\n processor,\n prompt,\n 'Raw-mode refine query generation',\n );\n\n if (!response.content) {\n const errMsg = response.error ?? 'LLM returned empty raw-mode refine query response';\n markLLMFailure('planner', errMsg);\n return { result: [], error: errMsg };\n }\n\n const cleaned = response.content.replace(/^```(?:json)?\\s*\\n?/m, '').replace(/\\n?```\\s*$/m, '').trim();\n const parsed = JSON.parse(cleaned) as { refine_queries?: RefineQuerySuggestion[] };\n\n markLLMSuccess('planner');\n return { result: Array.isArray(parsed.refine_queries) ? parsed.refine_queries : [] };\n } catch (err: unknown) {\n const message = err instanceof Error ? err.message : String(err);\n mcpLog('error', `Raw-mode refine query generation failed: ${message}`, 'llm');\n markLLMFailure('planner', message);\n return { result: [], error: message };\n }\n}\n\n// ============================================================================\n// Research Brief \u2014 goal-aware orientation (called by start-research)\n// ============================================================================\n\nexport type PrimaryBranch = 'reddit' | 'web' | 'both';\n\nexport interface ResearchBriefStep {\n readonly tool: 'web-search' | 'scrape-links';\n readonly reason: string;\n}\n\nexport interface ResearchBrief {\n readonly goal_class: string;\n readonly goal_class_reason: string;\n readonly primary_branch: PrimaryBranch;\n readonly primary_branch_reason: string;\n readonly freshness_window: string;\n readonly first_call_sequence: readonly ResearchBriefStep[];\n readonly keyword_seeds: readonly string[];\n readonly iteration_hints: readonly string[];\n readonly gaps_to_watch: readonly string[];\n readonly stop_criteria: readonly string[];\n}\n\nconst VALID_GOAL_CLASSES = new Set([\n 'spec', 'bug', 'migration', 'sentiment', 'pricing', 'security',\n 'synthesis', 'product_launch', 'other',\n]);\n\nconst VALID_FRESHNESS = new Set(['days', 'weeks', 'months', 'years']);\nconst VALID_BRANCHES = new Set<PrimaryBranch>(['reddit', 'web', 'both']);\nconst VALID_STEP_TOOLS = new Set(['web-search', 'scrape-links']);\n\nfunction isStringArray(value: unknown): value is string[] {\n return Array.isArray(value) && value.every((v) => typeof v === 'string');\n}\n\nfunction isStepArray(value: unknown): value is ResearchBriefStep[] {\n return Array.isArray(value) && value.every((s) => {\n if (typeof s !== 'object' || s === null) return false;\n const tool = (s as Record<string, unknown>).tool;\n const reason = (s as Record<string, unknown>).reason;\n return typeof tool === 'string'\n && VALID_STEP_TOOLS.has(tool)\n && typeof reason === 'string'\n && reason.trim().length > 0;\n });\n}\n\nexport function parseResearchBrief(raw: string): ResearchBrief | null {\n try {\n const cleaned = raw.replace(/^```(?:json)?\\s*\\n?/m, '').replace(/\\n?```\\s*$/m, '').trim();\n const parsed = JSON.parse(cleaned) as Record<string, unknown>;\n\n const goal_class = typeof parsed.goal_class === 'string' ? parsed.goal_class : null;\n if (!goal_class || !VALID_GOAL_CLASSES.has(goal_class)) return null;\n\n const freshness_window = typeof parsed.freshness_window === 'string' ? parsed.freshness_window : null;\n if (!freshness_window || !VALID_FRESHNESS.has(freshness_window)) return null;\n\n const primary_branch = parsed.primary_branch;\n if (typeof primary_branch !== 'string' || !VALID_BRANCHES.has(primary_branch as PrimaryBranch)) return null;\n\n if (!isStepArray(parsed.first_call_sequence) || parsed.first_call_sequence.length === 0) return null;\n if (!isStringArray(parsed.keyword_seeds) || parsed.keyword_seeds.length === 0) return null;\n\n return {\n goal_class,\n goal_class_reason: typeof parsed.goal_class_reason === 'string' ? parsed.goal_class_reason : '',\n primary_branch: primary_branch as PrimaryBranch,\n primary_branch_reason: typeof parsed.primary_branch_reason === 'string' ? parsed.primary_branch_reason : '',\n freshness_window,\n first_call_sequence: parsed.first_call_sequence,\n keyword_seeds: parsed.keyword_seeds.filter((s) => s.trim().length > 0),\n iteration_hints: isStringArray(parsed.iteration_hints) ? parsed.iteration_hints : [],\n gaps_to_watch: isStringArray(parsed.gaps_to_watch) ? parsed.gaps_to_watch : [],\n stop_criteria: isStringArray(parsed.stop_criteria) ? parsed.stop_criteria : [],\n };\n } catch {\n return null;\n }\n}\n\nexport async function generateResearchBrief(\n goal: string,\n processor: OpenAI,\n signal?: AbortSignal,\n): Promise<ResearchBrief | null> {\n const today = new Date().toISOString().slice(0, 10);\n\n const prompt = `You are a research planner. An agent is about to run a multi-pass research loop on the goal below using 3 tools:\n\n - web-search: fan-out Google, scope: web|reddit|both, up to 50 queries per call, parallel-callable (multiple calls per turn)\n - scrape-links: fetch URLs in parallel, auto-detects reddit.com post permalinks \u2192 Reddit API (threaded post+comments); all other URLs \u2192 HTTP scraper; parallel-callable\n\nProduce a tailored JSON brief.\n\nGOAL: ${goal}\nTODAY: ${today}\n\nReturn ONLY a JSON object (no markdown, no code fences):\n\n{\n \"goal_class\": \"spec | bug | migration | sentiment | pricing | security | synthesis | product_launch | other\",\n \"goal_class_reason\": \"one sentence \u2014 why this class\",\n \"primary_branch\": \"reddit | web | both\",\n \"primary_branch_reason\": \"one sentence \u2014 why this branch leads\",\n \"freshness_window\": \"days | weeks | months | years\",\n \"first_call_sequence\": [\n { \"tool\": \"web-search | scrape-links\", \"reason\": \"what this call establishes for the agent\" }\n ],\n \"keyword_seeds\": [\"25\u201350 concrete Google queries \u2014 flat list, to be fired in the first web-search call\"],\n \"iteration_hints\": [\"2\u20135 pointers on which harvested terms / follow-up signals to watch for after pass 1\"],\n \"gaps_to_watch\": [\"2\u20135 concrete questions the agent MUST verify or the answer is incomplete\"],\n \"stop_criteria\": [\"2\u20134 checkable conditions \u2014 all must hold before the agent declares done\"]\n}\n\nRULES:\n\nprimary_branch:\n- \"reddit\" \u2192 sentiment / migration / lived-experience / community-consensus goals. Leads with scope:\"reddit\" web-search.\n- \"web\" \u2192 spec / bug / pricing / CVE / API / primary-source goals. Leads with scope:\"web\" web-search.\n- \"both\" \u2192 opinion-heavy AND needs official sources (e.g. product launch + practitioner reception).\n\nfirst_call_sequence:\n- 1\u20133 steps.\n- reddit-first: step 1 = web-search (caller sets scope:\"reddit\"), step 2 = scrape-links on best post permalinks.\n- web-first: step 1 = web-search (scope:\"web\"), step 2 = scrape-links on HIGHLY_RELEVANT URLs.\n- both: step 1 = two parallel web-search calls (one scope:\"reddit\", one scope:\"web\"), step 2 = merged scrape-links.\n\nkeyword_seeds:\n- 25\u201350 total. Narrow bug \u2192 fewer. Open synthesis \u2192 more.\n- Use operators where helpful (site:, quotes, verbatim version numbers).\n- DIVERSE facets \u2014 same noun-phrase cannot repeat across seeds with adjectives-only variation.\n- Do NOT invent vendor names you are uncertain exist.\n- For \\`site:<domain>\\` filters, ONLY use domains you are highly confident are real. Safe choices: \\`github.com\\`, \\`stackoverflow.com\\`, \\`reddit.com\\`, \\`news.ycombinator.com\\`, \\`arxiv.org\\`, \\`nvd.nist.gov\\`, \\`pypi.org\\`, \\`npmjs.com\\`, plus any canonical homepage/docs domain explicitly spelled out in the goal itself (e.g. goal names \"Cursor\" \u2192 \\`cursor.com\\`/\\`docs.cursor.com\\` is acceptable). If you don't know the product's real docs domain, leave the query open (no \\`site:\\`) instead of guessing.\n\nfreshness_window:\n- If the goal mentions a recent release / date / version, use \"days\" or \"weeks\".\n- Stable protocols / APIs \u2192 \"months\" or \"years\".`;\n\n try {\n const response = await requestTextWithFallback(\n processor,\n prompt,\n 'Research brief generation',\n signal,\n );\n\n if (!response.content) {\n mcpLog('warning', `Research brief generation returned no content: ${response.error ?? 'unknown'}`, 'llm');\n markLLMFailure('planner', response.error ?? 'empty response');\n return null;\n }\n\n const brief = parseResearchBrief(response.content);\n if (!brief) {\n mcpLog('warning', 'Research brief JSON parse or shape validation failed', 'llm');\n markLLMFailure('planner', 'brief parse/validation failed');\n return null;\n }\n\n markLLMSuccess('planner');\n return brief;\n } catch (err: unknown) {\n const message = err instanceof Error ? err.message : String(err);\n mcpLog('warning', `Research brief generation failed: ${message}`, 'llm');\n markLLMFailure('planner', message);\n return null;\n }\n}\n\nexport function renderResearchBrief(brief: ResearchBrief): string {\n const lines: string[] = [];\n\n lines.push('## Your research brief (goal-tailored)');\n lines.push('');\n lines.push(`**Goal class**: \\`${brief.goal_class}\\` \u2014 ${brief.goal_class_reason}`);\n lines.push(`**Primary branch**: \\`${brief.primary_branch}\\` \u2014 ${brief.primary_branch_reason}`);\n lines.push(`**Freshness**: \\`${brief.freshness_window}\\``);\n lines.push('');\n\n if (brief.first_call_sequence.length > 0) {\n lines.push('### First-call sequence');\n brief.first_call_sequence.forEach((step, i) => {\n lines.push(`${i + 1}. \\`${step.tool}\\` \u2014 ${step.reason}`);\n });\n lines.push('');\n }\n\n if (brief.keyword_seeds.length > 0) {\n lines.push(`### Keyword seeds (${brief.keyword_seeds.length}) \u2014 fire these in your first \\`web-search\\` call as a flat \\`queries\\` array`);\n for (const seed of brief.keyword_seeds) {\n lines.push(`- ${seed}`);\n }\n lines.push('');\n }\n\n if (brief.iteration_hints.length > 0) {\n lines.push('### Iteration hints (harvest new terms from scrape extracts\\' `## Follow-up signals`)');\n for (const hint of brief.iteration_hints) lines.push(`- ${hint}`);\n lines.push('');\n }\n\n if (brief.gaps_to_watch.length > 0) {\n lines.push('### Gaps to watch');\n for (const gap of brief.gaps_to_watch) lines.push(`- ${gap}`);\n lines.push('');\n }\n\n if (brief.stop_criteria.length > 0) {\n lines.push('### Stop criteria');\n for (const c of brief.stop_criteria) lines.push(`- ${c}`);\n lines.push('');\n }\n\n lines.push('---');\n lines.push('');\n lines.push('Fire `first_call_sequence` now. After each `scrape-links`, harvest new terms from `## Follow-up signals` and build your next `web-search` round. Stop when every gap is closed.');\n\n return lines.join('\\n');\n}\n"],
|
|
5
|
+
"mappings": "AAQA,OAAO,YAAY;AACnB,SAAS,gBAAgB,uBAAuB;AAChD;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OAEK;AACP,SAAS,cAAc;AAGvB,MAAM,sBAAsB;AAO5B,MAAM,gCAAgC;AAGtC,MAAM,wBAAwB;AAG9B,MAAM,wBAAwB;AAG9B,MAAM,uBAAuB;AAG7B,MAAM,0BAA0B;AAuBhC,MAAM,YAAY;AAAA,EAChB,eAAe;AAAA,EACf,iBAAiB;AAAA,EACjB,sBAAsB;AAAA,EACtB,wBAAwB;AAAA,EACxB,kBAAkB;AAAA,EAClB,oBAAoB;AAAA,EACpB,4BAA4B;AAAA,EAC5B,8BAA8B;AAChC;AAEO,SAAS,eAAe,MAA2B;AACxD,QAAM,MAAK,oBAAI,KAAK,GAAE,YAAY;AAClC,MAAI,SAAS,WAAW;AACtB,cAAU,gBAAgB;AAC1B,cAAU,uBAAuB;AACjC,cAAU,mBAAmB;AAC7B,cAAU,6BAA6B;AAAA,EACzC,OAAO;AACL,cAAU,kBAAkB;AAC5B,cAAU,yBAAyB;AACnC,cAAU,qBAAqB;AAC/B,cAAU,+BAA+B;AAAA,EAC3C;AACF;AAEO,SAAS,eAAe,MAAqB,KAAoB;AACtE,QAAM,MAAK,oBAAI,KAAK,GAAE,YAAY;AAClC,QAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,OAAO,eAAe;AAClF,MAAI,SAAS,WAAW;AACtB,cAAU,gBAAgB;AAC1B,cAAU,uBAAuB;AACjC,cAAU,mBAAmB;AAC7B,cAAU,8BAA8B;AAAA,EAC1C,OAAO;AACL,cAAU,kBAAkB;AAC5B,cAAU,yBAAyB;AACnC,cAAU,qBAAqB;AAC/B,cAAU,gCAAgC;AAAA,EAC5C;AACF;AAEO,SAAS,eAAkC;AAChD,QAAM,MAAM,gBAAgB;AAC5B,SAAO;AAAA,IACL,eAAe,UAAU;AAAA,IACzB,iBAAiB,UAAU;AAAA,IAC3B,sBAAsB,UAAU;AAAA,IAChC,wBAAwB,UAAU;AAAA,IAClC,kBAAkB,UAAU;AAAA,IAC5B,oBAAoB,UAAU;AAAA;AAAA;AAAA,IAG9B,mBAAmB,IAAI;AAAA,IACvB,qBAAqB,IAAI;AAAA,IACzB,4BAA4B,UAAU;AAAA,IACtC,8BAA8B,UAAU;AAAA,EAC1C;AACF;AAGO,SAAS,0BAAgC;AAC9C,YAAU,gBAAgB;AAC1B,YAAU,kBAAkB;AAC5B,YAAU,uBAAuB;AACjC,YAAU,yBAAyB;AACnC,YAAU,mBAAmB;AAC7B,YAAU,qBAAqB;AAC/B,YAAU,6BAA6B;AACvC,YAAU,+BAA+B;AAC3C;AAgBA,MAAM,mBAAmB;AAAA,EACvB,YAAY;AAAA,EACZ,aAAa;AAAA,EACb,YAAY;AACd;AAGA,MAAM,uBAAuB;AAG7B,MAAM,4BAA4B,oBAAI,IAAI;AAAA,EACxC;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF,CAAC;AAGD,SAAS,UAAU,OAA6C;AAC9D,SACE,OAAO,UAAU,YACjB,UAAU,QACV,YAAY,SACZ,OAAQ,MAAkC,WAAW;AAEzD;AAEA,IAAI,YAA2B;AAIxB,SAAS,qBAAoC;AAClD,MAAI,CAAC,gBAAgB,EAAE,cAAe,QAAO;AAE7C,MAAI,CAAC,WAAW;AACd,gBAAY,IAAI,OAAO;AAAA,MACrB,SAAS,eAAe;AAAA,MACxB,QAAQ,eAAe;AAAA,MACvB,SAAS;AAAA,MACT,YAAY;AAAA,MACZ,gBAAgB,EAAE,WAAW,yBAAyB;AAAA,IACxD,CAAC;AACD,WAAO,QAAQ,qCAAqC,eAAe,KAAK,cAAc,eAAe,QAAQ,KAAK,KAAK;AAAA,EACzH;AACA,SAAO;AACT;AAEA,SAAS,qBAAqB,OAAe,QAAyC;AACpF,SAAO;AAAA,IACL;AAAA,IACA,UAAU,CAAC,EAAE,MAAM,QAAQ,SAAS,OAAO,CAAC;AAAA,IAC5C,kBAAkB;AAAA,EACpB;AACF;AAEA,eAAsB,YACpB,WACA,QACA,gBACA,QACA,eACoE;AACpE,QAAM,QAAQ,iBAAiB,eAAe;AAE9C,MAAI;AACF,UAAM,WAAW,MAAM;AAAA,MACrB,CAAC,gBAAgB,UAAU,KAAK,YAAY;AAAA,QAC1C,qBAAqB,OAAO,MAAM;AAAA,QAClC;AAAA,UACE,QAAQ,SAAS,YAAY,IAAI,CAAC,aAAa,MAAM,CAAC,IAAI;AAAA,UAC1D,SAAS;AAAA,QACX;AAAA,MACF;AAAA,MACA;AAAA,MACA;AAAA,MACA,GAAG,cAAc,KAAK,KAAK;AAAA,IAC7B;AAEA,UAAM,UAAU,SAAS,UAAU,CAAC,GAAG,SAAS,SAAS,KAAK;AAC9D,QAAI,SAAS;AACX,aAAO,EAAE,SAAS,MAAM;AAAA,IAC1B;AAEA,UAAM,MAAM,6BAA6B,KAAK;AAC9C,WAAO,WAAW,GAAG,cAAc,qCAAqC,KAAK,IAAI,KAAK;AACtF,WAAO,EAAE,SAAS,MAAM,OAAO,OAAO,IAAI;AAAA,EAC5C,SAAS,KAAc;AACrB,UAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC/D,WAAO,WAAW,GAAG,cAAc,qBAAqB,KAAK,KAAK,OAAO,IAAI,KAAK;AAClF,WAAO,EAAE,SAAS,MAAM,OAAO,OAAO,QAAQ;AAAA,EAChD;AACF;AAQA,eAAsB,wBACpB,WACA,QACA,gBACA,QACoE;AACpE,QAAM,UAAU,MAAM,YAAY,WAAW,QAAQ,gBAAgB,MAAM;AAC3E,MAAI,QAAQ,QAAS,QAAO;AAE5B,QAAM,gBAAgB,eAAe;AACrC,MAAI,CAAC,cAAe,QAAO;AAE3B,SAAO,WAAW,+CAA+C,aAAa,IAAI,KAAK;AAEvF,MAAI,YAAY,QAAQ;AACxB,WAAS,UAAU,GAAG,UAAU,sBAAsB,WAAW;AAC/D,QAAI,UAAU,GAAG;AACf,YAAM,UAAU,oBAAoB,UAAU,CAAC;AAC/C,aAAO,WAAW,kBAAkB,OAAO,IAAI,uBAAuB,CAAC,OAAO,OAAO,MAAM,KAAK;AAChG,UAAI;AAAE,cAAM,MAAM,SAAS,MAAM;AAAA,MAAG,QAAQ;AAAE;AAAA,MAAO;AAAA,IACvD;AACA,UAAM,SAAS,MAAM,YAAY,WAAW,QAAQ,GAAG,cAAc,eAAe,QAAQ,aAAa;AACzG,QAAI,OAAO,QAAS,QAAO;AAC3B,gBAAY,OAAO;AAAA,EACrB;AAEA,SAAO,EAAE,SAAS,MAAM,OAAO,eAAe,OAAO,UAAU;AACjE;AAKA,SAAS,oBAAoB,OAAyB;AACpD,MAAI,CAAC,SAAS,OAAO,UAAU,SAAU,QAAO;AAGhD,QAAM,YAAa,OAA6B;AAChD,MAAI,cAAc,cAAc,cAAc,aAAa;AACzD,WAAO;AAAA,EACT;AAGA,MAAI,UAAU,KAAK,GAAG;AACpB,QAAI,MAAM,WAAW,OAAO,MAAM,WAAW,OAAO,MAAM,WAAW,OAAO,MAAM,WAAW,OAAO,MAAM,WAAW,KAAK;AACxH,aAAO;AAAA,IACT;AAAA,EACF;AAGA,QAAM,SAAS;AACf,QAAM,OAAO,OAAO,OAAO,SAAS,WAAW,OAAO,OAAO;AAC7D,QAAM,SACJ,OAAO,OAAO,UAAU,YAAY,OAAO,UAAU,OAChD,OAAO,QACR;AACN,QAAM,YACJ,SACC,UAAU,OAAO,OAAO,SAAS,WAAW,OAAO,OAAO,YAC1D,UAAU,OAAO,OAAO,SAAS,WAAW,OAAO,OAAO;AAC7D,MAAI,aAAa,0BAA0B,IAAI,SAAS,GAAG;AACzD,WAAO;AAAA,EACT;AAGA,QAAM,UAAU,OAAO,OAAO,YAAY,WAAW,OAAO,QAAQ,YAAY,IAAI;AACpF,MACE,QAAQ,SAAS,YAAY,KAC7B,QAAQ,SAAS,SAAS,KAC1B,QAAQ,SAAS,WAAW,KAC5B,QAAQ,SAAS,qBAAqB,KACtC,QAAQ,SAAS,cAAc,KAC/B,QAAQ,SAAS,YAAY,KAC7B,QAAQ,SAAS,YAAY,GAC7B;AACA,WAAO;AAAA,EACT;AAEA,SAAO;AACT;AAOA,SAAS,qBAAqB,OAAyB;AACrD,MAAI,CAAC,SAAS,OAAO,UAAU,SAAU,QAAO;AAEhD,QAAM,SAAS;AACf,QAAM,SACJ,OAAO,OAAO,UAAU,YAAY,OAAO,UAAU,OAChD,OAAO,QACR;AAEN,QAAM,OAAO,OAAO,OAAO,SAAS,WAAW,OAAO,OAAO;AAC7D,QAAM,aAAa,UAAU,OAAO,OAAO,SAAS,WAAW,OAAO,OAAO;AAC7E,MAAI,SAAS,6BAA6B,eAAe,2BAA2B;AAClF,WAAO;AAAA,EACT;AAEA,QAAM,WAAqB,CAAC;AAC5B,MAAI,OAAO,OAAO,YAAY,SAAU,UAAS,KAAK,OAAO,OAAO;AACpE,MAAI,UAAU,OAAO,OAAO,YAAY,SAAU,UAAS,KAAK,OAAO,OAAO;AAC9E,QAAM,WAAW,SAAS,KAAK,GAAG,EAAE,YAAY;AAChD,SACE,SAAS,SAAS,gBAAgB,KAClC,SAAS,SAAS,gBAAgB,KAClC,SAAS,SAAS,iBAAiB,KACnC,SAAS,SAAS,gBAAgB,KAClC,SAAS,SAAS,aAAa,KAC/B,SAAS,SAAS,iBAAiB,KACnC,SAAS,SAAS,oBAAoB,KACtC,SAAS,SAAS,mBAAmB;AAEzC;AAKA,SAAS,oBAAoB,SAAyB;AACpD,QAAM,mBAAmB,iBAAiB,cAAc,KAAK,IAAI,GAAG,OAAO;AAC3E,QAAM,SAAS,KAAK,OAAO,IAAI,wBAAwB;AACvD,SAAO,KAAK,IAAI,mBAAmB,QAAQ,iBAAiB,UAAU;AACxE;AAOA,eAAsB,sBACpB,SACA,QACA,WACA,QACoB;AAEpB,MAAI,CAAC,OAAO,SAAS;AACnB,WAAO,EAAE,SAAS,WAAW,MAAM;AAAA,EACrC;AAEA,MAAI,CAAC,WAAW;AACd,WAAO;AAAA,MACL;AAAA,MACA,WAAW;AAAA,MACX,OAAO;AAAA,MACP,cAAc;AAAA,QACZ,MAAM,UAAU;AAAA,QAChB,SAAS;AAAA,QACT,WAAW;AAAA,MACb;AAAA,IACF;AAAA,EACF;AAEA,MAAI,CAAC,SAAS,KAAK,GAAG;AACpB,WAAO,EAAE,SAAS,WAAW,IAAI,WAAW,OAAO,OAAO,yBAAyB;AAAA,EACrF;AAGA,QAAM,mBAAmB,QAAQ,SAAS,sBACtC,QAAQ,UAAU,GAAG,mBAAmB,IAAI,0CAC5C;AAKJ,QAAM,qBACJ,iBAAiB,SAAS,iCAAiC,CAAC,CAAC,eAAe;AAK9E,QAAM,WAAW,MAAM;AACrB,QAAI,CAAC,OAAO,IAAK,QAAO;AACxB,QAAI;AACF,YAAM,IAAI,IAAI,IAAI,OAAO,GAAG;AAC5B,aAAO,GAAG,EAAE,MAAM,GAAG,EAAE,QAAQ;AAAA,IACjC,QAAQ;AACN,aAAO;AAAA,IACT;AAAA,EACF,GAAG;AACH,QAAM,UAAU,UAAU,aAAa,OAAO;AAAA;AAAA,IAAS;AAEvD,QAAM,SAAS,OAAO,UAClB;AAAA;AAAA,EAEJ,OAAO,2BAA2B,OAAO,OAAO;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAsDhD,gBAAgB,KACZ;AAAA;AAAA,EAEJ,OAAO;AAAA,EACP,gBAAgB;AAEhB,MAAI;AAIJ,MAAI,oBAAoB;AACtB;AAAA,MACE;AAAA,MACA,SAAS,iBAAiB,MAAM,qCAAqC,6BAA6B;AAAA,MAClG;AAAA,IACF;AAAA,EACF,OAAO;AACL,aAAS,UAAU,GAAG,WAAW,iBAAiB,YAAY,WAAW;AACvE,UAAI;AACF,YAAI,YAAY,GAAG;AACjB,iBAAO,QAAQ,4BAA4B,eAAe,KAAK,IAAI,KAAK;AAAA,QAC1E,OAAO;AACL,iBAAO,WAAW,iBAAiB,OAAO,IAAI,iBAAiB,UAAU,IAAI,KAAK;AAAA,QACpF;AAEA,cAAM,WAAW,MAAM,YAAY,WAAW,QAAQ,kBAAkB,MAAM;AAE9E,YAAI,SAAS,SAAS;AACpB,iBAAO,QAAQ,0BAA0B,SAAS,QAAQ,MAAM,eAAe,KAAK;AACpF,yBAAe,WAAW;AAC1B,iBAAO,EAAE,SAAS,SAAS,SAAS,WAAW,KAAK;AAAA,QACtD;AAGA,eAAO,WAAW,oCAAoC,KAAK;AAC3D,uBAAe,aAAa,6BAA6B;AACzD,eAAO;AAAA,UACL;AAAA,UACA,WAAW;AAAA,UACX,OAAO;AAAA,UACP,cAAc;AAAA,YACZ,MAAM,UAAU;AAAA,YAChB,SAAS;AAAA,YACT,WAAW;AAAA,UACb;AAAA,QACF;AAAA,MAEF,SAAS,KAAc;AACrB,oBAAY,cAAc,GAAG;AAC7B,cAAM,SAAS,UAAU,GAAG,IAAI,IAAI,SAAS;AAC7C,cAAM,OAAO,OAAO,QAAQ,YAAY,QAAQ,QAAQ,UAAU,MAC9D,OAAQ,IAAgC,IAAI,IAC5C;AACJ,cAAM,SAAS,qBAAqB,GAAG;AACvC,eAAO,SAAS,kBAAkB,UAAU,CAAC,MAAM,UAAU,OAAO,YAAY,MAAM,UAAU,IAAI,eAAe,oBAAoB,GAAG,CAAC,oBAAoB,MAAM,KAAK,KAAK;AAG/K,YAAI,QAAQ;AACV,iBAAO,WAAW,6FAAwF,KAAK;AAC/G;AAAA,QACF;AAEA,YAAI,oBAAoB,GAAG,KAAK,UAAU,iBAAiB,YAAY;AACrE,gBAAM,UAAU,oBAAoB,OAAO;AAC3C,iBAAO,WAAW,eAAe,OAAO,SAAS,KAAK;AACtD,cAAI;AAAE,kBAAM,MAAM,SAAS,MAAM;AAAA,UAAG,QAAQ;AAAE;AAAA,UAAO;AACrD;AAAA,QACF;AACA;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAGA,QAAM,gBAAgB,eAAe;AACrC,MAAI,eAAe;AACjB,WAAO,WAAW,4CAA4C,aAAa,IAAI,KAAK;AACpF,aAAS,UAAU,GAAG,UAAU,sBAAsB,WAAW;AAC/D,UAAI,UAAU,GAAG;AACf,cAAM,UAAU,oBAAoB,UAAU,CAAC;AAC/C,eAAO,WAAW,kBAAkB,OAAO,IAAI,uBAAuB,CAAC,OAAO,OAAO,MAAM,KAAK;AAChG,YAAI;AAAE,gBAAM,MAAM,SAAS,MAAM;AAAA,QAAG,QAAQ;AAAE;AAAA,QAAO;AAAA,MACvD;AACA,UAAI;AACF,cAAM,WAAW,MAAM,YAAY,WAAW,QAAQ,6BAA6B,QAAQ,aAAa;AACxG,YAAI,SAAS,SAAS;AACpB,iBAAO,QAAQ,sBAAsB,SAAS,QAAQ,MAAM,eAAe,KAAK;AAChF,yBAAe,WAAW;AAC1B,iBAAO,EAAE,SAAS,SAAS,SAAS,WAAW,KAAK;AAAA,QACtD;AACA,eAAO,WAAW,oCAAoC,KAAK;AAC3D;AAAA,MACF,SAAS,KAAc;AACrB,oBAAY,cAAc,GAAG;AAC7B,eAAO,SAAS,2BAA2B,UAAU,CAAC,MAAM,UAAU,OAAO,IAAI,KAAK;AAAA,MACxF;AAAA,IACF;AAAA,EACF;AAEA,QAAM,eAAe,WAAW,WAAW;AAC3C,SAAO,SAAS,wBAAwB,YAAY,iCAAiC,KAAK;AAC1F,iBAAe,aAAa,YAAY;AAExC,SAAO;AAAA,IACL;AAAA,IACA,WAAW;AAAA,IACX,OAAO,0BAA0B,YAAY;AAAA,IAC7C,cAAc,aAAa;AAAA,MACzB,MAAM,UAAU;AAAA,MAChB,SAAS;AAAA,MACT,WAAW;AAAA,IACb;AAAA,EACF;AACF;AAOA,MAAM,0BAA0B;AA2ChC,eAAsB,sBACpB,YAQA,WACA,cACA,WACA,kBAAqC,CAAC,GAC4B;AAClE,QAAM,iBAAiB,WAAW,MAAM,GAAG,uBAAuB;AAMlE,QAAM,iBAAiB,CAAC,IAAI,IAAI,IAAI,IAAI,GAAG,GAAG,GAAG,GAAG,GAAG,CAAC;AACxD,QAAM,gBAAgB,CAAC,SAAyB,eAAe,OAAO,CAAC,KAAK;AAG5E,QAAM,QAAkB,CAAC;AACzB,aAAW,OAAO,gBAAgB;AAChC,QAAI;AACJ,QAAI;AACF,eAAS,IAAI,IAAI,IAAI,GAAG,EAAE,SAAS,QAAQ,UAAU,EAAE;AAAA,IACzD,QAAQ;AACN,eAAS,IAAI;AAAA,IACf;AACA,UAAM,UAAU,IAAI,QAAQ,SAAS,MACjC,IAAI,QAAQ,MAAM,GAAG,GAAG,IAAI,QAC5B,IAAI;AACR,UAAM,KAAK,IAAI,IAAI,IAAI,OAAO,cAAc,IAAI,IAAI,CAAC,IAAI,IAAI,KAAK,WAAM,MAAM,WAAM,OAAO,EAAE;AAAA,EAC/F;AAEA,QAAM,mBAAmB,gBAAgB,SAAS,IAC9C,gBAAgB,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,EAAE,KAAK,IAAI,IAC9C;AACJ,QAAM,SAAQ,oBAAI,KAAK,GAAE,YAAY,EAAE,MAAM,GAAG,EAAE;AAElD,QAAM,SAAS;AAAA;AAAA,aAEJ,SAAS;AAAA,SACb,KAAK;AAAA;AAAA;AAAA,EAGZ,gBAAgB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,iBAmDD,eAAe,MAAM;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,kBAMpB,eAAe,MAAM,cAAc,YAAY;AAAA,EAC/D,MAAM,KAAK,IAAI,CAAC;AAEhB,MAAI;AACF,WAAO,QAAQ,eAAe,eAAe,MAAM,2BAA2B,KAAK;AAEnF,UAAM,WAAW,MAAM;AAAA,MACrB;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAEA,QAAI,CAAC,SAAS,SAAS;AACrB,YAAM,SAAS,SAAS,SAAS;AACjC,qBAAe,WAAW,MAAM;AAChC,aAAO,EAAE,QAAQ,MAAM,OAAO,OAAO;AAAA,IACvC;AAGA,UAAM,UAAU,SAAS,QAAQ,QAAQ,wBAAwB,EAAE,EAAE,QAAQ,eAAe,EAAE,EAAE,KAAK;AACrG,UAAM,SAAS,KAAK,MAAM,OAAO;AAKjC,QAAI,CAAC,OAAO,SAAS,OAAO,OAAO,cAAc,YAAY,CAAC,MAAM,QAAQ,OAAO,OAAO,GAAG;AAC3F,YAAM,SAAS;AACf,qBAAe,WAAW,MAAM;AAChC,aAAO,EAAE,QAAQ,MAAM,OAAO,OAAO;AAAA,IACvC;AAEA,WAAO,QAAQ,4BAA4B,OAAO,QAAQ,OAAO,OAAK,EAAE,SAAS,iBAAiB,EAAE,MAAM,oBAAoB,KAAK;AACnI,mBAAe,SAAS;AACxB,WAAO,EAAE,QAAQ,OAAO;AAAA,EAC1B,SAAS,KAAc;AACrB,UAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC/D,WAAO,SAAS,0BAA0B,OAAO,IAAI,KAAK;AAC1D,mBAAe,WAAW,OAAO;AACjC,WAAO,EAAE,QAAQ,MAAM,OAAO,0BAA0B,OAAO,GAAG;AAAA,EACpE;AACF;AAEA,eAAsB,+BACpB,YAKA,WACA,iBACA,WAC8D;AAC9D,QAAM,kBAAkB,WAAW,MAAM,GAAG,EAAE;AAC9C,QAAM,QAAQ,gBAAgB,IAAI,CAAC,QAAQ;AACzC,QAAI;AACJ,QAAI;AACF,eAAS,IAAI,IAAI,IAAI,GAAG,EAAE,SAAS,QAAQ,UAAU,EAAE;AAAA,IACzD,QAAQ;AACN,eAAS,IAAI;AAAA,IACf;AACA,WAAO,IAAI,IAAI,IAAI,KAAK,IAAI,KAAK,WAAM,MAAM;AAAA,EAC/C,CAAC;AAED,QAAM,SAAS;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,aASJ,SAAS;AAAA;AAAA;AAAA,EAGpB,gBAAgB,IAAI,CAAC,UAAU,KAAK,KAAK,EAAE,EAAE,KAAK,IAAI,CAAC;AAAA;AAAA;AAAA,EAGvD,MAAM,KAAK,IAAI,CAAC;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAShB,MAAI;AACF,UAAM,WAAW,MAAM;AAAA,MACrB;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAEA,QAAI,CAAC,SAAS,SAAS;AACrB,YAAM,SAAS,SAAS,SAAS;AACjC,qBAAe,WAAW,MAAM;AAChC,aAAO,EAAE,QAAQ,CAAC,GAAG,OAAO,OAAO;AAAA,IACrC;AAEA,UAAM,UAAU,SAAS,QAAQ,QAAQ,wBAAwB,EAAE,EAAE,QAAQ,eAAe,EAAE,EAAE,KAAK;AACrG,UAAM,SAAS,KAAK,MAAM,OAAO;AAEjC,mBAAe,SAAS;AACxB,WAAO,EAAE,QAAQ,MAAM,QAAQ,OAAO,cAAc,IAAI,OAAO,iBAAiB,CAAC,EAAE;AAAA,EACrF,SAAS,KAAc;AACrB,UAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC/D,WAAO,SAAS,4CAA4C,OAAO,IAAI,KAAK;AAC5E,mBAAe,WAAW,OAAO;AACjC,WAAO,EAAE,QAAQ,CAAC,GAAG,OAAO,QAAQ;AAAA,EACtC;AACF;AA0BA,MAAM,qBAAqB,oBAAI,IAAI;AAAA,EACjC;AAAA,EAAQ;AAAA,EAAO;AAAA,EAAa;AAAA,EAAa;AAAA,EAAW;AAAA,EACpD;AAAA,EAAa;AAAA,EAAkB;AACjC,CAAC;AAED,MAAM,kBAAkB,oBAAI,IAAI,CAAC,QAAQ,SAAS,UAAU,OAAO,CAAC;AACpE,MAAM,iBAAiB,oBAAI,IAAmB,CAAC,UAAU,OAAO,MAAM,CAAC;AACvE,MAAM,mBAAmB,oBAAI,IAAI,CAAC,cAAc,cAAc,CAAC;AAE/D,SAAS,cAAc,OAAmC;AACxD,SAAO,MAAM,QAAQ,KAAK,KAAK,MAAM,MAAM,CAAC,MAAM,OAAO,MAAM,QAAQ;AACzE;AAEA,SAAS,YAAY,OAA8C;AACjE,SAAO,MAAM,QAAQ,KAAK,KAAK,MAAM,MAAM,CAAC,MAAM;AAChD,QAAI,OAAO,MAAM,YAAY,MAAM,KAAM,QAAO;AAChD,UAAM,OAAQ,EAA8B;AAC5C,UAAM,SAAU,EAA8B;AAC9C,WAAO,OAAO,SAAS,YAClB,iBAAiB,IAAI,IAAI,KACzB,OAAO,WAAW,YAClB,OAAO,KAAK,EAAE,SAAS;AAAA,EAC9B,CAAC;AACH;AAEO,SAAS,mBAAmB,KAAmC;AACpE,MAAI;AACF,UAAM,UAAU,IAAI,QAAQ,wBAAwB,EAAE,EAAE,QAAQ,eAAe,EAAE,EAAE,KAAK;AACxF,UAAM,SAAS,KAAK,MAAM,OAAO;AAEjC,UAAM,aAAa,OAAO,OAAO,eAAe,WAAW,OAAO,aAAa;AAC/E,QAAI,CAAC,cAAc,CAAC,mBAAmB,IAAI,UAAU,EAAG,QAAO;AAE/D,UAAM,mBAAmB,OAAO,OAAO,qBAAqB,WAAW,OAAO,mBAAmB;AACjG,QAAI,CAAC,oBAAoB,CAAC,gBAAgB,IAAI,gBAAgB,EAAG,QAAO;AAExE,UAAM,iBAAiB,OAAO;AAC9B,QAAI,OAAO,mBAAmB,YAAY,CAAC,eAAe,IAAI,cAA+B,EAAG,QAAO;AAEvG,QAAI,CAAC,YAAY,OAAO,mBAAmB,KAAK,OAAO,oBAAoB,WAAW,EAAG,QAAO;AAChG,QAAI,CAAC,cAAc,OAAO,aAAa,KAAK,OAAO,cAAc,WAAW,EAAG,QAAO;AAEtF,WAAO;AAAA,MACL;AAAA,MACA,mBAAmB,OAAO,OAAO,sBAAsB,WAAW,OAAO,oBAAoB;AAAA,MAC7F;AAAA,MACA,uBAAuB,OAAO,OAAO,0BAA0B,WAAW,OAAO,wBAAwB;AAAA,MACzG;AAAA,MACA,qBAAqB,OAAO;AAAA,MAC5B,eAAe,OAAO,cAAc,OAAO,CAAC,MAAM,EAAE,KAAK,EAAE,SAAS,CAAC;AAAA,MACrE,iBAAiB,cAAc,OAAO,eAAe,IAAI,OAAO,kBAAkB,CAAC;AAAA,MACnF,eAAe,cAAc,OAAO,aAAa,IAAI,OAAO,gBAAgB,CAAC;AAAA,MAC7E,eAAe,cAAc,OAAO,aAAa,IAAI,OAAO,gBAAgB,CAAC;AAAA,IAC/E;AAAA,EACF,QAAQ;AACN,WAAO;AAAA,EACT;AACF;AAEA,eAAsB,sBACpB,MACA,WACA,QAC+B;AAC/B,QAAM,SAAQ,oBAAI,KAAK,GAAE,YAAY,EAAE,MAAM,GAAG,EAAE;AAElD,QAAM,SAAS;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,QAOT,IAAI;AAAA,SACH,KAAK;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AA2CZ,MAAI;AACF,UAAM,WAAW,MAAM;AAAA,MACrB;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAEA,QAAI,CAAC,SAAS,SAAS;AACrB,aAAO,WAAW,kDAAkD,SAAS,SAAS,SAAS,IAAI,KAAK;AACxG,qBAAe,WAAW,SAAS,SAAS,gBAAgB;AAC5D,aAAO;AAAA,IACT;AAEA,UAAM,QAAQ,mBAAmB,SAAS,OAAO;AACjD,QAAI,CAAC,OAAO;AACV,aAAO,WAAW,wDAAwD,KAAK;AAC/E,qBAAe,WAAW,+BAA+B;AACzD,aAAO;AAAA,IACT;AAEA,mBAAe,SAAS;AACxB,WAAO;AAAA,EACT,SAAS,KAAc;AACrB,UAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC/D,WAAO,WAAW,qCAAqC,OAAO,IAAI,KAAK;AACvE,mBAAe,WAAW,OAAO;AACjC,WAAO;AAAA,EACT;AACF;AAEO,SAAS,oBAAoB,OAA8B;AAChE,QAAM,QAAkB,CAAC;AAEzB,QAAM,KAAK,wCAAwC;AACnD,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,qBAAqB,MAAM,UAAU,aAAQ,MAAM,iBAAiB,EAAE;AACjF,QAAM,KAAK,yBAAyB,MAAM,cAAc,aAAQ,MAAM,qBAAqB,EAAE;AAC7F,QAAM,KAAK,oBAAoB,MAAM,gBAAgB,IAAI;AACzD,QAAM,KAAK,EAAE;AAEb,MAAI,MAAM,oBAAoB,SAAS,GAAG;AACxC,UAAM,KAAK,yBAAyB;AACpC,UAAM,oBAAoB,QAAQ,CAAC,MAAM,MAAM;AAC7C,YAAM,KAAK,GAAG,IAAI,CAAC,OAAO,KAAK,IAAI,aAAQ,KAAK,MAAM,EAAE;AAAA,IAC1D,CAAC;AACD,UAAM,KAAK,EAAE;AAAA,EACf;AAEA,MAAI,MAAM,cAAc,SAAS,GAAG;AAClC,UAAM,KAAK,sBAAsB,MAAM,cAAc,MAAM,mFAA8E;AACzI,eAAW,QAAQ,MAAM,eAAe;AACtC,YAAM,KAAK,KAAK,IAAI,EAAE;AAAA,IACxB;AACA,UAAM,KAAK,EAAE;AAAA,EACf;AAEA,MAAI,MAAM,gBAAgB,SAAS,GAAG;AACpC,UAAM,KAAK,sFAAuF;AAClG,eAAW,QAAQ,MAAM,gBAAiB,OAAM,KAAK,KAAK,IAAI,EAAE;AAChE,UAAM,KAAK,EAAE;AAAA,EACf;AAEA,MAAI,MAAM,cAAc,SAAS,GAAG;AAClC,UAAM,KAAK,mBAAmB;AAC9B,eAAW,OAAO,MAAM,cAAe,OAAM,KAAK,KAAK,GAAG,EAAE;AAC5D,UAAM,KAAK,EAAE;AAAA,EACf;AAEA,MAAI,MAAM,cAAc,SAAS,GAAG;AAClC,UAAM,KAAK,mBAAmB;AAC9B,eAAW,KAAK,MAAM,cAAe,OAAM,KAAK,KAAK,CAAC,EAAE;AACxD,UAAM,KAAK,EAAE;AAAA,EACf;AAEA,QAAM,KAAK,KAAK;AAChB,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,iLAAiL;AAE5L,SAAO,MAAM,KAAK,IAAI;AACxB;",
|
|
6
6
|
"names": []
|
|
7
7
|
}
|
package/dist/src/tools/search.js
CHANGED
|
@@ -334,7 +334,7 @@ async function handleWebSearch(params, reporter = NOOP_REPORTER) {
|
|
|
334
334
|
let llmError;
|
|
335
335
|
if (useRaw || !llmProcessor) {
|
|
336
336
|
if (!useRaw && !llmProcessor) {
|
|
337
|
-
llmError = "LLM unavailable (
|
|
337
|
+
llmError = "LLM unavailable (LLM_API_KEY / LLM_BASE_URL / LLM_MODEL not set). Falling back to raw output.";
|
|
338
338
|
mcpLog("warning", llmError, "search");
|
|
339
339
|
await reporter.log("warning", "llm_classifier_unreachable: planner not configured; raw ranked list returned");
|
|
340
340
|
}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"version": 3,
|
|
3
3
|
"sources": ["../../../src/tools/search.ts"],
|
|
4
|
-
"sourcesContent": ["/**\n * Web Search Tool Handler\n * NEVER throws - always returns structured response for graceful degradation\n */\n\nimport type { MCPServer } from 'mcp-use/server';\n\nimport { getCapabilities, getMissingEnvMessage } from '../config/index.js';\nimport {\n webSearchOutputSchema,\n webSearchParamsSchema,\n type WebSearchParams,\n type WebSearchOutput,\n} from '../schemas/web-search.js';\nimport { SearchClient } from '../clients/search.js';\nimport {\n aggregateAndRank,\n generateUnifiedOutput,\n} from '../utils/url-aggregator.js';\nimport {\n createLLMProcessor,\n classifySearchResults,\n suggestRefineQueriesForRawMode,\n type ClassificationEntry,\n type ClassificationResult,\n type RefineQuerySuggestion,\n} from '../services/llm-processor.js';\nimport { classifyError } from '../utils/errors.js';\nimport { classifySourceByUrl } from '../utils/source-type.js';\nimport {\n mcpLog,\n formatError,\n formatDuration,\n} from './utils.js';\nimport {\n createToolReporter,\n NOOP_REPORTER,\n toolFailure,\n toolSuccess,\n toToolResponse,\n type ToolExecutionResult,\n type ToolReporter,\n} from './mcp-helpers.js';\nimport { sanitizeSuggestion } from '../utils/sanitize.js';\n\n// --- Internal types ---\n\ninterface SearchAggregation {\n readonly rankedUrls: ReturnType<typeof aggregateAndRank>['rankedUrls'];\n readonly totalUniqueUrls: number;\n readonly frequencyThreshold: number;\n readonly thresholdNote?: string;\n}\n\ninterface SearchResponse {\n searches: Parameters<typeof aggregateAndRank>[0];\n totalQueries: number;\n}\n\n// --- Helpers ---\n\n/** Reddit post permalink: /r/{sub}/comments/{id}/ \u2014 drops subreddit\n * homepages, /rising, /new, /top, etc. so only post URLs reach the agent.\n * See mcp-revisions/tool-surface/02-extend-web-search-with-reddit-scope.md. */\nconst REDDIT_POST_PERMALINK = /\\/r\\/[^/]+\\/comments\\/[a-z0-9]+\\//i;\nconst REDDIT_HOST = /(?:^|\\.)reddit\\.com$/i;\n\nfunction decorateQueriesForScope(queries: string[], scope: 'web' | 'reddit' | 'both'): string[] {\n if (scope === 'web') return queries;\n const reddited = queries.map((q) =>\n /\\bsite:reddit\\.com\\b/i.test(q) ? q : `${q} site:reddit.com`,\n );\n return scope === 'reddit' ? reddited : [...queries, ...reddited];\n}\n\nasync function executeSearches(queries: string[]): Promise<SearchResponse> {\n const client = new SearchClient();\n return client.searchMultiple(queries);\n}\n\nfunction filterScopedSearches(\n response: SearchResponse,\n scope: 'web' | 'reddit' | 'both',\n): SearchResponse {\n if (scope === 'web') return response;\n const filtered = response.searches.map((search) => ({\n ...search,\n results: search.results.filter((r) => {\n let host: string;\n try { host = new URL(r.link).hostname; } catch { return true; }\n // Non-reddit URLs pass through; reddit URLs must be post permalinks.\n if (!REDDIT_HOST.test(host)) return scope !== 'reddit';\n return REDDIT_POST_PERMALINK.test(r.link);\n }),\n }));\n return { ...response, searches: filtered };\n}\n\nfunction processResults(response: SearchResponse): {\n aggregation: SearchAggregation;\n} {\n const aggregation = aggregateAndRank(response.searches, 5);\n return { aggregation };\n}\n\n// --- Raw output (traditional unified ranked list) ---\n\nfunction buildRawOutput(\n queries: string[],\n aggregation: SearchAggregation,\n searches: SearchResponse['searches'],\n verbose: boolean = false,\n): string {\n return generateUnifiedOutput(\n aggregation.rankedUrls, queries, searches,\n aggregation.totalUniqueUrls,\n aggregation.frequencyThreshold, aggregation.thresholdNote,\n verbose,\n );\n}\n\nfunction buildSignalsSection(\n aggregation: SearchAggregation,\n searches: SearchResponse['searches'],\n totalQueries: number,\n): string {\n const coverageCount = searches.filter((search) => search.results.length >= 3).length;\n const lowYield = searches\n .filter((search) => search.results.length <= 1)\n .map((search) => `\"${search.query}\"`);\n const consensusCount = aggregation.rankedUrls.filter((url) => url.isConsensus).length;\n\n const lines = [\n '**Signals**',\n `- Coverage: ${coverageCount}/${totalQueries} queries returned \u22653 results`,\n `- Consensus URLs: ${consensusCount}`,\n ];\n\n if (lowYield.length > 0) {\n lines.push(`- Low-yield: ${lowYield.join(', ')}`);\n }\n\n return lines.join('\\n');\n}\n\nexport function buildSuggestedFollowUpsSection(\n refineQueries: Array<{ query: string; rationale?: string; gap_id?: number; gap_description?: string }> | undefined,\n): string {\n if (!refineQueries || refineQueries.length === 0) {\n return '';\n }\n\n const lines = ['## Suggested follow-up searches', ''];\n\n for (const item of refineQueries) {\n const query = sanitizeSuggestion(item.query ?? '');\n if (!query) continue;\n const rationale = sanitizeSuggestion(item.rationale ?? '');\n const gapTag = typeof item.gap_id === 'number'\n ? ` _(closes gap [${item.gap_id}])_`\n : item.gap_description\n ? ` _(${sanitizeSuggestion(item.gap_description)})_`\n : '';\n lines.push(rationale\n ? `- ${query} \u2014 ${rationale}${gapTag}`\n : `- ${query}${gapTag}`,\n );\n }\n\n return lines.length === 2 ? '' : lines.join('\\n');\n}\n\nexport function appendSignalsAndFollowUps(\n markdown: string,\n signalsSection: string,\n refineQueries: RefineQuerySuggestion[] | undefined,\n options: { includeSignals?: boolean } = {},\n): string {\n const includeSignals = options.includeSignals ?? false;\n const sections = [markdown];\n if (includeSignals && signalsSection) {\n sections.push('', '---', signalsSection);\n }\n const followUps = buildSuggestedFollowUpsSection(refineQueries);\n if (followUps) {\n sections.push('', followUps);\n }\n return sections.join('\\n');\n}\n\n// --- \"Start here\" section ---\n//\n// Surfaces the best 3-5 URLs at the top of the classified response so an agent\n// skimming the first screen sees them before tier tables. Deterministic: uses\n// existing `tier` + `rank` + `reason` from the classifier, no extra LLM call.\n//\n// Algorithm: take HIGHLY_RELEVANT by rank up to MAX_START_HERE; if fewer than\n// MIN_START_HERE, pad from top MAYBE_RELEVANT; skip entirely if no entries\n// above OTHER.\n\nconst MIN_START_HERE = 3;\nconst MAX_START_HERE = 5;\n\n/** Minimal structural shape \u2014 avoids coupling to private `RankedUrl` type. */\ninterface StartHereCandidate {\n readonly rank: number;\n readonly url: string;\n readonly title: string;\n}\n\ninterface StartHereTiers {\n readonly high: readonly StartHereCandidate[];\n readonly maybe: readonly StartHereCandidate[];\n}\n\nexport function buildStartHereSection(\n tiers: StartHereTiers,\n entryByRank: Map<number, ClassificationEntry>,\n opts: { min?: number; max?: number } = {},\n): string {\n const min = opts.min ?? MIN_START_HERE;\n const max = opts.max ?? MAX_START_HERE;\n\n const picks: Array<{ candidate: StartHereCandidate; tier: 'HIGHLY_RELEVANT' | 'MAYBE_RELEVANT' }> = [];\n\n for (const candidate of tiers.high) {\n if (picks.length >= max) break;\n picks.push({ candidate, tier: 'HIGHLY_RELEVANT' });\n }\n\n if (picks.length < min) {\n const target = Math.min(min, max);\n for (const candidate of tiers.maybe) {\n if (picks.length >= target) break;\n picks.push({ candidate, tier: 'MAYBE_RELEVANT' });\n }\n }\n\n if (picks.length === 0) return '';\n\n const lines: string[] = [];\n lines.push('## Start here \u2014 best candidates for your extract');\n picks.forEach((pick, i) => {\n const entry = entryByRank.get(pick.candidate.rank);\n const reason = entry?.reason && entry.reason.trim().length > 0 ? entry.reason : '\u2014';\n let domain: string;\n try {\n domain = new URL(pick.candidate.url).hostname.replace(/^www\\./, '');\n } catch {\n domain = pick.candidate.url;\n }\n lines.push(\n `${i + 1}. **[${pick.candidate.title}](${pick.candidate.url})** \u2014 ${domain} \u2014 ${reason} *(${pick.tier}, rank ${pick.candidate.rank})*`,\n );\n });\n return lines.join('\\n');\n}\n\n// --- Classified output (3-tier LLM-classified table) ---\n\nfunction buildClassifiedOutput(\n classification: ClassificationResult,\n aggregation: SearchAggregation,\n extract: string,\n searches: SearchResponse['searches'],\n totalQueries: number,\n verbose: boolean = false,\n): string {\n const rankedUrls = aggregation.rankedUrls;\n\n // Build tier \u2192 entries mapping (keep url data alongside classifier metadata)\n const entryByRank = new Map(classification.results.map((r) => [r.rank, r]));\n\n const tiers = {\n high: [] as typeof rankedUrls,\n maybe: [] as typeof rankedUrls,\n other: [] as typeof rankedUrls,\n };\n\n for (const url of rankedUrls) {\n const entry = entryByRank.get(url.rank);\n const tier = entry?.tier;\n if (tier === 'HIGHLY_RELEVANT') {\n tiers.high.push(url);\n } else if (tier === 'MAYBE_RELEVANT') {\n tiers.maybe.push(url);\n } else {\n tiers.other.push(url);\n }\n }\n\n const lines: string[] = [];\n\n // Header with generated title, synthesis, and confidence\n lines.push(`## ${classification.title}`);\n lines.push(`> Looking for: ${extract}`);\n lines.push(`> ${totalQueries} queries \u2192 ${rankedUrls.length} URLs \u2192 ${tiers.high.length} highly relevant, ${tiers.maybe.length} possibly relevant`);\n if (classification.confidence) {\n const confReason = classification.confidence_reason ? ` \u2014 ${classification.confidence_reason}` : '';\n lines.push(`> Confidence: \\`${classification.confidence}\\`${confReason}`);\n }\n lines.push('');\n\n // \"Start here\" block: surface the top 3-5 URLs above the synthesis so an\n // agent skimming the first screen sees scrape candidates before prose.\n const startHere = buildStartHereSection(\n { high: tiers.high, maybe: tiers.maybe },\n entryByRank,\n );\n if (startHere) {\n lines.push(startHere);\n lines.push('');\n }\n\n lines.push(`**Summary:** ${classification.synthesis}`);\n lines.push('');\n\n // Helper: render one row with optional source_type + reason\n const renderRichRow = (url: typeof rankedUrls[number]): string => {\n const entry = entryByRank.get(url.rank);\n const coveragePct = Math.round(url.coverageRatio * 100);\n const seenIn = `${url.frequency}/${totalQueries} (${coveragePct}%)`;\n const sourceType = entry?.source_type ? `\\`${entry.source_type}\\`` : '\u2014';\n const reason = entry?.reason ? entry.reason.replace(/\\|/g, '\\\\|') : '\u2014';\n return `| ${url.rank} | [${url.title}](${url.url}) | ${sourceType} | ${seenIn} | ${reason} |`;\n };\n\n // Highly Relevant tier\n if (tiers.high.length > 0) {\n lines.push(`### Highly Relevant (${tiers.high.length})`);\n lines.push('| # | URL | Source | Seen in | Why |');\n lines.push('|---|-----|--------|---------|-----|');\n for (const url of tiers.high) lines.push(renderRichRow(url));\n lines.push('');\n }\n\n // Maybe Relevant tier\n if (tiers.maybe.length > 0) {\n lines.push(`### Maybe Relevant (${tiers.maybe.length})`);\n lines.push('| # | URL | Source | Seen in | Why |');\n lines.push('|---|-----|--------|---------|-----|');\n for (const url of tiers.maybe) lines.push(renderRichRow(url));\n lines.push('');\n }\n\n // Other tier \u2014 with query attribution\n if (tiers.other.length > 0) {\n lines.push(`### Other Results (${tiers.other.length})`);\n lines.push('| # | URL | Source | Score | Queries |');\n lines.push('|---|-----|--------|-------|---------|');\n for (const url of tiers.other) {\n const entry = entryByRank.get(url.rank);\n const queryList = url.queries.map((q) => `\"${q}\"`).join(', ');\n const sourceType = entry?.source_type ? `\\`${entry.source_type}\\`` : '\u2014';\n let domain: string;\n try {\n domain = new URL(url.url).hostname.replace(/^www\\./, '');\n } catch {\n domain = url.url;\n }\n lines.push(`| ${url.rank} | ${domain} | ${sourceType} | ${url.score.toFixed(1)} | ${queryList} |`);\n }\n lines.push('');\n }\n\n // Signals block is gated behind verbose \u2014 it duplicates info already\n // present in the per-row metadata for callers who care.\n // See: docs/code-review/context/05-output-formatting-patterns.md.\n if (verbose) {\n lines.push(buildSignalsSection(aggregation, searches, totalQueries));\n }\n\n // Gaps section \u2014 what the current results don't answer\n if (classification.gaps && classification.gaps.length > 0) {\n lines.push('');\n lines.push('## Gaps');\n for (const gap of classification.gaps) {\n lines.push(`- **[${gap.id}]** ${gap.description}`);\n }\n }\n\n const followUps = buildSuggestedFollowUpsSection(classification.refine_queries);\n if (followUps) {\n lines.push('');\n lines.push(followUps);\n }\n\n return lines.join('\\n');\n}\n\n// --- Metadata builder ---\n\nfunction buildMetadata(\n aggregation: SearchAggregation,\n executionTime: number,\n totalQueries: number,\n searches: SearchResponse['searches'],\n llmClassified: boolean,\n scope: 'web' | 'reddit' | 'both',\n llmError?: string,\n) {\n const coverageSummary = searches.map(s => {\n let topDomain: string | undefined;\n const topResult = s.results[0];\n if (topResult) {\n try { topDomain = new URL(topResult.link).hostname.replace(/^www\\./, ''); } catch { /* ignore */ }\n }\n return { query: s.query, result_count: s.results.length, top_url: topDomain };\n });\n const lowYieldQueries = searches\n .filter(s => s.results.length <= 1)\n .map(s => s.query);\n\n return {\n total_items: totalQueries,\n successful: aggregation.rankedUrls.length,\n failed: totalQueries - searches.filter(s => s.results.length > 0).length,\n execution_time_ms: executionTime,\n llm_classified: llmClassified,\n scope,\n ...(llmError ? { llm_error: llmError } : {}),\n coverage_summary: coverageSummary,\n ...(lowYieldQueries.length > 0 ? { low_yield_queries: lowYieldQueries } : {}),\n };\n}\n\nfunction buildStructuredResults(\n aggregation: SearchAggregation,\n llmTagsByRank?: Map<number, string>,\n): Array<{\n rank: number;\n url: string;\n title: string;\n snippet: string;\n source_type: 'reddit' | 'github' | 'docs' | 'blog' | 'paper' | 'qa' | 'cve' | 'news' | 'video' | 'web';\n score: number;\n seen_in: number;\n best_position: number;\n}> {\n return aggregation.rankedUrls.map((row) => {\n // LLM tag wins when present; heuristic is the always-on fallback. See:\n // mcp-revisions/output-shaping/06-source-type-tagging-without-llm.md.\n const llmTag = llmTagsByRank?.get(row.rank);\n const heuristic = classifySourceByUrl(row.url);\n return {\n rank: row.rank,\n url: row.url,\n title: row.title,\n snippet: row.snippet,\n source_type: ((llmTag as typeof heuristic) ?? heuristic),\n score: Number(row.score.toFixed(2)),\n seen_in: row.frequency,\n best_position: row.bestPosition,\n };\n });\n}\n\n// --- Error builder ---\n\nfunction buildWebSearchError(\n error: unknown,\n params: WebSearchParams,\n startTime: number,\n): ToolExecutionResult<WebSearchOutput> {\n const structuredError = classifyError(error);\n const executionTime = Date.now() - startTime;\n\n mcpLog('error', `web-search: ${structuredError.message}`, 'search');\n\n const errorContent = formatError({\n code: structuredError.code,\n message: structuredError.message,\n retryable: structuredError.retryable,\n toolName: 'web-search',\n howToFix: ['Verify SERPER_API_KEY is set correctly'],\n alternatives: [\n 'web-search(queries=[\"topic recommendations\"], extract=\"...\", scope: \"reddit\") \u2014 Reddit-only post permalinks via the same backend',\n 'scrape-links(urls=[...], extract=\"...\") \u2014 if you have URLs from prior steps, scrape them now',\n ],\n });\n\n return toolFailure(\n `${errorContent}\\n\\nExecution time: ${formatDuration(executionTime)}\\nQueries: ${params.queries.length}`,\n );\n}\n\n// --- Main handler ---\n\nexport async function handleWebSearch(\n params: WebSearchParams,\n reporter: ToolReporter = NOOP_REPORTER,\n): Promise<ToolExecutionResult<WebSearchOutput>> {\n const startTime = Date.now();\n\n try {\n const effectiveQueries = decorateQueriesForScope(params.queries, params.scope);\n if (params.scope !== 'web') {\n mcpLog('info', `Searching scope=${params.scope}: ${params.queries.length} input queries \u2192 ${effectiveQueries.length} dispatched`, 'search');\n } else {\n mcpLog('info', `Searching for ${params.queries.length} query/queries`, 'search');\n }\n await reporter.log('info', `Searching for ${effectiveQueries.length} query/queries (scope=${params.scope})`);\n await reporter.progress(15, 100, 'Submitting search queries');\n\n const rawResponse = await executeSearches(effectiveQueries);\n const response = filterScopedSearches(rawResponse, params.scope);\n await reporter.progress(50, 100, 'Collected search results');\n\n const { aggregation } = processResults(response);\n await reporter.log(\n 'info',\n `Collected ${aggregation.totalUniqueUrls} unique URLs across ${response.totalQueries} queries`,\n );\n\n // Decide: raw output or LLM classification\n const useRaw = params.raw;\n const llmProcessor = createLLMProcessor();\n\n let markdown: string;\n let llmClassified = false;\n let llmError: string | undefined;\n\n if (useRaw || !llmProcessor) {\n // Raw path: traditional unified ranked list\n if (!useRaw && !llmProcessor) {\n llmError = 'LLM unavailable (LLM_EXTRACTION_API_KEY not set). Falling back to raw output.';\n mcpLog('warning', llmError, 'search');\n // mcp-revisions/llm-degradation/01: surface degraded mode to the client.\n await reporter.log('warning', 'llm_classifier_unreachable: planner not configured; raw ranked list returned');\n }\n let rawRefineQueries: RefineQuerySuggestion[] | undefined;\n if (useRaw && llmProcessor) {\n const refineResult = await suggestRefineQueriesForRawMode(\n aggregation.rankedUrls,\n params.extract,\n params.queries,\n llmProcessor,\n );\n rawRefineQueries = refineResult.result;\n }\n markdown = appendSignalsAndFollowUps(\n buildRawOutput(params.queries, aggregation, response.searches, params.verbose),\n buildSignalsSection(aggregation, response.searches, response.totalQueries),\n rawRefineQueries,\n { includeSignals: params.verbose },\n );\n await reporter.progress(80, 100, 'Ranking search results');\n } else {\n // LLM classification path\n await reporter.progress(65, 100, 'Classifying results by relevance');\n const classification = await classifySearchResults(\n aggregation.rankedUrls,\n params.extract,\n response.totalQueries,\n llmProcessor,\n params.queries,\n );\n\n if (classification.result) {\n markdown = buildClassifiedOutput(\n classification.result, aggregation, params.extract, response.searches, response.totalQueries, params.verbose,\n );\n llmClassified = true;\n await reporter.progress(85, 100, 'Formatted classified results');\n } else {\n // Classification failed \u2014 fall back to raw\n llmError = classification.error ?? 'Unknown classification error';\n mcpLog('warning', `Classification failed, falling back to raw: ${llmError}`, 'search');\n // mcp-revisions/llm-degradation/01: surface degraded mode to the client.\n await reporter.log('warning', `llm_classifier_unreachable: ${llmError}`);\n markdown = appendSignalsAndFollowUps(\n buildRawOutput(params.queries, aggregation, response.searches, params.verbose),\n buildSignalsSection(aggregation, response.searches, response.totalQueries),\n undefined,\n { includeSignals: params.verbose },\n );\n await reporter.progress(85, 100, 'Classification failed, using raw output');\n }\n }\n\n const executionTime = Date.now() - startTime;\n const metadata = buildMetadata(\n aggregation, executionTime, response.totalQueries, response.searches, llmClassified, params.scope, llmError,\n );\n\n // Build per-row structured results so capability-aware clients can\n // index into `structuredContent.results` rather than regex-scrape the\n // markdown table. The LLM tag wins when present; heuristic is the\n // always-on fallback.\n const llmTagsByRank = new Map<number, string>();\n // (When classification succeeds the source_type per-row is populated\n // inside buildClassifiedOutput via the entry.source_type field \u2014 but\n // we don't have a direct handle on it here without a refactor. The\n // heuristic alone covers the structuredContent shape correctly; the\n // LLM-tagged variant remains in the markdown body.)\n const results = buildStructuredResults(aggregation, llmTagsByRank);\n\n mcpLog('info', `Search completed: ${aggregation.rankedUrls.length} URLs, classified=${llmClassified}`, 'search');\n await reporter.log('info', `Search completed with ${aggregation.rankedUrls.length} URLs (classified: ${llmClassified})`);\n\n const footer = `\\n---\\n*${formatDuration(executionTime)} | ${aggregation.totalUniqueUrls} unique URLs${llmClassified ? ' | LLM classified' : ''}*`;\n const fullMarkdown = markdown + footer;\n\n return toolSuccess(fullMarkdown, { results, metadata });\n } catch (error) {\n return buildWebSearchError(error, params, startTime);\n }\n}\n\nexport function registerWebSearchTool(server: MCPServer): void {\n server.tool(\n {\n name: 'web-search',\n title: 'Web Search',\n description:\n 'Fan out Google queries in parallel. One call carries up to 50 queries in a flat `queries` array \u2014 pack diverse facets (not paraphrases) into a single call. Call me AGGRESSIVELY across a session: 2\u20134 rounds is normal, 1 is underuse. After each pass, read `gaps[]` + `refine_queries[]` and fire another round with the new terms. Safe to call multiple times in parallel in the same turn for orthogonal subtopics. `scope`: `\"reddit\"` (server appends `site:reddit.com` + filters to post permalinks \u2014 use for sentiment / migration / lived experience), `\"web\"` default (spec / bug / pricing / CVE / API), `\"both\"` (fan each query across both \u2014 use when opinion-heavy AND needs official sources). Returns a tiered Markdown report (HIGHLY_RELEVANT / MAYBE_RELEVANT / OTHER) + grounded synthesis with `[rank]` citations + `## Gaps` + `## Suggested follow-up searches` tied to gap ids. Set `raw=true` to skip classification.',\n schema: webSearchParamsSchema,\n outputSchema: webSearchOutputSchema,\n annotations: {\n readOnlyHint: true,\n idempotentHint: true,\n destructiveHint: false,\n openWorldHint: true,\n },\n },\n async (args, ctx) => {\n if (!getCapabilities().search) {\n return toToolResponse(toolFailure(getMissingEnvMessage('search')));\n }\n\n const reporter = createToolReporter(ctx, 'web-search');\n const result = await handleWebSearch(args, reporter);\n\n await reporter.progress(100, 100, result.isError ? 'Search failed' : 'Search complete');\n return toToolResponse(result);\n },\n );\n}\n"],
|
|
4
|
+
"sourcesContent": ["/**\n * Web Search Tool Handler\n * NEVER throws - always returns structured response for graceful degradation\n */\n\nimport type { MCPServer } from 'mcp-use/server';\n\nimport { getCapabilities, getMissingEnvMessage } from '../config/index.js';\nimport {\n webSearchOutputSchema,\n webSearchParamsSchema,\n type WebSearchParams,\n type WebSearchOutput,\n} from '../schemas/web-search.js';\nimport { SearchClient } from '../clients/search.js';\nimport {\n aggregateAndRank,\n generateUnifiedOutput,\n} from '../utils/url-aggregator.js';\nimport {\n createLLMProcessor,\n classifySearchResults,\n suggestRefineQueriesForRawMode,\n type ClassificationEntry,\n type ClassificationResult,\n type RefineQuerySuggestion,\n} from '../services/llm-processor.js';\nimport { classifyError } from '../utils/errors.js';\nimport { classifySourceByUrl } from '../utils/source-type.js';\nimport {\n mcpLog,\n formatError,\n formatDuration,\n} from './utils.js';\nimport {\n createToolReporter,\n NOOP_REPORTER,\n toolFailure,\n toolSuccess,\n toToolResponse,\n type ToolExecutionResult,\n type ToolReporter,\n} from './mcp-helpers.js';\nimport { sanitizeSuggestion } from '../utils/sanitize.js';\n\n// --- Internal types ---\n\ninterface SearchAggregation {\n readonly rankedUrls: ReturnType<typeof aggregateAndRank>['rankedUrls'];\n readonly totalUniqueUrls: number;\n readonly frequencyThreshold: number;\n readonly thresholdNote?: string;\n}\n\ninterface SearchResponse {\n searches: Parameters<typeof aggregateAndRank>[0];\n totalQueries: number;\n}\n\n// --- Helpers ---\n\n/** Reddit post permalink: /r/{sub}/comments/{id}/ \u2014 drops subreddit\n * homepages, /rising, /new, /top, etc. so only post URLs reach the agent.\n * See mcp-revisions/tool-surface/02-extend-web-search-with-reddit-scope.md. */\nconst REDDIT_POST_PERMALINK = /\\/r\\/[^/]+\\/comments\\/[a-z0-9]+\\//i;\nconst REDDIT_HOST = /(?:^|\\.)reddit\\.com$/i;\n\nfunction decorateQueriesForScope(queries: string[], scope: 'web' | 'reddit' | 'both'): string[] {\n if (scope === 'web') return queries;\n const reddited = queries.map((q) =>\n /\\bsite:reddit\\.com\\b/i.test(q) ? q : `${q} site:reddit.com`,\n );\n return scope === 'reddit' ? reddited : [...queries, ...reddited];\n}\n\nasync function executeSearches(queries: string[]): Promise<SearchResponse> {\n const client = new SearchClient();\n return client.searchMultiple(queries);\n}\n\nfunction filterScopedSearches(\n response: SearchResponse,\n scope: 'web' | 'reddit' | 'both',\n): SearchResponse {\n if (scope === 'web') return response;\n const filtered = response.searches.map((search) => ({\n ...search,\n results: search.results.filter((r) => {\n let host: string;\n try { host = new URL(r.link).hostname; } catch { return true; }\n // Non-reddit URLs pass through; reddit URLs must be post permalinks.\n if (!REDDIT_HOST.test(host)) return scope !== 'reddit';\n return REDDIT_POST_PERMALINK.test(r.link);\n }),\n }));\n return { ...response, searches: filtered };\n}\n\nfunction processResults(response: SearchResponse): {\n aggregation: SearchAggregation;\n} {\n const aggregation = aggregateAndRank(response.searches, 5);\n return { aggregation };\n}\n\n// --- Raw output (traditional unified ranked list) ---\n\nfunction buildRawOutput(\n queries: string[],\n aggregation: SearchAggregation,\n searches: SearchResponse['searches'],\n verbose: boolean = false,\n): string {\n return generateUnifiedOutput(\n aggregation.rankedUrls, queries, searches,\n aggregation.totalUniqueUrls,\n aggregation.frequencyThreshold, aggregation.thresholdNote,\n verbose,\n );\n}\n\nfunction buildSignalsSection(\n aggregation: SearchAggregation,\n searches: SearchResponse['searches'],\n totalQueries: number,\n): string {\n const coverageCount = searches.filter((search) => search.results.length >= 3).length;\n const lowYield = searches\n .filter((search) => search.results.length <= 1)\n .map((search) => `\"${search.query}\"`);\n const consensusCount = aggregation.rankedUrls.filter((url) => url.isConsensus).length;\n\n const lines = [\n '**Signals**',\n `- Coverage: ${coverageCount}/${totalQueries} queries returned \u22653 results`,\n `- Consensus URLs: ${consensusCount}`,\n ];\n\n if (lowYield.length > 0) {\n lines.push(`- Low-yield: ${lowYield.join(', ')}`);\n }\n\n return lines.join('\\n');\n}\n\nexport function buildSuggestedFollowUpsSection(\n refineQueries: Array<{ query: string; rationale?: string; gap_id?: number; gap_description?: string }> | undefined,\n): string {\n if (!refineQueries || refineQueries.length === 0) {\n return '';\n }\n\n const lines = ['## Suggested follow-up searches', ''];\n\n for (const item of refineQueries) {\n const query = sanitizeSuggestion(item.query ?? '');\n if (!query) continue;\n const rationale = sanitizeSuggestion(item.rationale ?? '');\n const gapTag = typeof item.gap_id === 'number'\n ? ` _(closes gap [${item.gap_id}])_`\n : item.gap_description\n ? ` _(${sanitizeSuggestion(item.gap_description)})_`\n : '';\n lines.push(rationale\n ? `- ${query} \u2014 ${rationale}${gapTag}`\n : `- ${query}${gapTag}`,\n );\n }\n\n return lines.length === 2 ? '' : lines.join('\\n');\n}\n\nexport function appendSignalsAndFollowUps(\n markdown: string,\n signalsSection: string,\n refineQueries: RefineQuerySuggestion[] | undefined,\n options: { includeSignals?: boolean } = {},\n): string {\n const includeSignals = options.includeSignals ?? false;\n const sections = [markdown];\n if (includeSignals && signalsSection) {\n sections.push('', '---', signalsSection);\n }\n const followUps = buildSuggestedFollowUpsSection(refineQueries);\n if (followUps) {\n sections.push('', followUps);\n }\n return sections.join('\\n');\n}\n\n// --- \"Start here\" section ---\n//\n// Surfaces the best 3-5 URLs at the top of the classified response so an agent\n// skimming the first screen sees them before tier tables. Deterministic: uses\n// existing `tier` + `rank` + `reason` from the classifier, no extra LLM call.\n//\n// Algorithm: take HIGHLY_RELEVANT by rank up to MAX_START_HERE; if fewer than\n// MIN_START_HERE, pad from top MAYBE_RELEVANT; skip entirely if no entries\n// above OTHER.\n\nconst MIN_START_HERE = 3;\nconst MAX_START_HERE = 5;\n\n/** Minimal structural shape \u2014 avoids coupling to private `RankedUrl` type. */\ninterface StartHereCandidate {\n readonly rank: number;\n readonly url: string;\n readonly title: string;\n}\n\ninterface StartHereTiers {\n readonly high: readonly StartHereCandidate[];\n readonly maybe: readonly StartHereCandidate[];\n}\n\nexport function buildStartHereSection(\n tiers: StartHereTiers,\n entryByRank: Map<number, ClassificationEntry>,\n opts: { min?: number; max?: number } = {},\n): string {\n const min = opts.min ?? MIN_START_HERE;\n const max = opts.max ?? MAX_START_HERE;\n\n const picks: Array<{ candidate: StartHereCandidate; tier: 'HIGHLY_RELEVANT' | 'MAYBE_RELEVANT' }> = [];\n\n for (const candidate of tiers.high) {\n if (picks.length >= max) break;\n picks.push({ candidate, tier: 'HIGHLY_RELEVANT' });\n }\n\n if (picks.length < min) {\n const target = Math.min(min, max);\n for (const candidate of tiers.maybe) {\n if (picks.length >= target) break;\n picks.push({ candidate, tier: 'MAYBE_RELEVANT' });\n }\n }\n\n if (picks.length === 0) return '';\n\n const lines: string[] = [];\n lines.push('## Start here \u2014 best candidates for your extract');\n picks.forEach((pick, i) => {\n const entry = entryByRank.get(pick.candidate.rank);\n const reason = entry?.reason && entry.reason.trim().length > 0 ? entry.reason : '\u2014';\n let domain: string;\n try {\n domain = new URL(pick.candidate.url).hostname.replace(/^www\\./, '');\n } catch {\n domain = pick.candidate.url;\n }\n lines.push(\n `${i + 1}. **[${pick.candidate.title}](${pick.candidate.url})** \u2014 ${domain} \u2014 ${reason} *(${pick.tier}, rank ${pick.candidate.rank})*`,\n );\n });\n return lines.join('\\n');\n}\n\n// --- Classified output (3-tier LLM-classified table) ---\n\nfunction buildClassifiedOutput(\n classification: ClassificationResult,\n aggregation: SearchAggregation,\n extract: string,\n searches: SearchResponse['searches'],\n totalQueries: number,\n verbose: boolean = false,\n): string {\n const rankedUrls = aggregation.rankedUrls;\n\n // Build tier \u2192 entries mapping (keep url data alongside classifier metadata)\n const entryByRank = new Map(classification.results.map((r) => [r.rank, r]));\n\n const tiers = {\n high: [] as typeof rankedUrls,\n maybe: [] as typeof rankedUrls,\n other: [] as typeof rankedUrls,\n };\n\n for (const url of rankedUrls) {\n const entry = entryByRank.get(url.rank);\n const tier = entry?.tier;\n if (tier === 'HIGHLY_RELEVANT') {\n tiers.high.push(url);\n } else if (tier === 'MAYBE_RELEVANT') {\n tiers.maybe.push(url);\n } else {\n tiers.other.push(url);\n }\n }\n\n const lines: string[] = [];\n\n // Header with generated title, synthesis, and confidence\n lines.push(`## ${classification.title}`);\n lines.push(`> Looking for: ${extract}`);\n lines.push(`> ${totalQueries} queries \u2192 ${rankedUrls.length} URLs \u2192 ${tiers.high.length} highly relevant, ${tiers.maybe.length} possibly relevant`);\n if (classification.confidence) {\n const confReason = classification.confidence_reason ? ` \u2014 ${classification.confidence_reason}` : '';\n lines.push(`> Confidence: \\`${classification.confidence}\\`${confReason}`);\n }\n lines.push('');\n\n // \"Start here\" block: surface the top 3-5 URLs above the synthesis so an\n // agent skimming the first screen sees scrape candidates before prose.\n const startHere = buildStartHereSection(\n { high: tiers.high, maybe: tiers.maybe },\n entryByRank,\n );\n if (startHere) {\n lines.push(startHere);\n lines.push('');\n }\n\n lines.push(`**Summary:** ${classification.synthesis}`);\n lines.push('');\n\n // Helper: render one row with optional source_type + reason\n const renderRichRow = (url: typeof rankedUrls[number]): string => {\n const entry = entryByRank.get(url.rank);\n const coveragePct = Math.round(url.coverageRatio * 100);\n const seenIn = `${url.frequency}/${totalQueries} (${coveragePct}%)`;\n const sourceType = entry?.source_type ? `\\`${entry.source_type}\\`` : '\u2014';\n const reason = entry?.reason ? entry.reason.replace(/\\|/g, '\\\\|') : '\u2014';\n return `| ${url.rank} | [${url.title}](${url.url}) | ${sourceType} | ${seenIn} | ${reason} |`;\n };\n\n // Highly Relevant tier\n if (tiers.high.length > 0) {\n lines.push(`### Highly Relevant (${tiers.high.length})`);\n lines.push('| # | URL | Source | Seen in | Why |');\n lines.push('|---|-----|--------|---------|-----|');\n for (const url of tiers.high) lines.push(renderRichRow(url));\n lines.push('');\n }\n\n // Maybe Relevant tier\n if (tiers.maybe.length > 0) {\n lines.push(`### Maybe Relevant (${tiers.maybe.length})`);\n lines.push('| # | URL | Source | Seen in | Why |');\n lines.push('|---|-----|--------|---------|-----|');\n for (const url of tiers.maybe) lines.push(renderRichRow(url));\n lines.push('');\n }\n\n // Other tier \u2014 with query attribution\n if (tiers.other.length > 0) {\n lines.push(`### Other Results (${tiers.other.length})`);\n lines.push('| # | URL | Source | Score | Queries |');\n lines.push('|---|-----|--------|-------|---------|');\n for (const url of tiers.other) {\n const entry = entryByRank.get(url.rank);\n const queryList = url.queries.map((q) => `\"${q}\"`).join(', ');\n const sourceType = entry?.source_type ? `\\`${entry.source_type}\\`` : '\u2014';\n let domain: string;\n try {\n domain = new URL(url.url).hostname.replace(/^www\\./, '');\n } catch {\n domain = url.url;\n }\n lines.push(`| ${url.rank} | ${domain} | ${sourceType} | ${url.score.toFixed(1)} | ${queryList} |`);\n }\n lines.push('');\n }\n\n // Signals block is gated behind verbose \u2014 it duplicates info already\n // present in the per-row metadata for callers who care.\n // See: docs/code-review/context/05-output-formatting-patterns.md.\n if (verbose) {\n lines.push(buildSignalsSection(aggregation, searches, totalQueries));\n }\n\n // Gaps section \u2014 what the current results don't answer\n if (classification.gaps && classification.gaps.length > 0) {\n lines.push('');\n lines.push('## Gaps');\n for (const gap of classification.gaps) {\n lines.push(`- **[${gap.id}]** ${gap.description}`);\n }\n }\n\n const followUps = buildSuggestedFollowUpsSection(classification.refine_queries);\n if (followUps) {\n lines.push('');\n lines.push(followUps);\n }\n\n return lines.join('\\n');\n}\n\n// --- Metadata builder ---\n\nfunction buildMetadata(\n aggregation: SearchAggregation,\n executionTime: number,\n totalQueries: number,\n searches: SearchResponse['searches'],\n llmClassified: boolean,\n scope: 'web' | 'reddit' | 'both',\n llmError?: string,\n) {\n const coverageSummary = searches.map(s => {\n let topDomain: string | undefined;\n const topResult = s.results[0];\n if (topResult) {\n try { topDomain = new URL(topResult.link).hostname.replace(/^www\\./, ''); } catch { /* ignore */ }\n }\n return { query: s.query, result_count: s.results.length, top_url: topDomain };\n });\n const lowYieldQueries = searches\n .filter(s => s.results.length <= 1)\n .map(s => s.query);\n\n return {\n total_items: totalQueries,\n successful: aggregation.rankedUrls.length,\n failed: totalQueries - searches.filter(s => s.results.length > 0).length,\n execution_time_ms: executionTime,\n llm_classified: llmClassified,\n scope,\n ...(llmError ? { llm_error: llmError } : {}),\n coverage_summary: coverageSummary,\n ...(lowYieldQueries.length > 0 ? { low_yield_queries: lowYieldQueries } : {}),\n };\n}\n\nfunction buildStructuredResults(\n aggregation: SearchAggregation,\n llmTagsByRank?: Map<number, string>,\n): Array<{\n rank: number;\n url: string;\n title: string;\n snippet: string;\n source_type: 'reddit' | 'github' | 'docs' | 'blog' | 'paper' | 'qa' | 'cve' | 'news' | 'video' | 'web';\n score: number;\n seen_in: number;\n best_position: number;\n}> {\n return aggregation.rankedUrls.map((row) => {\n // LLM tag wins when present; heuristic is the always-on fallback. See:\n // mcp-revisions/output-shaping/06-source-type-tagging-without-llm.md.\n const llmTag = llmTagsByRank?.get(row.rank);\n const heuristic = classifySourceByUrl(row.url);\n return {\n rank: row.rank,\n url: row.url,\n title: row.title,\n snippet: row.snippet,\n source_type: ((llmTag as typeof heuristic) ?? heuristic),\n score: Number(row.score.toFixed(2)),\n seen_in: row.frequency,\n best_position: row.bestPosition,\n };\n });\n}\n\n// --- Error builder ---\n\nfunction buildWebSearchError(\n error: unknown,\n params: WebSearchParams,\n startTime: number,\n): ToolExecutionResult<WebSearchOutput> {\n const structuredError = classifyError(error);\n const executionTime = Date.now() - startTime;\n\n mcpLog('error', `web-search: ${structuredError.message}`, 'search');\n\n const errorContent = formatError({\n code: structuredError.code,\n message: structuredError.message,\n retryable: structuredError.retryable,\n toolName: 'web-search',\n howToFix: ['Verify SERPER_API_KEY is set correctly'],\n alternatives: [\n 'web-search(queries=[\"topic recommendations\"], extract=\"...\", scope: \"reddit\") \u2014 Reddit-only post permalinks via the same backend',\n 'scrape-links(urls=[...], extract=\"...\") \u2014 if you have URLs from prior steps, scrape them now',\n ],\n });\n\n return toolFailure(\n `${errorContent}\\n\\nExecution time: ${formatDuration(executionTime)}\\nQueries: ${params.queries.length}`,\n );\n}\n\n// --- Main handler ---\n\nexport async function handleWebSearch(\n params: WebSearchParams,\n reporter: ToolReporter = NOOP_REPORTER,\n): Promise<ToolExecutionResult<WebSearchOutput>> {\n const startTime = Date.now();\n\n try {\n const effectiveQueries = decorateQueriesForScope(params.queries, params.scope);\n if (params.scope !== 'web') {\n mcpLog('info', `Searching scope=${params.scope}: ${params.queries.length} input queries \u2192 ${effectiveQueries.length} dispatched`, 'search');\n } else {\n mcpLog('info', `Searching for ${params.queries.length} query/queries`, 'search');\n }\n await reporter.log('info', `Searching for ${effectiveQueries.length} query/queries (scope=${params.scope})`);\n await reporter.progress(15, 100, 'Submitting search queries');\n\n const rawResponse = await executeSearches(effectiveQueries);\n const response = filterScopedSearches(rawResponse, params.scope);\n await reporter.progress(50, 100, 'Collected search results');\n\n const { aggregation } = processResults(response);\n await reporter.log(\n 'info',\n `Collected ${aggregation.totalUniqueUrls} unique URLs across ${response.totalQueries} queries`,\n );\n\n // Decide: raw output or LLM classification\n const useRaw = params.raw;\n const llmProcessor = createLLMProcessor();\n\n let markdown: string;\n let llmClassified = false;\n let llmError: string | undefined;\n\n if (useRaw || !llmProcessor) {\n // Raw path: traditional unified ranked list\n if (!useRaw && !llmProcessor) {\n llmError = 'LLM unavailable (LLM_API_KEY / LLM_BASE_URL / LLM_MODEL not set). Falling back to raw output.';\n mcpLog('warning', llmError, 'search');\n // mcp-revisions/llm-degradation/01: surface degraded mode to the client.\n await reporter.log('warning', 'llm_classifier_unreachable: planner not configured; raw ranked list returned');\n }\n let rawRefineQueries: RefineQuerySuggestion[] | undefined;\n if (useRaw && llmProcessor) {\n const refineResult = await suggestRefineQueriesForRawMode(\n aggregation.rankedUrls,\n params.extract,\n params.queries,\n llmProcessor,\n );\n rawRefineQueries = refineResult.result;\n }\n markdown = appendSignalsAndFollowUps(\n buildRawOutput(params.queries, aggregation, response.searches, params.verbose),\n buildSignalsSection(aggregation, response.searches, response.totalQueries),\n rawRefineQueries,\n { includeSignals: params.verbose },\n );\n await reporter.progress(80, 100, 'Ranking search results');\n } else {\n // LLM classification path\n await reporter.progress(65, 100, 'Classifying results by relevance');\n const classification = await classifySearchResults(\n aggregation.rankedUrls,\n params.extract,\n response.totalQueries,\n llmProcessor,\n params.queries,\n );\n\n if (classification.result) {\n markdown = buildClassifiedOutput(\n classification.result, aggregation, params.extract, response.searches, response.totalQueries, params.verbose,\n );\n llmClassified = true;\n await reporter.progress(85, 100, 'Formatted classified results');\n } else {\n // Classification failed \u2014 fall back to raw\n llmError = classification.error ?? 'Unknown classification error';\n mcpLog('warning', `Classification failed, falling back to raw: ${llmError}`, 'search');\n // mcp-revisions/llm-degradation/01: surface degraded mode to the client.\n await reporter.log('warning', `llm_classifier_unreachable: ${llmError}`);\n markdown = appendSignalsAndFollowUps(\n buildRawOutput(params.queries, aggregation, response.searches, params.verbose),\n buildSignalsSection(aggregation, response.searches, response.totalQueries),\n undefined,\n { includeSignals: params.verbose },\n );\n await reporter.progress(85, 100, 'Classification failed, using raw output');\n }\n }\n\n const executionTime = Date.now() - startTime;\n const metadata = buildMetadata(\n aggregation, executionTime, response.totalQueries, response.searches, llmClassified, params.scope, llmError,\n );\n\n // Build per-row structured results so capability-aware clients can\n // index into `structuredContent.results` rather than regex-scrape the\n // markdown table. The LLM tag wins when present; heuristic is the\n // always-on fallback.\n const llmTagsByRank = new Map<number, string>();\n // (When classification succeeds the source_type per-row is populated\n // inside buildClassifiedOutput via the entry.source_type field \u2014 but\n // we don't have a direct handle on it here without a refactor. The\n // heuristic alone covers the structuredContent shape correctly; the\n // LLM-tagged variant remains in the markdown body.)\n const results = buildStructuredResults(aggregation, llmTagsByRank);\n\n mcpLog('info', `Search completed: ${aggregation.rankedUrls.length} URLs, classified=${llmClassified}`, 'search');\n await reporter.log('info', `Search completed with ${aggregation.rankedUrls.length} URLs (classified: ${llmClassified})`);\n\n const footer = `\\n---\\n*${formatDuration(executionTime)} | ${aggregation.totalUniqueUrls} unique URLs${llmClassified ? ' | LLM classified' : ''}*`;\n const fullMarkdown = markdown + footer;\n\n return toolSuccess(fullMarkdown, { results, metadata });\n } catch (error) {\n return buildWebSearchError(error, params, startTime);\n }\n}\n\nexport function registerWebSearchTool(server: MCPServer): void {\n server.tool(\n {\n name: 'web-search',\n title: 'Web Search',\n description:\n 'Fan out Google queries in parallel. One call carries up to 50 queries in a flat `queries` array \u2014 pack diverse facets (not paraphrases) into a single call. Call me AGGRESSIVELY across a session: 2\u20134 rounds is normal, 1 is underuse. After each pass, read `gaps[]` + `refine_queries[]` and fire another round with the new terms. Safe to call multiple times in parallel in the same turn for orthogonal subtopics. `scope`: `\"reddit\"` (server appends `site:reddit.com` + filters to post permalinks \u2014 use for sentiment / migration / lived experience), `\"web\"` default (spec / bug / pricing / CVE / API), `\"both\"` (fan each query across both \u2014 use when opinion-heavy AND needs official sources). Returns a tiered Markdown report (HIGHLY_RELEVANT / MAYBE_RELEVANT / OTHER) + grounded synthesis with `[rank]` citations + `## Gaps` + `## Suggested follow-up searches` tied to gap ids. Set `raw=true` to skip classification.',\n schema: webSearchParamsSchema,\n outputSchema: webSearchOutputSchema,\n annotations: {\n readOnlyHint: true,\n idempotentHint: true,\n destructiveHint: false,\n openWorldHint: true,\n },\n },\n async (args, ctx) => {\n if (!getCapabilities().search) {\n return toToolResponse(toolFailure(getMissingEnvMessage('search')));\n }\n\n const reporter = createToolReporter(ctx, 'web-search');\n const result = await handleWebSearch(args, reporter);\n\n await reporter.progress(100, 100, result.isError ? 'Search failed' : 'Search complete');\n return toToolResponse(result);\n },\n );\n}\n"],
|
|
5
5
|
"mappings": "AAOA,SAAS,iBAAiB,4BAA4B;AACtD;AAAA,EACE;AAAA,EACA;AAAA,OAGK;AACP,SAAS,oBAAoB;AAC7B;AAAA,EACE;AAAA,EACA;AAAA,OACK;AACP;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,OAIK;AACP,SAAS,qBAAqB;AAC9B,SAAS,2BAA2B;AACpC;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,OACK;AACP;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OAGK;AACP,SAAS,0BAA0B;AAqBnC,MAAM,wBAAwB;AAC9B,MAAM,cAAc;AAEpB,SAAS,wBAAwB,SAAmB,OAA4C;AAC9F,MAAI,UAAU,MAAO,QAAO;AAC5B,QAAM,WAAW,QAAQ;AAAA,IAAI,CAAC,MAC5B,wBAAwB,KAAK,CAAC,IAAI,IAAI,GAAG,CAAC;AAAA,EAC5C;AACA,SAAO,UAAU,WAAW,WAAW,CAAC,GAAG,SAAS,GAAG,QAAQ;AACjE;AAEA,eAAe,gBAAgB,SAA4C;AACzE,QAAM,SAAS,IAAI,aAAa;AAChC,SAAO,OAAO,eAAe,OAAO;AACtC;AAEA,SAAS,qBACP,UACA,OACgB;AAChB,MAAI,UAAU,MAAO,QAAO;AAC5B,QAAM,WAAW,SAAS,SAAS,IAAI,CAAC,YAAY;AAAA,IAClD,GAAG;AAAA,IACH,SAAS,OAAO,QAAQ,OAAO,CAAC,MAAM;AACpC,UAAI;AACJ,UAAI;AAAE,eAAO,IAAI,IAAI,EAAE,IAAI,EAAE;AAAA,MAAU,QAAQ;AAAE,eAAO;AAAA,MAAM;AAE9D,UAAI,CAAC,YAAY,KAAK,IAAI,EAAG,QAAO,UAAU;AAC9C,aAAO,sBAAsB,KAAK,EAAE,IAAI;AAAA,IAC1C,CAAC;AAAA,EACH,EAAE;AACF,SAAO,EAAE,GAAG,UAAU,UAAU,SAAS;AAC3C;AAEA,SAAS,eAAe,UAEtB;AACA,QAAM,cAAc,iBAAiB,SAAS,UAAU,CAAC;AACzD,SAAO,EAAE,YAAY;AACvB;AAIA,SAAS,eACP,SACA,aACA,UACA,UAAmB,OACX;AACR,SAAO;AAAA,IACL,YAAY;AAAA,IAAY;AAAA,IAAS;AAAA,IACjC,YAAY;AAAA,IACZ,YAAY;AAAA,IAAoB,YAAY;AAAA,IAC5C;AAAA,EACF;AACF;AAEA,SAAS,oBACP,aACA,UACA,cACQ;AACR,QAAM,gBAAgB,SAAS,OAAO,CAAC,WAAW,OAAO,QAAQ,UAAU,CAAC,EAAE;AAC9E,QAAM,WAAW,SACd,OAAO,CAAC,WAAW,OAAO,QAAQ,UAAU,CAAC,EAC7C,IAAI,CAAC,WAAW,IAAI,OAAO,KAAK,GAAG;AACtC,QAAM,iBAAiB,YAAY,WAAW,OAAO,CAAC,QAAQ,IAAI,WAAW,EAAE;AAE/E,QAAM,QAAQ;AAAA,IACZ;AAAA,IACA,eAAe,aAAa,IAAI,YAAY;AAAA,IAC5C,qBAAqB,cAAc;AAAA,EACrC;AAEA,MAAI,SAAS,SAAS,GAAG;AACvB,UAAM,KAAK,gBAAgB,SAAS,KAAK,IAAI,CAAC,EAAE;AAAA,EAClD;AAEA,SAAO,MAAM,KAAK,IAAI;AACxB;AAEO,SAAS,+BACd,eACQ;AACR,MAAI,CAAC,iBAAiB,cAAc,WAAW,GAAG;AAChD,WAAO;AAAA,EACT;AAEA,QAAM,QAAQ,CAAC,mCAAmC,EAAE;AAEpD,aAAW,QAAQ,eAAe;AAChC,UAAM,QAAQ,mBAAmB,KAAK,SAAS,EAAE;AACjD,QAAI,CAAC,MAAO;AACZ,UAAM,YAAY,mBAAmB,KAAK,aAAa,EAAE;AACzD,UAAM,SAAS,OAAO,KAAK,WAAW,WAClC,kBAAkB,KAAK,MAAM,QAC7B,KAAK,kBACH,MAAM,mBAAmB,KAAK,eAAe,CAAC,OAC9C;AACN,UAAM;AAAA,MAAK,YACP,KAAK,KAAK,WAAM,SAAS,GAAG,MAAM,KAClC,KAAK,KAAK,GAAG,MAAM;AAAA,IACvB;AAAA,EACF;AAEA,SAAO,MAAM,WAAW,IAAI,KAAK,MAAM,KAAK,IAAI;AAClD;AAEO,SAAS,0BACd,UACA,gBACA,eACA,UAAwC,CAAC,GACjC;AACR,QAAM,iBAAiB,QAAQ,kBAAkB;AACjD,QAAM,WAAW,CAAC,QAAQ;AAC1B,MAAI,kBAAkB,gBAAgB;AACpC,aAAS,KAAK,IAAI,OAAO,cAAc;AAAA,EACzC;AACA,QAAM,YAAY,+BAA+B,aAAa;AAC9D,MAAI,WAAW;AACb,aAAS,KAAK,IAAI,SAAS;AAAA,EAC7B;AACA,SAAO,SAAS,KAAK,IAAI;AAC3B;AAYA,MAAM,iBAAiB;AACvB,MAAM,iBAAiB;AAchB,SAAS,sBACd,OACA,aACA,OAAuC,CAAC,GAChC;AACR,QAAM,MAAM,KAAK,OAAO;AACxB,QAAM,MAAM,KAAK,OAAO;AAExB,QAAM,QAA8F,CAAC;AAErG,aAAW,aAAa,MAAM,MAAM;AAClC,QAAI,MAAM,UAAU,IAAK;AACzB,UAAM,KAAK,EAAE,WAAW,MAAM,kBAAkB,CAAC;AAAA,EACnD;AAEA,MAAI,MAAM,SAAS,KAAK;AACtB,UAAM,SAAS,KAAK,IAAI,KAAK,GAAG;AAChC,eAAW,aAAa,MAAM,OAAO;AACnC,UAAI,MAAM,UAAU,OAAQ;AAC5B,YAAM,KAAK,EAAE,WAAW,MAAM,iBAAiB,CAAC;AAAA,IAClD;AAAA,EACF;AAEA,MAAI,MAAM,WAAW,EAAG,QAAO;AAE/B,QAAM,QAAkB,CAAC;AACzB,QAAM,KAAK,uDAAkD;AAC7D,QAAM,QAAQ,CAAC,MAAM,MAAM;AACzB,UAAM,QAAQ,YAAY,IAAI,KAAK,UAAU,IAAI;AACjD,UAAM,SAAS,OAAO,UAAU,MAAM,OAAO,KAAK,EAAE,SAAS,IAAI,MAAM,SAAS;AAChF,QAAI;AACJ,QAAI;AACF,eAAS,IAAI,IAAI,KAAK,UAAU,GAAG,EAAE,SAAS,QAAQ,UAAU,EAAE;AAAA,IACpE,QAAQ;AACN,eAAS,KAAK,UAAU;AAAA,IAC1B;AACA,UAAM;AAAA,MACJ,GAAG,IAAI,CAAC,QAAQ,KAAK,UAAU,KAAK,KAAK,KAAK,UAAU,GAAG,cAAS,MAAM,WAAM,MAAM,MAAM,KAAK,IAAI,UAAU,KAAK,UAAU,IAAI;AAAA,IACpI;AAAA,EACF,CAAC;AACD,SAAO,MAAM,KAAK,IAAI;AACxB;AAIA,SAAS,sBACP,gBACA,aACA,SACA,UACA,cACA,UAAmB,OACX;AACR,QAAM,aAAa,YAAY;AAG/B,QAAM,cAAc,IAAI,IAAI,eAAe,QAAQ,IAAI,CAAC,MAAM,CAAC,EAAE,MAAM,CAAC,CAAC,CAAC;AAE1E,QAAM,QAAQ;AAAA,IACZ,MAAM,CAAC;AAAA,IACP,OAAO,CAAC;AAAA,IACR,OAAO,CAAC;AAAA,EACV;AAEA,aAAW,OAAO,YAAY;AAC5B,UAAM,QAAQ,YAAY,IAAI,IAAI,IAAI;AACtC,UAAM,OAAO,OAAO;AACpB,QAAI,SAAS,mBAAmB;AAC9B,YAAM,KAAK,KAAK,GAAG;AAAA,IACrB,WAAW,SAAS,kBAAkB;AACpC,YAAM,MAAM,KAAK,GAAG;AAAA,IACtB,OAAO;AACL,YAAM,MAAM,KAAK,GAAG;AAAA,IACtB;AAAA,EACF;AAEA,QAAM,QAAkB,CAAC;AAGzB,QAAM,KAAK,MAAM,eAAe,KAAK,EAAE;AACvC,QAAM,KAAK,kBAAkB,OAAO,EAAE;AACtC,QAAM,KAAK,KAAK,YAAY,mBAAc,WAAW,MAAM,gBAAW,MAAM,KAAK,MAAM,qBAAqB,MAAM,MAAM,MAAM,oBAAoB;AAClJ,MAAI,eAAe,YAAY;AAC7B,UAAM,aAAa,eAAe,oBAAoB,WAAM,eAAe,iBAAiB,KAAK;AACjG,UAAM,KAAK,mBAAmB,eAAe,UAAU,KAAK,UAAU,EAAE;AAAA,EAC1E;AACA,QAAM,KAAK,EAAE;AAIb,QAAM,YAAY;AAAA,IAChB,EAAE,MAAM,MAAM,MAAM,OAAO,MAAM,MAAM;AAAA,IACvC;AAAA,EACF;AACA,MAAI,WAAW;AACb,UAAM,KAAK,SAAS;AACpB,UAAM,KAAK,EAAE;AAAA,EACf;AAEA,QAAM,KAAK,gBAAgB,eAAe,SAAS,EAAE;AACrD,QAAM,KAAK,EAAE;AAGb,QAAM,gBAAgB,CAAC,QAA2C;AAChE,UAAM,QAAQ,YAAY,IAAI,IAAI,IAAI;AACtC,UAAM,cAAc,KAAK,MAAM,IAAI,gBAAgB,GAAG;AACtD,UAAM,SAAS,GAAG,IAAI,SAAS,IAAI,YAAY,KAAK,WAAW;AAC/D,UAAM,aAAa,OAAO,cAAc,KAAK,MAAM,WAAW,OAAO;AACrE,UAAM,SAAS,OAAO,SAAS,MAAM,OAAO,QAAQ,OAAO,KAAK,IAAI;AACpE,WAAO,KAAK,IAAI,IAAI,OAAO,IAAI,KAAK,KAAK,IAAI,GAAG,OAAO,UAAU,MAAM,MAAM,MAAM,MAAM;AAAA,EAC3F;AAGA,MAAI,MAAM,KAAK,SAAS,GAAG;AACzB,UAAM,KAAK,wBAAwB,MAAM,KAAK,MAAM,GAAG;AACvD,UAAM,KAAK,sCAAsC;AACjD,UAAM,KAAK,sCAAsC;AACjD,eAAW,OAAO,MAAM,KAAM,OAAM,KAAK,cAAc,GAAG,CAAC;AAC3D,UAAM,KAAK,EAAE;AAAA,EACf;AAGA,MAAI,MAAM,MAAM,SAAS,GAAG;AAC1B,UAAM,KAAK,uBAAuB,MAAM,MAAM,MAAM,GAAG;AACvD,UAAM,KAAK,sCAAsC;AACjD,UAAM,KAAK,sCAAsC;AACjD,eAAW,OAAO,MAAM,MAAO,OAAM,KAAK,cAAc,GAAG,CAAC;AAC5D,UAAM,KAAK,EAAE;AAAA,EACf;AAGA,MAAI,MAAM,MAAM,SAAS,GAAG;AAC1B,UAAM,KAAK,sBAAsB,MAAM,MAAM,MAAM,GAAG;AACtD,UAAM,KAAK,wCAAwC;AACnD,UAAM,KAAK,wCAAwC;AACnD,eAAW,OAAO,MAAM,OAAO;AAC7B,YAAM,QAAQ,YAAY,IAAI,IAAI,IAAI;AACtC,YAAM,YAAY,IAAI,QAAQ,IAAI,CAAC,MAAM,IAAI,CAAC,GAAG,EAAE,KAAK,IAAI;AAC5D,YAAM,aAAa,OAAO,cAAc,KAAK,MAAM,WAAW,OAAO;AACrE,UAAI;AACJ,UAAI;AACF,iBAAS,IAAI,IAAI,IAAI,GAAG,EAAE,SAAS,QAAQ,UAAU,EAAE;AAAA,MACzD,QAAQ;AACN,iBAAS,IAAI;AAAA,MACf;AACA,YAAM,KAAK,KAAK,IAAI,IAAI,MAAM,MAAM,MAAM,UAAU,MAAM,IAAI,MAAM,QAAQ,CAAC,CAAC,MAAM,SAAS,IAAI;AAAA,IACnG;AACA,UAAM,KAAK,EAAE;AAAA,EACf;AAKA,MAAI,SAAS;AACX,UAAM,KAAK,oBAAoB,aAAa,UAAU,YAAY,CAAC;AAAA,EACrE;AAGA,MAAI,eAAe,QAAQ,eAAe,KAAK,SAAS,GAAG;AACzD,UAAM,KAAK,EAAE;AACb,UAAM,KAAK,SAAS;AACpB,eAAW,OAAO,eAAe,MAAM;AACrC,YAAM,KAAK,QAAQ,IAAI,EAAE,OAAO,IAAI,WAAW,EAAE;AAAA,IACnD;AAAA,EACF;AAEA,QAAM,YAAY,+BAA+B,eAAe,cAAc;AAC9E,MAAI,WAAW;AACb,UAAM,KAAK,EAAE;AACb,UAAM,KAAK,SAAS;AAAA,EACtB;AAEA,SAAO,MAAM,KAAK,IAAI;AACxB;AAIA,SAAS,cACP,aACA,eACA,cACA,UACA,eACA,OACA,UACA;AACA,QAAM,kBAAkB,SAAS,IAAI,OAAK;AACxC,QAAI;AACJ,UAAM,YAAY,EAAE,QAAQ,CAAC;AAC7B,QAAI,WAAW;AACb,UAAI;AAAE,oBAAY,IAAI,IAAI,UAAU,IAAI,EAAE,SAAS,QAAQ,UAAU,EAAE;AAAA,MAAG,QAAQ;AAAA,MAAe;AAAA,IACnG;AACA,WAAO,EAAE,OAAO,EAAE,OAAO,cAAc,EAAE,QAAQ,QAAQ,SAAS,UAAU;AAAA,EAC9E,CAAC;AACD,QAAM,kBAAkB,SACrB,OAAO,OAAK,EAAE,QAAQ,UAAU,CAAC,EACjC,IAAI,OAAK,EAAE,KAAK;AAEnB,SAAO;AAAA,IACL,aAAa;AAAA,IACb,YAAY,YAAY,WAAW;AAAA,IACnC,QAAQ,eAAe,SAAS,OAAO,OAAK,EAAE,QAAQ,SAAS,CAAC,EAAE;AAAA,IAClE,mBAAmB;AAAA,IACnB,gBAAgB;AAAA,IAChB;AAAA,IACA,GAAI,WAAW,EAAE,WAAW,SAAS,IAAI,CAAC;AAAA,IAC1C,kBAAkB;AAAA,IAClB,GAAI,gBAAgB,SAAS,IAAI,EAAE,mBAAmB,gBAAgB,IAAI,CAAC;AAAA,EAC7E;AACF;AAEA,SAAS,uBACP,aACA,eAUC;AACD,SAAO,YAAY,WAAW,IAAI,CAAC,QAAQ;AAGzC,UAAM,SAAS,eAAe,IAAI,IAAI,IAAI;AAC1C,UAAM,YAAY,oBAAoB,IAAI,GAAG;AAC7C,WAAO;AAAA,MACL,MAAM,IAAI;AAAA,MACV,KAAK,IAAI;AAAA,MACT,OAAO,IAAI;AAAA,MACX,SAAS,IAAI;AAAA,MACb,aAAe,UAA+B;AAAA,MAC9C,OAAO,OAAO,IAAI,MAAM,QAAQ,CAAC,CAAC;AAAA,MAClC,SAAS,IAAI;AAAA,MACb,eAAe,IAAI;AAAA,IACrB;AAAA,EACF,CAAC;AACH;AAIA,SAAS,oBACP,OACA,QACA,WACsC;AACtC,QAAM,kBAAkB,cAAc,KAAK;AAC3C,QAAM,gBAAgB,KAAK,IAAI,IAAI;AAEnC,SAAO,SAAS,eAAe,gBAAgB,OAAO,IAAI,QAAQ;AAElE,QAAM,eAAe,YAAY;AAAA,IAC/B,MAAM,gBAAgB;AAAA,IACtB,SAAS,gBAAgB;AAAA,IACzB,WAAW,gBAAgB;AAAA,IAC3B,UAAU;AAAA,IACV,UAAU,CAAC,wCAAwC;AAAA,IACnD,cAAc;AAAA,MACZ;AAAA,MACA;AAAA,IACF;AAAA,EACF,CAAC;AAED,SAAO;AAAA,IACL,GAAG,YAAY;AAAA;AAAA,kBAAuB,eAAe,aAAa,CAAC;AAAA,WAAc,OAAO,QAAQ,MAAM;AAAA,EACxG;AACF;AAIA,eAAsB,gBACpB,QACA,WAAyB,eACsB;AAC/C,QAAM,YAAY,KAAK,IAAI;AAE3B,MAAI;AACF,UAAM,mBAAmB,wBAAwB,OAAO,SAAS,OAAO,KAAK;AAC7E,QAAI,OAAO,UAAU,OAAO;AAC1B,aAAO,QAAQ,mBAAmB,OAAO,KAAK,KAAK,OAAO,QAAQ,MAAM,yBAAoB,iBAAiB,MAAM,eAAe,QAAQ;AAAA,IAC5I,OAAO;AACL,aAAO,QAAQ,iBAAiB,OAAO,QAAQ,MAAM,kBAAkB,QAAQ;AAAA,IACjF;AACA,UAAM,SAAS,IAAI,QAAQ,iBAAiB,iBAAiB,MAAM,yBAAyB,OAAO,KAAK,GAAG;AAC3G,UAAM,SAAS,SAAS,IAAI,KAAK,2BAA2B;AAE5D,UAAM,cAAc,MAAM,gBAAgB,gBAAgB;AAC1D,UAAM,WAAW,qBAAqB,aAAa,OAAO,KAAK;AAC/D,UAAM,SAAS,SAAS,IAAI,KAAK,0BAA0B;AAE3D,UAAM,EAAE,YAAY,IAAI,eAAe,QAAQ;AAC/C,UAAM,SAAS;AAAA,MACb;AAAA,MACA,aAAa,YAAY,eAAe,uBAAuB,SAAS,YAAY;AAAA,IACtF;AAGA,UAAM,SAAS,OAAO;AACtB,UAAM,eAAe,mBAAmB;AAExC,QAAI;AACJ,QAAI,gBAAgB;AACpB,QAAI;AAEJ,QAAI,UAAU,CAAC,cAAc;AAE3B,UAAI,CAAC,UAAU,CAAC,cAAc;AAC5B,mBAAW;AACX,eAAO,WAAW,UAAU,QAAQ;AAEpC,cAAM,SAAS,IAAI,WAAW,8EAA8E;AAAA,MAC9G;AACA,UAAI;AACJ,UAAI,UAAU,cAAc;AAC1B,cAAM,eAAe,MAAM;AAAA,UACzB,YAAY;AAAA,UACZ,OAAO;AAAA,UACP,OAAO;AAAA,UACP;AAAA,QACF;AACA,2BAAmB,aAAa;AAAA,MAClC;AACA,iBAAW;AAAA,QACT,eAAe,OAAO,SAAS,aAAa,SAAS,UAAU,OAAO,OAAO;AAAA,QAC7E,oBAAoB,aAAa,SAAS,UAAU,SAAS,YAAY;AAAA,QACzE;AAAA,QACA,EAAE,gBAAgB,OAAO,QAAQ;AAAA,MACnC;AACA,YAAM,SAAS,SAAS,IAAI,KAAK,wBAAwB;AAAA,IAC3D,OAAO;AAEL,YAAM,SAAS,SAAS,IAAI,KAAK,kCAAkC;AACnE,YAAM,iBAAiB,MAAM;AAAA,QAC3B,YAAY;AAAA,QACZ,OAAO;AAAA,QACP,SAAS;AAAA,QACT;AAAA,QACA,OAAO;AAAA,MACT;AAEA,UAAI,eAAe,QAAQ;AACzB,mBAAW;AAAA,UACT,eAAe;AAAA,UAAQ;AAAA,UAAa,OAAO;AAAA,UAAS,SAAS;AAAA,UAAU,SAAS;AAAA,UAAc,OAAO;AAAA,QACvG;AACA,wBAAgB;AAChB,cAAM,SAAS,SAAS,IAAI,KAAK,8BAA8B;AAAA,MACjE,OAAO;AAEL,mBAAW,eAAe,SAAS;AACnC,eAAO,WAAW,+CAA+C,QAAQ,IAAI,QAAQ;AAErF,cAAM,SAAS,IAAI,WAAW,+BAA+B,QAAQ,EAAE;AACvE,mBAAW;AAAA,UACT,eAAe,OAAO,SAAS,aAAa,SAAS,UAAU,OAAO,OAAO;AAAA,UAC7E,oBAAoB,aAAa,SAAS,UAAU,SAAS,YAAY;AAAA,UACzE;AAAA,UACA,EAAE,gBAAgB,OAAO,QAAQ;AAAA,QACnC;AACA,cAAM,SAAS,SAAS,IAAI,KAAK,yCAAyC;AAAA,MAC5E;AAAA,IACF;AAEA,UAAM,gBAAgB,KAAK,IAAI,IAAI;AACnC,UAAM,WAAW;AAAA,MACf;AAAA,MAAa;AAAA,MAAe,SAAS;AAAA,MAAc,SAAS;AAAA,MAAU;AAAA,MAAe,OAAO;AAAA,MAAO;AAAA,IACrG;AAMA,UAAM,gBAAgB,oBAAI,IAAoB;AAM9C,UAAM,UAAU,uBAAuB,aAAa,aAAa;AAEjE,WAAO,QAAQ,qBAAqB,YAAY,WAAW,MAAM,qBAAqB,aAAa,IAAI,QAAQ;AAC/G,UAAM,SAAS,IAAI,QAAQ,yBAAyB,YAAY,WAAW,MAAM,sBAAsB,aAAa,GAAG;AAEvH,UAAM,SAAS;AAAA;AAAA,GAAW,eAAe,aAAa,CAAC,MAAM,YAAY,eAAe,eAAe,gBAAgB,sBAAsB,EAAE;AAC/I,UAAM,eAAe,WAAW;AAEhC,WAAO,YAAY,cAAc,EAAE,SAAS,SAAS,CAAC;AAAA,EACxD,SAAS,OAAO;AACd,WAAO,oBAAoB,OAAO,QAAQ,SAAS;AAAA,EACrD;AACF;AAEO,SAAS,sBAAsB,QAAyB;AAC7D,SAAO;AAAA,IACL;AAAA,MACE,MAAM;AAAA,MACN,OAAO;AAAA,MACP,aACE;AAAA,MACF,QAAQ;AAAA,MACR,cAAc;AAAA,MACd,aAAa;AAAA,QACX,cAAc;AAAA,QACd,gBAAgB;AAAA,QAChB,iBAAiB;AAAA,QACjB,eAAe;AAAA,MACjB;AAAA,IACF;AAAA,IACA,OAAO,MAAM,QAAQ;AACnB,UAAI,CAAC,gBAAgB,EAAE,QAAQ;AAC7B,eAAO,eAAe,YAAY,qBAAqB,QAAQ,CAAC,CAAC;AAAA,MACnE;AAEA,YAAM,WAAW,mBAAmB,KAAK,YAAY;AACrD,YAAM,SAAS,MAAM,gBAAgB,MAAM,QAAQ;AAEnD,YAAM,SAAS,SAAS,KAAK,KAAK,OAAO,UAAU,kBAAkB,iBAAiB;AACtF,aAAO,eAAe,MAAM;AAAA,IAC9B;AAAA,EACF;AACF;",
|
|
6
6
|
"names": []
|
|
7
7
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "mcp-researchpowerpack",
|
|
3
|
-
"version": "6.0.
|
|
3
|
+
"version": "6.0.4",
|
|
4
4
|
"description": "HTTP-first MCP research server: start-research (goal-tailored brief), web-search (with Reddit scope), scrape-links (auto-detects Reddit URLs) — built on mcp-use.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -24,7 +24,6 @@
|
|
|
24
24
|
"web-scraping",
|
|
25
25
|
"claude",
|
|
26
26
|
"anthropic",
|
|
27
|
-
"openrouter",
|
|
28
27
|
"scraping",
|
|
29
28
|
"research-powerpack",
|
|
30
29
|
"http-mcp"
|