mcp-researchpowerpack 6.0.1 → 6.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -102,6 +102,8 @@ function buildSessionConfig() {
102
102
  }
103
103
  function buildHealthPayload(server, startedAt) {
104
104
  const llm = getLLMHealth();
105
+ const plannerOkForHealth = llm.lastPlannerCheckedAt === null ? null : llm.lastPlannerOk;
106
+ const extractorOkForHealth = llm.lastExtractorCheckedAt === null ? null : llm.lastExtractorOk;
105
107
  return {
106
108
  status: "ok",
107
109
  name: SERVER.NAME,
@@ -109,14 +111,18 @@ function buildHealthPayload(server, startedAt) {
109
111
  transport: "http",
110
112
  uptime_seconds: Math.floor((Date.now() - startedAt) / 1e3),
111
113
  active_sessions: server.getActiveSessions().length,
112
- llm_planner_ok: llm.lastPlannerOk,
113
- llm_extractor_ok: llm.lastExtractorOk,
114
+ llm_planner_ok: plannerOkForHealth,
115
+ llm_extractor_ok: extractorOkForHealth,
114
116
  llm_planner_checked_at: llm.lastPlannerCheckedAt,
115
117
  llm_extractor_checked_at: llm.lastExtractorCheckedAt,
116
118
  llm_planner_error: llm.lastPlannerError,
117
119
  llm_extractor_error: llm.lastExtractorError,
118
120
  planner_configured: llm.plannerConfigured,
119
121
  extractor_configured: llm.extractorConfigured,
122
+ // Counter surfacing lets operators diagnose gate behavior from outside
123
+ // the process (see src/tools/start-research.ts for the gate semantics).
124
+ consecutive_planner_failures: llm.consecutivePlannerFailures,
125
+ consecutive_extractor_failures: llm.consecutiveExtractorFailures,
120
126
  timestamp: (/* @__PURE__ */ new Date()).toISOString()
121
127
  };
122
128
  }
package/dist/index.js.map CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "version": 3,
3
3
  "sources": ["../index.ts"],
4
- "sourcesContent": ["#!/usr/bin/env node\n\n// Expand libuv thread pool for parallel DNS lookups (default 4 is too low for 20+ concurrent connections)\nif (!process.env.UV_THREADPOOL_SIZE) {\n process.env.UV_THREADPOOL_SIZE = '8';\n}\n\nimport { Logger } from 'mcp-use';\nimport {\n InMemorySessionStore,\n InMemoryStreamManager,\n MCPServer,\n object,\n type ServerConfig,\n} from 'mcp-use/server';\n\nimport { SERVER } from './src/config/index.js';\nimport { getLLMHealth } from './src/services/llm-processor.js';\nimport { registerAllTools } from './src/tools/registry.js';\n\nconst DEFAULT_PORT = 3000 as const;\nconst SHUTDOWN_TIMEOUT_MS = 10_000 as const;\nconst WEBSITE_URL = 'https://github.com/yigitkonur/mcp-researchpowerpack-http' as const;\nconst LOCAL_DEFAULT_HOST = '127.0.0.1' as const;\n\ntype CleanupFn = () => Promise<void>;\n\nconst startupLogger = Logger.get('startup');\n\nfunction parseCsvEnv(value: string | undefined): string[] | undefined {\n if (!value) return undefined;\n\n const parts = value\n .split(',')\n .map((part) => part.trim())\n .filter(Boolean);\n\n return parts.length > 0 ? parts : undefined;\n}\n\nfunction parsePort(value: string | undefined, fallback: number): number {\n const parsed = Number.parseInt(value ?? '', 10);\n if (Number.isFinite(parsed) && parsed > 0) {\n return parsed;\n }\n\n return fallback;\n}\n\nfunction resolvePort(): number {\n const portFlagIndex = process.argv.findIndex((arg) => arg === '--port');\n if (portFlagIndex >= 0) {\n return parsePort(process.argv[portFlagIndex + 1], DEFAULT_PORT);\n }\n\n return parsePort(process.env.PORT, DEFAULT_PORT);\n}\n\nfunction resolveHost(): string {\n const explicitHost = process.env.HOST?.trim();\n if (explicitHost) {\n return explicitHost;\n }\n\n // Cloud runtimes typically inject PORT and expect the process to listen on all interfaces.\n if (process.env.PORT?.trim()) {\n return '0.0.0.0';\n }\n\n return LOCAL_DEFAULT_HOST;\n}\n\nfunction buildCors(allowedOrigins: string[] | undefined): ServerConfig['cors'] {\n if (!allowedOrigins || allowedOrigins.length === 0) {\n return undefined;\n }\n\n return {\n origin: allowedOrigins,\n allowMethods: ['GET', 'HEAD', 'POST', 'PUT', 'DELETE', 'OPTIONS'],\n allowHeaders: [\n 'Content-Type',\n 'Accept',\n 'Authorization',\n 'mcp-protocol-version',\n 'mcp-session-id',\n 'X-Proxy-Token',\n 'X-Target-URL',\n ],\n exposeHeaders: ['mcp-session-id'],\n };\n}\n\nfunction configureLogging(): void {\n Logger.configure({\n level: process.env.NODE_ENV === 'production' ? 'info' : 'debug',\n format: 'minimal',\n });\n\n const debug = process.env.DEBUG?.trim();\n if (debug === '2') {\n Logger.setDebug(2);\n } else if (debug) {\n Logger.setDebug(1);\n }\n}\n\nfunction normalizeOrigin(value: string, envName: string): string {\n try {\n return new URL(value).origin;\n } catch {\n throw new Error(`${envName} must contain absolute URLs with protocol. Received: ${value}`);\n }\n}\n\nfunction resolveAllowedOrigins(): string[] | undefined {\n const explicitOrigins = parseCsvEnv(process.env.ALLOWED_ORIGINS);\n if (explicitOrigins && explicitOrigins.length > 0) {\n return explicitOrigins.map(origin => normalizeOrigin(origin, 'ALLOWED_ORIGINS'));\n }\n\n return undefined;\n}\n\nfunction buildSessionConfig(): {\n sessionConfig: Pick<ServerConfig, 'sessionStore' | 'streamManager'>;\n cleanupFns: CleanupFn[];\n} {\n return {\n sessionConfig: {\n sessionStore: new InMemorySessionStore(),\n streamManager: new InMemoryStreamManager(),\n },\n cleanupFns: [],\n };\n}\n\nfunction buildHealthPayload(server: MCPServer, startedAt: number) {\n const llm = getLLMHealth();\n return {\n status: 'ok',\n name: SERVER.NAME,\n version: SERVER.VERSION,\n transport: 'http',\n uptime_seconds: Math.floor((Date.now() - startedAt) / 1000),\n active_sessions: server.getActiveSessions().length,\n llm_planner_ok: llm.lastPlannerOk,\n llm_extractor_ok: llm.lastExtractorOk,\n llm_planner_checked_at: llm.lastPlannerCheckedAt,\n llm_extractor_checked_at: llm.lastExtractorCheckedAt,\n llm_planner_error: llm.lastPlannerError,\n llm_extractor_error: llm.lastExtractorError,\n planner_configured: llm.plannerConfigured,\n extractor_configured: llm.extractorConfigured,\n timestamp: new Date().toISOString(),\n };\n}\n\nasync function main(): Promise<void> {\n configureLogging();\n\n const isProduction = process.env.NODE_ENV === 'production';\n const host = resolveHost();\n const port = resolvePort();\n const baseUrl = process.env.MCP_URL?.trim() || undefined;\n const allowedOrigins = resolveAllowedOrigins();\n\n const { sessionConfig, cleanupFns } = buildSessionConfig();\n\n startupLogger.info(`Starting ${SERVER.NAME} v${SERVER.VERSION}`);\n startupLogger.info(`Binding HTTP server to ${host}:${port}`);\n if (allowedOrigins && allowedOrigins.length > 0) {\n startupLogger.info(`Host validation enabled for origins: ${allowedOrigins.join(', ')}`);\n } else if (isProduction) {\n if (!baseUrl) {\n startupLogger.error(\n 'Production mode requires ALLOWED_ORIGINS or MCP_URL to be set. ' +\n 'Without host validation, the server is vulnerable to DNS rebinding attacks. ' +\n 'Set ALLOWED_ORIGINS to the public deployment URL or custom domain.',\n );\n process.exit(1);\n }\n startupLogger.warn(\n 'Host validation is disabled because ALLOWED_ORIGINS is not set. ' +\n 'MCP_URL is set, so the server will start \u2014 but set ALLOWED_ORIGINS for full origin protection.',\n );\n } else {\n startupLogger.info('Host validation disabled for local development');\n }\n\n const server = new MCPServer({\n name: SERVER.NAME,\n title: 'Research Powerpack',\n version: SERVER.VERSION,\n description: SERVER.DESCRIPTION,\n websiteUrl: WEBSITE_URL,\n host,\n baseUrl,\n cors: buildCors(allowedOrigins),\n allowedOrigins,\n ...sessionConfig,\n });\n\n registerAllTools(server);\n\n // Advertise our LLM-augmentation capability via the MCP `experimental`\n // namespace so capability-aware clients can branch at initialize-time\n // instead of parsing per-call footers. mcp-use creates a fresh native MCP\n // server per session via `getServerForSession()`, so we patch that factory\n // to register our experimental capability on every session. The capability\n // values are read fresh on each session so health flips are observable.\n // See: docs/code-review/context/06-mcp-use-best-practices-primer.md (#3, #6).\n try {\n type Native = { server?: { registerCapabilities?: (caps: Record<string, unknown>) => void } };\n type Patched = { getServerForSession?: (sessionId?: string) => Native };\n const patched = server as unknown as Patched;\n const original = patched.getServerForSession?.bind(server);\n if (original) {\n patched.getServerForSession = (sessionId?: string): Native => {\n const native = original(sessionId);\n try {\n const llm = getLLMHealth();\n native.server?.registerCapabilities?.({\n experimental: {\n research_powerpack: {\n planner_available: llm.plannerConfigured,\n extractor_available: llm.extractorConfigured,\n planner_model:\n process.env.LLM_MODEL ?? process.env.LLM_EXTRACTION_MODEL ?? null,\n extractor_model:\n process.env.LLM_MODEL ?? process.env.LLM_EXTRACTION_MODEL ?? null,\n },\n },\n });\n } catch {\n // Capability registration is advisory; never block session creation.\n }\n return native;\n };\n }\n } catch (err) {\n startupLogger.warn(`Could not patch session-server factory: ${String(err)}`);\n }\n\n const startedAt = Date.now();\n\n server.get('/health', (c) => c.json(buildHealthPayload(server, startedAt)));\n server.get('/healthz', (c) => c.json(buildHealthPayload(server, startedAt)));\n\n // Some MCP clients (Claude Desktop, Cursor, VS Code) proactively probe\n // /.well-known/oauth-protected-resource before receiving any 401, per the\n // MCP 2025-03-26 spec. Without these routes the server returns 404 and some\n // clients surface a spurious \"authentication required\" error. A minimal PRM\n // response with no authorization_servers field explicitly signals that this\n // server requires no authentication.\n const resourceBaseUrl = baseUrl ?? `http://${host}:${port}`;\n server.get('/.well-known/oauth-protected-resource', (c) =>\n c.json({ resource: resourceBaseUrl }),\n );\n server.get('/.well-known/oauth-protected-resource/mcp', (c) =>\n c.json({ resource: `${resourceBaseUrl}/mcp` }),\n );\n\n server.resource(\n {\n name: 'server-health',\n uri: 'health://status',\n description: 'Current server health, uptime, and active MCP session count.',\n mimeType: 'application/json',\n },\n async () => object(buildHealthPayload(server, startedAt)),\n );\n\n let isShuttingDown = false;\n\n async function shutdown(signal: string, exitCode: number): Promise<void> {\n if (isShuttingDown) return;\n isShuttingDown = true;\n\n const forceExit = setTimeout(() => {\n startupLogger.error(`Forced exit after ${SHUTDOWN_TIMEOUT_MS}ms (${signal})`);\n process.exit(1);\n }, SHUTDOWN_TIMEOUT_MS);\n\n try {\n startupLogger.warn(`Shutdown signal received: ${signal}`);\n await server.close();\n\n for (const cleanupFn of cleanupFns) {\n await cleanupFn();\n }\n\n clearTimeout(forceExit);\n process.exit(exitCode);\n } catch (error) {\n clearTimeout(forceExit);\n const message = error instanceof Error ? (error.stack ?? error.message) : String(error);\n startupLogger.error(`Error while stopping server: ${message}`);\n process.exit(1);\n }\n }\n\n process.on('SIGTERM', () => {\n void shutdown('SIGTERM', 0);\n });\n\n process.on('SIGINT', () => {\n void shutdown('SIGINT', 0);\n });\n\n process.on('uncaughtException', (error) => {\n startupLogger.error(`Uncaught exception: ${error.stack ?? error.message}`);\n void shutdown('uncaughtException', 1);\n });\n\n process.on('unhandledRejection', (reason) => {\n startupLogger.error(`Unhandled rejection: ${String(reason)}`);\n void shutdown('unhandledRejection', 1);\n });\n\n await server.listen(port);\n\n startupLogger.info(`${SERVER.NAME} v${SERVER.VERSION} listening on http://${host}:${port}/mcp`);\n}\n\nvoid main().catch((error) => {\n const message = error instanceof Error ? (error.stack ?? error.message) : String(error);\n startupLogger.error(`Server failed to start: ${message}`);\n process.exit(1);\n});\n"],
5
- "mappings": ";AAGA,IAAI,CAAC,QAAQ,IAAI,oBAAoB;AACnC,UAAQ,IAAI,qBAAqB;AACnC;AAEA,SAAS,cAAc;AACvB;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OAEK;AAEP,SAAS,cAAc;AACvB,SAAS,oBAAoB;AAC7B,SAAS,wBAAwB;AAEjC,MAAM,eAAe;AACrB,MAAM,sBAAsB;AAC5B,MAAM,cAAc;AACpB,MAAM,qBAAqB;AAI3B,MAAM,gBAAgB,OAAO,IAAI,SAAS;AAE1C,SAAS,YAAY,OAAiD;AACpE,MAAI,CAAC,MAAO,QAAO;AAEnB,QAAM,QAAQ,MACX,MAAM,GAAG,EACT,IAAI,CAAC,SAAS,KAAK,KAAK,CAAC,EACzB,OAAO,OAAO;AAEjB,SAAO,MAAM,SAAS,IAAI,QAAQ;AACpC;AAEA,SAAS,UAAU,OAA2B,UAA0B;AACtE,QAAM,SAAS,OAAO,SAAS,SAAS,IAAI,EAAE;AAC9C,MAAI,OAAO,SAAS,MAAM,KAAK,SAAS,GAAG;AACzC,WAAO;AAAA,EACT;AAEA,SAAO;AACT;AAEA,SAAS,cAAsB;AAC7B,QAAM,gBAAgB,QAAQ,KAAK,UAAU,CAAC,QAAQ,QAAQ,QAAQ;AACtE,MAAI,iBAAiB,GAAG;AACtB,WAAO,UAAU,QAAQ,KAAK,gBAAgB,CAAC,GAAG,YAAY;AAAA,EAChE;AAEA,SAAO,UAAU,QAAQ,IAAI,MAAM,YAAY;AACjD;AAEA,SAAS,cAAsB;AAC7B,QAAM,eAAe,QAAQ,IAAI,MAAM,KAAK;AAC5C,MAAI,cAAc;AAChB,WAAO;AAAA,EACT;AAGA,MAAI,QAAQ,IAAI,MAAM,KAAK,GAAG;AAC5B,WAAO;AAAA,EACT;AAEA,SAAO;AACT;AAEA,SAAS,UAAU,gBAA4D;AAC7E,MAAI,CAAC,kBAAkB,eAAe,WAAW,GAAG;AAClD,WAAO;AAAA,EACT;AAEA,SAAO;AAAA,IACL,QAAQ;AAAA,IACR,cAAc,CAAC,OAAO,QAAQ,QAAQ,OAAO,UAAU,SAAS;AAAA,IAChE,cAAc;AAAA,MACZ;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAAA,IACA,eAAe,CAAC,gBAAgB;AAAA,EAClC;AACF;AAEA,SAAS,mBAAyB;AAChC,SAAO,UAAU;AAAA,IACf,OAAO,QAAQ,IAAI,aAAa,eAAe,SAAS;AAAA,IACxD,QAAQ;AAAA,EACV,CAAC;AAED,QAAM,QAAQ,QAAQ,IAAI,OAAO,KAAK;AACtC,MAAI,UAAU,KAAK;AACjB,WAAO,SAAS,CAAC;AAAA,EACnB,WAAW,OAAO;AAChB,WAAO,SAAS,CAAC;AAAA,EACnB;AACF;AAEA,SAAS,gBAAgB,OAAe,SAAyB;AAC/D,MAAI;AACF,WAAO,IAAI,IAAI,KAAK,EAAE;AAAA,EACxB,QAAQ;AACN,UAAM,IAAI,MAAM,GAAG,OAAO,wDAAwD,KAAK,EAAE;AAAA,EAC3F;AACF;AAEA,SAAS,wBAA8C;AACrD,QAAM,kBAAkB,YAAY,QAAQ,IAAI,eAAe;AAC/D,MAAI,mBAAmB,gBAAgB,SAAS,GAAG;AACjD,WAAO,gBAAgB,IAAI,YAAU,gBAAgB,QAAQ,iBAAiB,CAAC;AAAA,EACjF;AAEA,SAAO;AACT;AAEA,SAAS,qBAGP;AACA,SAAO;AAAA,IACL,eAAe;AAAA,MACb,cAAc,IAAI,qBAAqB;AAAA,MACvC,eAAe,IAAI,sBAAsB;AAAA,IAC3C;AAAA,IACA,YAAY,CAAC;AAAA,EACf;AACF;AAEA,SAAS,mBAAmB,QAAmB,WAAmB;AAChE,QAAM,MAAM,aAAa;AACzB,SAAO;AAAA,IACL,QAAQ;AAAA,IACR,MAAM,OAAO;AAAA,IACb,SAAS,OAAO;AAAA,IAChB,WAAW;AAAA,IACX,gBAAgB,KAAK,OAAO,KAAK,IAAI,IAAI,aAAa,GAAI;AAAA,IAC1D,iBAAiB,OAAO,kBAAkB,EAAE;AAAA,IAC5C,gBAAgB,IAAI;AAAA,IACpB,kBAAkB,IAAI;AAAA,IACtB,wBAAwB,IAAI;AAAA,IAC5B,0BAA0B,IAAI;AAAA,IAC9B,mBAAmB,IAAI;AAAA,IACvB,qBAAqB,IAAI;AAAA,IACzB,oBAAoB,IAAI;AAAA,IACxB,sBAAsB,IAAI;AAAA,IAC1B,YAAW,oBAAI,KAAK,GAAE,YAAY;AAAA,EACpC;AACF;AAEA,eAAe,OAAsB;AACnC,mBAAiB;AAEjB,QAAM,eAAe,QAAQ,IAAI,aAAa;AAC9C,QAAM,OAAO,YAAY;AACzB,QAAM,OAAO,YAAY;AACzB,QAAM,UAAU,QAAQ,IAAI,SAAS,KAAK,KAAK;AAC/C,QAAM,iBAAiB,sBAAsB;AAE7C,QAAM,EAAE,eAAe,WAAW,IAAI,mBAAmB;AAEzD,gBAAc,KAAK,YAAY,OAAO,IAAI,KAAK,OAAO,OAAO,EAAE;AAC/D,gBAAc,KAAK,0BAA0B,IAAI,IAAI,IAAI,EAAE;AAC3D,MAAI,kBAAkB,eAAe,SAAS,GAAG;AAC/C,kBAAc,KAAK,wCAAwC,eAAe,KAAK,IAAI,CAAC,EAAE;AAAA,EACxF,WAAW,cAAc;AACvB,QAAI,CAAC,SAAS;AACZ,oBAAc;AAAA,QACZ;AAAA,MAGF;AACA,cAAQ,KAAK,CAAC;AAAA,IAChB;AACA,kBAAc;AAAA,MACZ;AAAA,IAEF;AAAA,EACF,OAAO;AACL,kBAAc,KAAK,gDAAgD;AAAA,EACrE;AAEA,QAAM,SAAS,IAAI,UAAU;AAAA,IAC3B,MAAM,OAAO;AAAA,IACb,OAAO;AAAA,IACP,SAAS,OAAO;AAAA,IAChB,aAAa,OAAO;AAAA,IACpB,YAAY;AAAA,IACZ;AAAA,IACA;AAAA,IACA,MAAM,UAAU,cAAc;AAAA,IAC9B;AAAA,IACA,GAAG;AAAA,EACL,CAAC;AAED,mBAAiB,MAAM;AASvB,MAAI;AAGF,UAAM,UAAU;AAChB,UAAM,WAAW,QAAQ,qBAAqB,KAAK,MAAM;AACzD,QAAI,UAAU;AACZ,cAAQ,sBAAsB,CAAC,cAA+B;AAC5D,cAAM,SAAS,SAAS,SAAS;AACjC,YAAI;AACF,gBAAM,MAAM,aAAa;AACzB,iBAAO,QAAQ,uBAAuB;AAAA,YACpC,cAAc;AAAA,cACZ,oBAAoB;AAAA,gBAClB,mBAAmB,IAAI;AAAA,gBACvB,qBAAqB,IAAI;AAAA,gBACzB,eACE,QAAQ,IAAI,aAAa,QAAQ,IAAI,wBAAwB;AAAA,gBAC/D,iBACE,QAAQ,IAAI,aAAa,QAAQ,IAAI,wBAAwB;AAAA,cACjE;AAAA,YACF;AAAA,UACF,CAAC;AAAA,QACH,QAAQ;AAAA,QAER;AACA,eAAO;AAAA,MACT;AAAA,IACF;AAAA,EACF,SAAS,KAAK;AACZ,kBAAc,KAAK,2CAA2C,OAAO,GAAG,CAAC,EAAE;AAAA,EAC7E;AAEA,QAAM,YAAY,KAAK,IAAI;AAE3B,SAAO,IAAI,WAAW,CAAC,MAAM,EAAE,KAAK,mBAAmB,QAAQ,SAAS,CAAC,CAAC;AAC1E,SAAO,IAAI,YAAY,CAAC,MAAM,EAAE,KAAK,mBAAmB,QAAQ,SAAS,CAAC,CAAC;AAQ3E,QAAM,kBAAkB,WAAW,UAAU,IAAI,IAAI,IAAI;AACzD,SAAO;AAAA,IAAI;AAAA,IAAyC,CAAC,MACnD,EAAE,KAAK,EAAE,UAAU,gBAAgB,CAAC;AAAA,EACtC;AACA,SAAO;AAAA,IAAI;AAAA,IAA6C,CAAC,MACvD,EAAE,KAAK,EAAE,UAAU,GAAG,eAAe,OAAO,CAAC;AAAA,EAC/C;AAEA,SAAO;AAAA,IACL;AAAA,MACE,MAAM;AAAA,MACN,KAAK;AAAA,MACL,aAAa;AAAA,MACb,UAAU;AAAA,IACZ;AAAA,IACA,YAAY,OAAO,mBAAmB,QAAQ,SAAS,CAAC;AAAA,EAC1D;AAEA,MAAI,iBAAiB;AAErB,iBAAe,SAAS,QAAgB,UAAiC;AACvE,QAAI,eAAgB;AACpB,qBAAiB;AAEjB,UAAM,YAAY,WAAW,MAAM;AACjC,oBAAc,MAAM,qBAAqB,mBAAmB,OAAO,MAAM,GAAG;AAC5E,cAAQ,KAAK,CAAC;AAAA,IAChB,GAAG,mBAAmB;AAEtB,QAAI;AACF,oBAAc,KAAK,6BAA6B,MAAM,EAAE;AACxD,YAAM,OAAO,MAAM;AAEnB,iBAAW,aAAa,YAAY;AAClC,cAAM,UAAU;AAAA,MAClB;AAEA,mBAAa,SAAS;AACtB,cAAQ,KAAK,QAAQ;AAAA,IACvB,SAAS,OAAO;AACd,mBAAa,SAAS;AACtB,YAAM,UAAU,iBAAiB,QAAS,MAAM,SAAS,MAAM,UAAW,OAAO,KAAK;AACtF,oBAAc,MAAM,gCAAgC,OAAO,EAAE;AAC7D,cAAQ,KAAK,CAAC;AAAA,IAChB;AAAA,EACF;AAEA,UAAQ,GAAG,WAAW,MAAM;AAC1B,SAAK,SAAS,WAAW,CAAC;AAAA,EAC5B,CAAC;AAED,UAAQ,GAAG,UAAU,MAAM;AACzB,SAAK,SAAS,UAAU,CAAC;AAAA,EAC3B,CAAC;AAED,UAAQ,GAAG,qBAAqB,CAAC,UAAU;AACzC,kBAAc,MAAM,uBAAuB,MAAM,SAAS,MAAM,OAAO,EAAE;AACzE,SAAK,SAAS,qBAAqB,CAAC;AAAA,EACtC,CAAC;AAED,UAAQ,GAAG,sBAAsB,CAAC,WAAW;AAC3C,kBAAc,MAAM,wBAAwB,OAAO,MAAM,CAAC,EAAE;AAC5D,SAAK,SAAS,sBAAsB,CAAC;AAAA,EACvC,CAAC;AAED,QAAM,OAAO,OAAO,IAAI;AAExB,gBAAc,KAAK,GAAG,OAAO,IAAI,KAAK,OAAO,OAAO,wBAAwB,IAAI,IAAI,IAAI,MAAM;AAChG;AAEA,KAAK,KAAK,EAAE,MAAM,CAAC,UAAU;AAC3B,QAAM,UAAU,iBAAiB,QAAS,MAAM,SAAS,MAAM,UAAW,OAAO,KAAK;AACtF,gBAAc,MAAM,2BAA2B,OAAO,EAAE;AACxD,UAAQ,KAAK,CAAC;AAChB,CAAC;",
4
+ "sourcesContent": ["#!/usr/bin/env node\n\n// Expand libuv thread pool for parallel DNS lookups (default 4 is too low for 20+ concurrent connections)\nif (!process.env.UV_THREADPOOL_SIZE) {\n process.env.UV_THREADPOOL_SIZE = '8';\n}\n\nimport { Logger } from 'mcp-use';\nimport {\n InMemorySessionStore,\n InMemoryStreamManager,\n MCPServer,\n object,\n type ServerConfig,\n} from 'mcp-use/server';\n\nimport { SERVER } from './src/config/index.js';\nimport { getLLMHealth } from './src/services/llm-processor.js';\nimport { registerAllTools } from './src/tools/registry.js';\n\nconst DEFAULT_PORT = 3000 as const;\nconst SHUTDOWN_TIMEOUT_MS = 10_000 as const;\nconst WEBSITE_URL = 'https://github.com/yigitkonur/mcp-researchpowerpack-http' as const;\nconst LOCAL_DEFAULT_HOST = '127.0.0.1' as const;\n\ntype CleanupFn = () => Promise<void>;\n\nconst startupLogger = Logger.get('startup');\n\nfunction parseCsvEnv(value: string | undefined): string[] | undefined {\n if (!value) return undefined;\n\n const parts = value\n .split(',')\n .map((part) => part.trim())\n .filter(Boolean);\n\n return parts.length > 0 ? parts : undefined;\n}\n\nfunction parsePort(value: string | undefined, fallback: number): number {\n const parsed = Number.parseInt(value ?? '', 10);\n if (Number.isFinite(parsed) && parsed > 0) {\n return parsed;\n }\n\n return fallback;\n}\n\nfunction resolvePort(): number {\n const portFlagIndex = process.argv.findIndex((arg) => arg === '--port');\n if (portFlagIndex >= 0) {\n return parsePort(process.argv[portFlagIndex + 1], DEFAULT_PORT);\n }\n\n return parsePort(process.env.PORT, DEFAULT_PORT);\n}\n\nfunction resolveHost(): string {\n const explicitHost = process.env.HOST?.trim();\n if (explicitHost) {\n return explicitHost;\n }\n\n // Cloud runtimes typically inject PORT and expect the process to listen on all interfaces.\n if (process.env.PORT?.trim()) {\n return '0.0.0.0';\n }\n\n return LOCAL_DEFAULT_HOST;\n}\n\nfunction buildCors(allowedOrigins: string[] | undefined): ServerConfig['cors'] {\n if (!allowedOrigins || allowedOrigins.length === 0) {\n return undefined;\n }\n\n return {\n origin: allowedOrigins,\n allowMethods: ['GET', 'HEAD', 'POST', 'PUT', 'DELETE', 'OPTIONS'],\n allowHeaders: [\n 'Content-Type',\n 'Accept',\n 'Authorization',\n 'mcp-protocol-version',\n 'mcp-session-id',\n 'X-Proxy-Token',\n 'X-Target-URL',\n ],\n exposeHeaders: ['mcp-session-id'],\n };\n}\n\nfunction configureLogging(): void {\n Logger.configure({\n level: process.env.NODE_ENV === 'production' ? 'info' : 'debug',\n format: 'minimal',\n });\n\n const debug = process.env.DEBUG?.trim();\n if (debug === '2') {\n Logger.setDebug(2);\n } else if (debug) {\n Logger.setDebug(1);\n }\n}\n\nfunction normalizeOrigin(value: string, envName: string): string {\n try {\n return new URL(value).origin;\n } catch {\n throw new Error(`${envName} must contain absolute URLs with protocol. Received: ${value}`);\n }\n}\n\nfunction resolveAllowedOrigins(): string[] | undefined {\n const explicitOrigins = parseCsvEnv(process.env.ALLOWED_ORIGINS);\n if (explicitOrigins && explicitOrigins.length > 0) {\n return explicitOrigins.map(origin => normalizeOrigin(origin, 'ALLOWED_ORIGINS'));\n }\n\n return undefined;\n}\n\nfunction buildSessionConfig(): {\n sessionConfig: Pick<ServerConfig, 'sessionStore' | 'streamManager'>;\n cleanupFns: CleanupFn[];\n} {\n return {\n sessionConfig: {\n sessionStore: new InMemorySessionStore(),\n streamManager: new InMemoryStreamManager(),\n },\n cleanupFns: [],\n };\n}\n\nfunction buildHealthPayload(server: MCPServer, startedAt: number) {\n const llm = getLLMHealth();\n // Distinguish \"never probed\" (checkedAt === null) from \"probed and failed\"\n // (checkedAt set, ok=false). The raw `lastPlannerOk` defaults to `false`\n // at startup, which would mislead operators into thinking the LLM is\n // broken before it has been exercised once.\n const plannerOkForHealth = llm.lastPlannerCheckedAt === null ? null : llm.lastPlannerOk;\n const extractorOkForHealth = llm.lastExtractorCheckedAt === null ? null : llm.lastExtractorOk;\n return {\n status: 'ok',\n name: SERVER.NAME,\n version: SERVER.VERSION,\n transport: 'http',\n uptime_seconds: Math.floor((Date.now() - startedAt) / 1000),\n active_sessions: server.getActiveSessions().length,\n llm_planner_ok: plannerOkForHealth,\n llm_extractor_ok: extractorOkForHealth,\n llm_planner_checked_at: llm.lastPlannerCheckedAt,\n llm_extractor_checked_at: llm.lastExtractorCheckedAt,\n llm_planner_error: llm.lastPlannerError,\n llm_extractor_error: llm.lastExtractorError,\n planner_configured: llm.plannerConfigured,\n extractor_configured: llm.extractorConfigured,\n // Counter surfacing lets operators diagnose gate behavior from outside\n // the process (see src/tools/start-research.ts for the gate semantics).\n consecutive_planner_failures: llm.consecutivePlannerFailures,\n consecutive_extractor_failures: llm.consecutiveExtractorFailures,\n timestamp: new Date().toISOString(),\n };\n}\n\nasync function main(): Promise<void> {\n configureLogging();\n\n const isProduction = process.env.NODE_ENV === 'production';\n const host = resolveHost();\n const port = resolvePort();\n const baseUrl = process.env.MCP_URL?.trim() || undefined;\n const allowedOrigins = resolveAllowedOrigins();\n\n const { sessionConfig, cleanupFns } = buildSessionConfig();\n\n startupLogger.info(`Starting ${SERVER.NAME} v${SERVER.VERSION}`);\n startupLogger.info(`Binding HTTP server to ${host}:${port}`);\n if (allowedOrigins && allowedOrigins.length > 0) {\n startupLogger.info(`Host validation enabled for origins: ${allowedOrigins.join(', ')}`);\n } else if (isProduction) {\n if (!baseUrl) {\n startupLogger.error(\n 'Production mode requires ALLOWED_ORIGINS or MCP_URL to be set. ' +\n 'Without host validation, the server is vulnerable to DNS rebinding attacks. ' +\n 'Set ALLOWED_ORIGINS to the public deployment URL or custom domain.',\n );\n process.exit(1);\n }\n startupLogger.warn(\n 'Host validation is disabled because ALLOWED_ORIGINS is not set. ' +\n 'MCP_URL is set, so the server will start \u2014 but set ALLOWED_ORIGINS for full origin protection.',\n );\n } else {\n startupLogger.info('Host validation disabled for local development');\n }\n\n const server = new MCPServer({\n name: SERVER.NAME,\n title: 'Research Powerpack',\n version: SERVER.VERSION,\n description: SERVER.DESCRIPTION,\n websiteUrl: WEBSITE_URL,\n host,\n baseUrl,\n cors: buildCors(allowedOrigins),\n allowedOrigins,\n ...sessionConfig,\n });\n\n registerAllTools(server);\n\n // Advertise our LLM-augmentation capability via the MCP `experimental`\n // namespace so capability-aware clients can branch at initialize-time\n // instead of parsing per-call footers. mcp-use creates a fresh native MCP\n // server per session via `getServerForSession()`, so we patch that factory\n // to register our experimental capability on every session. The capability\n // values are read fresh on each session so health flips are observable.\n // See: docs/code-review/context/06-mcp-use-best-practices-primer.md (#3, #6).\n try {\n type Native = { server?: { registerCapabilities?: (caps: Record<string, unknown>) => void } };\n type Patched = { getServerForSession?: (sessionId?: string) => Native };\n const patched = server as unknown as Patched;\n const original = patched.getServerForSession?.bind(server);\n if (original) {\n patched.getServerForSession = (sessionId?: string): Native => {\n const native = original(sessionId);\n try {\n const llm = getLLMHealth();\n native.server?.registerCapabilities?.({\n experimental: {\n research_powerpack: {\n planner_available: llm.plannerConfigured,\n extractor_available: llm.extractorConfigured,\n planner_model:\n process.env.LLM_MODEL ?? process.env.LLM_EXTRACTION_MODEL ?? null,\n extractor_model:\n process.env.LLM_MODEL ?? process.env.LLM_EXTRACTION_MODEL ?? null,\n },\n },\n });\n } catch {\n // Capability registration is advisory; never block session creation.\n }\n return native;\n };\n }\n } catch (err) {\n startupLogger.warn(`Could not patch session-server factory: ${String(err)}`);\n }\n\n const startedAt = Date.now();\n\n server.get('/health', (c) => c.json(buildHealthPayload(server, startedAt)));\n server.get('/healthz', (c) => c.json(buildHealthPayload(server, startedAt)));\n\n // Some MCP clients (Claude Desktop, Cursor, VS Code) proactively probe\n // /.well-known/oauth-protected-resource before receiving any 401, per the\n // MCP 2025-03-26 spec. Without these routes the server returns 404 and some\n // clients surface a spurious \"authentication required\" error. A minimal PRM\n // response with no authorization_servers field explicitly signals that this\n // server requires no authentication.\n const resourceBaseUrl = baseUrl ?? `http://${host}:${port}`;\n server.get('/.well-known/oauth-protected-resource', (c) =>\n c.json({ resource: resourceBaseUrl }),\n );\n server.get('/.well-known/oauth-protected-resource/mcp', (c) =>\n c.json({ resource: `${resourceBaseUrl}/mcp` }),\n );\n\n server.resource(\n {\n name: 'server-health',\n uri: 'health://status',\n description: 'Current server health, uptime, and active MCP session count.',\n mimeType: 'application/json',\n },\n async () => object(buildHealthPayload(server, startedAt)),\n );\n\n let isShuttingDown = false;\n\n async function shutdown(signal: string, exitCode: number): Promise<void> {\n if (isShuttingDown) return;\n isShuttingDown = true;\n\n const forceExit = setTimeout(() => {\n startupLogger.error(`Forced exit after ${SHUTDOWN_TIMEOUT_MS}ms (${signal})`);\n process.exit(1);\n }, SHUTDOWN_TIMEOUT_MS);\n\n try {\n startupLogger.warn(`Shutdown signal received: ${signal}`);\n await server.close();\n\n for (const cleanupFn of cleanupFns) {\n await cleanupFn();\n }\n\n clearTimeout(forceExit);\n process.exit(exitCode);\n } catch (error) {\n clearTimeout(forceExit);\n const message = error instanceof Error ? (error.stack ?? error.message) : String(error);\n startupLogger.error(`Error while stopping server: ${message}`);\n process.exit(1);\n }\n }\n\n process.on('SIGTERM', () => {\n void shutdown('SIGTERM', 0);\n });\n\n process.on('SIGINT', () => {\n void shutdown('SIGINT', 0);\n });\n\n process.on('uncaughtException', (error) => {\n startupLogger.error(`Uncaught exception: ${error.stack ?? error.message}`);\n void shutdown('uncaughtException', 1);\n });\n\n process.on('unhandledRejection', (reason) => {\n startupLogger.error(`Unhandled rejection: ${String(reason)}`);\n void shutdown('unhandledRejection', 1);\n });\n\n await server.listen(port);\n\n startupLogger.info(`${SERVER.NAME} v${SERVER.VERSION} listening on http://${host}:${port}/mcp`);\n}\n\nvoid main().catch((error) => {\n const message = error instanceof Error ? (error.stack ?? error.message) : String(error);\n startupLogger.error(`Server failed to start: ${message}`);\n process.exit(1);\n});\n"],
5
+ "mappings": ";AAGA,IAAI,CAAC,QAAQ,IAAI,oBAAoB;AACnC,UAAQ,IAAI,qBAAqB;AACnC;AAEA,SAAS,cAAc;AACvB;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OAEK;AAEP,SAAS,cAAc;AACvB,SAAS,oBAAoB;AAC7B,SAAS,wBAAwB;AAEjC,MAAM,eAAe;AACrB,MAAM,sBAAsB;AAC5B,MAAM,cAAc;AACpB,MAAM,qBAAqB;AAI3B,MAAM,gBAAgB,OAAO,IAAI,SAAS;AAE1C,SAAS,YAAY,OAAiD;AACpE,MAAI,CAAC,MAAO,QAAO;AAEnB,QAAM,QAAQ,MACX,MAAM,GAAG,EACT,IAAI,CAAC,SAAS,KAAK,KAAK,CAAC,EACzB,OAAO,OAAO;AAEjB,SAAO,MAAM,SAAS,IAAI,QAAQ;AACpC;AAEA,SAAS,UAAU,OAA2B,UAA0B;AACtE,QAAM,SAAS,OAAO,SAAS,SAAS,IAAI,EAAE;AAC9C,MAAI,OAAO,SAAS,MAAM,KAAK,SAAS,GAAG;AACzC,WAAO;AAAA,EACT;AAEA,SAAO;AACT;AAEA,SAAS,cAAsB;AAC7B,QAAM,gBAAgB,QAAQ,KAAK,UAAU,CAAC,QAAQ,QAAQ,QAAQ;AACtE,MAAI,iBAAiB,GAAG;AACtB,WAAO,UAAU,QAAQ,KAAK,gBAAgB,CAAC,GAAG,YAAY;AAAA,EAChE;AAEA,SAAO,UAAU,QAAQ,IAAI,MAAM,YAAY;AACjD;AAEA,SAAS,cAAsB;AAC7B,QAAM,eAAe,QAAQ,IAAI,MAAM,KAAK;AAC5C,MAAI,cAAc;AAChB,WAAO;AAAA,EACT;AAGA,MAAI,QAAQ,IAAI,MAAM,KAAK,GAAG;AAC5B,WAAO;AAAA,EACT;AAEA,SAAO;AACT;AAEA,SAAS,UAAU,gBAA4D;AAC7E,MAAI,CAAC,kBAAkB,eAAe,WAAW,GAAG;AAClD,WAAO;AAAA,EACT;AAEA,SAAO;AAAA,IACL,QAAQ;AAAA,IACR,cAAc,CAAC,OAAO,QAAQ,QAAQ,OAAO,UAAU,SAAS;AAAA,IAChE,cAAc;AAAA,MACZ;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAAA,IACA,eAAe,CAAC,gBAAgB;AAAA,EAClC;AACF;AAEA,SAAS,mBAAyB;AAChC,SAAO,UAAU;AAAA,IACf,OAAO,QAAQ,IAAI,aAAa,eAAe,SAAS;AAAA,IACxD,QAAQ;AAAA,EACV,CAAC;AAED,QAAM,QAAQ,QAAQ,IAAI,OAAO,KAAK;AACtC,MAAI,UAAU,KAAK;AACjB,WAAO,SAAS,CAAC;AAAA,EACnB,WAAW,OAAO;AAChB,WAAO,SAAS,CAAC;AAAA,EACnB;AACF;AAEA,SAAS,gBAAgB,OAAe,SAAyB;AAC/D,MAAI;AACF,WAAO,IAAI,IAAI,KAAK,EAAE;AAAA,EACxB,QAAQ;AACN,UAAM,IAAI,MAAM,GAAG,OAAO,wDAAwD,KAAK,EAAE;AAAA,EAC3F;AACF;AAEA,SAAS,wBAA8C;AACrD,QAAM,kBAAkB,YAAY,QAAQ,IAAI,eAAe;AAC/D,MAAI,mBAAmB,gBAAgB,SAAS,GAAG;AACjD,WAAO,gBAAgB,IAAI,YAAU,gBAAgB,QAAQ,iBAAiB,CAAC;AAAA,EACjF;AAEA,SAAO;AACT;AAEA,SAAS,qBAGP;AACA,SAAO;AAAA,IACL,eAAe;AAAA,MACb,cAAc,IAAI,qBAAqB;AAAA,MACvC,eAAe,IAAI,sBAAsB;AAAA,IAC3C;AAAA,IACA,YAAY,CAAC;AAAA,EACf;AACF;AAEA,SAAS,mBAAmB,QAAmB,WAAmB;AAChE,QAAM,MAAM,aAAa;AAKzB,QAAM,qBAAqB,IAAI,yBAAyB,OAAO,OAAO,IAAI;AAC1E,QAAM,uBAAuB,IAAI,2BAA2B,OAAO,OAAO,IAAI;AAC9E,SAAO;AAAA,IACL,QAAQ;AAAA,IACR,MAAM,OAAO;AAAA,IACb,SAAS,OAAO;AAAA,IAChB,WAAW;AAAA,IACX,gBAAgB,KAAK,OAAO,KAAK,IAAI,IAAI,aAAa,GAAI;AAAA,IAC1D,iBAAiB,OAAO,kBAAkB,EAAE;AAAA,IAC5C,gBAAgB;AAAA,IAChB,kBAAkB;AAAA,IAClB,wBAAwB,IAAI;AAAA,IAC5B,0BAA0B,IAAI;AAAA,IAC9B,mBAAmB,IAAI;AAAA,IACvB,qBAAqB,IAAI;AAAA,IACzB,oBAAoB,IAAI;AAAA,IACxB,sBAAsB,IAAI;AAAA;AAAA;AAAA,IAG1B,8BAA8B,IAAI;AAAA,IAClC,gCAAgC,IAAI;AAAA,IACpC,YAAW,oBAAI,KAAK,GAAE,YAAY;AAAA,EACpC;AACF;AAEA,eAAe,OAAsB;AACnC,mBAAiB;AAEjB,QAAM,eAAe,QAAQ,IAAI,aAAa;AAC9C,QAAM,OAAO,YAAY;AACzB,QAAM,OAAO,YAAY;AACzB,QAAM,UAAU,QAAQ,IAAI,SAAS,KAAK,KAAK;AAC/C,QAAM,iBAAiB,sBAAsB;AAE7C,QAAM,EAAE,eAAe,WAAW,IAAI,mBAAmB;AAEzD,gBAAc,KAAK,YAAY,OAAO,IAAI,KAAK,OAAO,OAAO,EAAE;AAC/D,gBAAc,KAAK,0BAA0B,IAAI,IAAI,IAAI,EAAE;AAC3D,MAAI,kBAAkB,eAAe,SAAS,GAAG;AAC/C,kBAAc,KAAK,wCAAwC,eAAe,KAAK,IAAI,CAAC,EAAE;AAAA,EACxF,WAAW,cAAc;AACvB,QAAI,CAAC,SAAS;AACZ,oBAAc;AAAA,QACZ;AAAA,MAGF;AACA,cAAQ,KAAK,CAAC;AAAA,IAChB;AACA,kBAAc;AAAA,MACZ;AAAA,IAEF;AAAA,EACF,OAAO;AACL,kBAAc,KAAK,gDAAgD;AAAA,EACrE;AAEA,QAAM,SAAS,IAAI,UAAU;AAAA,IAC3B,MAAM,OAAO;AAAA,IACb,OAAO;AAAA,IACP,SAAS,OAAO;AAAA,IAChB,aAAa,OAAO;AAAA,IACpB,YAAY;AAAA,IACZ;AAAA,IACA;AAAA,IACA,MAAM,UAAU,cAAc;AAAA,IAC9B;AAAA,IACA,GAAG;AAAA,EACL,CAAC;AAED,mBAAiB,MAAM;AASvB,MAAI;AAGF,UAAM,UAAU;AAChB,UAAM,WAAW,QAAQ,qBAAqB,KAAK,MAAM;AACzD,QAAI,UAAU;AACZ,cAAQ,sBAAsB,CAAC,cAA+B;AAC5D,cAAM,SAAS,SAAS,SAAS;AACjC,YAAI;AACF,gBAAM,MAAM,aAAa;AACzB,iBAAO,QAAQ,uBAAuB;AAAA,YACpC,cAAc;AAAA,cACZ,oBAAoB;AAAA,gBAClB,mBAAmB,IAAI;AAAA,gBACvB,qBAAqB,IAAI;AAAA,gBACzB,eACE,QAAQ,IAAI,aAAa,QAAQ,IAAI,wBAAwB;AAAA,gBAC/D,iBACE,QAAQ,IAAI,aAAa,QAAQ,IAAI,wBAAwB;AAAA,cACjE;AAAA,YACF;AAAA,UACF,CAAC;AAAA,QACH,QAAQ;AAAA,QAER;AACA,eAAO;AAAA,MACT;AAAA,IACF;AAAA,EACF,SAAS,KAAK;AACZ,kBAAc,KAAK,2CAA2C,OAAO,GAAG,CAAC,EAAE;AAAA,EAC7E;AAEA,QAAM,YAAY,KAAK,IAAI;AAE3B,SAAO,IAAI,WAAW,CAAC,MAAM,EAAE,KAAK,mBAAmB,QAAQ,SAAS,CAAC,CAAC;AAC1E,SAAO,IAAI,YAAY,CAAC,MAAM,EAAE,KAAK,mBAAmB,QAAQ,SAAS,CAAC,CAAC;AAQ3E,QAAM,kBAAkB,WAAW,UAAU,IAAI,IAAI,IAAI;AACzD,SAAO;AAAA,IAAI;AAAA,IAAyC,CAAC,MACnD,EAAE,KAAK,EAAE,UAAU,gBAAgB,CAAC;AAAA,EACtC;AACA,SAAO;AAAA,IAAI;AAAA,IAA6C,CAAC,MACvD,EAAE,KAAK,EAAE,UAAU,GAAG,eAAe,OAAO,CAAC;AAAA,EAC/C;AAEA,SAAO;AAAA,IACL;AAAA,MACE,MAAM;AAAA,MACN,KAAK;AAAA,MACL,aAAa;AAAA,MACb,UAAU;AAAA,IACZ;AAAA,IACA,YAAY,OAAO,mBAAmB,QAAQ,SAAS,CAAC;AAAA,EAC1D;AAEA,MAAI,iBAAiB;AAErB,iBAAe,SAAS,QAAgB,UAAiC;AACvE,QAAI,eAAgB;AACpB,qBAAiB;AAEjB,UAAM,YAAY,WAAW,MAAM;AACjC,oBAAc,MAAM,qBAAqB,mBAAmB,OAAO,MAAM,GAAG;AAC5E,cAAQ,KAAK,CAAC;AAAA,IAChB,GAAG,mBAAmB;AAEtB,QAAI;AACF,oBAAc,KAAK,6BAA6B,MAAM,EAAE;AACxD,YAAM,OAAO,MAAM;AAEnB,iBAAW,aAAa,YAAY;AAClC,cAAM,UAAU;AAAA,MAClB;AAEA,mBAAa,SAAS;AACtB,cAAQ,KAAK,QAAQ;AAAA,IACvB,SAAS,OAAO;AACd,mBAAa,SAAS;AACtB,YAAM,UAAU,iBAAiB,QAAS,MAAM,SAAS,MAAM,UAAW,OAAO,KAAK;AACtF,oBAAc,MAAM,gCAAgC,OAAO,EAAE;AAC7D,cAAQ,KAAK,CAAC;AAAA,IAChB;AAAA,EACF;AAEA,UAAQ,GAAG,WAAW,MAAM;AAC1B,SAAK,SAAS,WAAW,CAAC;AAAA,EAC5B,CAAC;AAED,UAAQ,GAAG,UAAU,MAAM;AACzB,SAAK,SAAS,UAAU,CAAC;AAAA,EAC3B,CAAC;AAED,UAAQ,GAAG,qBAAqB,CAAC,UAAU;AACzC,kBAAc,MAAM,uBAAuB,MAAM,SAAS,MAAM,OAAO,EAAE;AACzE,SAAK,SAAS,qBAAqB,CAAC;AAAA,EACtC,CAAC;AAED,UAAQ,GAAG,sBAAsB,CAAC,WAAW;AAC3C,kBAAc,MAAM,wBAAwB,OAAO,MAAM,CAAC,EAAE;AAC5D,SAAK,SAAS,sBAAsB,CAAC;AAAA,EACvC,CAAC;AAED,QAAM,OAAO,OAAO,IAAI;AAExB,gBAAc,KAAK,GAAG,OAAO,IAAI,KAAK,OAAO,OAAO,wBAAwB,IAAI,IAAI,IAAI,MAAM;AAChG;AAEA,KAAK,KAAK,EAAE,MAAM,CAAC,UAAU;AAC3B,QAAM,UAAU,iBAAiB,QAAS,MAAM,SAAS,MAAM,UAAW,OAAO,KAAK;AACtF,gBAAc,MAAM,2BAA2B,OAAO,EAAE;AACxD,UAAQ,KAAK,CAAC;AAChB,CAAC;",
6
6
  "names": []
7
7
  }
package/dist/mcp-use.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "includeInspector": false,
3
- "buildTime": "2026-04-21T20:02:58.381Z",
4
- "buildId": "2d035686c42d62f1",
3
+ "buildTime": "2026-04-21T22:15:35.136Z",
4
+ "buildId": "b3e71203214e510a",
5
5
  "entryPoint": "dist/index.js",
6
6
  "widgets": {}
7
7
  }
@@ -10,7 +10,9 @@ const scrapeLinksParamsSchema = z.object({
10
10
  )
11
11
  }).strict();
12
12
  const scrapeLinksOutputSchema = z.object({
13
- content: z.string().describe("LLM-extracted content from scraped pages per the extract instructions."),
13
+ // `content` deliberately NOT duplicated here the primary markdown lives in
14
+ // the MCP tool result's `content[0].text`. Previously this schema echoed the
15
+ // whole extraction output, doubling token cost for clients that forward both.
14
16
  metadata: z.object({
15
17
  total_items: z.number().int().nonnegative().describe("Number of URLs processed."),
16
18
  successful: z.number().int().nonnegative().describe("URLs fetched successfully."),
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "version": 3,
3
3
  "sources": ["../../../src/schemas/scrape-links.ts"],
4
- "sourcesContent": ["import { z } from 'zod';\n\nconst urlSchema = z\n .string()\n .url({ message: 'scrape-links: Invalid URL format' })\n .refine(\n url => url.startsWith('http://') || url.startsWith('https://'),\n { message: 'scrape-links: URL must use http:// or https://' }\n )\n .describe('A fully-qualified HTTP or HTTPS URL to scrape.');\n\nexport const scrapeLinksParamsSchema = z.object({\n urls: z\n .array(urlSchema)\n .min(1, { message: 'scrape-links: At least 1 URL required' })\n .describe('URLs to fetch and extract in parallel. Reddit post permalinks (`reddit.com/r/<sub>/comments/<id>/...`) are auto-detected and routed through the Reddit API (threaded post + comments); every other URL flows through the HTTP scraper. Mix reddit + non-reddit URLs freely; both branches run concurrently. Prefer contextually grouped batches \u2014 call this tool multiple times in parallel when URL sets are unrelated, instead of one giant mixed batch.'),\n extract: z\n .string()\n .min(1, { message: 'scrape-links: extract cannot be empty' })\n .describe(\n 'Semantic extraction instruction. Describe the SHAPE of what you want, separated by `|`. The extractor classifies each page (docs / github-thread / reddit / marketing / cve / paper / announcement / qa / blog / changelog / release-notes) and adjusts emphasis per type: preserves numbers/versions/stacktraces verbatim from docs and CVE pages, quotes Reddit/HN with attribution plus sentiment distribution, flags what the page did NOT answer in a \"Not found\" section, and surfaces referenced-but-unscraped URLs in a \"Follow-up signals\" bulletin that feeds the next research loop. Good examples: \"root cause | affected versions | fix | workarounds | timeline\"; \"pricing tiers | rate limits | enterprise contact | free-tier quotas\"; \"maintainer decisions | accepted fix commits | stacktraces | resolved version\".',\n ),\n}).strict();\n\nexport type ScrapeLinksParams = z.infer<typeof scrapeLinksParamsSchema>;\n\nexport const scrapeLinksOutputSchema = z.object({\n content: z\n .string()\n .describe('LLM-extracted content from scraped pages per the extract instructions.'),\n metadata: z.object({\n total_items: z.number().int().nonnegative().describe('Number of URLs processed.'),\n successful: z.number().int().nonnegative().describe('URLs fetched successfully.'),\n failed: z.number().int().nonnegative().describe('URLs that failed.'),\n execution_time_ms: z.number().int().nonnegative().describe('Wall clock time in milliseconds.'),\n total_credits: z.number().int().nonnegative().describe('External scraping credits consumed.'),\n }).strict(),\n}).strict();\n\nexport type ScrapeLinksOutput = z.infer<typeof scrapeLinksOutputSchema>;\n"],
5
- "mappings": "AAAA,SAAS,SAAS;AAElB,MAAM,YAAY,EACf,OAAO,EACP,IAAI,EAAE,SAAS,mCAAmC,CAAC,EACnD;AAAA,EACC,SAAO,IAAI,WAAW,SAAS,KAAK,IAAI,WAAW,UAAU;AAAA,EAC7D,EAAE,SAAS,iDAAiD;AAC9D,EACC,SAAS,gDAAgD;AAErD,MAAM,0BAA0B,EAAE,OAAO;AAAA,EAC9C,MAAM,EACH,MAAM,SAAS,EACf,IAAI,GAAG,EAAE,SAAS,wCAAwC,CAAC,EAC3D,SAAS,icAA4b;AAAA,EACxc,SAAS,EACN,OAAO,EACP,IAAI,GAAG,EAAE,SAAS,wCAAwC,CAAC,EAC3D;AAAA,IACC;AAAA,EACF;AACJ,CAAC,EAAE,OAAO;AAIH,MAAM,0BAA0B,EAAE,OAAO;AAAA,EAC9C,SAAS,EACN,OAAO,EACP,SAAS,wEAAwE;AAAA,EACpF,UAAU,EAAE,OAAO;AAAA,IACjB,aAAa,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,2BAA2B;AAAA,IAChF,YAAY,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,4BAA4B;AAAA,IAChF,QAAQ,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,mBAAmB;AAAA,IACnE,mBAAmB,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,kCAAkC;AAAA,IAC7F,eAAe,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,qCAAqC;AAAA,EAC9F,CAAC,EAAE,OAAO;AACZ,CAAC,EAAE,OAAO;",
4
+ "sourcesContent": ["import { z } from 'zod';\n\nconst urlSchema = z\n .string()\n .url({ message: 'scrape-links: Invalid URL format' })\n .refine(\n url => url.startsWith('http://') || url.startsWith('https://'),\n { message: 'scrape-links: URL must use http:// or https://' }\n )\n .describe('A fully-qualified HTTP or HTTPS URL to scrape.');\n\nexport const scrapeLinksParamsSchema = z.object({\n urls: z\n .array(urlSchema)\n .min(1, { message: 'scrape-links: At least 1 URL required' })\n .describe('URLs to fetch and extract in parallel. Reddit post permalinks (`reddit.com/r/<sub>/comments/<id>/...`) are auto-detected and routed through the Reddit API (threaded post + comments); every other URL flows through the HTTP scraper. Mix reddit + non-reddit URLs freely; both branches run concurrently. Prefer contextually grouped batches \u2014 call this tool multiple times in parallel when URL sets are unrelated, instead of one giant mixed batch.'),\n extract: z\n .string()\n .min(1, { message: 'scrape-links: extract cannot be empty' })\n .describe(\n 'Semantic extraction instruction. Describe the SHAPE of what you want, separated by `|`. The extractor classifies each page (docs / github-thread / reddit / marketing / cve / paper / announcement / qa / blog / changelog / release-notes) and adjusts emphasis per type: preserves numbers/versions/stacktraces verbatim from docs and CVE pages, quotes Reddit/HN with attribution plus sentiment distribution, flags what the page did NOT answer in a \"Not found\" section, and surfaces referenced-but-unscraped URLs in a \"Follow-up signals\" bulletin that feeds the next research loop. Good examples: \"root cause | affected versions | fix | workarounds | timeline\"; \"pricing tiers | rate limits | enterprise contact | free-tier quotas\"; \"maintainer decisions | accepted fix commits | stacktraces | resolved version\".',\n ),\n}).strict();\n\nexport type ScrapeLinksParams = z.infer<typeof scrapeLinksParamsSchema>;\n\nexport const scrapeLinksOutputSchema = z.object({\n // `content` deliberately NOT duplicated here \u2014 the primary markdown lives in\n // the MCP tool result's `content[0].text`. Previously this schema echoed the\n // whole extraction output, doubling token cost for clients that forward both.\n metadata: z.object({\n total_items: z.number().int().nonnegative().describe('Number of URLs processed.'),\n successful: z.number().int().nonnegative().describe('URLs fetched successfully.'),\n failed: z.number().int().nonnegative().describe('URLs that failed.'),\n execution_time_ms: z.number().int().nonnegative().describe('Wall clock time in milliseconds.'),\n total_credits: z.number().int().nonnegative().describe('External scraping credits consumed.'),\n }).strict(),\n}).strict();\n\nexport type ScrapeLinksOutput = z.infer<typeof scrapeLinksOutputSchema>;\n"],
5
+ "mappings": "AAAA,SAAS,SAAS;AAElB,MAAM,YAAY,EACf,OAAO,EACP,IAAI,EAAE,SAAS,mCAAmC,CAAC,EACnD;AAAA,EACC,SAAO,IAAI,WAAW,SAAS,KAAK,IAAI,WAAW,UAAU;AAAA,EAC7D,EAAE,SAAS,iDAAiD;AAC9D,EACC,SAAS,gDAAgD;AAErD,MAAM,0BAA0B,EAAE,OAAO;AAAA,EAC9C,MAAM,EACH,MAAM,SAAS,EACf,IAAI,GAAG,EAAE,SAAS,wCAAwC,CAAC,EAC3D,SAAS,icAA4b;AAAA,EACxc,SAAS,EACN,OAAO,EACP,IAAI,GAAG,EAAE,SAAS,wCAAwC,CAAC,EAC3D;AAAA,IACC;AAAA,EACF;AACJ,CAAC,EAAE,OAAO;AAIH,MAAM,0BAA0B,EAAE,OAAO;AAAA;AAAA;AAAA;AAAA,EAI9C,UAAU,EAAE,OAAO;AAAA,IACjB,aAAa,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,2BAA2B;AAAA,IAChF,YAAY,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,4BAA4B;AAAA,IAChF,QAAQ,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,mBAAmB;AAAA,IACnE,mBAAmB,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,kCAAkC;AAAA,IAC7F,eAAe,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,qCAAqC;AAAA,EAC9F,CAAC,EAAE,OAAO;AACZ,CAAC,EAAE,OAAO;",
6
6
  "names": []
7
7
  }
@@ -7,9 +7,7 @@ const startResearchParamsSchema = z.object({
7
7
  "Include the full 3-tool research playbook (toolbelt overview, the loop, output discipline). Default false \u2014 when the LLM planner is offline the server emits a compact stub that already names the 3 tools and the loop. Pass true only if the agent needs the verbose tactic reference, or to override the degraded-mode shrink."
8
8
  )
9
9
  }).strict();
10
- const startResearchOutputSchema = z.object({
11
- content: z.string().describe("Orientation markdown for the current research session.")
12
- }).strict();
10
+ const startResearchOutputSchema = z.object({}).strict();
13
11
  export {
14
12
  startResearchOutputSchema,
15
13
  startResearchParamsSchema
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "version": 3,
3
3
  "sources": ["../../../src/schemas/start-research.ts"],
4
- "sourcesContent": ["import { z } from 'zod';\n\nexport const startResearchParamsSchema = z.object({\n goal: z\n .string()\n .min(1, { message: 'start-research: goal cannot be empty' })\n .optional()\n .describe(\n 'Research goal for this session. When provided AND the LLM planner (LLM_API_KEY) is available, the server returns a goal-tailored brief: classified goal type (spec | bug | migration | sentiment | pricing | security | synthesis | product_launch), a `primary_branch` recommendation (reddit for sentiment/migration; web for spec/bug/pricing; both when opinion-heavy AND needs official sources), the exact `first_call_sequence` of web-search + scrape-links calls to fire, 25\u201350 keyword seeds for the first `web-search` call, iteration hints, gaps to watch, and stop criteria. No goal \u2192 the generic 3-tool playbook (no tailored brief). Write the goal as you would to a human researcher \u2014 one or two sentences, specific about what \"done\" looks like.',\n ),\n include_playbook: z\n .boolean()\n .default(false)\n .describe(\n 'Include the full 3-tool research playbook (toolbelt overview, the loop, output discipline). Default false \u2014 when the LLM planner is offline the server emits a compact stub that already names the 3 tools and the loop. Pass true only if the agent needs the verbose tactic reference, or to override the degraded-mode shrink.',\n ),\n}).strict();\n\nexport const startResearchOutputSchema = z.object({\n content: z\n .string()\n .describe('Orientation markdown for the current research session.'),\n}).strict();\n\nexport type StartResearchParams = z.infer<typeof startResearchParamsSchema>;\nexport type StartResearchOutput = z.infer<typeof startResearchOutputSchema>;\n"],
5
- "mappings": "AAAA,SAAS,SAAS;AAEX,MAAM,4BAA4B,EAAE,OAAO;AAAA,EAChD,MAAM,EACH,OAAO,EACP,IAAI,GAAG,EAAE,SAAS,uCAAuC,CAAC,EAC1D,SAAS,EACT;AAAA,IACC;AAAA,EACF;AAAA,EACF,kBAAkB,EACf,QAAQ,EACR,QAAQ,KAAK,EACb;AAAA,IACC;AAAA,EACF;AACJ,CAAC,EAAE,OAAO;AAEH,MAAM,4BAA4B,EAAE,OAAO;AAAA,EAChD,SAAS,EACN,OAAO,EACP,SAAS,wDAAwD;AACtE,CAAC,EAAE,OAAO;",
4
+ "sourcesContent": ["import { z } from 'zod';\n\nexport const startResearchParamsSchema = z.object({\n goal: z\n .string()\n .min(1, { message: 'start-research: goal cannot be empty' })\n .optional()\n .describe(\n 'Research goal for this session. When provided AND the LLM planner (LLM_API_KEY) is available, the server returns a goal-tailored brief: classified goal type (spec | bug | migration | sentiment | pricing | security | synthesis | product_launch), a `primary_branch` recommendation (reddit for sentiment/migration; web for spec/bug/pricing; both when opinion-heavy AND needs official sources), the exact `first_call_sequence` of web-search + scrape-links calls to fire, 25\u201350 keyword seeds for the first `web-search` call, iteration hints, gaps to watch, and stop criteria. No goal \u2192 the generic 3-tool playbook (no tailored brief). Write the goal as you would to a human researcher \u2014 one or two sentences, specific about what \"done\" looks like.',\n ),\n include_playbook: z\n .boolean()\n .default(false)\n .describe(\n 'Include the full 3-tool research playbook (toolbelt overview, the loop, output discipline). Default false \u2014 when the LLM planner is offline the server emits a compact stub that already names the 3 tools and the loop. Pass true only if the agent needs the verbose tactic reference, or to override the degraded-mode shrink.',\n ),\n}).strict();\n\n// No output schema: `start-research` returns pure markdown via `content[0].text`.\n// There is nothing structured worth exposing (no per-row data, no metadata).\n// Clients read the primary text output; `structuredContent` is omitted entirely.\nexport const startResearchOutputSchema = z.object({}).strict();\n\nexport type StartResearchParams = z.infer<typeof startResearchParamsSchema>;\nexport type StartResearchOutput = z.infer<typeof startResearchOutputSchema>;\n"],
5
+ "mappings": "AAAA,SAAS,SAAS;AAEX,MAAM,4BAA4B,EAAE,OAAO;AAAA,EAChD,MAAM,EACH,OAAO,EACP,IAAI,GAAG,EAAE,SAAS,uCAAuC,CAAC,EAC1D,SAAS,EACT;AAAA,IACC;AAAA,EACF;AAAA,EACF,kBAAkB,EACf,QAAQ,EACR,QAAQ,KAAK,EACb;AAAA,IACC;AAAA,EACF;AACJ,CAAC,EAAE,OAAO;AAKH,MAAM,4BAA4B,EAAE,OAAO,CAAC,CAAC,EAAE,OAAO;",
6
6
  "names": []
7
7
  }
@@ -17,7 +17,10 @@ const webSearchParamsSchema = z.object({
17
17
  )
18
18
  }).strict();
19
19
  const webSearchOutputSchema = z.object({
20
- content: z.string().describe("Markdown report with tiered results (LLM mode) or ranked URL list (raw mode)."),
20
+ // `content` deliberately NOT duplicated here the primary markdown lives in
21
+ // the MCP tool result's `content[0].text`. Previously this schema echoed the
22
+ // whole markdown under `structuredContent.content`, doubling token cost for
23
+ // clients that forward both fields to an LLM.
21
24
  results: z.array(z.object({
22
25
  rank: z.number().int().positive().describe("1-based rank in the merged ranking."),
23
26
  url: z.string().describe("Result URL."),
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "version": 3,
3
3
  "sources": ["../../../src/schemas/web-search.ts"],
4
- "sourcesContent": ["import { z } from 'zod';\n\nexport const webSearchParamsSchema = z.object({\n queries: z\n .array(\n z.string()\n .min(1, { message: 'web-search: Query cannot be empty' })\n .describe('A single Google search query. Each query runs as a separate parallel search. Use operators (site:, quotes, verbatim version numbers) to sharpen retrieval.'),\n )\n .min(1, { message: 'web-search: At least 1 query required' })\n .describe(\n 'Search queries to run in parallel via Google. Think of these as **concept groups** \u2014 clusters of semantically distinct facets of your research goal, each probing a DIFFERENT angle (official spec, implementation, failures, comparison, sentiment, changelog, CVE, pricing). Fire all groups in ONE call as a flat array. Overlapping queries waste budget; orthogonal facets multiply coverage. A narrow bug needs 10\u201320 queries across 2\u20133 facets; a comparison needs 25\u201335 across 4\u20136 facets; open-ended synthesis needs 40\u201380 across 8+ facets.',\n ),\n extract: z\n .string()\n .min(1, { message: 'web-search: extract cannot be empty' })\n .describe(\n 'Semantic instruction for the relevance classifier \u2014 what \"relevant\" means for THIS goal. Drives tiering (HIGHLY_RELEVANT / MAYBE_RELEVANT / OTHER), synthesis, gap analysis, and refine-query suggestions. Be specific: \"OAuth 2.1 support in TypeScript MCP frameworks \u2014 runnable code, not marketing\", not \"MCP OAuth\". The classifier uses this to choose a source-of-truth rubric (vendor_doc for spec, github for bugs, reddit/blog for migration/sentiment, cve_databases for security).',\n ),\n raw: z\n .boolean()\n .default(false)\n .describe('Skip LLM classification and return the raw ranked URL list. Use when you need unprocessed results.'),\n scope: z\n .enum(['web', 'reddit', 'both'])\n .default('web')\n .describe(\n 'Search scope. \"web\" (default) = open web, no augmentation. \"reddit\" = server appends `site:reddit.com` to every query and filters results to post permalinks (`/r/.+/comments/[a-z0-9]+/`); subreddit homepages are dropped. \"both\" = runs every query twice (open web + reddit-scoped), merges the result set, and tags each row with its source. Use \"reddit\" for sentiment/migration/lived-experience research; use \"both\" when you want one call to cover both branches.',\n ),\n verbose: z\n .boolean()\n .default(false)\n .describe(\n 'Include the per-row scoring/coverage metadata, the trailing Signals block, and the CONSENSUS labels even when they carry little signal (single-query hits, threshold of 1). Default false \u2014 most agents do not need this and it costs ~1.5KB per call on a typical 3-query fan-out.',\n ),\n}).strict();\n\nexport type WebSearchParams = z.infer<typeof webSearchParamsSchema>;\n\nexport const webSearchOutputSchema = z.object({\n content: z\n .string()\n .describe('Markdown report with tiered results (LLM mode) or ranked URL list (raw mode).'),\n results: z\n .array(z.object({\n rank: z.number().int().positive().describe('1-based rank in the merged ranking.'),\n url: z.string().describe('Result URL.'),\n title: z.string().describe('Page title from the result.'),\n snippet: z.string().describe('Search snippet from the result.'),\n source_type: z\n .enum(['reddit', 'github', 'docs', 'blog', 'paper', 'qa', 'cve', 'news', 'video', 'web'])\n .describe(\n 'Heuristic source kind from the URL. When the LLM classifier is online its tag overrides this.',\n ),\n score: z.number().describe('Composite CTR-weighted score, normalized to 100.'),\n seen_in: z.number().int().nonnegative().describe('Number of input queries this URL appeared in.'),\n best_position: z.number().int().nonnegative().describe('Best (lowest) SERP position observed.'),\n }))\n .optional()\n .describe('Per-result structured payload \u2014 same data the markdown table renders, machine-readable.'),\n metadata: z.object({\n total_items: z.number().int().nonnegative().describe('Number of queries executed.'),\n successful: z.number().int().nonnegative().describe('Queries that returned results.'),\n failed: z.number().int().nonnegative().describe('Queries that failed.'),\n execution_time_ms: z.number().int().nonnegative().describe('Wall clock time in milliseconds.'),\n llm_classified: z.boolean().describe('Whether LLM classification was applied.'),\n llm_error: z.string().optional().describe('LLM error if classification failed and fell back to raw.'),\n scope: z.enum(['web', 'reddit', 'both']).optional().describe('Search scope used.'),\n coverage_summary: z\n .array(z.object({\n query: z.string().describe('The search query.'),\n result_count: z.number().int().nonnegative().describe('Results returned for this query.'),\n top_url: z.string().optional().describe('Domain of the top result.'),\n }))\n .optional()\n .describe('Per-query result counts and top URLs.'),\n low_yield_queries: z\n .array(z.string())\n .optional()\n .describe('Queries that produced 0-1 results.'),\n }).strict(),\n}).strict();\n\nexport type WebSearchOutput = z.infer<typeof webSearchOutputSchema>;\n"],
5
- "mappings": "AAAA,SAAS,SAAS;AAEX,MAAM,wBAAwB,EAAE,OAAO;AAAA,EAC5C,SAAS,EACN;AAAA,IACC,EAAE,OAAO,EACN,IAAI,GAAG,EAAE,SAAS,oCAAoC,CAAC,EACvD,SAAS,4JAA4J;AAAA,EAC1K,EACC,IAAI,GAAG,EAAE,SAAS,wCAAwC,CAAC,EAC3D;AAAA,IACC;AAAA,EACF;AAAA,EACF,SAAS,EACN,OAAO,EACP,IAAI,GAAG,EAAE,SAAS,sCAAsC,CAAC,EACzD;AAAA,IACC;AAAA,EACF;AAAA,EACF,KAAK,EACF,QAAQ,EACR,QAAQ,KAAK,EACb,SAAS,oGAAoG;AAAA,EAChH,OAAO,EACJ,KAAK,CAAC,OAAO,UAAU,MAAM,CAAC,EAC9B,QAAQ,KAAK,EACb;AAAA,IACC;AAAA,EACF;AAAA,EACF,SAAS,EACN,QAAQ,EACR,QAAQ,KAAK,EACb;AAAA,IACC;AAAA,EACF;AACJ,CAAC,EAAE,OAAO;AAIH,MAAM,wBAAwB,EAAE,OAAO;AAAA,EAC5C,SAAS,EACN,OAAO,EACP,SAAS,+EAA+E;AAAA,EAC3F,SAAS,EACN,MAAM,EAAE,OAAO;AAAA,IACd,MAAM,EAAE,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,SAAS,qCAAqC;AAAA,IAChF,KAAK,EAAE,OAAO,EAAE,SAAS,aAAa;AAAA,IACtC,OAAO,EAAE,OAAO,EAAE,SAAS,6BAA6B;AAAA,IACxD,SAAS,EAAE,OAAO,EAAE,SAAS,iCAAiC;AAAA,IAC9D,aAAa,EACV,KAAK,CAAC,UAAU,UAAU,QAAQ,QAAQ,SAAS,MAAM,OAAO,QAAQ,SAAS,KAAK,CAAC,EACvF;AAAA,MACC;AAAA,IACF;AAAA,IACF,OAAO,EAAE,OAAO,EAAE,SAAS,kDAAkD;AAAA,IAC7E,SAAS,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,+CAA+C;AAAA,IAChG,eAAe,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,uCAAuC;AAAA,EAChG,CAAC,CAAC,EACD,SAAS,EACT,SAAS,8FAAyF;AAAA,EACrG,UAAU,EAAE,OAAO;AAAA,IACjB,aAAa,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,6BAA6B;AAAA,IAClF,YAAY,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,gCAAgC;AAAA,IACpF,QAAQ,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,sBAAsB;AAAA,IACtE,mBAAmB,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,kCAAkC;AAAA,IAC7F,gBAAgB,EAAE,QAAQ,EAAE,SAAS,yCAAyC;AAAA,IAC9E,WAAW,EAAE,OAAO,EAAE,SAAS,EAAE,SAAS,0DAA0D;AAAA,IACpG,OAAO,EAAE,KAAK,CAAC,OAAO,UAAU,MAAM,CAAC,EAAE,SAAS,EAAE,SAAS,oBAAoB;AAAA,IACjF,kBAAkB,EACf,MAAM,EAAE,OAAO;AAAA,MACd,OAAO,EAAE,OAAO,EAAE,SAAS,mBAAmB;AAAA,MAC9C,cAAc,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,kCAAkC;AAAA,MACxF,SAAS,EAAE,OAAO,EAAE,SAAS,EAAE,SAAS,2BAA2B;AAAA,IACrE,CAAC,CAAC,EACD,SAAS,EACT,SAAS,uCAAuC;AAAA,IACnD,mBAAmB,EAChB,MAAM,EAAE,OAAO,CAAC,EAChB,SAAS,EACT,SAAS,oCAAoC;AAAA,EAClD,CAAC,EAAE,OAAO;AACZ,CAAC,EAAE,OAAO;",
4
+ "sourcesContent": ["import { z } from 'zod';\n\nexport const webSearchParamsSchema = z.object({\n queries: z\n .array(\n z.string()\n .min(1, { message: 'web-search: Query cannot be empty' })\n .describe('A single Google search query. Each query runs as a separate parallel search. Use operators (site:, quotes, verbatim version numbers) to sharpen retrieval.'),\n )\n .min(1, { message: 'web-search: At least 1 query required' })\n .describe(\n 'Search queries to run in parallel via Google. Think of these as **concept groups** \u2014 clusters of semantically distinct facets of your research goal, each probing a DIFFERENT angle (official spec, implementation, failures, comparison, sentiment, changelog, CVE, pricing). Fire all groups in ONE call as a flat array. Overlapping queries waste budget; orthogonal facets multiply coverage. A narrow bug needs 10\u201320 queries across 2\u20133 facets; a comparison needs 25\u201335 across 4\u20136 facets; open-ended synthesis needs 40\u201380 across 8+ facets.',\n ),\n extract: z\n .string()\n .min(1, { message: 'web-search: extract cannot be empty' })\n .describe(\n 'Semantic instruction for the relevance classifier \u2014 what \"relevant\" means for THIS goal. Drives tiering (HIGHLY_RELEVANT / MAYBE_RELEVANT / OTHER), synthesis, gap analysis, and refine-query suggestions. Be specific: \"OAuth 2.1 support in TypeScript MCP frameworks \u2014 runnable code, not marketing\", not \"MCP OAuth\". The classifier uses this to choose a source-of-truth rubric (vendor_doc for spec, github for bugs, reddit/blog for migration/sentiment, cve_databases for security).',\n ),\n raw: z\n .boolean()\n .default(false)\n .describe('Skip LLM classification and return the raw ranked URL list. Use when you need unprocessed results.'),\n scope: z\n .enum(['web', 'reddit', 'both'])\n .default('web')\n .describe(\n 'Search scope. \"web\" (default) = open web, no augmentation. \"reddit\" = server appends `site:reddit.com` to every query and filters results to post permalinks (`/r/.+/comments/[a-z0-9]+/`); subreddit homepages are dropped. \"both\" = runs every query twice (open web + reddit-scoped), merges the result set, and tags each row with its source. Use \"reddit\" for sentiment/migration/lived-experience research; use \"both\" when you want one call to cover both branches.',\n ),\n verbose: z\n .boolean()\n .default(false)\n .describe(\n 'Include the per-row scoring/coverage metadata, the trailing Signals block, and the CONSENSUS labels even when they carry little signal (single-query hits, threshold of 1). Default false \u2014 most agents do not need this and it costs ~1.5KB per call on a typical 3-query fan-out.',\n ),\n}).strict();\n\nexport type WebSearchParams = z.infer<typeof webSearchParamsSchema>;\n\nexport const webSearchOutputSchema = z.object({\n // `content` deliberately NOT duplicated here \u2014 the primary markdown lives in\n // the MCP tool result's `content[0].text`. Previously this schema echoed the\n // whole markdown under `structuredContent.content`, doubling token cost for\n // clients that forward both fields to an LLM.\n results: z\n .array(z.object({\n rank: z.number().int().positive().describe('1-based rank in the merged ranking.'),\n url: z.string().describe('Result URL.'),\n title: z.string().describe('Page title from the result.'),\n snippet: z.string().describe('Search snippet from the result.'),\n source_type: z\n .enum(['reddit', 'github', 'docs', 'blog', 'paper', 'qa', 'cve', 'news', 'video', 'web'])\n .describe(\n 'Heuristic source kind from the URL. When the LLM classifier is online its tag overrides this.',\n ),\n score: z.number().describe('Composite CTR-weighted score, normalized to 100.'),\n seen_in: z.number().int().nonnegative().describe('Number of input queries this URL appeared in.'),\n best_position: z.number().int().nonnegative().describe('Best (lowest) SERP position observed.'),\n }))\n .optional()\n .describe('Per-result structured payload \u2014 same data the markdown table renders, machine-readable.'),\n metadata: z.object({\n total_items: z.number().int().nonnegative().describe('Number of queries executed.'),\n successful: z.number().int().nonnegative().describe('Queries that returned results.'),\n failed: z.number().int().nonnegative().describe('Queries that failed.'),\n execution_time_ms: z.number().int().nonnegative().describe('Wall clock time in milliseconds.'),\n llm_classified: z.boolean().describe('Whether LLM classification was applied.'),\n llm_error: z.string().optional().describe('LLM error if classification failed and fell back to raw.'),\n scope: z.enum(['web', 'reddit', 'both']).optional().describe('Search scope used.'),\n coverage_summary: z\n .array(z.object({\n query: z.string().describe('The search query.'),\n result_count: z.number().int().nonnegative().describe('Results returned for this query.'),\n top_url: z.string().optional().describe('Domain of the top result.'),\n }))\n .optional()\n .describe('Per-query result counts and top URLs.'),\n low_yield_queries: z\n .array(z.string())\n .optional()\n .describe('Queries that produced 0-1 results.'),\n }).strict(),\n}).strict();\n\nexport type WebSearchOutput = z.infer<typeof webSearchOutputSchema>;\n"],
5
+ "mappings": "AAAA,SAAS,SAAS;AAEX,MAAM,wBAAwB,EAAE,OAAO;AAAA,EAC5C,SAAS,EACN;AAAA,IACC,EAAE,OAAO,EACN,IAAI,GAAG,EAAE,SAAS,oCAAoC,CAAC,EACvD,SAAS,4JAA4J;AAAA,EAC1K,EACC,IAAI,GAAG,EAAE,SAAS,wCAAwC,CAAC,EAC3D;AAAA,IACC;AAAA,EACF;AAAA,EACF,SAAS,EACN,OAAO,EACP,IAAI,GAAG,EAAE,SAAS,sCAAsC,CAAC,EACzD;AAAA,IACC;AAAA,EACF;AAAA,EACF,KAAK,EACF,QAAQ,EACR,QAAQ,KAAK,EACb,SAAS,oGAAoG;AAAA,EAChH,OAAO,EACJ,KAAK,CAAC,OAAO,UAAU,MAAM,CAAC,EAC9B,QAAQ,KAAK,EACb;AAAA,IACC;AAAA,EACF;AAAA,EACF,SAAS,EACN,QAAQ,EACR,QAAQ,KAAK,EACb;AAAA,IACC;AAAA,EACF;AACJ,CAAC,EAAE,OAAO;AAIH,MAAM,wBAAwB,EAAE,OAAO;AAAA;AAAA;AAAA;AAAA;AAAA,EAK5C,SAAS,EACN,MAAM,EAAE,OAAO;AAAA,IACd,MAAM,EAAE,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,SAAS,qCAAqC;AAAA,IAChF,KAAK,EAAE,OAAO,EAAE,SAAS,aAAa;AAAA,IACtC,OAAO,EAAE,OAAO,EAAE,SAAS,6BAA6B;AAAA,IACxD,SAAS,EAAE,OAAO,EAAE,SAAS,iCAAiC;AAAA,IAC9D,aAAa,EACV,KAAK,CAAC,UAAU,UAAU,QAAQ,QAAQ,SAAS,MAAM,OAAO,QAAQ,SAAS,KAAK,CAAC,EACvF;AAAA,MACC;AAAA,IACF;AAAA,IACF,OAAO,EAAE,OAAO,EAAE,SAAS,kDAAkD;AAAA,IAC7E,SAAS,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,+CAA+C;AAAA,IAChG,eAAe,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,uCAAuC;AAAA,EAChG,CAAC,CAAC,EACD,SAAS,EACT,SAAS,8FAAyF;AAAA,EACrG,UAAU,EAAE,OAAO;AAAA,IACjB,aAAa,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,6BAA6B;AAAA,IAClF,YAAY,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,gCAAgC;AAAA,IACpF,QAAQ,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,sBAAsB;AAAA,IACtE,mBAAmB,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,kCAAkC;AAAA,IAC7F,gBAAgB,EAAE,QAAQ,EAAE,SAAS,yCAAyC;AAAA,IAC9E,WAAW,EAAE,OAAO,EAAE,SAAS,EAAE,SAAS,0DAA0D;AAAA,IACpG,OAAO,EAAE,KAAK,CAAC,OAAO,UAAU,MAAM,CAAC,EAAE,SAAS,EAAE,SAAS,oBAAoB;AAAA,IACjF,kBAAkB,EACf,MAAM,EAAE,OAAO;AAAA,MACd,OAAO,EAAE,OAAO,EAAE,SAAS,mBAAmB;AAAA,MAC9C,cAAc,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,kCAAkC;AAAA,MACxF,SAAS,EAAE,OAAO,EAAE,SAAS,EAAE,SAAS,2BAA2B;AAAA,IACrE,CAAC,CAAC,EACD,SAAS,EACT,SAAS,uCAAuC;AAAA,IACnD,mBAAmB,EAChB,MAAM,EAAE,OAAO,CAAC,EAChB,SAAS,EACT,SAAS,oCAAoC;AAAA,EAClD,CAAC,EAAE,OAAO;AACZ,CAAC,EAAE,OAAO;",
6
6
  "names": []
7
7
  }
@@ -598,6 +598,7 @@ keyword_seeds:
598
598
  - Use operators where helpful (site:, quotes, verbatim version numbers).
599
599
  - DIVERSE facets \u2014 same noun-phrase cannot repeat across seeds with adjectives-only variation.
600
600
  - Do NOT invent vendor names you are uncertain exist.
601
+ - For \`site:<domain>\` filters, ONLY use domains you are highly confident are real. Safe choices: \`github.com\`, \`stackoverflow.com\`, \`reddit.com\`, \`news.ycombinator.com\`, \`arxiv.org\`, \`nvd.nist.gov\`, \`pypi.org\`, \`npmjs.com\`, plus any canonical homepage/docs domain explicitly spelled out in the goal itself (e.g. goal names "Cursor" \u2192 \`cursor.com\`/\`docs.cursor.com\` is acceptable). If you don't know the product's real docs domain, leave the query open (no \`site:\`) instead of guessing.
601
602
 
602
603
  freshness_window:
603
604
  - If the goal mentions a recent release / date / version, use "days" or "weeks".
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "version": 3,
3
3
  "sources": ["../../../src/services/llm-processor.ts"],
4
- "sourcesContent": ["/**\n * LLM Processor for content extraction\n * Uses OpenRouter via OPENROUTER_API_KEY for AI-powered content filtering\n * Implements robust retry logic and NEVER throws\n */\n\nimport OpenAI from 'openai';\nimport { LLM_EXTRACTION, getCapabilities } from '../config/index.js';\nimport {\n classifyError,\n sleep,\n ErrorCode,\n withStallProtection,\n type StructuredError,\n} from '../utils/errors.js';\nimport { mcpLog } from '../utils/logger.js';\n\n/** Maximum input characters for LLM processing (~25k tokens) */\nconst MAX_LLM_INPUT_CHARS = 100_000 as const;\n\n/** LLM client timeout in milliseconds */\nconst LLM_CLIENT_TIMEOUT_MS = 120_000 as const;\n\n/** Jitter factor for exponential backoff */\nconst BACKOFF_JITTER_FACTOR = 0.3 as const;\n\n/** Stall detection timeout \u2014 abort if no response in this time */\nconst LLM_STALL_TIMEOUT_MS = 15_000 as const;\n\n/** Hard request deadline for LLM calls */\nconst LLM_REQUEST_DEADLINE_MS = 30_000 as const;\n\n// ============================================================================\n// LLM health tracking \u2014 surfaced via health://status so capability-aware\n// clients can branch on degraded mode without parsing per-call footers.\n// ============================================================================\n\ntype LLMHealthKind = 'planner' | 'extractor';\n\nexport interface LLMHealthSnapshot {\n readonly lastPlannerOk: boolean;\n readonly lastExtractorOk: boolean;\n readonly lastPlannerCheckedAt: string | null;\n readonly lastExtractorCheckedAt: string | null;\n readonly lastPlannerError: string | null;\n readonly lastExtractorError: string | null;\n readonly plannerConfigured: boolean;\n readonly extractorConfigured: boolean;\n /** Failures since the last success. Reset to 0 on `markLLMSuccess`. */\n readonly consecutivePlannerFailures: number;\n readonly consecutiveExtractorFailures: number;\n}\n\nconst llmHealth = {\n lastPlannerOk: false,\n lastExtractorOk: false,\n lastPlannerCheckedAt: null as string | null,\n lastExtractorCheckedAt: null as string | null,\n lastPlannerError: null as string | null,\n lastExtractorError: null as string | null,\n consecutivePlannerFailures: 0,\n consecutiveExtractorFailures: 0,\n};\n\nexport function markLLMSuccess(kind: LLMHealthKind): void {\n const ts = new Date().toISOString();\n if (kind === 'planner') {\n llmHealth.lastPlannerOk = true;\n llmHealth.lastPlannerCheckedAt = ts;\n llmHealth.lastPlannerError = null;\n llmHealth.consecutivePlannerFailures = 0;\n } else {\n llmHealth.lastExtractorOk = true;\n llmHealth.lastExtractorCheckedAt = ts;\n llmHealth.lastExtractorError = null;\n llmHealth.consecutiveExtractorFailures = 0;\n }\n}\n\nexport function markLLMFailure(kind: LLMHealthKind, err: unknown): void {\n const ts = new Date().toISOString();\n const message = err instanceof Error ? err.message : String(err ?? 'unknown error');\n if (kind === 'planner') {\n llmHealth.lastPlannerOk = false;\n llmHealth.lastPlannerCheckedAt = ts;\n llmHealth.lastPlannerError = message;\n llmHealth.consecutivePlannerFailures += 1;\n } else {\n llmHealth.lastExtractorOk = false;\n llmHealth.lastExtractorCheckedAt = ts;\n llmHealth.lastExtractorError = message;\n llmHealth.consecutiveExtractorFailures += 1;\n }\n}\n\nexport function getLLMHealth(): LLMHealthSnapshot {\n const cap = getCapabilities();\n return {\n lastPlannerOk: llmHealth.lastPlannerOk,\n lastExtractorOk: llmHealth.lastExtractorOk,\n lastPlannerCheckedAt: llmHealth.lastPlannerCheckedAt,\n lastExtractorCheckedAt: llmHealth.lastExtractorCheckedAt,\n lastPlannerError: llmHealth.lastPlannerError,\n lastExtractorError: llmHealth.lastExtractorError,\n // Static capability \u2014 based on env presence at boot. Runtime health (above)\n // tells whether the last attempt actually succeeded.\n plannerConfigured: cap.llmExtraction,\n extractorConfigured: cap.llmExtraction,\n consecutivePlannerFailures: llmHealth.consecutivePlannerFailures,\n consecutiveExtractorFailures: llmHealth.consecutiveExtractorFailures,\n };\n}\n\n/** Test-only \u2014 reset state between tests. Not exported from index. */\nexport function _resetLLMHealthForTests(): void {\n llmHealth.lastPlannerOk = false;\n llmHealth.lastExtractorOk = false;\n llmHealth.lastPlannerCheckedAt = null;\n llmHealth.lastExtractorCheckedAt = null;\n llmHealth.lastPlannerError = null;\n llmHealth.lastExtractorError = null;\n llmHealth.consecutivePlannerFailures = 0;\n llmHealth.consecutiveExtractorFailures = 0;\n}\n\ninterface ProcessingConfig {\n readonly enabled: boolean;\n readonly extract: string | undefined;\n readonly url?: string;\n}\n\ninterface LLMResult {\n readonly content: string;\n readonly processed: boolean;\n readonly error?: string;\n readonly errorDetails?: StructuredError;\n}\n\n// LLM-specific retry configuration\nconst LLM_RETRY_CONFIG = {\n maxRetries: 2,\n baseDelayMs: 1000,\n maxDelayMs: 5000,\n} as const;\n\n// OpenRouter/OpenAI specific retryable error codes (using Set for type-safe lookup)\nconst RETRYABLE_LLM_ERROR_CODES = new Set([\n 'rate_limit_exceeded',\n 'server_error',\n 'timeout',\n 'service_unavailable',\n]);\n\n/** Type guard for errors with an HTTP status code */\nfunction hasStatus(error: unknown): error is { status: number } {\n return (\n typeof error === 'object' &&\n error !== null &&\n 'status' in error &&\n typeof (error as Record<string, unknown>).status === 'number'\n );\n}\n\nlet llmClient: OpenAI | null = null;\n\ntype OpenAITextGenerator = Pick<OpenAI, 'chat'>;\n\nexport function createLLMProcessor(): OpenAI | null {\n if (!getCapabilities().llmExtraction) return null;\n\n if (!llmClient) {\n llmClient = new OpenAI({\n baseURL: LLM_EXTRACTION.BASE_URL,\n apiKey: LLM_EXTRACTION.API_KEY,\n timeout: LLM_CLIENT_TIMEOUT_MS,\n maxRetries: 0,\n defaultHeaders: { 'X-Title': 'mcp-research-powerpack' },\n });\n mcpLog('info', `LLM extraction configured (model: ${LLM_EXTRACTION.MODEL}, baseURL: ${LLM_EXTRACTION.BASE_URL})`, 'llm');\n }\n return llmClient;\n}\n\nfunction buildChatRequestBody(model: string, prompt: string): Record<string, unknown> {\n const requestBody: Record<string, unknown> = {\n model,\n messages: [{ role: 'user', content: prompt }],\n };\n\n if (LLM_EXTRACTION.REASONING_EFFORT !== 'none') {\n requestBody.reasoning_effort = LLM_EXTRACTION.REASONING_EFFORT;\n }\n\n return requestBody;\n}\n\nexport async function requestText(\n processor: OpenAITextGenerator,\n prompt: string,\n operationLabel: string,\n signal?: AbortSignal,\n): Promise<{ content: string | null; model: string; error?: string }> {\n const model = LLM_EXTRACTION.MODEL;\n\n try {\n const response = await withStallProtection(\n (stallSignal) => processor.chat.completions.create(\n buildChatRequestBody(model, prompt) as unknown as OpenAI.ChatCompletionCreateParamsNonStreaming,\n {\n signal: signal ? AbortSignal.any([stallSignal, signal]) : stallSignal,\n timeout: LLM_REQUEST_DEADLINE_MS,\n },\n ),\n LLM_STALL_TIMEOUT_MS,\n 3,\n `${operationLabel} (${model})`,\n );\n\n const content = response.choices?.[0]?.message?.content?.trim();\n if (content) {\n return { content, model };\n }\n\n const err = `Empty response from model ${model}`;\n mcpLog('warning', `${operationLabel} returned empty content for model ${model}`, 'llm');\n return { content: null, model, error: err };\n } catch (err: unknown) {\n const message = err instanceof Error ? err.message : String(err);\n mcpLog('warning', `${operationLabel} failed for model ${model}: ${message}`, 'llm');\n return { content: null, model, error: message };\n }\n}\n\n/**\n * Check if an LLM error is retryable\n */\nfunction isRetryableLLMError(error: unknown): boolean {\n if (!error || typeof error !== 'object') return false;\n\n // Stall/timeout protection errors - always retry these\n const stallCode = (error as { code?: string })?.code;\n if (stallCode === 'ESTALLED' || stallCode === 'ETIMEDOUT') {\n return true;\n }\n\n // Check HTTP status codes\n if (hasStatus(error)) {\n if (error.status === 429 || error.status === 500 || error.status === 502 || error.status === 503 || error.status === 504) {\n return true;\n }\n }\n\n // Check error codes from OpenAI/OpenRouter\n const record = error as Record<string, unknown>;\n const code = typeof record.code === 'string' ? record.code : undefined;\n const nested =\n typeof record.error === 'object' && record.error !== null\n ? (record.error as Record<string, unknown>)\n : null;\n const errorCode =\n code ??\n (nested && typeof nested.code === 'string' ? nested.code : undefined) ??\n (nested && typeof nested.type === 'string' ? nested.type : undefined);\n if (errorCode && RETRYABLE_LLM_ERROR_CODES.has(errorCode)) {\n return true;\n }\n\n // Check message for common patterns\n const message = typeof record.message === 'string' ? record.message.toLowerCase() : '';\n if (\n message.includes('rate limit') ||\n message.includes('timeout') ||\n message.includes('timed out') ||\n message.includes('service unavailable') ||\n message.includes('server error') ||\n message.includes('connection') ||\n message.includes('econnreset')\n ) {\n return true;\n }\n\n return false;\n}\n\n/**\n * Calculate backoff delay with jitter for LLM retries\n */\nfunction calculateLLMBackoff(attempt: number): number {\n const exponentialDelay = LLM_RETRY_CONFIG.baseDelayMs * Math.pow(2, attempt);\n const jitter = Math.random() * BACKOFF_JITTER_FACTOR * exponentialDelay;\n return Math.min(exponentialDelay + jitter, LLM_RETRY_CONFIG.maxDelayMs);\n}\n\n/**\n * Process content with LLM extraction\n * NEVER throws - always returns a valid LLMResult\n * Implements retry logic with exponential backoff for transient failures\n */\nexport async function processContentWithLLM(\n content: string,\n config: ProcessingConfig,\n processor?: OpenAI | null,\n signal?: AbortSignal\n): Promise<LLMResult> {\n // Early returns for invalid/skip conditions\n if (!config.enabled) {\n return { content, processed: false };\n }\n\n if (!processor) {\n return {\n content,\n processed: false,\n error: 'LLM processor not available (LLM_EXTRACTION_API_KEY or OPENROUTER_API_KEY not set)',\n errorDetails: {\n code: ErrorCode.AUTH_ERROR,\n message: 'LLM processor not available',\n retryable: false,\n },\n };\n }\n\n if (!content?.trim()) {\n return { content: content || '', processed: false, error: 'Empty content provided' };\n }\n\n // Truncate extremely long content to avoid token limits\n const truncatedContent = content.length > MAX_LLM_INPUT_CHARS\n ? content.substring(0, MAX_LLM_INPUT_CHARS) + '\\n\\n[Content truncated due to length]'\n : content;\n\n // Sanitize URL before sending to LLM: drop query string and fragment\n // so signed URLs, session tokens, auth params, or tracking hashes never\n // land in a third-party LLM prompt. Keep origin + path for page-type classification.\n const safeUrl = (() => {\n if (!config.url) return undefined;\n try {\n const u = new URL(config.url);\n return `${u.origin}${u.pathname}`;\n } catch {\n return undefined;\n }\n })();\n const urlLine = safeUrl ? `PAGE URL: ${safeUrl}\\n\\n` : '';\n\n const prompt = config.extract\n ? `You are a factual extractor for a research agent. Extract ONLY the information that matches the instruction below. Do not summarize, interpret, or editorialize.\n\n${urlLine}EXTRACTION INSTRUCTION: ${config.extract}\n\nSTEP 1 \u2014 Classify this page. Look at the URL if present, plus structural cues (code blocks, table patterns, comment threads, marketing copy). Pick ONE:\n\\`docs | changelog | github-readme | github-thread | reddit | hackernews | forum | blog | marketing | announcement | qa | cve | paper | release-notes | other\\`\n\nSTEP 2 \u2014 Adjust emphasis by page type:\n- docs / changelog / github-readme / release-notes \u2192 API signatures, version numbers, flags, exact config keys, code blocks. Copy verbatim. Preserve tables as tables.\n- github-thread \u2192 weight MAINTAINER comments (label \"[maintainer]\") over drive-by commenters. Preserve stacktraces verbatim. Capture chronological resolution \u2014 what was decided and when. Link the accepted-fix commit/PR if referenced.\n- reddit / hackernews / forum \u2192 lived experience. Quote verbatim with attribution (\"u/foo wrote: \u2026\" or \"user <name>\"). Prioritize replies with stack details, specific failure stories, or replies that contradict the OP. Record overall sentiment distribution as one bullet if clear skew (\"~70% agree / ~20% dissent / rest off-topic\"). Drop context-free opinions (\"this sucks\") from Matches.\n- blog \u2192 prioritize concrete reproductions, code, measurements. If the author makes a claim without evidence, mark \"[unsourced claim]\".\n- marketing / announcement \u2192 pricing tiers, feature matrices verbatim, free-tier quotas, enterprise contact. Preserve tables as tables. Treat roadmap/future-tense claims skeptically \u2014 note them as \"[announced, not shipped]\" when framing is future-tense.\n- qa (stackoverflow) \u2192 accepted answer's code + high-voted disagreements. Always note the answer date \u2014 SO rots.\n- cve \u2192 CVSS vector verbatim, CWE, CPE ranges, affected versions, fix version, references. Each with its label.\n- paper \u2192 claim, method, dataset, benchmark numbers, comparison baseline. Preserve numeric deltas verbatim.\n\nSTEP 3 \u2014 Emit markdown with these sections, in order:\n\n## Source\n- URL: <verbatim if visible, else \"unknown\">\n- Page type: <the type you picked>\n- Page date: <verbatim if visible, else \"not visible\">\n- Author / maintainer (if identifiable): <verbatim>\n\n## Matches\nOne bullet per distinct piece of matching info:\n- **<short label>** \u2014 the information. Quote VERBATIM for: numbers, versions, dates, API names, prices, error messages, stacktraces, CVSS vectors, benchmark scores, command flags, proper nouns, and people's words. Backticks for code/identifiers. Preserve tables.\n\n## Not found\nEvery part of the extraction instruction this page did NOT answer. Be explicit. Example: \"Enterprise pricing contact \u2014 not present on this page.\"\n\n## Follow-up signals\nShort bullets \u2014 NEW angles this page surfaced that the agent should investigate. Include: new terms, unexpected vendor names, contradicting claims, referenced-but-unscraped URLs. Copy URLs VERBATIM from the source; if only anchor text is visible, write \"anchor: <text> (URL not in scraped content)\". Skip this section if nothing new surfaced. Do NOT invent.\n\n## Contradictions\n(Include this section only if the page contains internally contradictory claims.) Bullet each contradiction with both sides quoted verbatim.\n\n## Truncation\n(Include only if content appears cut mid-element.) \"Content cut mid-<table row / code block / comment / paragraph>; extraction may be incomplete for <section>.\"\n\nRULES:\n- Never paraphrase numbers, versions, code, or quoted text.\n- If an instruction item is not answered, it goes in \"Not found\" \u2014 do NOT invent an answer to please the caller.\n- Preserve code blocks, command examples, tables exactly.\n- Do NOT add commentary or recommendations outside \"Follow-up signals\".\n- Page language \u2260 English: quote verbatim in the original language AND provide a parenthetical gloss in English.\n- Content clearly failed to load: return ONLY a single line, choosing from:\n \\`## Matches\\\\n_Page did not load: 404_\\`\n \\`## Matches\\\\n_Page did not load: login-wall_\\`\n \\`## Matches\\\\n_Page did not load: paywall_\\`\n \\`## Matches\\\\n_Page did not load: JS-render-empty_\\`\n \\`## Matches\\\\n_Page did not load: non-text-asset_\\`\n \\`## Matches\\\\n_Page did not load: truncated-before-relevant-section_\\`\n\nContent:\n${truncatedContent}`\n : `Clean the following page content: drop navigation, ads, cookie banners, footers, author bios, related-article lists. Preserve headings, paragraphs, code blocks, tables, and inline links as \\`[text](url)\\`. Do NOT summarize \u2014 preserve the full body.\n\n${urlLine}Content:\n${truncatedContent}`;\n\n let lastError: StructuredError | undefined;\n\n // Retry loop\n for (let attempt = 0; attempt <= LLM_RETRY_CONFIG.maxRetries; attempt++) {\n try {\n if (attempt === 0) {\n mcpLog('info', `Starting extraction with ${LLM_EXTRACTION.MODEL}`, 'llm');\n } else {\n mcpLog('warning', `Retry attempt ${attempt}/${LLM_RETRY_CONFIG.maxRetries}`, 'llm');\n }\n\n const response = await requestText(\n processor,\n prompt,\n 'LLM extraction',\n signal,\n );\n\n if (response.content) {\n mcpLog('info', `Successfully extracted ${response.content.length} characters`, 'llm');\n markLLMSuccess('extractor');\n return { content: response.content, processed: true };\n }\n\n // Empty response - not retryable\n mcpLog('warning', 'Received empty response from LLM', 'llm');\n markLLMFailure('extractor', 'LLM returned empty response');\n return {\n content,\n processed: false,\n error: 'LLM returned empty response',\n errorDetails: {\n code: ErrorCode.INTERNAL_ERROR,\n message: 'LLM returned empty response',\n retryable: false,\n },\n };\n\n } catch (err: unknown) {\n lastError = classifyError(err);\n\n // Log the error\n const status = hasStatus(err) ? err.status : undefined;\n const code = typeof err === 'object' && err !== null && 'code' in err\n ? String((err as Record<string, unknown>).code)\n : undefined;\n mcpLog('error', `Error (attempt ${attempt + 1}): ${lastError.message} [status=${status}, code=${code}, retryable=${isRetryableLLMError(err)}]`, 'llm');\n\n // Check if we should retry\n if (isRetryableLLMError(err) && attempt < LLM_RETRY_CONFIG.maxRetries) {\n const delayMs = calculateLLMBackoff(attempt);\n mcpLog('warning', `Retrying in ${delayMs}ms...`, 'llm');\n try { await sleep(delayMs, signal); } catch { break; }\n continue;\n }\n\n // Non-retryable or max retries reached\n break;\n }\n }\n\n // All attempts failed - return original content with error info\n const errorMessage = lastError?.message || 'Unknown LLM error';\n mcpLog('error', `All attempts failed: ${errorMessage}. Returning original content.`, 'llm');\n markLLMFailure('extractor', errorMessage);\n\n return {\n content, // Return original content as fallback\n processed: false,\n error: `LLM extraction failed: ${errorMessage}`,\n errorDetails: lastError || {\n code: ErrorCode.UNKNOWN_ERROR,\n message: errorMessage,\n retryable: false,\n },\n };\n}\n\n// ============================================================================\n// Web-Search Result Classification\n// ============================================================================\n\n/** Maximum URLs to send to the LLM for classification */\nconst MAX_CLASSIFICATION_URLS = 50 as const;\n\n/** Classification tiers */\ntype ClassificationTier = 'HIGHLY_RELEVANT' | 'MAYBE_RELEVANT' | 'OTHER';\n\nexport interface ClassificationEntry {\n readonly rank: number;\n readonly tier: ClassificationTier;\n readonly source_type?: string;\n readonly reason?: string;\n}\n\nexport interface ClassificationGap {\n readonly id: number;\n readonly description: string;\n}\n\nexport interface ClassificationResult {\n readonly title: string;\n readonly synthesis: string;\n readonly results: ClassificationEntry[];\n readonly refine_queries?: Array<{\n readonly query: string;\n readonly rationale: string;\n readonly gap_id?: number;\n }>;\n readonly confidence?: 'high' | 'medium' | 'low';\n readonly confidence_reason?: string;\n readonly gaps?: ClassificationGap[];\n}\n\nexport interface RefineQuerySuggestion {\n readonly query: string;\n readonly rationale: string;\n readonly gap_id?: number;\n readonly gap_description?: string;\n}\n\n/**\n * Classify web-search results by relevance to an objective using the LLM.\n * Sends only titles, snippets, and domain names \u2014 does NOT fetch URLs.\n * Returns null on failure (caller should fall back to raw output).\n */\nexport async function classifySearchResults(\n rankedUrls: ReadonlyArray<{\n readonly rank: number;\n readonly url: string;\n readonly title: string;\n readonly snippet: string;\n readonly frequency: number;\n readonly queries: string[];\n }>,\n objective: string,\n totalQueries: number,\n processor: OpenAI,\n previousQueries: readonly string[] = [],\n): Promise<{ result: ClassificationResult | null; error?: string }> {\n const urlsToClassify = rankedUrls.slice(0, MAX_CLASSIFICATION_URLS);\n\n // Descending static weights fed to the LLM. Higher-ranked URLs get a bigger\n // weight so the classifier biases HIGHLY_RELEVANT toward them. The weights\n // here are a shown-to-LLM summary, not the internal CTR ranking (which\n // still runs in url-aggregator.ts). Rank 11+ all bucket to w=1.\n const STATIC_WEIGHTS = [30, 20, 15, 10, 8, 6, 5, 4, 3, 2] as const;\n const weightForRank = (rank: number): number => STATIC_WEIGHTS[rank - 1] ?? 1;\n\n // Build compressed result list \u2014 weight + title + domain + snippet (truncated)\n const lines: string[] = [];\n for (const url of urlsToClassify) {\n let domain: string;\n try {\n domain = new URL(url.url).hostname.replace(/^www\\./, '');\n } catch {\n domain = url.url;\n }\n const snippet = url.snippet.length > 120\n ? url.snippet.slice(0, 117) + '...'\n : url.snippet;\n lines.push(`[${url.rank}] w=${weightForRank(url.rank)} ${url.title} \u2014 ${domain} \u2014 ${snippet}`);\n }\n\n const prevQueriesBlock = previousQueries.length > 0\n ? previousQueries.map((q) => `- ${q}`).join('\\n')\n : '- (none provided)';\n const today = new Date().toISOString().slice(0, 10);\n\n const prompt = `You are the relevance filter for a research agent. Classify each search result below against the objective and produce a structured analysis.\n\nOBJECTIVE: ${objective}\nTODAY: ${today}\n\nPREVIOUS QUERIES (already run \u2014 do NOT paraphrase in refine_queries):\n${prevQueriesBlock}\n\nReturn ONLY a JSON object (no markdown, no code fences):\n\n{\n \"title\": \"2\u20138 word label for this RESULT CLUSTER (not the objective)\",\n \"synthesis\": \"3\u20135 sentences grounded in the results. Every non-trivial claim cites a rank in [brackets], e.g. '[3] documents the flag; [7][12] report it is broken on macOS.' A synthesis with zero citations is invalid.\",\n \"confidence\": \"high | medium | low\",\n \"confidence_reason\": \"one sentence \u2014 why\",\n \"gaps\": [\n { \"id\": 0, \"description\": \"specific, actionable thing the current results do NOT answer \u2014 not 'more info needed'\" }\n ],\n \"refine_queries\": [\n { \"query\": \"concrete next search\", \"gap_id\": 0, \"rationale\": \"\u226412 words\" }\n ],\n \"results\": [\n {\n \"rank\": 1,\n \"tier\": \"HIGHLY_RELEVANT | MAYBE_RELEVANT | OTHER\",\n \"source_type\": \"vendor_doc | github | reddit | hackernews | blog | news | marketing | stackoverflow | cve | paper | release_notes | aggregator | other\",\n \"reason\": \"\u226412 words citing the snippet cue that drove the tier\"\n }\n ]\n}\n\nWEIGHT SCHEME: each row is prefixed with a weight (w=N). Higher weight means the URL ranked better across input queries \u2014 prefer HIGHLY_RELEVANT for high-weight rows when content matches the objective. Weight alone never justifies HIGHLY_RELEVANT; snippet cues still drive the decision.\n\nSOURCE-OF-TRUTH RUBRIC (the \"primary source\" is goal-dependent \u2014 infer goal type from the objective):\n- spec / API / config questions \u2192 vendor_doc, github (README, RFC), release_notes are primary\n- bug / failure-mode questions \u2192 github (issue/PR), stackoverflow are primary\n- migration / sentiment / lived-experience \u2192 reddit, hackernews, blog are primary; docs are secondary\n- pricing / commercial \u2192 marketing (the vendor's own pricing page IS the primary source, but treat feature lists skeptically)\n- security / CVE \u2192 cve databases, distro security trackers (nvd.nist.gov, security-tracker.debian.org, ubuntu.com/security) are primary\n- synthesis / open-ended \u2192 blend; no single type is primary\n- product launch \u2192 vendor_doc + news + marketing for the launch itself; blogs + reddit for independent verification\n\nFRESHNESS: proportional to topic velocity. For a week-old release, demote anything older than 30 days. For general tech questions, demote older than 18 months. For stable protocols (HTTP, TCP, POSIX), don't demote by age.\n\nCONFIDENCE:\n- high = \u22653 HIGHLY_RELEVANT results from INDEPENDENT domains agree on the core answer\n- medium = \u22652 HIGHLY_RELEVANT exist but disagree or share a domain; OR a single authoritative primary source answers it\n- low = otherwise; snippet-only judgments cap at medium\n\nREFINE QUERIES \u2014 each MUST differ from every previousQuery by:\n- a new operator (site:, quotes, verbatim version number), OR\n- a domain-specific noun ABSENT from every prior query\nAdding a year alone does NOT count as differentiation.\nEach refine_query MUST reference a specific gap_id from the gaps array above.\nProduce 4\u20138 refine_queries total. Cover: (a) a primary-source probe, (b) a temporal sharpener, (c) a failure-mode or comparison probe, (d) at least one new-term probe seeded by a specific result's snippet.\n\nRULES:\n- Classify ALL ${urlsToClassify.length} results. Do not skip or collapse any.\n- Use only the three tier values.\n- Judge from title + domain + snippet only. Do NOT invent facts not present in the snippet.\n- If ALL results are OTHER: synthesis = \"\", confidence = \"low\", and \\`gaps\\` must explicitly state why the current queries missed the target.\n- Casing: tier = UPPERCASE_WITH_UNDERSCORES, confidence = lowercase.\n\nSEARCH RESULTS (${urlsToClassify.length} URLs from ${totalQueries} queries):\n${lines.join('\\n')}`;\n\n try {\n mcpLog('info', `Classifying ${urlsToClassify.length} URLs against objective`, 'llm');\n\n const response = await requestText(\n processor,\n prompt,\n 'Search classification',\n );\n\n if (!response.content) {\n const errMsg = response.error ?? 'LLM returned empty classification response';\n markLLMFailure('planner', errMsg);\n return { result: null, error: errMsg };\n }\n\n // Strip markdown code fences if present\n const cleaned = response.content.replace(/^```(?:json)?\\s*\\n?/m, '').replace(/\\n?```\\s*$/m, '').trim();\n const parsed = JSON.parse(cleaned) as ClassificationResult;\n\n // Validate the response shape.\n // Note: synthesis is typed not truthy \u2014 the prompt explicitly instructs an empty string\n // for the all-OTHER case, and we must not reject that.\n if (!parsed.title || typeof parsed.synthesis !== 'string' || !Array.isArray(parsed.results)) {\n const errMsg = 'LLM response missing required fields (title, synthesis, results)';\n markLLMFailure('planner', errMsg);\n return { result: null, error: errMsg };\n }\n\n mcpLog('info', `Classification complete: ${parsed.results.filter(r => r.tier === 'HIGHLY_RELEVANT').length} highly relevant`, 'llm');\n markLLMSuccess('planner');\n return { result: parsed };\n } catch (err: unknown) {\n const message = err instanceof Error ? err.message : String(err);\n mcpLog('error', `Classification failed: ${message}`, 'llm');\n markLLMFailure('planner', message);\n return { result: null, error: `Classification failed: ${message}` };\n }\n}\n\nexport async function suggestRefineQueriesForRawMode(\n rankedUrls: ReadonlyArray<{\n readonly rank: number;\n readonly url: string;\n readonly title: string;\n }>,\n objective: string,\n originalQueries: readonly string[],\n processor: OpenAI,\n): Promise<{ result: RefineQuerySuggestion[]; error?: string }> {\n const urlsToSummarize = rankedUrls.slice(0, 12);\n const lines = urlsToSummarize.map((url) => {\n let domain: string;\n try {\n domain = new URL(url.url).hostname.replace(/^www\\./, '');\n } catch {\n domain = url.url;\n }\n return `[${url.rank}] ${url.title} \u2014 ${domain}`;\n });\n\n const prompt = `You are generating follow-up search queries for an agent using raw web-search results.\n\nReturn ONLY a JSON object (no markdown, no code fences):\n{\n \"refine_queries\": [\n { \"query\": \"next search query\", \"gap_description\": \"what gap this closes\", \"rationale\": \"\u226412 words on why\" }\n ]\n}\n\nOBJECTIVE: ${objective}\n\nPREVIOUS QUERIES (already run \u2014 do NOT paraphrase):\n${originalQueries.map((query) => `- ${query}`).join('\\n')}\n\nTOP RESULT TITLES (to seed new-term probes):\n${lines.join('\\n')}\n\nRULES:\n- Produce 4\u20136 diverse follow-ups. Cover: (a) a primary-source probe (site:, RFC, vendor docs); (b) a temporal sharpener (changelog, version number); (c) a failure-mode or comparison probe; (d) at least one new-term probe seeded by a specific result title.\n- Each query MUST differ from every previousQuery by either a new operator (site:, quotes, a verbatim version number) OR a domain-specific noun absent from every prior query. Adding a year alone does NOT count.\n- Each refine_query MUST include a \\`gap_description\\` naming what the current results don't answer.\n- Do not include URLs.\n- Keep rationales \u226412 words.`;\n\n try {\n const response = await requestText(\n processor,\n prompt,\n 'Raw-mode refine query generation',\n );\n\n if (!response.content) {\n const errMsg = response.error ?? 'LLM returned empty raw-mode refine query response';\n markLLMFailure('planner', errMsg);\n return { result: [], error: errMsg };\n }\n\n const cleaned = response.content.replace(/^```(?:json)?\\s*\\n?/m, '').replace(/\\n?```\\s*$/m, '').trim();\n const parsed = JSON.parse(cleaned) as { refine_queries?: RefineQuerySuggestion[] };\n\n markLLMSuccess('planner');\n return { result: Array.isArray(parsed.refine_queries) ? parsed.refine_queries : [] };\n } catch (err: unknown) {\n const message = err instanceof Error ? err.message : String(err);\n mcpLog('error', `Raw-mode refine query generation failed: ${message}`, 'llm');\n markLLMFailure('planner', message);\n return { result: [], error: message };\n }\n}\n\n// ============================================================================\n// Research Brief \u2014 goal-aware orientation (called by start-research)\n// ============================================================================\n\nexport type PrimaryBranch = 'reddit' | 'web' | 'both';\n\nexport interface ResearchBriefStep {\n readonly tool: 'web-search' | 'scrape-links';\n readonly reason: string;\n}\n\nexport interface ResearchBrief {\n readonly goal_class: string;\n readonly goal_class_reason: string;\n readonly primary_branch: PrimaryBranch;\n readonly primary_branch_reason: string;\n readonly freshness_window: string;\n readonly first_call_sequence: readonly ResearchBriefStep[];\n readonly keyword_seeds: readonly string[];\n readonly iteration_hints: readonly string[];\n readonly gaps_to_watch: readonly string[];\n readonly stop_criteria: readonly string[];\n}\n\nconst VALID_GOAL_CLASSES = new Set([\n 'spec', 'bug', 'migration', 'sentiment', 'pricing', 'security',\n 'synthesis', 'product_launch', 'other',\n]);\n\nconst VALID_FRESHNESS = new Set(['days', 'weeks', 'months', 'years']);\nconst VALID_BRANCHES = new Set<PrimaryBranch>(['reddit', 'web', 'both']);\nconst VALID_STEP_TOOLS = new Set(['web-search', 'scrape-links']);\n\nfunction isStringArray(value: unknown): value is string[] {\n return Array.isArray(value) && value.every((v) => typeof v === 'string');\n}\n\nfunction isStepArray(value: unknown): value is ResearchBriefStep[] {\n return Array.isArray(value) && value.every((s) => {\n if (typeof s !== 'object' || s === null) return false;\n const tool = (s as Record<string, unknown>).tool;\n const reason = (s as Record<string, unknown>).reason;\n return typeof tool === 'string'\n && VALID_STEP_TOOLS.has(tool)\n && typeof reason === 'string'\n && reason.trim().length > 0;\n });\n}\n\nexport function parseResearchBrief(raw: string): ResearchBrief | null {\n try {\n const cleaned = raw.replace(/^```(?:json)?\\s*\\n?/m, '').replace(/\\n?```\\s*$/m, '').trim();\n const parsed = JSON.parse(cleaned) as Record<string, unknown>;\n\n const goal_class = typeof parsed.goal_class === 'string' ? parsed.goal_class : null;\n if (!goal_class || !VALID_GOAL_CLASSES.has(goal_class)) return null;\n\n const freshness_window = typeof parsed.freshness_window === 'string' ? parsed.freshness_window : null;\n if (!freshness_window || !VALID_FRESHNESS.has(freshness_window)) return null;\n\n const primary_branch = parsed.primary_branch;\n if (typeof primary_branch !== 'string' || !VALID_BRANCHES.has(primary_branch as PrimaryBranch)) return null;\n\n if (!isStepArray(parsed.first_call_sequence) || parsed.first_call_sequence.length === 0) return null;\n if (!isStringArray(parsed.keyword_seeds) || parsed.keyword_seeds.length === 0) return null;\n\n return {\n goal_class,\n goal_class_reason: typeof parsed.goal_class_reason === 'string' ? parsed.goal_class_reason : '',\n primary_branch: primary_branch as PrimaryBranch,\n primary_branch_reason: typeof parsed.primary_branch_reason === 'string' ? parsed.primary_branch_reason : '',\n freshness_window,\n first_call_sequence: parsed.first_call_sequence,\n keyword_seeds: parsed.keyword_seeds.filter((s) => s.trim().length > 0),\n iteration_hints: isStringArray(parsed.iteration_hints) ? parsed.iteration_hints : [],\n gaps_to_watch: isStringArray(parsed.gaps_to_watch) ? parsed.gaps_to_watch : [],\n stop_criteria: isStringArray(parsed.stop_criteria) ? parsed.stop_criteria : [],\n };\n } catch {\n return null;\n }\n}\n\nexport async function generateResearchBrief(\n goal: string,\n processor: OpenAI,\n signal?: AbortSignal,\n): Promise<ResearchBrief | null> {\n const today = new Date().toISOString().slice(0, 10);\n\n const prompt = `You are a research planner. An agent is about to run a multi-pass research loop on the goal below using 3 tools:\n\n - web-search: fan-out Google, scope: web|reddit|both, up to 50 queries per call, parallel-callable (multiple calls per turn)\n - scrape-links: fetch URLs in parallel, auto-detects reddit.com post permalinks \u2192 Reddit API (threaded post+comments); all other URLs \u2192 HTTP scraper; parallel-callable\n\nProduce a tailored JSON brief.\n\nGOAL: ${goal}\nTODAY: ${today}\n\nReturn ONLY a JSON object (no markdown, no code fences):\n\n{\n \"goal_class\": \"spec | bug | migration | sentiment | pricing | security | synthesis | product_launch | other\",\n \"goal_class_reason\": \"one sentence \u2014 why this class\",\n \"primary_branch\": \"reddit | web | both\",\n \"primary_branch_reason\": \"one sentence \u2014 why this branch leads\",\n \"freshness_window\": \"days | weeks | months | years\",\n \"first_call_sequence\": [\n { \"tool\": \"web-search | scrape-links\", \"reason\": \"what this call establishes for the agent\" }\n ],\n \"keyword_seeds\": [\"25\u201350 concrete Google queries \u2014 flat list, to be fired in the first web-search call\"],\n \"iteration_hints\": [\"2\u20135 pointers on which harvested terms / follow-up signals to watch for after pass 1\"],\n \"gaps_to_watch\": [\"2\u20135 concrete questions the agent MUST verify or the answer is incomplete\"],\n \"stop_criteria\": [\"2\u20134 checkable conditions \u2014 all must hold before the agent declares done\"]\n}\n\nRULES:\n\nprimary_branch:\n- \"reddit\" \u2192 sentiment / migration / lived-experience / community-consensus goals. Leads with scope:\"reddit\" web-search.\n- \"web\" \u2192 spec / bug / pricing / CVE / API / primary-source goals. Leads with scope:\"web\" web-search.\n- \"both\" \u2192 opinion-heavy AND needs official sources (e.g. product launch + practitioner reception).\n\nfirst_call_sequence:\n- 1\u20133 steps.\n- reddit-first: step 1 = web-search (caller sets scope:\"reddit\"), step 2 = scrape-links on best post permalinks.\n- web-first: step 1 = web-search (scope:\"web\"), step 2 = scrape-links on HIGHLY_RELEVANT URLs.\n- both: step 1 = two parallel web-search calls (one scope:\"reddit\", one scope:\"web\"), step 2 = merged scrape-links.\n\nkeyword_seeds:\n- 25\u201350 total. Narrow bug \u2192 fewer. Open synthesis \u2192 more.\n- Use operators where helpful (site:, quotes, verbatim version numbers).\n- DIVERSE facets \u2014 same noun-phrase cannot repeat across seeds with adjectives-only variation.\n- Do NOT invent vendor names you are uncertain exist.\n\nfreshness_window:\n- If the goal mentions a recent release / date / version, use \"days\" or \"weeks\".\n- Stable protocols / APIs \u2192 \"months\" or \"years\".`;\n\n try {\n const response = await requestText(\n processor,\n prompt,\n 'Research brief generation',\n signal,\n );\n\n if (!response.content) {\n mcpLog('warning', `Research brief generation returned no content: ${response.error ?? 'unknown'}`, 'llm');\n markLLMFailure('planner', response.error ?? 'empty response');\n return null;\n }\n\n const brief = parseResearchBrief(response.content);\n if (!brief) {\n mcpLog('warning', 'Research brief JSON parse or shape validation failed', 'llm');\n markLLMFailure('planner', 'brief parse/validation failed');\n return null;\n }\n\n markLLMSuccess('planner');\n return brief;\n } catch (err: unknown) {\n const message = err instanceof Error ? err.message : String(err);\n mcpLog('warning', `Research brief generation failed: ${message}`, 'llm');\n markLLMFailure('planner', message);\n return null;\n }\n}\n\nexport function renderResearchBrief(brief: ResearchBrief): string {\n const lines: string[] = [];\n\n lines.push('## Your research brief (goal-tailored)');\n lines.push('');\n lines.push(`**Goal class**: \\`${brief.goal_class}\\` \u2014 ${brief.goal_class_reason}`);\n lines.push(`**Primary branch**: \\`${brief.primary_branch}\\` \u2014 ${brief.primary_branch_reason}`);\n lines.push(`**Freshness**: \\`${brief.freshness_window}\\``);\n lines.push('');\n\n if (brief.first_call_sequence.length > 0) {\n lines.push('### First-call sequence');\n brief.first_call_sequence.forEach((step, i) => {\n lines.push(`${i + 1}. \\`${step.tool}\\` \u2014 ${step.reason}`);\n });\n lines.push('');\n }\n\n if (brief.keyword_seeds.length > 0) {\n lines.push(`### Keyword seeds (${brief.keyword_seeds.length}) \u2014 fire these in your first \\`web-search\\` call as a flat \\`queries\\` array`);\n for (const seed of brief.keyword_seeds) {\n lines.push(`- ${seed}`);\n }\n lines.push('');\n }\n\n if (brief.iteration_hints.length > 0) {\n lines.push('### Iteration hints (harvest new terms from scrape extracts\\' `## Follow-up signals`)');\n for (const hint of brief.iteration_hints) lines.push(`- ${hint}`);\n lines.push('');\n }\n\n if (brief.gaps_to_watch.length > 0) {\n lines.push('### Gaps to watch');\n for (const gap of brief.gaps_to_watch) lines.push(`- ${gap}`);\n lines.push('');\n }\n\n if (brief.stop_criteria.length > 0) {\n lines.push('### Stop criteria');\n for (const c of brief.stop_criteria) lines.push(`- ${c}`);\n lines.push('');\n }\n\n lines.push('---');\n lines.push('');\n lines.push('Fire `first_call_sequence` now. After each `scrape-links`, harvest new terms from `## Follow-up signals` and build your next `web-search` round. Stop when every gap is closed.');\n\n return lines.join('\\n');\n}\n"],
5
- "mappings": "AAMA,OAAO,YAAY;AACnB,SAAS,gBAAgB,uBAAuB;AAChD;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OAEK;AACP,SAAS,cAAc;AAGvB,MAAM,sBAAsB;AAG5B,MAAM,wBAAwB;AAG9B,MAAM,wBAAwB;AAG9B,MAAM,uBAAuB;AAG7B,MAAM,0BAA0B;AAuBhC,MAAM,YAAY;AAAA,EAChB,eAAe;AAAA,EACf,iBAAiB;AAAA,EACjB,sBAAsB;AAAA,EACtB,wBAAwB;AAAA,EACxB,kBAAkB;AAAA,EAClB,oBAAoB;AAAA,EACpB,4BAA4B;AAAA,EAC5B,8BAA8B;AAChC;AAEO,SAAS,eAAe,MAA2B;AACxD,QAAM,MAAK,oBAAI,KAAK,GAAE,YAAY;AAClC,MAAI,SAAS,WAAW;AACtB,cAAU,gBAAgB;AAC1B,cAAU,uBAAuB;AACjC,cAAU,mBAAmB;AAC7B,cAAU,6BAA6B;AAAA,EACzC,OAAO;AACL,cAAU,kBAAkB;AAC5B,cAAU,yBAAyB;AACnC,cAAU,qBAAqB;AAC/B,cAAU,+BAA+B;AAAA,EAC3C;AACF;AAEO,SAAS,eAAe,MAAqB,KAAoB;AACtE,QAAM,MAAK,oBAAI,KAAK,GAAE,YAAY;AAClC,QAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,OAAO,eAAe;AAClF,MAAI,SAAS,WAAW;AACtB,cAAU,gBAAgB;AAC1B,cAAU,uBAAuB;AACjC,cAAU,mBAAmB;AAC7B,cAAU,8BAA8B;AAAA,EAC1C,OAAO;AACL,cAAU,kBAAkB;AAC5B,cAAU,yBAAyB;AACnC,cAAU,qBAAqB;AAC/B,cAAU,gCAAgC;AAAA,EAC5C;AACF;AAEO,SAAS,eAAkC;AAChD,QAAM,MAAM,gBAAgB;AAC5B,SAAO;AAAA,IACL,eAAe,UAAU;AAAA,IACzB,iBAAiB,UAAU;AAAA,IAC3B,sBAAsB,UAAU;AAAA,IAChC,wBAAwB,UAAU;AAAA,IAClC,kBAAkB,UAAU;AAAA,IAC5B,oBAAoB,UAAU;AAAA;AAAA;AAAA,IAG9B,mBAAmB,IAAI;AAAA,IACvB,qBAAqB,IAAI;AAAA,IACzB,4BAA4B,UAAU;AAAA,IACtC,8BAA8B,UAAU;AAAA,EAC1C;AACF;AAGO,SAAS,0BAAgC;AAC9C,YAAU,gBAAgB;AAC1B,YAAU,kBAAkB;AAC5B,YAAU,uBAAuB;AACjC,YAAU,yBAAyB;AACnC,YAAU,mBAAmB;AAC7B,YAAU,qBAAqB;AAC/B,YAAU,6BAA6B;AACvC,YAAU,+BAA+B;AAC3C;AAgBA,MAAM,mBAAmB;AAAA,EACvB,YAAY;AAAA,EACZ,aAAa;AAAA,EACb,YAAY;AACd;AAGA,MAAM,4BAA4B,oBAAI,IAAI;AAAA,EACxC;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF,CAAC;AAGD,SAAS,UAAU,OAA6C;AAC9D,SACE,OAAO,UAAU,YACjB,UAAU,QACV,YAAY,SACZ,OAAQ,MAAkC,WAAW;AAEzD;AAEA,IAAI,YAA2B;AAIxB,SAAS,qBAAoC;AAClD,MAAI,CAAC,gBAAgB,EAAE,cAAe,QAAO;AAE7C,MAAI,CAAC,WAAW;AACd,gBAAY,IAAI,OAAO;AAAA,MACrB,SAAS,eAAe;AAAA,MACxB,QAAQ,eAAe;AAAA,MACvB,SAAS;AAAA,MACT,YAAY;AAAA,MACZ,gBAAgB,EAAE,WAAW,yBAAyB;AAAA,IACxD,CAAC;AACD,WAAO,QAAQ,qCAAqC,eAAe,KAAK,cAAc,eAAe,QAAQ,KAAK,KAAK;AAAA,EACzH;AACA,SAAO;AACT;AAEA,SAAS,qBAAqB,OAAe,QAAyC;AACpF,QAAM,cAAuC;AAAA,IAC3C;AAAA,IACA,UAAU,CAAC,EAAE,MAAM,QAAQ,SAAS,OAAO,CAAC;AAAA,EAC9C;AAEA,MAAI,eAAe,qBAAqB,QAAQ;AAC9C,gBAAY,mBAAmB,eAAe;AAAA,EAChD;AAEA,SAAO;AACT;AAEA,eAAsB,YACpB,WACA,QACA,gBACA,QACoE;AACpE,QAAM,QAAQ,eAAe;AAE7B,MAAI;AACF,UAAM,WAAW,MAAM;AAAA,MACrB,CAAC,gBAAgB,UAAU,KAAK,YAAY;AAAA,QAC1C,qBAAqB,OAAO,MAAM;AAAA,QAClC;AAAA,UACE,QAAQ,SAAS,YAAY,IAAI,CAAC,aAAa,MAAM,CAAC,IAAI;AAAA,UAC1D,SAAS;AAAA,QACX;AAAA,MACF;AAAA,MACA;AAAA,MACA;AAAA,MACA,GAAG,cAAc,KAAK,KAAK;AAAA,IAC7B;AAEA,UAAM,UAAU,SAAS,UAAU,CAAC,GAAG,SAAS,SAAS,KAAK;AAC9D,QAAI,SAAS;AACX,aAAO,EAAE,SAAS,MAAM;AAAA,IAC1B;AAEA,UAAM,MAAM,6BAA6B,KAAK;AAC9C,WAAO,WAAW,GAAG,cAAc,qCAAqC,KAAK,IAAI,KAAK;AACtF,WAAO,EAAE,SAAS,MAAM,OAAO,OAAO,IAAI;AAAA,EAC5C,SAAS,KAAc;AACrB,UAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC/D,WAAO,WAAW,GAAG,cAAc,qBAAqB,KAAK,KAAK,OAAO,IAAI,KAAK;AAClF,WAAO,EAAE,SAAS,MAAM,OAAO,OAAO,QAAQ;AAAA,EAChD;AACF;AAKA,SAAS,oBAAoB,OAAyB;AACpD,MAAI,CAAC,SAAS,OAAO,UAAU,SAAU,QAAO;AAGhD,QAAM,YAAa,OAA6B;AAChD,MAAI,cAAc,cAAc,cAAc,aAAa;AACzD,WAAO;AAAA,EACT;AAGA,MAAI,UAAU,KAAK,GAAG;AACpB,QAAI,MAAM,WAAW,OAAO,MAAM,WAAW,OAAO,MAAM,WAAW,OAAO,MAAM,WAAW,OAAO,MAAM,WAAW,KAAK;AACxH,aAAO;AAAA,IACT;AAAA,EACF;AAGA,QAAM,SAAS;AACf,QAAM,OAAO,OAAO,OAAO,SAAS,WAAW,OAAO,OAAO;AAC7D,QAAM,SACJ,OAAO,OAAO,UAAU,YAAY,OAAO,UAAU,OAChD,OAAO,QACR;AACN,QAAM,YACJ,SACC,UAAU,OAAO,OAAO,SAAS,WAAW,OAAO,OAAO,YAC1D,UAAU,OAAO,OAAO,SAAS,WAAW,OAAO,OAAO;AAC7D,MAAI,aAAa,0BAA0B,IAAI,SAAS,GAAG;AACzD,WAAO;AAAA,EACT;AAGA,QAAM,UAAU,OAAO,OAAO,YAAY,WAAW,OAAO,QAAQ,YAAY,IAAI;AACpF,MACE,QAAQ,SAAS,YAAY,KAC7B,QAAQ,SAAS,SAAS,KAC1B,QAAQ,SAAS,WAAW,KAC5B,QAAQ,SAAS,qBAAqB,KACtC,QAAQ,SAAS,cAAc,KAC/B,QAAQ,SAAS,YAAY,KAC7B,QAAQ,SAAS,YAAY,GAC7B;AACA,WAAO;AAAA,EACT;AAEA,SAAO;AACT;AAKA,SAAS,oBAAoB,SAAyB;AACpD,QAAM,mBAAmB,iBAAiB,cAAc,KAAK,IAAI,GAAG,OAAO;AAC3E,QAAM,SAAS,KAAK,OAAO,IAAI,wBAAwB;AACvD,SAAO,KAAK,IAAI,mBAAmB,QAAQ,iBAAiB,UAAU;AACxE;AAOA,eAAsB,sBACpB,SACA,QACA,WACA,QACoB;AAEpB,MAAI,CAAC,OAAO,SAAS;AACnB,WAAO,EAAE,SAAS,WAAW,MAAM;AAAA,EACrC;AAEA,MAAI,CAAC,WAAW;AACd,WAAO;AAAA,MACL;AAAA,MACA,WAAW;AAAA,MACX,OAAO;AAAA,MACP,cAAc;AAAA,QACZ,MAAM,UAAU;AAAA,QAChB,SAAS;AAAA,QACT,WAAW;AAAA,MACb;AAAA,IACF;AAAA,EACF;AAEA,MAAI,CAAC,SAAS,KAAK,GAAG;AACpB,WAAO,EAAE,SAAS,WAAW,IAAI,WAAW,OAAO,OAAO,yBAAyB;AAAA,EACrF;AAGA,QAAM,mBAAmB,QAAQ,SAAS,sBACtC,QAAQ,UAAU,GAAG,mBAAmB,IAAI,0CAC5C;AAKJ,QAAM,WAAW,MAAM;AACrB,QAAI,CAAC,OAAO,IAAK,QAAO;AACxB,QAAI;AACF,YAAM,IAAI,IAAI,IAAI,OAAO,GAAG;AAC5B,aAAO,GAAG,EAAE,MAAM,GAAG,EAAE,QAAQ;AAAA,IACjC,QAAQ;AACN,aAAO;AAAA,IACT;AAAA,EACF,GAAG;AACH,QAAM,UAAU,UAAU,aAAa,OAAO;AAAA;AAAA,IAAS;AAEvD,QAAM,SAAS,OAAO,UAClB;AAAA;AAAA,EAEJ,OAAO,2BAA2B,OAAO,OAAO;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAsDhD,gBAAgB,KACZ;AAAA;AAAA,EAEJ,OAAO;AAAA,EACP,gBAAgB;AAEhB,MAAI;AAGJ,WAAS,UAAU,GAAG,WAAW,iBAAiB,YAAY,WAAW;AACvE,QAAI;AACF,UAAI,YAAY,GAAG;AACjB,eAAO,QAAQ,4BAA4B,eAAe,KAAK,IAAI,KAAK;AAAA,MAC1E,OAAO;AACL,eAAO,WAAW,iBAAiB,OAAO,IAAI,iBAAiB,UAAU,IAAI,KAAK;AAAA,MACpF;AAEA,YAAM,WAAW,MAAM;AAAA,QACrB;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,MACF;AAEA,UAAI,SAAS,SAAS;AACpB,eAAO,QAAQ,0BAA0B,SAAS,QAAQ,MAAM,eAAe,KAAK;AACpF,uBAAe,WAAW;AAC1B,eAAO,EAAE,SAAS,SAAS,SAAS,WAAW,KAAK;AAAA,MACtD;AAGA,aAAO,WAAW,oCAAoC,KAAK;AAC3D,qBAAe,aAAa,6BAA6B;AACzD,aAAO;AAAA,QACL;AAAA,QACA,WAAW;AAAA,QACX,OAAO;AAAA,QACP,cAAc;AAAA,UACZ,MAAM,UAAU;AAAA,UAChB,SAAS;AAAA,UACT,WAAW;AAAA,QACb;AAAA,MACF;AAAA,IAEF,SAAS,KAAc;AACrB,kBAAY,cAAc,GAAG;AAG7B,YAAM,SAAS,UAAU,GAAG,IAAI,IAAI,SAAS;AAC7C,YAAM,OAAO,OAAO,QAAQ,YAAY,QAAQ,QAAQ,UAAU,MAC9D,OAAQ,IAAgC,IAAI,IAC5C;AACJ,aAAO,SAAS,kBAAkB,UAAU,CAAC,MAAM,UAAU,OAAO,YAAY,MAAM,UAAU,IAAI,eAAe,oBAAoB,GAAG,CAAC,KAAK,KAAK;AAGrJ,UAAI,oBAAoB,GAAG,KAAK,UAAU,iBAAiB,YAAY;AACrE,cAAM,UAAU,oBAAoB,OAAO;AAC3C,eAAO,WAAW,eAAe,OAAO,SAAS,KAAK;AACtD,YAAI;AAAE,gBAAM,MAAM,SAAS,MAAM;AAAA,QAAG,QAAQ;AAAE;AAAA,QAAO;AACrD;AAAA,MACF;AAGA;AAAA,IACF;AAAA,EACF;AAGA,QAAM,eAAe,WAAW,WAAW;AAC3C,SAAO,SAAS,wBAAwB,YAAY,iCAAiC,KAAK;AAC1F,iBAAe,aAAa,YAAY;AAExC,SAAO;AAAA,IACL;AAAA;AAAA,IACA,WAAW;AAAA,IACX,OAAO,0BAA0B,YAAY;AAAA,IAC7C,cAAc,aAAa;AAAA,MACzB,MAAM,UAAU;AAAA,MAChB,SAAS;AAAA,MACT,WAAW;AAAA,IACb;AAAA,EACF;AACF;AAOA,MAAM,0BAA0B;AA2ChC,eAAsB,sBACpB,YAQA,WACA,cACA,WACA,kBAAqC,CAAC,GAC4B;AAClE,QAAM,iBAAiB,WAAW,MAAM,GAAG,uBAAuB;AAMlE,QAAM,iBAAiB,CAAC,IAAI,IAAI,IAAI,IAAI,GAAG,GAAG,GAAG,GAAG,GAAG,CAAC;AACxD,QAAM,gBAAgB,CAAC,SAAyB,eAAe,OAAO,CAAC,KAAK;AAG5E,QAAM,QAAkB,CAAC;AACzB,aAAW,OAAO,gBAAgB;AAChC,QAAI;AACJ,QAAI;AACF,eAAS,IAAI,IAAI,IAAI,GAAG,EAAE,SAAS,QAAQ,UAAU,EAAE;AAAA,IACzD,QAAQ;AACN,eAAS,IAAI;AAAA,IACf;AACA,UAAM,UAAU,IAAI,QAAQ,SAAS,MACjC,IAAI,QAAQ,MAAM,GAAG,GAAG,IAAI,QAC5B,IAAI;AACR,UAAM,KAAK,IAAI,IAAI,IAAI,OAAO,cAAc,IAAI,IAAI,CAAC,IAAI,IAAI,KAAK,WAAM,MAAM,WAAM,OAAO,EAAE;AAAA,EAC/F;AAEA,QAAM,mBAAmB,gBAAgB,SAAS,IAC9C,gBAAgB,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,EAAE,KAAK,IAAI,IAC9C;AACJ,QAAM,SAAQ,oBAAI,KAAK,GAAE,YAAY,EAAE,MAAM,GAAG,EAAE;AAElD,QAAM,SAAS;AAAA;AAAA,aAEJ,SAAS;AAAA,SACb,KAAK;AAAA;AAAA;AAAA,EAGZ,gBAAgB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,iBAmDD,eAAe,MAAM;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,kBAMpB,eAAe,MAAM,cAAc,YAAY;AAAA,EAC/D,MAAM,KAAK,IAAI,CAAC;AAEhB,MAAI;AACF,WAAO,QAAQ,eAAe,eAAe,MAAM,2BAA2B,KAAK;AAEnF,UAAM,WAAW,MAAM;AAAA,MACrB;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAEA,QAAI,CAAC,SAAS,SAAS;AACrB,YAAM,SAAS,SAAS,SAAS;AACjC,qBAAe,WAAW,MAAM;AAChC,aAAO,EAAE,QAAQ,MAAM,OAAO,OAAO;AAAA,IACvC;AAGA,UAAM,UAAU,SAAS,QAAQ,QAAQ,wBAAwB,EAAE,EAAE,QAAQ,eAAe,EAAE,EAAE,KAAK;AACrG,UAAM,SAAS,KAAK,MAAM,OAAO;AAKjC,QAAI,CAAC,OAAO,SAAS,OAAO,OAAO,cAAc,YAAY,CAAC,MAAM,QAAQ,OAAO,OAAO,GAAG;AAC3F,YAAM,SAAS;AACf,qBAAe,WAAW,MAAM;AAChC,aAAO,EAAE,QAAQ,MAAM,OAAO,OAAO;AAAA,IACvC;AAEA,WAAO,QAAQ,4BAA4B,OAAO,QAAQ,OAAO,OAAK,EAAE,SAAS,iBAAiB,EAAE,MAAM,oBAAoB,KAAK;AACnI,mBAAe,SAAS;AACxB,WAAO,EAAE,QAAQ,OAAO;AAAA,EAC1B,SAAS,KAAc;AACrB,UAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC/D,WAAO,SAAS,0BAA0B,OAAO,IAAI,KAAK;AAC1D,mBAAe,WAAW,OAAO;AACjC,WAAO,EAAE,QAAQ,MAAM,OAAO,0BAA0B,OAAO,GAAG;AAAA,EACpE;AACF;AAEA,eAAsB,+BACpB,YAKA,WACA,iBACA,WAC8D;AAC9D,QAAM,kBAAkB,WAAW,MAAM,GAAG,EAAE;AAC9C,QAAM,QAAQ,gBAAgB,IAAI,CAAC,QAAQ;AACzC,QAAI;AACJ,QAAI;AACF,eAAS,IAAI,IAAI,IAAI,GAAG,EAAE,SAAS,QAAQ,UAAU,EAAE;AAAA,IACzD,QAAQ;AACN,eAAS,IAAI;AAAA,IACf;AACA,WAAO,IAAI,IAAI,IAAI,KAAK,IAAI,KAAK,WAAM,MAAM;AAAA,EAC/C,CAAC;AAED,QAAM,SAAS;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,aASJ,SAAS;AAAA;AAAA;AAAA,EAGpB,gBAAgB,IAAI,CAAC,UAAU,KAAK,KAAK,EAAE,EAAE,KAAK,IAAI,CAAC;AAAA;AAAA;AAAA,EAGvD,MAAM,KAAK,IAAI,CAAC;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAShB,MAAI;AACF,UAAM,WAAW,MAAM;AAAA,MACrB;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAEA,QAAI,CAAC,SAAS,SAAS;AACrB,YAAM,SAAS,SAAS,SAAS;AACjC,qBAAe,WAAW,MAAM;AAChC,aAAO,EAAE,QAAQ,CAAC,GAAG,OAAO,OAAO;AAAA,IACrC;AAEA,UAAM,UAAU,SAAS,QAAQ,QAAQ,wBAAwB,EAAE,EAAE,QAAQ,eAAe,EAAE,EAAE,KAAK;AACrG,UAAM,SAAS,KAAK,MAAM,OAAO;AAEjC,mBAAe,SAAS;AACxB,WAAO,EAAE,QAAQ,MAAM,QAAQ,OAAO,cAAc,IAAI,OAAO,iBAAiB,CAAC,EAAE;AAAA,EACrF,SAAS,KAAc;AACrB,UAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC/D,WAAO,SAAS,4CAA4C,OAAO,IAAI,KAAK;AAC5E,mBAAe,WAAW,OAAO;AACjC,WAAO,EAAE,QAAQ,CAAC,GAAG,OAAO,QAAQ;AAAA,EACtC;AACF;AA0BA,MAAM,qBAAqB,oBAAI,IAAI;AAAA,EACjC;AAAA,EAAQ;AAAA,EAAO;AAAA,EAAa;AAAA,EAAa;AAAA,EAAW;AAAA,EACpD;AAAA,EAAa;AAAA,EAAkB;AACjC,CAAC;AAED,MAAM,kBAAkB,oBAAI,IAAI,CAAC,QAAQ,SAAS,UAAU,OAAO,CAAC;AACpE,MAAM,iBAAiB,oBAAI,IAAmB,CAAC,UAAU,OAAO,MAAM,CAAC;AACvE,MAAM,mBAAmB,oBAAI,IAAI,CAAC,cAAc,cAAc,CAAC;AAE/D,SAAS,cAAc,OAAmC;AACxD,SAAO,MAAM,QAAQ,KAAK,KAAK,MAAM,MAAM,CAAC,MAAM,OAAO,MAAM,QAAQ;AACzE;AAEA,SAAS,YAAY,OAA8C;AACjE,SAAO,MAAM,QAAQ,KAAK,KAAK,MAAM,MAAM,CAAC,MAAM;AAChD,QAAI,OAAO,MAAM,YAAY,MAAM,KAAM,QAAO;AAChD,UAAM,OAAQ,EAA8B;AAC5C,UAAM,SAAU,EAA8B;AAC9C,WAAO,OAAO,SAAS,YAClB,iBAAiB,IAAI,IAAI,KACzB,OAAO,WAAW,YAClB,OAAO,KAAK,EAAE,SAAS;AAAA,EAC9B,CAAC;AACH;AAEO,SAAS,mBAAmB,KAAmC;AACpE,MAAI;AACF,UAAM,UAAU,IAAI,QAAQ,wBAAwB,EAAE,EAAE,QAAQ,eAAe,EAAE,EAAE,KAAK;AACxF,UAAM,SAAS,KAAK,MAAM,OAAO;AAEjC,UAAM,aAAa,OAAO,OAAO,eAAe,WAAW,OAAO,aAAa;AAC/E,QAAI,CAAC,cAAc,CAAC,mBAAmB,IAAI,UAAU,EAAG,QAAO;AAE/D,UAAM,mBAAmB,OAAO,OAAO,qBAAqB,WAAW,OAAO,mBAAmB;AACjG,QAAI,CAAC,oBAAoB,CAAC,gBAAgB,IAAI,gBAAgB,EAAG,QAAO;AAExE,UAAM,iBAAiB,OAAO;AAC9B,QAAI,OAAO,mBAAmB,YAAY,CAAC,eAAe,IAAI,cAA+B,EAAG,QAAO;AAEvG,QAAI,CAAC,YAAY,OAAO,mBAAmB,KAAK,OAAO,oBAAoB,WAAW,EAAG,QAAO;AAChG,QAAI,CAAC,cAAc,OAAO,aAAa,KAAK,OAAO,cAAc,WAAW,EAAG,QAAO;AAEtF,WAAO;AAAA,MACL;AAAA,MACA,mBAAmB,OAAO,OAAO,sBAAsB,WAAW,OAAO,oBAAoB;AAAA,MAC7F;AAAA,MACA,uBAAuB,OAAO,OAAO,0BAA0B,WAAW,OAAO,wBAAwB;AAAA,MACzG;AAAA,MACA,qBAAqB,OAAO;AAAA,MAC5B,eAAe,OAAO,cAAc,OAAO,CAAC,MAAM,EAAE,KAAK,EAAE,SAAS,CAAC;AAAA,MACrE,iBAAiB,cAAc,OAAO,eAAe,IAAI,OAAO,kBAAkB,CAAC;AAAA,MACnF,eAAe,cAAc,OAAO,aAAa,IAAI,OAAO,gBAAgB,CAAC;AAAA,MAC7E,eAAe,cAAc,OAAO,aAAa,IAAI,OAAO,gBAAgB,CAAC;AAAA,IAC/E;AAAA,EACF,QAAQ;AACN,WAAO;AAAA,EACT;AACF;AAEA,eAAsB,sBACpB,MACA,WACA,QAC+B;AAC/B,QAAM,SAAQ,oBAAI,KAAK,GAAE,YAAY,EAAE,MAAM,GAAG,EAAE;AAElD,QAAM,SAAS;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,QAOT,IAAI;AAAA,SACH,KAAK;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AA0CZ,MAAI;AACF,UAAM,WAAW,MAAM;AAAA,MACrB;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAEA,QAAI,CAAC,SAAS,SAAS;AACrB,aAAO,WAAW,kDAAkD,SAAS,SAAS,SAAS,IAAI,KAAK;AACxG,qBAAe,WAAW,SAAS,SAAS,gBAAgB;AAC5D,aAAO;AAAA,IACT;AAEA,UAAM,QAAQ,mBAAmB,SAAS,OAAO;AACjD,QAAI,CAAC,OAAO;AACV,aAAO,WAAW,wDAAwD,KAAK;AAC/E,qBAAe,WAAW,+BAA+B;AACzD,aAAO;AAAA,IACT;AAEA,mBAAe,SAAS;AACxB,WAAO;AAAA,EACT,SAAS,KAAc;AACrB,UAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC/D,WAAO,WAAW,qCAAqC,OAAO,IAAI,KAAK;AACvE,mBAAe,WAAW,OAAO;AACjC,WAAO;AAAA,EACT;AACF;AAEO,SAAS,oBAAoB,OAA8B;AAChE,QAAM,QAAkB,CAAC;AAEzB,QAAM,KAAK,wCAAwC;AACnD,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,qBAAqB,MAAM,UAAU,aAAQ,MAAM,iBAAiB,EAAE;AACjF,QAAM,KAAK,yBAAyB,MAAM,cAAc,aAAQ,MAAM,qBAAqB,EAAE;AAC7F,QAAM,KAAK,oBAAoB,MAAM,gBAAgB,IAAI;AACzD,QAAM,KAAK,EAAE;AAEb,MAAI,MAAM,oBAAoB,SAAS,GAAG;AACxC,UAAM,KAAK,yBAAyB;AACpC,UAAM,oBAAoB,QAAQ,CAAC,MAAM,MAAM;AAC7C,YAAM,KAAK,GAAG,IAAI,CAAC,OAAO,KAAK,IAAI,aAAQ,KAAK,MAAM,EAAE;AAAA,IAC1D,CAAC;AACD,UAAM,KAAK,EAAE;AAAA,EACf;AAEA,MAAI,MAAM,cAAc,SAAS,GAAG;AAClC,UAAM,KAAK,sBAAsB,MAAM,cAAc,MAAM,mFAA8E;AACzI,eAAW,QAAQ,MAAM,eAAe;AACtC,YAAM,KAAK,KAAK,IAAI,EAAE;AAAA,IACxB;AACA,UAAM,KAAK,EAAE;AAAA,EACf;AAEA,MAAI,MAAM,gBAAgB,SAAS,GAAG;AACpC,UAAM,KAAK,sFAAuF;AAClG,eAAW,QAAQ,MAAM,gBAAiB,OAAM,KAAK,KAAK,IAAI,EAAE;AAChE,UAAM,KAAK,EAAE;AAAA,EACf;AAEA,MAAI,MAAM,cAAc,SAAS,GAAG;AAClC,UAAM,KAAK,mBAAmB;AAC9B,eAAW,OAAO,MAAM,cAAe,OAAM,KAAK,KAAK,GAAG,EAAE;AAC5D,UAAM,KAAK,EAAE;AAAA,EACf;AAEA,MAAI,MAAM,cAAc,SAAS,GAAG;AAClC,UAAM,KAAK,mBAAmB;AAC9B,eAAW,KAAK,MAAM,cAAe,OAAM,KAAK,KAAK,CAAC,EAAE;AACxD,UAAM,KAAK,EAAE;AAAA,EACf;AAEA,QAAM,KAAK,KAAK;AAChB,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,iLAAiL;AAE5L,SAAO,MAAM,KAAK,IAAI;AACxB;",
4
+ "sourcesContent": ["/**\n * LLM Processor for content extraction\n * Uses OpenRouter via OPENROUTER_API_KEY for AI-powered content filtering\n * Implements robust retry logic and NEVER throws\n */\n\nimport OpenAI from 'openai';\nimport { LLM_EXTRACTION, getCapabilities } from '../config/index.js';\nimport {\n classifyError,\n sleep,\n ErrorCode,\n withStallProtection,\n type StructuredError,\n} from '../utils/errors.js';\nimport { mcpLog } from '../utils/logger.js';\n\n/** Maximum input characters for LLM processing (~25k tokens) */\nconst MAX_LLM_INPUT_CHARS = 100_000 as const;\n\n/** LLM client timeout in milliseconds */\nconst LLM_CLIENT_TIMEOUT_MS = 120_000 as const;\n\n/** Jitter factor for exponential backoff */\nconst BACKOFF_JITTER_FACTOR = 0.3 as const;\n\n/** Stall detection timeout \u2014 abort if no response in this time */\nconst LLM_STALL_TIMEOUT_MS = 15_000 as const;\n\n/** Hard request deadline for LLM calls */\nconst LLM_REQUEST_DEADLINE_MS = 30_000 as const;\n\n// ============================================================================\n// LLM health tracking \u2014 surfaced via health://status so capability-aware\n// clients can branch on degraded mode without parsing per-call footers.\n// ============================================================================\n\ntype LLMHealthKind = 'planner' | 'extractor';\n\nexport interface LLMHealthSnapshot {\n readonly lastPlannerOk: boolean;\n readonly lastExtractorOk: boolean;\n readonly lastPlannerCheckedAt: string | null;\n readonly lastExtractorCheckedAt: string | null;\n readonly lastPlannerError: string | null;\n readonly lastExtractorError: string | null;\n readonly plannerConfigured: boolean;\n readonly extractorConfigured: boolean;\n /** Failures since the last success. Reset to 0 on `markLLMSuccess`. */\n readonly consecutivePlannerFailures: number;\n readonly consecutiveExtractorFailures: number;\n}\n\nconst llmHealth = {\n lastPlannerOk: false,\n lastExtractorOk: false,\n lastPlannerCheckedAt: null as string | null,\n lastExtractorCheckedAt: null as string | null,\n lastPlannerError: null as string | null,\n lastExtractorError: null as string | null,\n consecutivePlannerFailures: 0,\n consecutiveExtractorFailures: 0,\n};\n\nexport function markLLMSuccess(kind: LLMHealthKind): void {\n const ts = new Date().toISOString();\n if (kind === 'planner') {\n llmHealth.lastPlannerOk = true;\n llmHealth.lastPlannerCheckedAt = ts;\n llmHealth.lastPlannerError = null;\n llmHealth.consecutivePlannerFailures = 0;\n } else {\n llmHealth.lastExtractorOk = true;\n llmHealth.lastExtractorCheckedAt = ts;\n llmHealth.lastExtractorError = null;\n llmHealth.consecutiveExtractorFailures = 0;\n }\n}\n\nexport function markLLMFailure(kind: LLMHealthKind, err: unknown): void {\n const ts = new Date().toISOString();\n const message = err instanceof Error ? err.message : String(err ?? 'unknown error');\n if (kind === 'planner') {\n llmHealth.lastPlannerOk = false;\n llmHealth.lastPlannerCheckedAt = ts;\n llmHealth.lastPlannerError = message;\n llmHealth.consecutivePlannerFailures += 1;\n } else {\n llmHealth.lastExtractorOk = false;\n llmHealth.lastExtractorCheckedAt = ts;\n llmHealth.lastExtractorError = message;\n llmHealth.consecutiveExtractorFailures += 1;\n }\n}\n\nexport function getLLMHealth(): LLMHealthSnapshot {\n const cap = getCapabilities();\n return {\n lastPlannerOk: llmHealth.lastPlannerOk,\n lastExtractorOk: llmHealth.lastExtractorOk,\n lastPlannerCheckedAt: llmHealth.lastPlannerCheckedAt,\n lastExtractorCheckedAt: llmHealth.lastExtractorCheckedAt,\n lastPlannerError: llmHealth.lastPlannerError,\n lastExtractorError: llmHealth.lastExtractorError,\n // Static capability \u2014 based on env presence at boot. Runtime health (above)\n // tells whether the last attempt actually succeeded.\n plannerConfigured: cap.llmExtraction,\n extractorConfigured: cap.llmExtraction,\n consecutivePlannerFailures: llmHealth.consecutivePlannerFailures,\n consecutiveExtractorFailures: llmHealth.consecutiveExtractorFailures,\n };\n}\n\n/** Test-only \u2014 reset state between tests. Not exported from index. */\nexport function _resetLLMHealthForTests(): void {\n llmHealth.lastPlannerOk = false;\n llmHealth.lastExtractorOk = false;\n llmHealth.lastPlannerCheckedAt = null;\n llmHealth.lastExtractorCheckedAt = null;\n llmHealth.lastPlannerError = null;\n llmHealth.lastExtractorError = null;\n llmHealth.consecutivePlannerFailures = 0;\n llmHealth.consecutiveExtractorFailures = 0;\n}\n\ninterface ProcessingConfig {\n readonly enabled: boolean;\n readonly extract: string | undefined;\n readonly url?: string;\n}\n\ninterface LLMResult {\n readonly content: string;\n readonly processed: boolean;\n readonly error?: string;\n readonly errorDetails?: StructuredError;\n}\n\n// LLM-specific retry configuration\nconst LLM_RETRY_CONFIG = {\n maxRetries: 2,\n baseDelayMs: 1000,\n maxDelayMs: 5000,\n} as const;\n\n// OpenRouter/OpenAI specific retryable error codes (using Set for type-safe lookup)\nconst RETRYABLE_LLM_ERROR_CODES = new Set([\n 'rate_limit_exceeded',\n 'server_error',\n 'timeout',\n 'service_unavailable',\n]);\n\n/** Type guard for errors with an HTTP status code */\nfunction hasStatus(error: unknown): error is { status: number } {\n return (\n typeof error === 'object' &&\n error !== null &&\n 'status' in error &&\n typeof (error as Record<string, unknown>).status === 'number'\n );\n}\n\nlet llmClient: OpenAI | null = null;\n\ntype OpenAITextGenerator = Pick<OpenAI, 'chat'>;\n\nexport function createLLMProcessor(): OpenAI | null {\n if (!getCapabilities().llmExtraction) return null;\n\n if (!llmClient) {\n llmClient = new OpenAI({\n baseURL: LLM_EXTRACTION.BASE_URL,\n apiKey: LLM_EXTRACTION.API_KEY,\n timeout: LLM_CLIENT_TIMEOUT_MS,\n maxRetries: 0,\n defaultHeaders: { 'X-Title': 'mcp-research-powerpack' },\n });\n mcpLog('info', `LLM extraction configured (model: ${LLM_EXTRACTION.MODEL}, baseURL: ${LLM_EXTRACTION.BASE_URL})`, 'llm');\n }\n return llmClient;\n}\n\nfunction buildChatRequestBody(model: string, prompt: string): Record<string, unknown> {\n const requestBody: Record<string, unknown> = {\n model,\n messages: [{ role: 'user', content: prompt }],\n };\n\n if (LLM_EXTRACTION.REASONING_EFFORT !== 'none') {\n requestBody.reasoning_effort = LLM_EXTRACTION.REASONING_EFFORT;\n }\n\n return requestBody;\n}\n\nexport async function requestText(\n processor: OpenAITextGenerator,\n prompt: string,\n operationLabel: string,\n signal?: AbortSignal,\n): Promise<{ content: string | null; model: string; error?: string }> {\n const model = LLM_EXTRACTION.MODEL;\n\n try {\n const response = await withStallProtection(\n (stallSignal) => processor.chat.completions.create(\n buildChatRequestBody(model, prompt) as unknown as OpenAI.ChatCompletionCreateParamsNonStreaming,\n {\n signal: signal ? AbortSignal.any([stallSignal, signal]) : stallSignal,\n timeout: LLM_REQUEST_DEADLINE_MS,\n },\n ),\n LLM_STALL_TIMEOUT_MS,\n 3,\n `${operationLabel} (${model})`,\n );\n\n const content = response.choices?.[0]?.message?.content?.trim();\n if (content) {\n return { content, model };\n }\n\n const err = `Empty response from model ${model}`;\n mcpLog('warning', `${operationLabel} returned empty content for model ${model}`, 'llm');\n return { content: null, model, error: err };\n } catch (err: unknown) {\n const message = err instanceof Error ? err.message : String(err);\n mcpLog('warning', `${operationLabel} failed for model ${model}: ${message}`, 'llm');\n return { content: null, model, error: message };\n }\n}\n\n/**\n * Check if an LLM error is retryable\n */\nfunction isRetryableLLMError(error: unknown): boolean {\n if (!error || typeof error !== 'object') return false;\n\n // Stall/timeout protection errors - always retry these\n const stallCode = (error as { code?: string })?.code;\n if (stallCode === 'ESTALLED' || stallCode === 'ETIMEDOUT') {\n return true;\n }\n\n // Check HTTP status codes\n if (hasStatus(error)) {\n if (error.status === 429 || error.status === 500 || error.status === 502 || error.status === 503 || error.status === 504) {\n return true;\n }\n }\n\n // Check error codes from OpenAI/OpenRouter\n const record = error as Record<string, unknown>;\n const code = typeof record.code === 'string' ? record.code : undefined;\n const nested =\n typeof record.error === 'object' && record.error !== null\n ? (record.error as Record<string, unknown>)\n : null;\n const errorCode =\n code ??\n (nested && typeof nested.code === 'string' ? nested.code : undefined) ??\n (nested && typeof nested.type === 'string' ? nested.type : undefined);\n if (errorCode && RETRYABLE_LLM_ERROR_CODES.has(errorCode)) {\n return true;\n }\n\n // Check message for common patterns\n const message = typeof record.message === 'string' ? record.message.toLowerCase() : '';\n if (\n message.includes('rate limit') ||\n message.includes('timeout') ||\n message.includes('timed out') ||\n message.includes('service unavailable') ||\n message.includes('server error') ||\n message.includes('connection') ||\n message.includes('econnreset')\n ) {\n return true;\n }\n\n return false;\n}\n\n/**\n * Calculate backoff delay with jitter for LLM retries\n */\nfunction calculateLLMBackoff(attempt: number): number {\n const exponentialDelay = LLM_RETRY_CONFIG.baseDelayMs * Math.pow(2, attempt);\n const jitter = Math.random() * BACKOFF_JITTER_FACTOR * exponentialDelay;\n return Math.min(exponentialDelay + jitter, LLM_RETRY_CONFIG.maxDelayMs);\n}\n\n/**\n * Process content with LLM extraction\n * NEVER throws - always returns a valid LLMResult\n * Implements retry logic with exponential backoff for transient failures\n */\nexport async function processContentWithLLM(\n content: string,\n config: ProcessingConfig,\n processor?: OpenAI | null,\n signal?: AbortSignal\n): Promise<LLMResult> {\n // Early returns for invalid/skip conditions\n if (!config.enabled) {\n return { content, processed: false };\n }\n\n if (!processor) {\n return {\n content,\n processed: false,\n error: 'LLM processor not available (LLM_EXTRACTION_API_KEY or OPENROUTER_API_KEY not set)',\n errorDetails: {\n code: ErrorCode.AUTH_ERROR,\n message: 'LLM processor not available',\n retryable: false,\n },\n };\n }\n\n if (!content?.trim()) {\n return { content: content || '', processed: false, error: 'Empty content provided' };\n }\n\n // Truncate extremely long content to avoid token limits\n const truncatedContent = content.length > MAX_LLM_INPUT_CHARS\n ? content.substring(0, MAX_LLM_INPUT_CHARS) + '\\n\\n[Content truncated due to length]'\n : content;\n\n // Sanitize URL before sending to LLM: drop query string and fragment\n // so signed URLs, session tokens, auth params, or tracking hashes never\n // land in a third-party LLM prompt. Keep origin + path for page-type classification.\n const safeUrl = (() => {\n if (!config.url) return undefined;\n try {\n const u = new URL(config.url);\n return `${u.origin}${u.pathname}`;\n } catch {\n return undefined;\n }\n })();\n const urlLine = safeUrl ? `PAGE URL: ${safeUrl}\\n\\n` : '';\n\n const prompt = config.extract\n ? `You are a factual extractor for a research agent. Extract ONLY the information that matches the instruction below. Do not summarize, interpret, or editorialize.\n\n${urlLine}EXTRACTION INSTRUCTION: ${config.extract}\n\nSTEP 1 \u2014 Classify this page. Look at the URL if present, plus structural cues (code blocks, table patterns, comment threads, marketing copy). Pick ONE:\n\\`docs | changelog | github-readme | github-thread | reddit | hackernews | forum | blog | marketing | announcement | qa | cve | paper | release-notes | other\\`\n\nSTEP 2 \u2014 Adjust emphasis by page type:\n- docs / changelog / github-readme / release-notes \u2192 API signatures, version numbers, flags, exact config keys, code blocks. Copy verbatim. Preserve tables as tables.\n- github-thread \u2192 weight MAINTAINER comments (label \"[maintainer]\") over drive-by commenters. Preserve stacktraces verbatim. Capture chronological resolution \u2014 what was decided and when. Link the accepted-fix commit/PR if referenced.\n- reddit / hackernews / forum \u2192 lived experience. Quote verbatim with attribution (\"u/foo wrote: \u2026\" or \"user <name>\"). Prioritize replies with stack details, specific failure stories, or replies that contradict the OP. Record overall sentiment distribution as one bullet if clear skew (\"~70% agree / ~20% dissent / rest off-topic\"). Drop context-free opinions (\"this sucks\") from Matches.\n- blog \u2192 prioritize concrete reproductions, code, measurements. If the author makes a claim without evidence, mark \"[unsourced claim]\".\n- marketing / announcement \u2192 pricing tiers, feature matrices verbatim, free-tier quotas, enterprise contact. Preserve tables as tables. Treat roadmap/future-tense claims skeptically \u2014 note them as \"[announced, not shipped]\" when framing is future-tense.\n- qa (stackoverflow) \u2192 accepted answer's code + high-voted disagreements. Always note the answer date \u2014 SO rots.\n- cve \u2192 CVSS vector verbatim, CWE, CPE ranges, affected versions, fix version, references. Each with its label.\n- paper \u2192 claim, method, dataset, benchmark numbers, comparison baseline. Preserve numeric deltas verbatim.\n\nSTEP 3 \u2014 Emit markdown with these sections, in order:\n\n## Source\n- URL: <verbatim if visible, else \"unknown\">\n- Page type: <the type you picked>\n- Page date: <verbatim if visible, else \"not visible\">\n- Author / maintainer (if identifiable): <verbatim>\n\n## Matches\nOne bullet per distinct piece of matching info:\n- **<short label>** \u2014 the information. Quote VERBATIM for: numbers, versions, dates, API names, prices, error messages, stacktraces, CVSS vectors, benchmark scores, command flags, proper nouns, and people's words. Backticks for code/identifiers. Preserve tables.\n\n## Not found\nEvery part of the extraction instruction this page did NOT answer. Be explicit. Example: \"Enterprise pricing contact \u2014 not present on this page.\"\n\n## Follow-up signals\nShort bullets \u2014 NEW angles this page surfaced that the agent should investigate. Include: new terms, unexpected vendor names, contradicting claims, referenced-but-unscraped URLs. Copy URLs VERBATIM from the source; if only anchor text is visible, write \"anchor: <text> (URL not in scraped content)\". Skip this section if nothing new surfaced. Do NOT invent.\n\n## Contradictions\n(Include this section only if the page contains internally contradictory claims.) Bullet each contradiction with both sides quoted verbatim.\n\n## Truncation\n(Include only if content appears cut mid-element.) \"Content cut mid-<table row / code block / comment / paragraph>; extraction may be incomplete for <section>.\"\n\nRULES:\n- Never paraphrase numbers, versions, code, or quoted text.\n- If an instruction item is not answered, it goes in \"Not found\" \u2014 do NOT invent an answer to please the caller.\n- Preserve code blocks, command examples, tables exactly.\n- Do NOT add commentary or recommendations outside \"Follow-up signals\".\n- Page language \u2260 English: quote verbatim in the original language AND provide a parenthetical gloss in English.\n- Content clearly failed to load: return ONLY a single line, choosing from:\n \\`## Matches\\\\n_Page did not load: 404_\\`\n \\`## Matches\\\\n_Page did not load: login-wall_\\`\n \\`## Matches\\\\n_Page did not load: paywall_\\`\n \\`## Matches\\\\n_Page did not load: JS-render-empty_\\`\n \\`## Matches\\\\n_Page did not load: non-text-asset_\\`\n \\`## Matches\\\\n_Page did not load: truncated-before-relevant-section_\\`\n\nContent:\n${truncatedContent}`\n : `Clean the following page content: drop navigation, ads, cookie banners, footers, author bios, related-article lists. Preserve headings, paragraphs, code blocks, tables, and inline links as \\`[text](url)\\`. Do NOT summarize \u2014 preserve the full body.\n\n${urlLine}Content:\n${truncatedContent}`;\n\n let lastError: StructuredError | undefined;\n\n // Retry loop\n for (let attempt = 0; attempt <= LLM_RETRY_CONFIG.maxRetries; attempt++) {\n try {\n if (attempt === 0) {\n mcpLog('info', `Starting extraction with ${LLM_EXTRACTION.MODEL}`, 'llm');\n } else {\n mcpLog('warning', `Retry attempt ${attempt}/${LLM_RETRY_CONFIG.maxRetries}`, 'llm');\n }\n\n const response = await requestText(\n processor,\n prompt,\n 'LLM extraction',\n signal,\n );\n\n if (response.content) {\n mcpLog('info', `Successfully extracted ${response.content.length} characters`, 'llm');\n markLLMSuccess('extractor');\n return { content: response.content, processed: true };\n }\n\n // Empty response - not retryable\n mcpLog('warning', 'Received empty response from LLM', 'llm');\n markLLMFailure('extractor', 'LLM returned empty response');\n return {\n content,\n processed: false,\n error: 'LLM returned empty response',\n errorDetails: {\n code: ErrorCode.INTERNAL_ERROR,\n message: 'LLM returned empty response',\n retryable: false,\n },\n };\n\n } catch (err: unknown) {\n lastError = classifyError(err);\n\n // Log the error\n const status = hasStatus(err) ? err.status : undefined;\n const code = typeof err === 'object' && err !== null && 'code' in err\n ? String((err as Record<string, unknown>).code)\n : undefined;\n mcpLog('error', `Error (attempt ${attempt + 1}): ${lastError.message} [status=${status}, code=${code}, retryable=${isRetryableLLMError(err)}]`, 'llm');\n\n // Check if we should retry\n if (isRetryableLLMError(err) && attempt < LLM_RETRY_CONFIG.maxRetries) {\n const delayMs = calculateLLMBackoff(attempt);\n mcpLog('warning', `Retrying in ${delayMs}ms...`, 'llm');\n try { await sleep(delayMs, signal); } catch { break; }\n continue;\n }\n\n // Non-retryable or max retries reached\n break;\n }\n }\n\n // All attempts failed - return original content with error info\n const errorMessage = lastError?.message || 'Unknown LLM error';\n mcpLog('error', `All attempts failed: ${errorMessage}. Returning original content.`, 'llm');\n markLLMFailure('extractor', errorMessage);\n\n return {\n content, // Return original content as fallback\n processed: false,\n error: `LLM extraction failed: ${errorMessage}`,\n errorDetails: lastError || {\n code: ErrorCode.UNKNOWN_ERROR,\n message: errorMessage,\n retryable: false,\n },\n };\n}\n\n// ============================================================================\n// Web-Search Result Classification\n// ============================================================================\n\n/** Maximum URLs to send to the LLM for classification */\nconst MAX_CLASSIFICATION_URLS = 50 as const;\n\n/** Classification tiers */\ntype ClassificationTier = 'HIGHLY_RELEVANT' | 'MAYBE_RELEVANT' | 'OTHER';\n\nexport interface ClassificationEntry {\n readonly rank: number;\n readonly tier: ClassificationTier;\n readonly source_type?: string;\n readonly reason?: string;\n}\n\nexport interface ClassificationGap {\n readonly id: number;\n readonly description: string;\n}\n\nexport interface ClassificationResult {\n readonly title: string;\n readonly synthesis: string;\n readonly results: ClassificationEntry[];\n readonly refine_queries?: Array<{\n readonly query: string;\n readonly rationale: string;\n readonly gap_id?: number;\n }>;\n readonly confidence?: 'high' | 'medium' | 'low';\n readonly confidence_reason?: string;\n readonly gaps?: ClassificationGap[];\n}\n\nexport interface RefineQuerySuggestion {\n readonly query: string;\n readonly rationale: string;\n readonly gap_id?: number;\n readonly gap_description?: string;\n}\n\n/**\n * Classify web-search results by relevance to an objective using the LLM.\n * Sends only titles, snippets, and domain names \u2014 does NOT fetch URLs.\n * Returns null on failure (caller should fall back to raw output).\n */\nexport async function classifySearchResults(\n rankedUrls: ReadonlyArray<{\n readonly rank: number;\n readonly url: string;\n readonly title: string;\n readonly snippet: string;\n readonly frequency: number;\n readonly queries: string[];\n }>,\n objective: string,\n totalQueries: number,\n processor: OpenAI,\n previousQueries: readonly string[] = [],\n): Promise<{ result: ClassificationResult | null; error?: string }> {\n const urlsToClassify = rankedUrls.slice(0, MAX_CLASSIFICATION_URLS);\n\n // Descending static weights fed to the LLM. Higher-ranked URLs get a bigger\n // weight so the classifier biases HIGHLY_RELEVANT toward them. The weights\n // here are a shown-to-LLM summary, not the internal CTR ranking (which\n // still runs in url-aggregator.ts). Rank 11+ all bucket to w=1.\n const STATIC_WEIGHTS = [30, 20, 15, 10, 8, 6, 5, 4, 3, 2] as const;\n const weightForRank = (rank: number): number => STATIC_WEIGHTS[rank - 1] ?? 1;\n\n // Build compressed result list \u2014 weight + title + domain + snippet (truncated)\n const lines: string[] = [];\n for (const url of urlsToClassify) {\n let domain: string;\n try {\n domain = new URL(url.url).hostname.replace(/^www\\./, '');\n } catch {\n domain = url.url;\n }\n const snippet = url.snippet.length > 120\n ? url.snippet.slice(0, 117) + '...'\n : url.snippet;\n lines.push(`[${url.rank}] w=${weightForRank(url.rank)} ${url.title} \u2014 ${domain} \u2014 ${snippet}`);\n }\n\n const prevQueriesBlock = previousQueries.length > 0\n ? previousQueries.map((q) => `- ${q}`).join('\\n')\n : '- (none provided)';\n const today = new Date().toISOString().slice(0, 10);\n\n const prompt = `You are the relevance filter for a research agent. Classify each search result below against the objective and produce a structured analysis.\n\nOBJECTIVE: ${objective}\nTODAY: ${today}\n\nPREVIOUS QUERIES (already run \u2014 do NOT paraphrase in refine_queries):\n${prevQueriesBlock}\n\nReturn ONLY a JSON object (no markdown, no code fences):\n\n{\n \"title\": \"2\u20138 word label for this RESULT CLUSTER (not the objective)\",\n \"synthesis\": \"3\u20135 sentences grounded in the results. Every non-trivial claim cites a rank in [brackets], e.g. '[3] documents the flag; [7][12] report it is broken on macOS.' A synthesis with zero citations is invalid.\",\n \"confidence\": \"high | medium | low\",\n \"confidence_reason\": \"one sentence \u2014 why\",\n \"gaps\": [\n { \"id\": 0, \"description\": \"specific, actionable thing the current results do NOT answer \u2014 not 'more info needed'\" }\n ],\n \"refine_queries\": [\n { \"query\": \"concrete next search\", \"gap_id\": 0, \"rationale\": \"\u226412 words\" }\n ],\n \"results\": [\n {\n \"rank\": 1,\n \"tier\": \"HIGHLY_RELEVANT | MAYBE_RELEVANT | OTHER\",\n \"source_type\": \"vendor_doc | github | reddit | hackernews | blog | news | marketing | stackoverflow | cve | paper | release_notes | aggregator | other\",\n \"reason\": \"\u226412 words citing the snippet cue that drove the tier\"\n }\n ]\n}\n\nWEIGHT SCHEME: each row is prefixed with a weight (w=N). Higher weight means the URL ranked better across input queries \u2014 prefer HIGHLY_RELEVANT for high-weight rows when content matches the objective. Weight alone never justifies HIGHLY_RELEVANT; snippet cues still drive the decision.\n\nSOURCE-OF-TRUTH RUBRIC (the \"primary source\" is goal-dependent \u2014 infer goal type from the objective):\n- spec / API / config questions \u2192 vendor_doc, github (README, RFC), release_notes are primary\n- bug / failure-mode questions \u2192 github (issue/PR), stackoverflow are primary\n- migration / sentiment / lived-experience \u2192 reddit, hackernews, blog are primary; docs are secondary\n- pricing / commercial \u2192 marketing (the vendor's own pricing page IS the primary source, but treat feature lists skeptically)\n- security / CVE \u2192 cve databases, distro security trackers (nvd.nist.gov, security-tracker.debian.org, ubuntu.com/security) are primary\n- synthesis / open-ended \u2192 blend; no single type is primary\n- product launch \u2192 vendor_doc + news + marketing for the launch itself; blogs + reddit for independent verification\n\nFRESHNESS: proportional to topic velocity. For a week-old release, demote anything older than 30 days. For general tech questions, demote older than 18 months. For stable protocols (HTTP, TCP, POSIX), don't demote by age.\n\nCONFIDENCE:\n- high = \u22653 HIGHLY_RELEVANT results from INDEPENDENT domains agree on the core answer\n- medium = \u22652 HIGHLY_RELEVANT exist but disagree or share a domain; OR a single authoritative primary source answers it\n- low = otherwise; snippet-only judgments cap at medium\n\nREFINE QUERIES \u2014 each MUST differ from every previousQuery by:\n- a new operator (site:, quotes, verbatim version number), OR\n- a domain-specific noun ABSENT from every prior query\nAdding a year alone does NOT count as differentiation.\nEach refine_query MUST reference a specific gap_id from the gaps array above.\nProduce 4\u20138 refine_queries total. Cover: (a) a primary-source probe, (b) a temporal sharpener, (c) a failure-mode or comparison probe, (d) at least one new-term probe seeded by a specific result's snippet.\n\nRULES:\n- Classify ALL ${urlsToClassify.length} results. Do not skip or collapse any.\n- Use only the three tier values.\n- Judge from title + domain + snippet only. Do NOT invent facts not present in the snippet.\n- If ALL results are OTHER: synthesis = \"\", confidence = \"low\", and \\`gaps\\` must explicitly state why the current queries missed the target.\n- Casing: tier = UPPERCASE_WITH_UNDERSCORES, confidence = lowercase.\n\nSEARCH RESULTS (${urlsToClassify.length} URLs from ${totalQueries} queries):\n${lines.join('\\n')}`;\n\n try {\n mcpLog('info', `Classifying ${urlsToClassify.length} URLs against objective`, 'llm');\n\n const response = await requestText(\n processor,\n prompt,\n 'Search classification',\n );\n\n if (!response.content) {\n const errMsg = response.error ?? 'LLM returned empty classification response';\n markLLMFailure('planner', errMsg);\n return { result: null, error: errMsg };\n }\n\n // Strip markdown code fences if present\n const cleaned = response.content.replace(/^```(?:json)?\\s*\\n?/m, '').replace(/\\n?```\\s*$/m, '').trim();\n const parsed = JSON.parse(cleaned) as ClassificationResult;\n\n // Validate the response shape.\n // Note: synthesis is typed not truthy \u2014 the prompt explicitly instructs an empty string\n // for the all-OTHER case, and we must not reject that.\n if (!parsed.title || typeof parsed.synthesis !== 'string' || !Array.isArray(parsed.results)) {\n const errMsg = 'LLM response missing required fields (title, synthesis, results)';\n markLLMFailure('planner', errMsg);\n return { result: null, error: errMsg };\n }\n\n mcpLog('info', `Classification complete: ${parsed.results.filter(r => r.tier === 'HIGHLY_RELEVANT').length} highly relevant`, 'llm');\n markLLMSuccess('planner');\n return { result: parsed };\n } catch (err: unknown) {\n const message = err instanceof Error ? err.message : String(err);\n mcpLog('error', `Classification failed: ${message}`, 'llm');\n markLLMFailure('planner', message);\n return { result: null, error: `Classification failed: ${message}` };\n }\n}\n\nexport async function suggestRefineQueriesForRawMode(\n rankedUrls: ReadonlyArray<{\n readonly rank: number;\n readonly url: string;\n readonly title: string;\n }>,\n objective: string,\n originalQueries: readonly string[],\n processor: OpenAI,\n): Promise<{ result: RefineQuerySuggestion[]; error?: string }> {\n const urlsToSummarize = rankedUrls.slice(0, 12);\n const lines = urlsToSummarize.map((url) => {\n let domain: string;\n try {\n domain = new URL(url.url).hostname.replace(/^www\\./, '');\n } catch {\n domain = url.url;\n }\n return `[${url.rank}] ${url.title} \u2014 ${domain}`;\n });\n\n const prompt = `You are generating follow-up search queries for an agent using raw web-search results.\n\nReturn ONLY a JSON object (no markdown, no code fences):\n{\n \"refine_queries\": [\n { \"query\": \"next search query\", \"gap_description\": \"what gap this closes\", \"rationale\": \"\u226412 words on why\" }\n ]\n}\n\nOBJECTIVE: ${objective}\n\nPREVIOUS QUERIES (already run \u2014 do NOT paraphrase):\n${originalQueries.map((query) => `- ${query}`).join('\\n')}\n\nTOP RESULT TITLES (to seed new-term probes):\n${lines.join('\\n')}\n\nRULES:\n- Produce 4\u20136 diverse follow-ups. Cover: (a) a primary-source probe (site:, RFC, vendor docs); (b) a temporal sharpener (changelog, version number); (c) a failure-mode or comparison probe; (d) at least one new-term probe seeded by a specific result title.\n- Each query MUST differ from every previousQuery by either a new operator (site:, quotes, a verbatim version number) OR a domain-specific noun absent from every prior query. Adding a year alone does NOT count.\n- Each refine_query MUST include a \\`gap_description\\` naming what the current results don't answer.\n- Do not include URLs.\n- Keep rationales \u226412 words.`;\n\n try {\n const response = await requestText(\n processor,\n prompt,\n 'Raw-mode refine query generation',\n );\n\n if (!response.content) {\n const errMsg = response.error ?? 'LLM returned empty raw-mode refine query response';\n markLLMFailure('planner', errMsg);\n return { result: [], error: errMsg };\n }\n\n const cleaned = response.content.replace(/^```(?:json)?\\s*\\n?/m, '').replace(/\\n?```\\s*$/m, '').trim();\n const parsed = JSON.parse(cleaned) as { refine_queries?: RefineQuerySuggestion[] };\n\n markLLMSuccess('planner');\n return { result: Array.isArray(parsed.refine_queries) ? parsed.refine_queries : [] };\n } catch (err: unknown) {\n const message = err instanceof Error ? err.message : String(err);\n mcpLog('error', `Raw-mode refine query generation failed: ${message}`, 'llm');\n markLLMFailure('planner', message);\n return { result: [], error: message };\n }\n}\n\n// ============================================================================\n// Research Brief \u2014 goal-aware orientation (called by start-research)\n// ============================================================================\n\nexport type PrimaryBranch = 'reddit' | 'web' | 'both';\n\nexport interface ResearchBriefStep {\n readonly tool: 'web-search' | 'scrape-links';\n readonly reason: string;\n}\n\nexport interface ResearchBrief {\n readonly goal_class: string;\n readonly goal_class_reason: string;\n readonly primary_branch: PrimaryBranch;\n readonly primary_branch_reason: string;\n readonly freshness_window: string;\n readonly first_call_sequence: readonly ResearchBriefStep[];\n readonly keyword_seeds: readonly string[];\n readonly iteration_hints: readonly string[];\n readonly gaps_to_watch: readonly string[];\n readonly stop_criteria: readonly string[];\n}\n\nconst VALID_GOAL_CLASSES = new Set([\n 'spec', 'bug', 'migration', 'sentiment', 'pricing', 'security',\n 'synthesis', 'product_launch', 'other',\n]);\n\nconst VALID_FRESHNESS = new Set(['days', 'weeks', 'months', 'years']);\nconst VALID_BRANCHES = new Set<PrimaryBranch>(['reddit', 'web', 'both']);\nconst VALID_STEP_TOOLS = new Set(['web-search', 'scrape-links']);\n\nfunction isStringArray(value: unknown): value is string[] {\n return Array.isArray(value) && value.every((v) => typeof v === 'string');\n}\n\nfunction isStepArray(value: unknown): value is ResearchBriefStep[] {\n return Array.isArray(value) && value.every((s) => {\n if (typeof s !== 'object' || s === null) return false;\n const tool = (s as Record<string, unknown>).tool;\n const reason = (s as Record<string, unknown>).reason;\n return typeof tool === 'string'\n && VALID_STEP_TOOLS.has(tool)\n && typeof reason === 'string'\n && reason.trim().length > 0;\n });\n}\n\nexport function parseResearchBrief(raw: string): ResearchBrief | null {\n try {\n const cleaned = raw.replace(/^```(?:json)?\\s*\\n?/m, '').replace(/\\n?```\\s*$/m, '').trim();\n const parsed = JSON.parse(cleaned) as Record<string, unknown>;\n\n const goal_class = typeof parsed.goal_class === 'string' ? parsed.goal_class : null;\n if (!goal_class || !VALID_GOAL_CLASSES.has(goal_class)) return null;\n\n const freshness_window = typeof parsed.freshness_window === 'string' ? parsed.freshness_window : null;\n if (!freshness_window || !VALID_FRESHNESS.has(freshness_window)) return null;\n\n const primary_branch = parsed.primary_branch;\n if (typeof primary_branch !== 'string' || !VALID_BRANCHES.has(primary_branch as PrimaryBranch)) return null;\n\n if (!isStepArray(parsed.first_call_sequence) || parsed.first_call_sequence.length === 0) return null;\n if (!isStringArray(parsed.keyword_seeds) || parsed.keyword_seeds.length === 0) return null;\n\n return {\n goal_class,\n goal_class_reason: typeof parsed.goal_class_reason === 'string' ? parsed.goal_class_reason : '',\n primary_branch: primary_branch as PrimaryBranch,\n primary_branch_reason: typeof parsed.primary_branch_reason === 'string' ? parsed.primary_branch_reason : '',\n freshness_window,\n first_call_sequence: parsed.first_call_sequence,\n keyword_seeds: parsed.keyword_seeds.filter((s) => s.trim().length > 0),\n iteration_hints: isStringArray(parsed.iteration_hints) ? parsed.iteration_hints : [],\n gaps_to_watch: isStringArray(parsed.gaps_to_watch) ? parsed.gaps_to_watch : [],\n stop_criteria: isStringArray(parsed.stop_criteria) ? parsed.stop_criteria : [],\n };\n } catch {\n return null;\n }\n}\n\nexport async function generateResearchBrief(\n goal: string,\n processor: OpenAI,\n signal?: AbortSignal,\n): Promise<ResearchBrief | null> {\n const today = new Date().toISOString().slice(0, 10);\n\n const prompt = `You are a research planner. An agent is about to run a multi-pass research loop on the goal below using 3 tools:\n\n - web-search: fan-out Google, scope: web|reddit|both, up to 50 queries per call, parallel-callable (multiple calls per turn)\n - scrape-links: fetch URLs in parallel, auto-detects reddit.com post permalinks \u2192 Reddit API (threaded post+comments); all other URLs \u2192 HTTP scraper; parallel-callable\n\nProduce a tailored JSON brief.\n\nGOAL: ${goal}\nTODAY: ${today}\n\nReturn ONLY a JSON object (no markdown, no code fences):\n\n{\n \"goal_class\": \"spec | bug | migration | sentiment | pricing | security | synthesis | product_launch | other\",\n \"goal_class_reason\": \"one sentence \u2014 why this class\",\n \"primary_branch\": \"reddit | web | both\",\n \"primary_branch_reason\": \"one sentence \u2014 why this branch leads\",\n \"freshness_window\": \"days | weeks | months | years\",\n \"first_call_sequence\": [\n { \"tool\": \"web-search | scrape-links\", \"reason\": \"what this call establishes for the agent\" }\n ],\n \"keyword_seeds\": [\"25\u201350 concrete Google queries \u2014 flat list, to be fired in the first web-search call\"],\n \"iteration_hints\": [\"2\u20135 pointers on which harvested terms / follow-up signals to watch for after pass 1\"],\n \"gaps_to_watch\": [\"2\u20135 concrete questions the agent MUST verify or the answer is incomplete\"],\n \"stop_criteria\": [\"2\u20134 checkable conditions \u2014 all must hold before the agent declares done\"]\n}\n\nRULES:\n\nprimary_branch:\n- \"reddit\" \u2192 sentiment / migration / lived-experience / community-consensus goals. Leads with scope:\"reddit\" web-search.\n- \"web\" \u2192 spec / bug / pricing / CVE / API / primary-source goals. Leads with scope:\"web\" web-search.\n- \"both\" \u2192 opinion-heavy AND needs official sources (e.g. product launch + practitioner reception).\n\nfirst_call_sequence:\n- 1\u20133 steps.\n- reddit-first: step 1 = web-search (caller sets scope:\"reddit\"), step 2 = scrape-links on best post permalinks.\n- web-first: step 1 = web-search (scope:\"web\"), step 2 = scrape-links on HIGHLY_RELEVANT URLs.\n- both: step 1 = two parallel web-search calls (one scope:\"reddit\", one scope:\"web\"), step 2 = merged scrape-links.\n\nkeyword_seeds:\n- 25\u201350 total. Narrow bug \u2192 fewer. Open synthesis \u2192 more.\n- Use operators where helpful (site:, quotes, verbatim version numbers).\n- DIVERSE facets \u2014 same noun-phrase cannot repeat across seeds with adjectives-only variation.\n- Do NOT invent vendor names you are uncertain exist.\n- For \\`site:<domain>\\` filters, ONLY use domains you are highly confident are real. Safe choices: \\`github.com\\`, \\`stackoverflow.com\\`, \\`reddit.com\\`, \\`news.ycombinator.com\\`, \\`arxiv.org\\`, \\`nvd.nist.gov\\`, \\`pypi.org\\`, \\`npmjs.com\\`, plus any canonical homepage/docs domain explicitly spelled out in the goal itself (e.g. goal names \"Cursor\" \u2192 \\`cursor.com\\`/\\`docs.cursor.com\\` is acceptable). If you don't know the product's real docs domain, leave the query open (no \\`site:\\`) instead of guessing.\n\nfreshness_window:\n- If the goal mentions a recent release / date / version, use \"days\" or \"weeks\".\n- Stable protocols / APIs \u2192 \"months\" or \"years\".`;\n\n try {\n const response = await requestText(\n processor,\n prompt,\n 'Research brief generation',\n signal,\n );\n\n if (!response.content) {\n mcpLog('warning', `Research brief generation returned no content: ${response.error ?? 'unknown'}`, 'llm');\n markLLMFailure('planner', response.error ?? 'empty response');\n return null;\n }\n\n const brief = parseResearchBrief(response.content);\n if (!brief) {\n mcpLog('warning', 'Research brief JSON parse or shape validation failed', 'llm');\n markLLMFailure('planner', 'brief parse/validation failed');\n return null;\n }\n\n markLLMSuccess('planner');\n return brief;\n } catch (err: unknown) {\n const message = err instanceof Error ? err.message : String(err);\n mcpLog('warning', `Research brief generation failed: ${message}`, 'llm');\n markLLMFailure('planner', message);\n return null;\n }\n}\n\nexport function renderResearchBrief(brief: ResearchBrief): string {\n const lines: string[] = [];\n\n lines.push('## Your research brief (goal-tailored)');\n lines.push('');\n lines.push(`**Goal class**: \\`${brief.goal_class}\\` \u2014 ${brief.goal_class_reason}`);\n lines.push(`**Primary branch**: \\`${brief.primary_branch}\\` \u2014 ${brief.primary_branch_reason}`);\n lines.push(`**Freshness**: \\`${brief.freshness_window}\\``);\n lines.push('');\n\n if (brief.first_call_sequence.length > 0) {\n lines.push('### First-call sequence');\n brief.first_call_sequence.forEach((step, i) => {\n lines.push(`${i + 1}. \\`${step.tool}\\` \u2014 ${step.reason}`);\n });\n lines.push('');\n }\n\n if (brief.keyword_seeds.length > 0) {\n lines.push(`### Keyword seeds (${brief.keyword_seeds.length}) \u2014 fire these in your first \\`web-search\\` call as a flat \\`queries\\` array`);\n for (const seed of brief.keyword_seeds) {\n lines.push(`- ${seed}`);\n }\n lines.push('');\n }\n\n if (brief.iteration_hints.length > 0) {\n lines.push('### Iteration hints (harvest new terms from scrape extracts\\' `## Follow-up signals`)');\n for (const hint of brief.iteration_hints) lines.push(`- ${hint}`);\n lines.push('');\n }\n\n if (brief.gaps_to_watch.length > 0) {\n lines.push('### Gaps to watch');\n for (const gap of brief.gaps_to_watch) lines.push(`- ${gap}`);\n lines.push('');\n }\n\n if (brief.stop_criteria.length > 0) {\n lines.push('### Stop criteria');\n for (const c of brief.stop_criteria) lines.push(`- ${c}`);\n lines.push('');\n }\n\n lines.push('---');\n lines.push('');\n lines.push('Fire `first_call_sequence` now. After each `scrape-links`, harvest new terms from `## Follow-up signals` and build your next `web-search` round. Stop when every gap is closed.');\n\n return lines.join('\\n');\n}\n"],
5
+ "mappings": "AAMA,OAAO,YAAY;AACnB,SAAS,gBAAgB,uBAAuB;AAChD;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OAEK;AACP,SAAS,cAAc;AAGvB,MAAM,sBAAsB;AAG5B,MAAM,wBAAwB;AAG9B,MAAM,wBAAwB;AAG9B,MAAM,uBAAuB;AAG7B,MAAM,0BAA0B;AAuBhC,MAAM,YAAY;AAAA,EAChB,eAAe;AAAA,EACf,iBAAiB;AAAA,EACjB,sBAAsB;AAAA,EACtB,wBAAwB;AAAA,EACxB,kBAAkB;AAAA,EAClB,oBAAoB;AAAA,EACpB,4BAA4B;AAAA,EAC5B,8BAA8B;AAChC;AAEO,SAAS,eAAe,MAA2B;AACxD,QAAM,MAAK,oBAAI,KAAK,GAAE,YAAY;AAClC,MAAI,SAAS,WAAW;AACtB,cAAU,gBAAgB;AAC1B,cAAU,uBAAuB;AACjC,cAAU,mBAAmB;AAC7B,cAAU,6BAA6B;AAAA,EACzC,OAAO;AACL,cAAU,kBAAkB;AAC5B,cAAU,yBAAyB;AACnC,cAAU,qBAAqB;AAC/B,cAAU,+BAA+B;AAAA,EAC3C;AACF;AAEO,SAAS,eAAe,MAAqB,KAAoB;AACtE,QAAM,MAAK,oBAAI,KAAK,GAAE,YAAY;AAClC,QAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,OAAO,eAAe;AAClF,MAAI,SAAS,WAAW;AACtB,cAAU,gBAAgB;AAC1B,cAAU,uBAAuB;AACjC,cAAU,mBAAmB;AAC7B,cAAU,8BAA8B;AAAA,EAC1C,OAAO;AACL,cAAU,kBAAkB;AAC5B,cAAU,yBAAyB;AACnC,cAAU,qBAAqB;AAC/B,cAAU,gCAAgC;AAAA,EAC5C;AACF;AAEO,SAAS,eAAkC;AAChD,QAAM,MAAM,gBAAgB;AAC5B,SAAO;AAAA,IACL,eAAe,UAAU;AAAA,IACzB,iBAAiB,UAAU;AAAA,IAC3B,sBAAsB,UAAU;AAAA,IAChC,wBAAwB,UAAU;AAAA,IAClC,kBAAkB,UAAU;AAAA,IAC5B,oBAAoB,UAAU;AAAA;AAAA;AAAA,IAG9B,mBAAmB,IAAI;AAAA,IACvB,qBAAqB,IAAI;AAAA,IACzB,4BAA4B,UAAU;AAAA,IACtC,8BAA8B,UAAU;AAAA,EAC1C;AACF;AAGO,SAAS,0BAAgC;AAC9C,YAAU,gBAAgB;AAC1B,YAAU,kBAAkB;AAC5B,YAAU,uBAAuB;AACjC,YAAU,yBAAyB;AACnC,YAAU,mBAAmB;AAC7B,YAAU,qBAAqB;AAC/B,YAAU,6BAA6B;AACvC,YAAU,+BAA+B;AAC3C;AAgBA,MAAM,mBAAmB;AAAA,EACvB,YAAY;AAAA,EACZ,aAAa;AAAA,EACb,YAAY;AACd;AAGA,MAAM,4BAA4B,oBAAI,IAAI;AAAA,EACxC;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF,CAAC;AAGD,SAAS,UAAU,OAA6C;AAC9D,SACE,OAAO,UAAU,YACjB,UAAU,QACV,YAAY,SACZ,OAAQ,MAAkC,WAAW;AAEzD;AAEA,IAAI,YAA2B;AAIxB,SAAS,qBAAoC;AAClD,MAAI,CAAC,gBAAgB,EAAE,cAAe,QAAO;AAE7C,MAAI,CAAC,WAAW;AACd,gBAAY,IAAI,OAAO;AAAA,MACrB,SAAS,eAAe;AAAA,MACxB,QAAQ,eAAe;AAAA,MACvB,SAAS;AAAA,MACT,YAAY;AAAA,MACZ,gBAAgB,EAAE,WAAW,yBAAyB;AAAA,IACxD,CAAC;AACD,WAAO,QAAQ,qCAAqC,eAAe,KAAK,cAAc,eAAe,QAAQ,KAAK,KAAK;AAAA,EACzH;AACA,SAAO;AACT;AAEA,SAAS,qBAAqB,OAAe,QAAyC;AACpF,QAAM,cAAuC;AAAA,IAC3C;AAAA,IACA,UAAU,CAAC,EAAE,MAAM,QAAQ,SAAS,OAAO,CAAC;AAAA,EAC9C;AAEA,MAAI,eAAe,qBAAqB,QAAQ;AAC9C,gBAAY,mBAAmB,eAAe;AAAA,EAChD;AAEA,SAAO;AACT;AAEA,eAAsB,YACpB,WACA,QACA,gBACA,QACoE;AACpE,QAAM,QAAQ,eAAe;AAE7B,MAAI;AACF,UAAM,WAAW,MAAM;AAAA,MACrB,CAAC,gBAAgB,UAAU,KAAK,YAAY;AAAA,QAC1C,qBAAqB,OAAO,MAAM;AAAA,QAClC;AAAA,UACE,QAAQ,SAAS,YAAY,IAAI,CAAC,aAAa,MAAM,CAAC,IAAI;AAAA,UAC1D,SAAS;AAAA,QACX;AAAA,MACF;AAAA,MACA;AAAA,MACA;AAAA,MACA,GAAG,cAAc,KAAK,KAAK;AAAA,IAC7B;AAEA,UAAM,UAAU,SAAS,UAAU,CAAC,GAAG,SAAS,SAAS,KAAK;AAC9D,QAAI,SAAS;AACX,aAAO,EAAE,SAAS,MAAM;AAAA,IAC1B;AAEA,UAAM,MAAM,6BAA6B,KAAK;AAC9C,WAAO,WAAW,GAAG,cAAc,qCAAqC,KAAK,IAAI,KAAK;AACtF,WAAO,EAAE,SAAS,MAAM,OAAO,OAAO,IAAI;AAAA,EAC5C,SAAS,KAAc;AACrB,UAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC/D,WAAO,WAAW,GAAG,cAAc,qBAAqB,KAAK,KAAK,OAAO,IAAI,KAAK;AAClF,WAAO,EAAE,SAAS,MAAM,OAAO,OAAO,QAAQ;AAAA,EAChD;AACF;AAKA,SAAS,oBAAoB,OAAyB;AACpD,MAAI,CAAC,SAAS,OAAO,UAAU,SAAU,QAAO;AAGhD,QAAM,YAAa,OAA6B;AAChD,MAAI,cAAc,cAAc,cAAc,aAAa;AACzD,WAAO;AAAA,EACT;AAGA,MAAI,UAAU,KAAK,GAAG;AACpB,QAAI,MAAM,WAAW,OAAO,MAAM,WAAW,OAAO,MAAM,WAAW,OAAO,MAAM,WAAW,OAAO,MAAM,WAAW,KAAK;AACxH,aAAO;AAAA,IACT;AAAA,EACF;AAGA,QAAM,SAAS;AACf,QAAM,OAAO,OAAO,OAAO,SAAS,WAAW,OAAO,OAAO;AAC7D,QAAM,SACJ,OAAO,OAAO,UAAU,YAAY,OAAO,UAAU,OAChD,OAAO,QACR;AACN,QAAM,YACJ,SACC,UAAU,OAAO,OAAO,SAAS,WAAW,OAAO,OAAO,YAC1D,UAAU,OAAO,OAAO,SAAS,WAAW,OAAO,OAAO;AAC7D,MAAI,aAAa,0BAA0B,IAAI,SAAS,GAAG;AACzD,WAAO;AAAA,EACT;AAGA,QAAM,UAAU,OAAO,OAAO,YAAY,WAAW,OAAO,QAAQ,YAAY,IAAI;AACpF,MACE,QAAQ,SAAS,YAAY,KAC7B,QAAQ,SAAS,SAAS,KAC1B,QAAQ,SAAS,WAAW,KAC5B,QAAQ,SAAS,qBAAqB,KACtC,QAAQ,SAAS,cAAc,KAC/B,QAAQ,SAAS,YAAY,KAC7B,QAAQ,SAAS,YAAY,GAC7B;AACA,WAAO;AAAA,EACT;AAEA,SAAO;AACT;AAKA,SAAS,oBAAoB,SAAyB;AACpD,QAAM,mBAAmB,iBAAiB,cAAc,KAAK,IAAI,GAAG,OAAO;AAC3E,QAAM,SAAS,KAAK,OAAO,IAAI,wBAAwB;AACvD,SAAO,KAAK,IAAI,mBAAmB,QAAQ,iBAAiB,UAAU;AACxE;AAOA,eAAsB,sBACpB,SACA,QACA,WACA,QACoB;AAEpB,MAAI,CAAC,OAAO,SAAS;AACnB,WAAO,EAAE,SAAS,WAAW,MAAM;AAAA,EACrC;AAEA,MAAI,CAAC,WAAW;AACd,WAAO;AAAA,MACL;AAAA,MACA,WAAW;AAAA,MACX,OAAO;AAAA,MACP,cAAc;AAAA,QACZ,MAAM,UAAU;AAAA,QAChB,SAAS;AAAA,QACT,WAAW;AAAA,MACb;AAAA,IACF;AAAA,EACF;AAEA,MAAI,CAAC,SAAS,KAAK,GAAG;AACpB,WAAO,EAAE,SAAS,WAAW,IAAI,WAAW,OAAO,OAAO,yBAAyB;AAAA,EACrF;AAGA,QAAM,mBAAmB,QAAQ,SAAS,sBACtC,QAAQ,UAAU,GAAG,mBAAmB,IAAI,0CAC5C;AAKJ,QAAM,WAAW,MAAM;AACrB,QAAI,CAAC,OAAO,IAAK,QAAO;AACxB,QAAI;AACF,YAAM,IAAI,IAAI,IAAI,OAAO,GAAG;AAC5B,aAAO,GAAG,EAAE,MAAM,GAAG,EAAE,QAAQ;AAAA,IACjC,QAAQ;AACN,aAAO;AAAA,IACT;AAAA,EACF,GAAG;AACH,QAAM,UAAU,UAAU,aAAa,OAAO;AAAA;AAAA,IAAS;AAEvD,QAAM,SAAS,OAAO,UAClB;AAAA;AAAA,EAEJ,OAAO,2BAA2B,OAAO,OAAO;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAsDhD,gBAAgB,KACZ;AAAA;AAAA,EAEJ,OAAO;AAAA,EACP,gBAAgB;AAEhB,MAAI;AAGJ,WAAS,UAAU,GAAG,WAAW,iBAAiB,YAAY,WAAW;AACvE,QAAI;AACF,UAAI,YAAY,GAAG;AACjB,eAAO,QAAQ,4BAA4B,eAAe,KAAK,IAAI,KAAK;AAAA,MAC1E,OAAO;AACL,eAAO,WAAW,iBAAiB,OAAO,IAAI,iBAAiB,UAAU,IAAI,KAAK;AAAA,MACpF;AAEA,YAAM,WAAW,MAAM;AAAA,QACrB;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,MACF;AAEA,UAAI,SAAS,SAAS;AACpB,eAAO,QAAQ,0BAA0B,SAAS,QAAQ,MAAM,eAAe,KAAK;AACpF,uBAAe,WAAW;AAC1B,eAAO,EAAE,SAAS,SAAS,SAAS,WAAW,KAAK;AAAA,MACtD;AAGA,aAAO,WAAW,oCAAoC,KAAK;AAC3D,qBAAe,aAAa,6BAA6B;AACzD,aAAO;AAAA,QACL;AAAA,QACA,WAAW;AAAA,QACX,OAAO;AAAA,QACP,cAAc;AAAA,UACZ,MAAM,UAAU;AAAA,UAChB,SAAS;AAAA,UACT,WAAW;AAAA,QACb;AAAA,MACF;AAAA,IAEF,SAAS,KAAc;AACrB,kBAAY,cAAc,GAAG;AAG7B,YAAM,SAAS,UAAU,GAAG,IAAI,IAAI,SAAS;AAC7C,YAAM,OAAO,OAAO,QAAQ,YAAY,QAAQ,QAAQ,UAAU,MAC9D,OAAQ,IAAgC,IAAI,IAC5C;AACJ,aAAO,SAAS,kBAAkB,UAAU,CAAC,MAAM,UAAU,OAAO,YAAY,MAAM,UAAU,IAAI,eAAe,oBAAoB,GAAG,CAAC,KAAK,KAAK;AAGrJ,UAAI,oBAAoB,GAAG,KAAK,UAAU,iBAAiB,YAAY;AACrE,cAAM,UAAU,oBAAoB,OAAO;AAC3C,eAAO,WAAW,eAAe,OAAO,SAAS,KAAK;AACtD,YAAI;AAAE,gBAAM,MAAM,SAAS,MAAM;AAAA,QAAG,QAAQ;AAAE;AAAA,QAAO;AACrD;AAAA,MACF;AAGA;AAAA,IACF;AAAA,EACF;AAGA,QAAM,eAAe,WAAW,WAAW;AAC3C,SAAO,SAAS,wBAAwB,YAAY,iCAAiC,KAAK;AAC1F,iBAAe,aAAa,YAAY;AAExC,SAAO;AAAA,IACL;AAAA;AAAA,IACA,WAAW;AAAA,IACX,OAAO,0BAA0B,YAAY;AAAA,IAC7C,cAAc,aAAa;AAAA,MACzB,MAAM,UAAU;AAAA,MAChB,SAAS;AAAA,MACT,WAAW;AAAA,IACb;AAAA,EACF;AACF;AAOA,MAAM,0BAA0B;AA2ChC,eAAsB,sBACpB,YAQA,WACA,cACA,WACA,kBAAqC,CAAC,GAC4B;AAClE,QAAM,iBAAiB,WAAW,MAAM,GAAG,uBAAuB;AAMlE,QAAM,iBAAiB,CAAC,IAAI,IAAI,IAAI,IAAI,GAAG,GAAG,GAAG,GAAG,GAAG,CAAC;AACxD,QAAM,gBAAgB,CAAC,SAAyB,eAAe,OAAO,CAAC,KAAK;AAG5E,QAAM,QAAkB,CAAC;AACzB,aAAW,OAAO,gBAAgB;AAChC,QAAI;AACJ,QAAI;AACF,eAAS,IAAI,IAAI,IAAI,GAAG,EAAE,SAAS,QAAQ,UAAU,EAAE;AAAA,IACzD,QAAQ;AACN,eAAS,IAAI;AAAA,IACf;AACA,UAAM,UAAU,IAAI,QAAQ,SAAS,MACjC,IAAI,QAAQ,MAAM,GAAG,GAAG,IAAI,QAC5B,IAAI;AACR,UAAM,KAAK,IAAI,IAAI,IAAI,OAAO,cAAc,IAAI,IAAI,CAAC,IAAI,IAAI,KAAK,WAAM,MAAM,WAAM,OAAO,EAAE;AAAA,EAC/F;AAEA,QAAM,mBAAmB,gBAAgB,SAAS,IAC9C,gBAAgB,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,EAAE,KAAK,IAAI,IAC9C;AACJ,QAAM,SAAQ,oBAAI,KAAK,GAAE,YAAY,EAAE,MAAM,GAAG,EAAE;AAElD,QAAM,SAAS;AAAA;AAAA,aAEJ,SAAS;AAAA,SACb,KAAK;AAAA;AAAA;AAAA,EAGZ,gBAAgB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,iBAmDD,eAAe,MAAM;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,kBAMpB,eAAe,MAAM,cAAc,YAAY;AAAA,EAC/D,MAAM,KAAK,IAAI,CAAC;AAEhB,MAAI;AACF,WAAO,QAAQ,eAAe,eAAe,MAAM,2BAA2B,KAAK;AAEnF,UAAM,WAAW,MAAM;AAAA,MACrB;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAEA,QAAI,CAAC,SAAS,SAAS;AACrB,YAAM,SAAS,SAAS,SAAS;AACjC,qBAAe,WAAW,MAAM;AAChC,aAAO,EAAE,QAAQ,MAAM,OAAO,OAAO;AAAA,IACvC;AAGA,UAAM,UAAU,SAAS,QAAQ,QAAQ,wBAAwB,EAAE,EAAE,QAAQ,eAAe,EAAE,EAAE,KAAK;AACrG,UAAM,SAAS,KAAK,MAAM,OAAO;AAKjC,QAAI,CAAC,OAAO,SAAS,OAAO,OAAO,cAAc,YAAY,CAAC,MAAM,QAAQ,OAAO,OAAO,GAAG;AAC3F,YAAM,SAAS;AACf,qBAAe,WAAW,MAAM;AAChC,aAAO,EAAE,QAAQ,MAAM,OAAO,OAAO;AAAA,IACvC;AAEA,WAAO,QAAQ,4BAA4B,OAAO,QAAQ,OAAO,OAAK,EAAE,SAAS,iBAAiB,EAAE,MAAM,oBAAoB,KAAK;AACnI,mBAAe,SAAS;AACxB,WAAO,EAAE,QAAQ,OAAO;AAAA,EAC1B,SAAS,KAAc;AACrB,UAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC/D,WAAO,SAAS,0BAA0B,OAAO,IAAI,KAAK;AAC1D,mBAAe,WAAW,OAAO;AACjC,WAAO,EAAE,QAAQ,MAAM,OAAO,0BAA0B,OAAO,GAAG;AAAA,EACpE;AACF;AAEA,eAAsB,+BACpB,YAKA,WACA,iBACA,WAC8D;AAC9D,QAAM,kBAAkB,WAAW,MAAM,GAAG,EAAE;AAC9C,QAAM,QAAQ,gBAAgB,IAAI,CAAC,QAAQ;AACzC,QAAI;AACJ,QAAI;AACF,eAAS,IAAI,IAAI,IAAI,GAAG,EAAE,SAAS,QAAQ,UAAU,EAAE;AAAA,IACzD,QAAQ;AACN,eAAS,IAAI;AAAA,IACf;AACA,WAAO,IAAI,IAAI,IAAI,KAAK,IAAI,KAAK,WAAM,MAAM;AAAA,EAC/C,CAAC;AAED,QAAM,SAAS;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,aASJ,SAAS;AAAA;AAAA;AAAA,EAGpB,gBAAgB,IAAI,CAAC,UAAU,KAAK,KAAK,EAAE,EAAE,KAAK,IAAI,CAAC;AAAA;AAAA;AAAA,EAGvD,MAAM,KAAK,IAAI,CAAC;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAShB,MAAI;AACF,UAAM,WAAW,MAAM;AAAA,MACrB;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAEA,QAAI,CAAC,SAAS,SAAS;AACrB,YAAM,SAAS,SAAS,SAAS;AACjC,qBAAe,WAAW,MAAM;AAChC,aAAO,EAAE,QAAQ,CAAC,GAAG,OAAO,OAAO;AAAA,IACrC;AAEA,UAAM,UAAU,SAAS,QAAQ,QAAQ,wBAAwB,EAAE,EAAE,QAAQ,eAAe,EAAE,EAAE,KAAK;AACrG,UAAM,SAAS,KAAK,MAAM,OAAO;AAEjC,mBAAe,SAAS;AACxB,WAAO,EAAE,QAAQ,MAAM,QAAQ,OAAO,cAAc,IAAI,OAAO,iBAAiB,CAAC,EAAE;AAAA,EACrF,SAAS,KAAc;AACrB,UAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC/D,WAAO,SAAS,4CAA4C,OAAO,IAAI,KAAK;AAC5E,mBAAe,WAAW,OAAO;AACjC,WAAO,EAAE,QAAQ,CAAC,GAAG,OAAO,QAAQ;AAAA,EACtC;AACF;AA0BA,MAAM,qBAAqB,oBAAI,IAAI;AAAA,EACjC;AAAA,EAAQ;AAAA,EAAO;AAAA,EAAa;AAAA,EAAa;AAAA,EAAW;AAAA,EACpD;AAAA,EAAa;AAAA,EAAkB;AACjC,CAAC;AAED,MAAM,kBAAkB,oBAAI,IAAI,CAAC,QAAQ,SAAS,UAAU,OAAO,CAAC;AACpE,MAAM,iBAAiB,oBAAI,IAAmB,CAAC,UAAU,OAAO,MAAM,CAAC;AACvE,MAAM,mBAAmB,oBAAI,IAAI,CAAC,cAAc,cAAc,CAAC;AAE/D,SAAS,cAAc,OAAmC;AACxD,SAAO,MAAM,QAAQ,KAAK,KAAK,MAAM,MAAM,CAAC,MAAM,OAAO,MAAM,QAAQ;AACzE;AAEA,SAAS,YAAY,OAA8C;AACjE,SAAO,MAAM,QAAQ,KAAK,KAAK,MAAM,MAAM,CAAC,MAAM;AAChD,QAAI,OAAO,MAAM,YAAY,MAAM,KAAM,QAAO;AAChD,UAAM,OAAQ,EAA8B;AAC5C,UAAM,SAAU,EAA8B;AAC9C,WAAO,OAAO,SAAS,YAClB,iBAAiB,IAAI,IAAI,KACzB,OAAO,WAAW,YAClB,OAAO,KAAK,EAAE,SAAS;AAAA,EAC9B,CAAC;AACH;AAEO,SAAS,mBAAmB,KAAmC;AACpE,MAAI;AACF,UAAM,UAAU,IAAI,QAAQ,wBAAwB,EAAE,EAAE,QAAQ,eAAe,EAAE,EAAE,KAAK;AACxF,UAAM,SAAS,KAAK,MAAM,OAAO;AAEjC,UAAM,aAAa,OAAO,OAAO,eAAe,WAAW,OAAO,aAAa;AAC/E,QAAI,CAAC,cAAc,CAAC,mBAAmB,IAAI,UAAU,EAAG,QAAO;AAE/D,UAAM,mBAAmB,OAAO,OAAO,qBAAqB,WAAW,OAAO,mBAAmB;AACjG,QAAI,CAAC,oBAAoB,CAAC,gBAAgB,IAAI,gBAAgB,EAAG,QAAO;AAExE,UAAM,iBAAiB,OAAO;AAC9B,QAAI,OAAO,mBAAmB,YAAY,CAAC,eAAe,IAAI,cAA+B,EAAG,QAAO;AAEvG,QAAI,CAAC,YAAY,OAAO,mBAAmB,KAAK,OAAO,oBAAoB,WAAW,EAAG,QAAO;AAChG,QAAI,CAAC,cAAc,OAAO,aAAa,KAAK,OAAO,cAAc,WAAW,EAAG,QAAO;AAEtF,WAAO;AAAA,MACL;AAAA,MACA,mBAAmB,OAAO,OAAO,sBAAsB,WAAW,OAAO,oBAAoB;AAAA,MAC7F;AAAA,MACA,uBAAuB,OAAO,OAAO,0BAA0B,WAAW,OAAO,wBAAwB;AAAA,MACzG;AAAA,MACA,qBAAqB,OAAO;AAAA,MAC5B,eAAe,OAAO,cAAc,OAAO,CAAC,MAAM,EAAE,KAAK,EAAE,SAAS,CAAC;AAAA,MACrE,iBAAiB,cAAc,OAAO,eAAe,IAAI,OAAO,kBAAkB,CAAC;AAAA,MACnF,eAAe,cAAc,OAAO,aAAa,IAAI,OAAO,gBAAgB,CAAC;AAAA,MAC7E,eAAe,cAAc,OAAO,aAAa,IAAI,OAAO,gBAAgB,CAAC;AAAA,IAC/E;AAAA,EACF,QAAQ;AACN,WAAO;AAAA,EACT;AACF;AAEA,eAAsB,sBACpB,MACA,WACA,QAC+B;AAC/B,QAAM,SAAQ,oBAAI,KAAK,GAAE,YAAY,EAAE,MAAM,GAAG,EAAE;AAElD,QAAM,SAAS;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,QAOT,IAAI;AAAA,SACH,KAAK;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AA2CZ,MAAI;AACF,UAAM,WAAW,MAAM;AAAA,MACrB;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAEA,QAAI,CAAC,SAAS,SAAS;AACrB,aAAO,WAAW,kDAAkD,SAAS,SAAS,SAAS,IAAI,KAAK;AACxG,qBAAe,WAAW,SAAS,SAAS,gBAAgB;AAC5D,aAAO;AAAA,IACT;AAEA,UAAM,QAAQ,mBAAmB,SAAS,OAAO;AACjD,QAAI,CAAC,OAAO;AACV,aAAO,WAAW,wDAAwD,KAAK;AAC/E,qBAAe,WAAW,+BAA+B;AACzD,aAAO;AAAA,IACT;AAEA,mBAAe,SAAS;AACxB,WAAO;AAAA,EACT,SAAS,KAAc;AACrB,UAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC/D,WAAO,WAAW,qCAAqC,OAAO,IAAI,KAAK;AACvE,mBAAe,WAAW,OAAO;AACjC,WAAO;AAAA,EACT;AACF;AAEO,SAAS,oBAAoB,OAA8B;AAChE,QAAM,QAAkB,CAAC;AAEzB,QAAM,KAAK,wCAAwC;AACnD,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,qBAAqB,MAAM,UAAU,aAAQ,MAAM,iBAAiB,EAAE;AACjF,QAAM,KAAK,yBAAyB,MAAM,cAAc,aAAQ,MAAM,qBAAqB,EAAE;AAC7F,QAAM,KAAK,oBAAoB,MAAM,gBAAgB,IAAI;AACzD,QAAM,KAAK,EAAE;AAEb,MAAI,MAAM,oBAAoB,SAAS,GAAG;AACxC,UAAM,KAAK,yBAAyB;AACpC,UAAM,oBAAoB,QAAQ,CAAC,MAAM,MAAM;AAC7C,YAAM,KAAK,GAAG,IAAI,CAAC,OAAO,KAAK,IAAI,aAAQ,KAAK,MAAM,EAAE;AAAA,IAC1D,CAAC;AACD,UAAM,KAAK,EAAE;AAAA,EACf;AAEA,MAAI,MAAM,cAAc,SAAS,GAAG;AAClC,UAAM,KAAK,sBAAsB,MAAM,cAAc,MAAM,mFAA8E;AACzI,eAAW,QAAQ,MAAM,eAAe;AACtC,YAAM,KAAK,KAAK,IAAI,EAAE;AAAA,IACxB;AACA,UAAM,KAAK,EAAE;AAAA,EACf;AAEA,MAAI,MAAM,gBAAgB,SAAS,GAAG;AACpC,UAAM,KAAK,sFAAuF;AAClG,eAAW,QAAQ,MAAM,gBAAiB,OAAM,KAAK,KAAK,IAAI,EAAE;AAChE,UAAM,KAAK,EAAE;AAAA,EACf;AAEA,MAAI,MAAM,cAAc,SAAS,GAAG;AAClC,UAAM,KAAK,mBAAmB;AAC9B,eAAW,OAAO,MAAM,cAAe,OAAM,KAAK,KAAK,GAAG,EAAE;AAC5D,UAAM,KAAK,EAAE;AAAA,EACf;AAEA,MAAI,MAAM,cAAc,SAAS,GAAG;AAClC,UAAM,KAAK,mBAAmB;AAC9B,eAAW,KAAK,MAAM,cAAe,OAAM,KAAK,KAAK,CAAC,EAAE;AACxD,UAAM,KAAK,EAAE;AAAA,EACf;AAEA,QAAM,KAAK,KAAK;AAChB,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,iLAAiL;AAE5L,SAAO,MAAM,KAAK,IAAI;AACxB;",
6
6
  "names": []
7
7
  }
@@ -308,7 +308,7 @@ function buildScrapeResponse(params, contents, metrics, llmErrors, executionTime
308
308
  execution_time_ms: executionTime,
309
309
  total_credits: metrics.totalCredits
310
310
  };
311
- return { content: formattedContent, structuredContent: { content: formattedContent, metadata } };
311
+ return { content: formattedContent, structuredContent: { metadata } };
312
312
  }
313
313
  async function handleScrapeLinks(params, reporter = NOOP_REPORTER) {
314
314
  const startTime = Date.now();
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "version": 3,
3
3
  "sources": ["../../../src/tools/scrape.ts"],
4
- "sourcesContent": ["/**\n * Scrape Links Tool Handler\n *\n * Scrapes many URLs in parallel. Reddit permalinks (reddit.com/r/.../comments/...)\n * are auto-detected and routed through the Reddit API; all other URLs go through\n * the scraper. Both branches feed the same per-URL LLM extraction pipeline.\n *\n * NEVER throws \u2014 every error is returned as a tool-level failure response.\n */\n\nimport type { MCPServer } from 'mcp-use/server';\n\nimport {\n SCRAPER,\n CONCURRENCY,\n getCapabilities,\n getMissingEnvMessage,\n parseEnv,\n} from '../config/index.js';\nimport {\n scrapeLinksOutputSchema,\n scrapeLinksParamsSchema,\n type ScrapeLinksParams,\n type ScrapeLinksOutput,\n} from '../schemas/scrape-links.js';\nimport { ScraperClient } from '../clients/scraper.js';\nimport { RedditClient, type PostResult } from '../clients/reddit.js';\nimport { MarkdownCleaner } from '../services/markdown-cleaner.js';\nimport { createLLMProcessor, processContentWithLLM } from '../services/llm-processor.js';\nimport { removeMetaTags } from '../utils/markdown-formatter.js';\nimport { extractReadableContent } from '../utils/content-extractor.js';\nimport { classifyError } from '../utils/errors.js';\nimport { pMap } from '../utils/concurrency.js';\nimport {\n mcpLog,\n formatSuccess,\n formatError,\n formatBatchHeader,\n formatDuration,\n} from './utils.js';\nimport {\n createToolReporter,\n NOOP_REPORTER,\n toolFailure,\n toolSuccess,\n toToolResponse,\n type ToolExecutionResult,\n type ToolReporter,\n} from './mcp-helpers.js';\n\nconst markdownCleaner = new MarkdownCleaner();\n\nfunction enhanceExtractionInstruction(instruction: string | undefined): string {\n const base = instruction || 'Extract the main content and key information from this page.';\n return `${SCRAPER.EXTRACTION_PREFIX}\\n\\n${base}\\n\\n${SCRAPER.EXTRACTION_SUFFIX}`;\n}\n\n// --- Types ---\n\ninterface ProcessedResult {\n url: string;\n content: string;\n index: number; // original position in params.urls[]\n}\n\ninterface ScrapeMetrics {\n successful: number;\n failed: number;\n totalCredits: number;\n}\n\ninterface ScrapePhaseResult {\n successItems: ProcessedResult[];\n failedContents: string[];\n metrics: ScrapeMetrics;\n}\n\ninterface BranchInput {\n url: string;\n origIndex: number;\n}\n\ninterface ScrapeClients {\n client: ScraperClient;\n llmProcessor: ReturnType<typeof createLLMProcessor>;\n}\n\n// --- Reddit URL detection ---\n\nconst REDDIT_HOST = /(?:^|\\.)reddit\\.com$/i;\nconst REDDIT_POST_PERMALINK = /\\/r\\/[^/]+\\/comments\\/[a-z0-9]+/i;\n\nfunction isRedditUrl(url: string): boolean {\n try {\n const u = new URL(url);\n return REDDIT_HOST.test(u.hostname);\n } catch {\n return false;\n }\n}\n\nfunction isRedditPostPermalink(url: string): boolean {\n try {\n const u = new URL(url);\n return REDDIT_HOST.test(u.hostname) && REDDIT_POST_PERMALINK.test(u.pathname);\n } catch {\n return false;\n }\n}\n\n// --- Error helper ---\n\nfunction createScrapeErrorResponse(\n code: string,\n message: string,\n startTime: number,\n retryable = false,\n alternatives?: string[],\n): ToolExecutionResult<ScrapeLinksOutput> {\n return toolFailure(\n `${formatError({\n code,\n message,\n retryable,\n toolName: 'scrape-links',\n howToFix: code === 'NO_URLS' ? ['Provide at least one valid URL'] : undefined,\n alternatives,\n })}\\n\\nExecution time: ${formatDuration(Date.now() - startTime)}`,\n );\n}\n\n// --- URL partitioning ---\n\ninterface PartitionedUrls {\n webInputs: BranchInput[];\n redditInputs: BranchInput[];\n invalidEntries: { url: string; origIndex: number }[];\n}\n\nfunction partitionUrls(urls: string[]): PartitionedUrls {\n const webInputs: BranchInput[] = [];\n const redditInputs: BranchInput[] = [];\n const invalidEntries: { url: string; origIndex: number }[] = [];\n\n for (let i = 0; i < urls.length; i++) {\n const url = urls[i]!;\n try {\n new URL(url);\n } catch {\n invalidEntries.push({ url, origIndex: i });\n continue;\n }\n if (isRedditUrl(url)) {\n redditInputs.push({ url, origIndex: i });\n } else {\n webInputs.push({ url, origIndex: i });\n }\n }\n\n return { webInputs, redditInputs, invalidEntries };\n}\n\n// --- Web branch ---\n\nasync function fetchWebBranch(\n inputs: BranchInput[],\n client: ScraperClient,\n): Promise<ScrapePhaseResult> {\n if (inputs.length === 0) {\n return { successItems: [], failedContents: [], metrics: { successful: 0, failed: 0, totalCredits: 0 } };\n }\n\n mcpLog('info', `[concurrency] web branch: fanning out ${inputs.length} URL(s) with limit=${CONCURRENCY.SCRAPER}`, 'scrape');\n const urls = inputs.map((i) => i.url);\n const results = await client.scrapeMultiple(urls, { timeout: 60 });\n\n const successItems: ProcessedResult[] = [];\n const failedContents: string[] = [];\n let successful = 0;\n let failed = 0;\n let totalCredits = 0;\n\n for (let i = 0; i < results.length; i++) {\n const result = results[i];\n const origIndex = inputs[i]!.origIndex;\n if (!result) {\n failed++;\n failedContents.push(`## ${inputs[i]!.url}\\n\\n\u274C No result returned`);\n continue;\n }\n\n if (result.error || result.statusCode < 200 || result.statusCode >= 300) {\n failed++;\n const errorMsg = result.error?.message || result.content || `HTTP ${result.statusCode}`;\n failedContents.push(`## ${result.url}\\n\\n\u274C Failed to scrape: ${errorMsg}`);\n continue;\n }\n\n successful++;\n totalCredits += result.credits;\n\n let content: string;\n try {\n const readable = extractReadableContent(result.content, result.url);\n const sourceForCleaner = readable.extracted ? readable.content : result.content;\n content = markdownCleaner.processContent(sourceForCleaner);\n } catch {\n content = result.content;\n }\n\n successItems.push({ url: result.url, content, index: origIndex });\n }\n\n return { successItems, failedContents, metrics: { successful, failed, totalCredits } };\n}\n\n// --- Reddit branch ---\n\nfunction formatRedditPostAsMarkdown(result: PostResult): string {\n const { post, comments } = result;\n const lines: string[] = [];\n lines.push(`# ${post.title}`);\n lines.push('');\n lines.push(`**r/${post.subreddit}** \u2022 u/${post.author} \u2022 \u2B06\uFE0F ${post.score} \u2022 \uD83D\uDCAC ${post.commentCount} comments`);\n lines.push(`\uD83D\uDD17 ${post.url}`);\n lines.push('');\n if (post.body) {\n lines.push('## Post content');\n lines.push('');\n lines.push(post.body);\n lines.push('');\n }\n if (comments.length > 0) {\n lines.push(`## Top comments (${comments.length} total)`);\n lines.push('');\n for (const c of comments) {\n const indent = ' '.repeat(c.depth);\n const op = c.isOP ? ' **[OP]**' : '';\n const score = c.score >= 0 ? `+${c.score}` : `${c.score}`;\n lines.push(`${indent}- **u/${c.author}**${op} _(${score})_`);\n for (const line of c.body.split('\\n')) {\n lines.push(`${indent} ${line}`);\n }\n lines.push('');\n }\n }\n return lines.join('\\n');\n}\n\nasync function fetchRedditBranch(inputs: BranchInput[]): Promise<ScrapePhaseResult> {\n if (inputs.length === 0) {\n return { successItems: [], failedContents: [], metrics: { successful: 0, failed: 0, totalCredits: 0 } };\n }\n\n const env = parseEnv();\n if (!env.REDDIT_CLIENT_ID || !env.REDDIT_CLIENT_SECRET) {\n const failedContents = inputs.map(\n (i) => `## ${i.url}\\n\\n\u274C Reddit URL detected, but Reddit API is not configured. Set \\`REDDIT_CLIENT_ID\\` and \\`REDDIT_CLIENT_SECRET\\` in the server env to enable threaded Reddit scraping.`,\n );\n return {\n successItems: [],\n failedContents,\n metrics: { successful: 0, failed: inputs.length, totalCredits: 0 },\n };\n }\n\n // Warn for non-permalink Reddit URLs (subreddit homepages, /new, /top, /hot,\n // user profiles). The Reddit API path we call requires /r/.../comments/... \u2014\n // reject upfront so the caller sees a helpful message instead of a 404.\n const [postInputs, nonPermalinks] = inputs.reduce<[BranchInput[], BranchInput[]]>(\n ([posts, rest], input) => {\n if (isRedditPostPermalink(input.url)) posts.push(input);\n else rest.push(input);\n return [posts, rest];\n },\n [[], []],\n );\n\n const nonPermalinkFailed = nonPermalinks.map(\n (i) => `## ${i.url}\\n\\n\u274C Only Reddit post permalinks (/r/<sub>/comments/<id>/...) are supported. Use web-search with scope:\"reddit\" to discover post permalinks first.`,\n );\n\n if (postInputs.length === 0) {\n return {\n successItems: [],\n failedContents: nonPermalinkFailed,\n metrics: { successful: 0, failed: nonPermalinks.length, totalCredits: 0 },\n };\n }\n\n mcpLog('info', `[concurrency] reddit branch: fetching ${postInputs.length} post(s) with limit=${CONCURRENCY.REDDIT}`, 'scrape');\n const client = new RedditClient(env.REDDIT_CLIENT_ID, env.REDDIT_CLIENT_SECRET);\n const urls = postInputs.map((i) => i.url);\n const batchResult = await client.batchGetPosts(urls, true);\n const urlToIndex = new Map(postInputs.map((i) => [i.url, i.origIndex]));\n\n const successItems: ProcessedResult[] = [];\n const failedContents: string[] = [...nonPermalinkFailed];\n let successful = 0;\n let failed = nonPermalinks.length;\n\n for (const [url, result] of batchResult.results) {\n const origIndex = urlToIndex.get(url) ?? -1;\n if (result instanceof Error) {\n failed++;\n failedContents.push(`## ${url}\\n\\n\u274C Reddit fetch failed: ${result.message}`);\n continue;\n }\n successful++;\n successItems.push({ url, content: formatRedditPostAsMarkdown(result), index: origIndex });\n }\n\n return { successItems, failedContents, metrics: { successful, failed, totalCredits: 0 } };\n}\n\n// --- LLM extraction (shared by both branches) ---\n\nasync function processItemsWithLlm(\n successItems: ProcessedResult[],\n enhancedInstruction: string,\n llmProcessor: ReturnType<typeof createLLMProcessor>,\n reporter: ToolReporter,\n): Promise<{ items: ProcessedResult[]; llmErrors: number; llmAttempted: number }> {\n let llmErrors = 0;\n\n if (!llmProcessor || successItems.length === 0) {\n if (!llmProcessor && successItems.length > 0) {\n mcpLog('warning', 'LLM unavailable (LLM_API_KEY not set). Returning raw scraped content.', 'scrape');\n void reporter.log('warning', 'llm_extractor_unreachable: planner not configured; raw scraped content returned');\n }\n return { items: successItems, llmErrors, llmAttempted: 0 };\n }\n\n mcpLog('info', `[concurrency] llm extraction: fanning out ${successItems.length} item(s) with limit=${CONCURRENCY.LLM_EXTRACTION}`, 'scrape');\n\n const llmResults = await pMap(\n successItems,\n async (item) => {\n mcpLog('debug', `LLM extracting ${item.url}...`, 'scrape');\n\n const llmResult = await processContentWithLLM(\n item.content,\n { enabled: true, extract: enhancedInstruction, url: item.url },\n llmProcessor,\n );\n\n if (llmResult.processed) {\n return { ...item, content: llmResult.content };\n }\n\n llmErrors++;\n mcpLog('warning', `LLM extraction failed for ${item.url}: ${llmResult.error || 'unknown reason'}`, 'scrape');\n void reporter.log('warning', `llm_extractor_unreachable: ${item.url} \u2014 ${llmResult.error || 'unknown reason'}`);\n return item;\n },\n CONCURRENCY.LLM_EXTRACTION,\n );\n\n return { items: llmResults, llmErrors, llmAttempted: successItems.length };\n}\n\n// --- Output assembly ---\n\nfunction assembleContentEntries(successItems: ProcessedResult[], failedContents: string[]): string[] {\n const sorted = [...successItems].sort((a, b) => a.index - b.index);\n const contents = [...failedContents];\n for (const item of sorted) {\n let content = item.content;\n try {\n content = removeMetaTags(content);\n } catch {\n // Use content as-is\n }\n contents.push(`## ${item.url}\\n\\n${content}`);\n }\n return contents;\n}\n\nfunction buildScrapeResponse(\n params: ScrapeLinksParams,\n contents: string[],\n metrics: ScrapeMetrics,\n llmErrors: number,\n executionTime: number,\n llmAccounting: { llmAttempted: number; llmSucceeded: boolean },\n): { content: string; structuredContent: ScrapeLinksOutput } {\n const llmExtras: Record<string, string | number> = {};\n if (llmAccounting.llmAttempted > 0) {\n const ok = llmAccounting.llmAttempted - llmErrors;\n llmExtras['LLM extraction'] = `${ok}/${llmAccounting.llmAttempted} succeeded`;\n if (!llmAccounting.llmSucceeded) {\n llmExtras['LLM credit'] = '0 charged (no extraction produced)';\n }\n } else if (llmErrors > 0) {\n llmExtras['LLM extraction failures'] = llmErrors;\n }\n\n const batchHeader = formatBatchHeader({\n title: `Scraped Content (${params.urls.length} URLs)`,\n totalItems: params.urls.length,\n successful: metrics.successful,\n failed: metrics.failed,\n extras: {\n 'Credits used': metrics.totalCredits,\n ...llmExtras,\n },\n });\n\n const formattedContent = formatSuccess({\n title: 'Scraping Complete',\n summary: batchHeader,\n data: contents.join('\\n\\n---\\n\\n'),\n metadata: {\n 'Execution time': formatDuration(executionTime),\n },\n });\n\n const metadata: ScrapeLinksOutput['metadata'] = {\n total_items: params.urls.length,\n successful: metrics.successful,\n failed: metrics.failed,\n execution_time_ms: executionTime,\n total_credits: metrics.totalCredits,\n };\n return { content: formattedContent, structuredContent: { content: formattedContent, metadata } };\n}\n\n// --- Handler ---\n\nexport async function handleScrapeLinks(\n params: ScrapeLinksParams,\n reporter: ToolReporter = NOOP_REPORTER,\n): Promise<ToolExecutionResult<ScrapeLinksOutput>> {\n const startTime = Date.now();\n\n if (!params.urls || params.urls.length === 0) {\n return createScrapeErrorResponse('NO_URLS', 'No URLs provided', startTime);\n }\n\n const { webInputs, redditInputs, invalidEntries } = partitionUrls(params.urls);\n const validCount = webInputs.length + redditInputs.length;\n\n await reporter.log(\n 'info',\n `Partitioned ${params.urls.length} URL(s): ${webInputs.length} web, ${redditInputs.length} reddit, ${invalidEntries.length} invalid`,\n );\n\n if (validCount === 0) {\n return createScrapeErrorResponse(\n 'INVALID_URLS',\n `All ${params.urls.length} URLs are invalid`,\n startTime,\n false,\n [\n 'web-search(queries=[...], extract=\"...\") \u2014 search for valid URLs first, then scrape the results',\n ],\n );\n }\n\n mcpLog(\n 'info',\n `Starting scrape: ${webInputs.length} web + ${redditInputs.length} reddit URL(s)`,\n 'scrape',\n );\n await reporter.progress(15, 100, 'Preparing scraper clients');\n\n // Only initialize web clients if we actually have web URLs. Reddit-only\n // batches run without touching the scraper.\n let clients: ScrapeClients | null = null;\n try {\n if (webInputs.length > 0) {\n clients = { client: new ScraperClient(), llmProcessor: createLLMProcessor() };\n } else {\n // Reddit-only: no scraper needed, but still create the LLM processor\n // so the extraction pass runs.\n clients = {\n client: null as unknown as ScraperClient,\n llmProcessor: createLLMProcessor(),\n };\n }\n } catch (error) {\n const err = classifyError(error);\n return createScrapeErrorResponse(\n 'CLIENT_INIT_FAILED',\n `Failed to initialize scraper: ${err.message}`,\n startTime,\n false,\n [\n 'web-search(queries=[\"topic key findings\", \"topic summary\"], extract=\"key findings and summary\") \u2014 search instead of scraping',\n ],\n );\n }\n\n const enhancedInstruction = enhanceExtractionInstruction(params.extract);\n\n await reporter.progress(35, 100, 'Fetching page content');\n\n // Run both branches in parallel. Failures in one branch do not block the other.\n const [webPhase, redditPhase] = await Promise.all([\n webInputs.length > 0\n ? fetchWebBranch(webInputs, clients.client)\n : Promise.resolve<ScrapePhaseResult>({ successItems: [], failedContents: [], metrics: { successful: 0, failed: 0, totalCredits: 0 } }),\n fetchRedditBranch(redditInputs),\n ]);\n\n const successItems = [...webPhase.successItems, ...redditPhase.successItems];\n const invalidFailed = invalidEntries.map(\n ({ url }) => `## ${url}\\n\\n\u274C Invalid URL format`,\n );\n const failedContents = [...invalidFailed, ...webPhase.failedContents, ...redditPhase.failedContents];\n const metrics: ScrapeMetrics = {\n successful: webPhase.metrics.successful + redditPhase.metrics.successful,\n failed: invalidEntries.length + webPhase.metrics.failed + redditPhase.metrics.failed,\n totalCredits: webPhase.metrics.totalCredits,\n };\n\n await reporter.log('info', `Fetched ${metrics.successful} page(s), ${metrics.failed} failed`);\n\n if (successItems.length > 0) {\n await reporter.progress(80, 100, 'Running LLM extraction over fetched pages');\n }\n\n const { items: processedItems, llmErrors, llmAttempted } = await processItemsWithLlm(\n successItems,\n enhancedInstruction,\n clients.llmProcessor,\n reporter,\n );\n\n const contents = assembleContentEntries(processedItems, failedContents);\n const executionTime = Date.now() - startTime;\n\n mcpLog(\n 'info',\n `Completed: ${metrics.successful} successful, ${metrics.failed} failed, ${metrics.totalCredits} credits used`,\n 'scrape',\n );\n\n const llmSucceeded = llmAttempted > 0 && llmErrors < llmAttempted;\n const result = buildScrapeResponse(\n params,\n contents,\n metrics,\n llmErrors,\n executionTime,\n { llmAttempted, llmSucceeded },\n );\n\n if (metrics.successful === 0 && metrics.failed > 0) {\n return toolFailure(result.content);\n }\n\n return toolSuccess(result.content, result.structuredContent);\n}\n\nexport function registerScrapeLinksTool(server: MCPServer): void {\n server.tool(\n {\n name: 'scrape-links',\n title: 'Scrape Links',\n description:\n 'Fetch many URLs in parallel and run per-URL structured LLM extraction. Auto-detects reddit.com post permalinks and routes them through the Reddit API (threaded post + comments); everything else flows through the HTTP scraper. Safe to call in parallel \u2014 group URLs by context rather than jamming unrelated batches together. Each page returns `## Source`, `## Matches` (verbatim-preserved facts), `## Not found` (explicit gaps), and `## Follow-up signals` (new terms + referenced URLs) that feed the next research loop. Describe the SHAPE of what you want in `extract`, facets separated by `|` (e.g. `root cause | affected versions | fix | workarounds | timeline`).',\n schema: scrapeLinksParamsSchema,\n outputSchema: scrapeLinksOutputSchema,\n annotations: {\n readOnlyHint: true,\n idempotentHint: true,\n destructiveHint: false,\n openWorldHint: true,\n },\n },\n async (args, ctx) => {\n if (!getCapabilities().scraping) {\n return toToolResponse(toolFailure(getMissingEnvMessage('scraping')));\n }\n\n const reporter = createToolReporter(ctx, 'scrape-links');\n const result = await handleScrapeLinks(args, reporter);\n\n await reporter.progress(100, 100, result.isError ? 'Scrape failed' : 'Scrape complete');\n return toToolResponse(result);\n },\n );\n}\n"],
5
- "mappings": "AAYA;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AACP;AAAA,EACE;AAAA,EACA;AAAA,OAGK;AACP,SAAS,qBAAqB;AAC9B,SAAS,oBAAqC;AAC9C,SAAS,uBAAuB;AAChC,SAAS,oBAAoB,6BAA6B;AAC1D,SAAS,sBAAsB;AAC/B,SAAS,8BAA8B;AACvC,SAAS,qBAAqB;AAC9B,SAAS,YAAY;AACrB;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AACP;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OAGK;AAEP,MAAM,kBAAkB,IAAI,gBAAgB;AAE5C,SAAS,6BAA6B,aAAyC;AAC7E,QAAM,OAAO,eAAe;AAC5B,SAAO,GAAG,QAAQ,iBAAiB;AAAA;AAAA,EAAO,IAAI;AAAA;AAAA,EAAO,QAAQ,iBAAiB;AAChF;AAkCA,MAAM,cAAc;AACpB,MAAM,wBAAwB;AAE9B,SAAS,YAAY,KAAsB;AACzC,MAAI;AACF,UAAM,IAAI,IAAI,IAAI,GAAG;AACrB,WAAO,YAAY,KAAK,EAAE,QAAQ;AAAA,EACpC,QAAQ;AACN,WAAO;AAAA,EACT;AACF;AAEA,SAAS,sBAAsB,KAAsB;AACnD,MAAI;AACF,UAAM,IAAI,IAAI,IAAI,GAAG;AACrB,WAAO,YAAY,KAAK,EAAE,QAAQ,KAAK,sBAAsB,KAAK,EAAE,QAAQ;AAAA,EAC9E,QAAQ;AACN,WAAO;AAAA,EACT;AACF;AAIA,SAAS,0BACP,MACA,SACA,WACA,YAAY,OACZ,cACwC;AACxC,SAAO;AAAA,IACL,GAAG,YAAY;AAAA,MACb;AAAA,MACA;AAAA,MACA;AAAA,MACA,UAAU;AAAA,MACV,UAAU,SAAS,YAAY,CAAC,gCAAgC,IAAI;AAAA,MACpE;AAAA,IACF,CAAC,CAAC;AAAA;AAAA,kBAAuB,eAAe,KAAK,IAAI,IAAI,SAAS,CAAC;AAAA,EACjE;AACF;AAUA,SAAS,cAAc,MAAiC;AACtD,QAAM,YAA2B,CAAC;AAClC,QAAM,eAA8B,CAAC;AACrC,QAAM,iBAAuD,CAAC;AAE9D,WAAS,IAAI,GAAG,IAAI,KAAK,QAAQ,KAAK;AACpC,UAAM,MAAM,KAAK,CAAC;AAClB,QAAI;AACF,UAAI,IAAI,GAAG;AAAA,IACb,QAAQ;AACN,qBAAe,KAAK,EAAE,KAAK,WAAW,EAAE,CAAC;AACzC;AAAA,IACF;AACA,QAAI,YAAY,GAAG,GAAG;AACpB,mBAAa,KAAK,EAAE,KAAK,WAAW,EAAE,CAAC;AAAA,IACzC,OAAO;AACL,gBAAU,KAAK,EAAE,KAAK,WAAW,EAAE,CAAC;AAAA,IACtC;AAAA,EACF;AAEA,SAAO,EAAE,WAAW,cAAc,eAAe;AACnD;AAIA,eAAe,eACb,QACA,QAC4B;AAC5B,MAAI,OAAO,WAAW,GAAG;AACvB,WAAO,EAAE,cAAc,CAAC,GAAG,gBAAgB,CAAC,GAAG,SAAS,EAAE,YAAY,GAAG,QAAQ,GAAG,cAAc,EAAE,EAAE;AAAA,EACxG;AAEA,SAAO,QAAQ,yCAAyC,OAAO,MAAM,sBAAsB,YAAY,OAAO,IAAI,QAAQ;AAC1H,QAAM,OAAO,OAAO,IAAI,CAAC,MAAM,EAAE,GAAG;AACpC,QAAM,UAAU,MAAM,OAAO,eAAe,MAAM,EAAE,SAAS,GAAG,CAAC;AAEjE,QAAM,eAAkC,CAAC;AACzC,QAAM,iBAA2B,CAAC;AAClC,MAAI,aAAa;AACjB,MAAI,SAAS;AACb,MAAI,eAAe;AAEnB,WAAS,IAAI,GAAG,IAAI,QAAQ,QAAQ,KAAK;AACvC,UAAM,SAAS,QAAQ,CAAC;AACxB,UAAM,YAAY,OAAO,CAAC,EAAG;AAC7B,QAAI,CAAC,QAAQ;AACX;AACA,qBAAe,KAAK,MAAM,OAAO,CAAC,EAAG,GAAG;AAAA;AAAA,0BAA0B;AAClE;AAAA,IACF;AAEA,QAAI,OAAO,SAAS,OAAO,aAAa,OAAO,OAAO,cAAc,KAAK;AACvE;AACA,YAAM,WAAW,OAAO,OAAO,WAAW,OAAO,WAAW,QAAQ,OAAO,UAAU;AACrF,qBAAe,KAAK,MAAM,OAAO,GAAG;AAAA;AAAA,2BAA2B,QAAQ,EAAE;AACzE;AAAA,IACF;AAEA;AACA,oBAAgB,OAAO;AAEvB,QAAI;AACJ,QAAI;AACF,YAAM,WAAW,uBAAuB,OAAO,SAAS,OAAO,GAAG;AAClE,YAAM,mBAAmB,SAAS,YAAY,SAAS,UAAU,OAAO;AACxE,gBAAU,gBAAgB,eAAe,gBAAgB;AAAA,IAC3D,QAAQ;AACN,gBAAU,OAAO;AAAA,IACnB;AAEA,iBAAa,KAAK,EAAE,KAAK,OAAO,KAAK,SAAS,OAAO,UAAU,CAAC;AAAA,EAClE;AAEA,SAAO,EAAE,cAAc,gBAAgB,SAAS,EAAE,YAAY,QAAQ,aAAa,EAAE;AACvF;AAIA,SAAS,2BAA2B,QAA4B;AAC9D,QAAM,EAAE,MAAM,SAAS,IAAI;AAC3B,QAAM,QAAkB,CAAC;AACzB,QAAM,KAAK,KAAK,KAAK,KAAK,EAAE;AAC5B,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,OAAO,KAAK,SAAS,eAAU,KAAK,MAAM,wBAAS,KAAK,KAAK,qBAAS,KAAK,YAAY,WAAW;AAC7G,QAAM,KAAK,aAAM,KAAK,GAAG,EAAE;AAC3B,QAAM,KAAK,EAAE;AACb,MAAI,KAAK,MAAM;AACb,UAAM,KAAK,iBAAiB;AAC5B,UAAM,KAAK,EAAE;AACb,UAAM,KAAK,KAAK,IAAI;AACpB,UAAM,KAAK,EAAE;AAAA,EACf;AACA,MAAI,SAAS,SAAS,GAAG;AACvB,UAAM,KAAK,oBAAoB,SAAS,MAAM,SAAS;AACvD,UAAM,KAAK,EAAE;AACb,eAAW,KAAK,UAAU;AACxB,YAAM,SAAS,KAAK,OAAO,EAAE,KAAK;AAClC,YAAM,KAAK,EAAE,OAAO,cAAc;AAClC,YAAM,QAAQ,EAAE,SAAS,IAAI,IAAI,EAAE,KAAK,KAAK,GAAG,EAAE,KAAK;AACvD,YAAM,KAAK,GAAG,MAAM,SAAS,EAAE,MAAM,KAAK,EAAE,MAAM,KAAK,IAAI;AAC3D,iBAAW,QAAQ,EAAE,KAAK,MAAM,IAAI,GAAG;AACrC,cAAM,KAAK,GAAG,MAAM,KAAK,IAAI,EAAE;AAAA,MACjC;AACA,YAAM,KAAK,EAAE;AAAA,IACf;AAAA,EACF;AACA,SAAO,MAAM,KAAK,IAAI;AACxB;AAEA,eAAe,kBAAkB,QAAmD;AAClF,MAAI,OAAO,WAAW,GAAG;AACvB,WAAO,EAAE,cAAc,CAAC,GAAG,gBAAgB,CAAC,GAAG,SAAS,EAAE,YAAY,GAAG,QAAQ,GAAG,cAAc,EAAE,EAAE;AAAA,EACxG;AAEA,QAAM,MAAM,SAAS;AACrB,MAAI,CAAC,IAAI,oBAAoB,CAAC,IAAI,sBAAsB;AACtD,UAAMA,kBAAiB,OAAO;AAAA,MAC5B,CAAC,MAAM,MAAM,EAAE,GAAG;AAAA;AAAA;AAAA,IACpB;AACA,WAAO;AAAA,MACL,cAAc,CAAC;AAAA,MACf,gBAAAA;AAAA,MACA,SAAS,EAAE,YAAY,GAAG,QAAQ,OAAO,QAAQ,cAAc,EAAE;AAAA,IACnE;AAAA,EACF;AAKA,QAAM,CAAC,YAAY,aAAa,IAAI,OAAO;AAAA,IACzC,CAAC,CAAC,OAAO,IAAI,GAAG,UAAU;AACxB,UAAI,sBAAsB,MAAM,GAAG,EAAG,OAAM,KAAK,KAAK;AAAA,UACjD,MAAK,KAAK,KAAK;AACpB,aAAO,CAAC,OAAO,IAAI;AAAA,IACrB;AAAA,IACA,CAAC,CAAC,GAAG,CAAC,CAAC;AAAA,EACT;AAEA,QAAM,qBAAqB,cAAc;AAAA,IACvC,CAAC,MAAM,MAAM,EAAE,GAAG;AAAA;AAAA;AAAA,EACpB;AAEA,MAAI,WAAW,WAAW,GAAG;AAC3B,WAAO;AAAA,MACL,cAAc,CAAC;AAAA,MACf,gBAAgB;AAAA,MAChB,SAAS,EAAE,YAAY,GAAG,QAAQ,cAAc,QAAQ,cAAc,EAAE;AAAA,IAC1E;AAAA,EACF;AAEA,SAAO,QAAQ,yCAAyC,WAAW,MAAM,uBAAuB,YAAY,MAAM,IAAI,QAAQ;AAC9H,QAAM,SAAS,IAAI,aAAa,IAAI,kBAAkB,IAAI,oBAAoB;AAC9E,QAAM,OAAO,WAAW,IAAI,CAAC,MAAM,EAAE,GAAG;AACxC,QAAM,cAAc,MAAM,OAAO,cAAc,MAAM,IAAI;AACzD,QAAM,aAAa,IAAI,IAAI,WAAW,IAAI,CAAC,MAAM,CAAC,EAAE,KAAK,EAAE,SAAS,CAAC,CAAC;AAEtE,QAAM,eAAkC,CAAC;AACzC,QAAM,iBAA2B,CAAC,GAAG,kBAAkB;AACvD,MAAI,aAAa;AACjB,MAAI,SAAS,cAAc;AAE3B,aAAW,CAAC,KAAK,MAAM,KAAK,YAAY,SAAS;AAC/C,UAAM,YAAY,WAAW,IAAI,GAAG,KAAK;AACzC,QAAI,kBAAkB,OAAO;AAC3B;AACA,qBAAe,KAAK,MAAM,GAAG;AAAA;AAAA,8BAA8B,OAAO,OAAO,EAAE;AAC3E;AAAA,IACF;AACA;AACA,iBAAa,KAAK,EAAE,KAAK,SAAS,2BAA2B,MAAM,GAAG,OAAO,UAAU,CAAC;AAAA,EAC1F;AAEA,SAAO,EAAE,cAAc,gBAAgB,SAAS,EAAE,YAAY,QAAQ,cAAc,EAAE,EAAE;AAC1F;AAIA,eAAe,oBACb,cACA,qBACA,cACA,UACgF;AAChF,MAAI,YAAY;AAEhB,MAAI,CAAC,gBAAgB,aAAa,WAAW,GAAG;AAC9C,QAAI,CAAC,gBAAgB,aAAa,SAAS,GAAG;AAC5C,aAAO,WAAW,yEAAyE,QAAQ;AACnG,WAAK,SAAS,IAAI,WAAW,iFAAiF;AAAA,IAChH;AACA,WAAO,EAAE,OAAO,cAAc,WAAW,cAAc,EAAE;AAAA,EAC3D;AAEA,SAAO,QAAQ,6CAA6C,aAAa,MAAM,uBAAuB,YAAY,cAAc,IAAI,QAAQ;AAE5I,QAAM,aAAa,MAAM;AAAA,IACvB;AAAA,IACA,OAAO,SAAS;AACd,aAAO,SAAS,kBAAkB,KAAK,GAAG,OAAO,QAAQ;AAEzD,YAAM,YAAY,MAAM;AAAA,QACtB,KAAK;AAAA,QACL,EAAE,SAAS,MAAM,SAAS,qBAAqB,KAAK,KAAK,IAAI;AAAA,QAC7D;AAAA,MACF;AAEA,UAAI,UAAU,WAAW;AACvB,eAAO,EAAE,GAAG,MAAM,SAAS,UAAU,QAAQ;AAAA,MAC/C;AAEA;AACA,aAAO,WAAW,6BAA6B,KAAK,GAAG,KAAK,UAAU,SAAS,gBAAgB,IAAI,QAAQ;AAC3G,WAAK,SAAS,IAAI,WAAW,8BAA8B,KAAK,GAAG,WAAM,UAAU,SAAS,gBAAgB,EAAE;AAC9G,aAAO;AAAA,IACT;AAAA,IACA,YAAY;AAAA,EACd;AAEA,SAAO,EAAE,OAAO,YAAY,WAAW,cAAc,aAAa,OAAO;AAC3E;AAIA,SAAS,uBAAuB,cAAiC,gBAAoC;AACnG,QAAM,SAAS,CAAC,GAAG,YAAY,EAAE,KAAK,CAAC,GAAG,MAAM,EAAE,QAAQ,EAAE,KAAK;AACjE,QAAM,WAAW,CAAC,GAAG,cAAc;AACnC,aAAW,QAAQ,QAAQ;AACzB,QAAI,UAAU,KAAK;AACnB,QAAI;AACF,gBAAU,eAAe,OAAO;AAAA,IAClC,QAAQ;AAAA,IAER;AACA,aAAS,KAAK,MAAM,KAAK,GAAG;AAAA;AAAA,EAAO,OAAO,EAAE;AAAA,EAC9C;AACA,SAAO;AACT;AAEA,SAAS,oBACP,QACA,UACA,SACA,WACA,eACA,eAC2D;AAC3D,QAAM,YAA6C,CAAC;AACpD,MAAI,cAAc,eAAe,GAAG;AAClC,UAAM,KAAK,cAAc,eAAe;AACxC,cAAU,gBAAgB,IAAI,GAAG,EAAE,IAAI,cAAc,YAAY;AACjE,QAAI,CAAC,cAAc,cAAc;AAC/B,gBAAU,YAAY,IAAI;AAAA,IAC5B;AAAA,EACF,WAAW,YAAY,GAAG;AACxB,cAAU,yBAAyB,IAAI;AAAA,EACzC;AAEA,QAAM,cAAc,kBAAkB;AAAA,IACpC,OAAO,oBAAoB,OAAO,KAAK,MAAM;AAAA,IAC7C,YAAY,OAAO,KAAK;AAAA,IACxB,YAAY,QAAQ;AAAA,IACpB,QAAQ,QAAQ;AAAA,IAChB,QAAQ;AAAA,MACN,gBAAgB,QAAQ;AAAA,MACxB,GAAG;AAAA,IACL;AAAA,EACF,CAAC;AAED,QAAM,mBAAmB,cAAc;AAAA,IACrC,OAAO;AAAA,IACP,SAAS;AAAA,IACT,MAAM,SAAS,KAAK,aAAa;AAAA,IACjC,UAAU;AAAA,MACR,kBAAkB,eAAe,aAAa;AAAA,IAChD;AAAA,EACF,CAAC;AAED,QAAM,WAA0C;AAAA,IAC9C,aAAa,OAAO,KAAK;AAAA,IACzB,YAAY,QAAQ;AAAA,IACpB,QAAQ,QAAQ;AAAA,IAChB,mBAAmB;AAAA,IACnB,eAAe,QAAQ;AAAA,EACzB;AACA,SAAO,EAAE,SAAS,kBAAkB,mBAAmB,EAAE,SAAS,kBAAkB,SAAS,EAAE;AACjG;AAIA,eAAsB,kBACpB,QACA,WAAyB,eACwB;AACjD,QAAM,YAAY,KAAK,IAAI;AAE3B,MAAI,CAAC,OAAO,QAAQ,OAAO,KAAK,WAAW,GAAG;AAC5C,WAAO,0BAA0B,WAAW,oBAAoB,SAAS;AAAA,EAC3E;AAEA,QAAM,EAAE,WAAW,cAAc,eAAe,IAAI,cAAc,OAAO,IAAI;AAC7E,QAAM,aAAa,UAAU,SAAS,aAAa;AAEnD,QAAM,SAAS;AAAA,IACb;AAAA,IACA,eAAe,OAAO,KAAK,MAAM,YAAY,UAAU,MAAM,SAAS,aAAa,MAAM,YAAY,eAAe,MAAM;AAAA,EAC5H;AAEA,MAAI,eAAe,GAAG;AACpB,WAAO;AAAA,MACL;AAAA,MACA,OAAO,OAAO,KAAK,MAAM;AAAA,MACzB;AAAA,MACA;AAAA,MACA;AAAA,QACE;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAEA;AAAA,IACE;AAAA,IACA,oBAAoB,UAAU,MAAM,UAAU,aAAa,MAAM;AAAA,IACjE;AAAA,EACF;AACA,QAAM,SAAS,SAAS,IAAI,KAAK,2BAA2B;AAI5D,MAAI,UAAgC;AACpC,MAAI;AACF,QAAI,UAAU,SAAS,GAAG;AACxB,gBAAU,EAAE,QAAQ,IAAI,cAAc,GAAG,cAAc,mBAAmB,EAAE;AAAA,IAC9E,OAAO;AAGL,gBAAU;AAAA,QACR,QAAQ;AAAA,QACR,cAAc,mBAAmB;AAAA,MACnC;AAAA,IACF;AAAA,EACF,SAAS,OAAO;AACd,UAAM,MAAM,cAAc,KAAK;AAC/B,WAAO;AAAA,MACL;AAAA,MACA,iCAAiC,IAAI,OAAO;AAAA,MAC5C;AAAA,MACA;AAAA,MACA;AAAA,QACE;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAEA,QAAM,sBAAsB,6BAA6B,OAAO,OAAO;AAEvE,QAAM,SAAS,SAAS,IAAI,KAAK,uBAAuB;AAGxD,QAAM,CAAC,UAAU,WAAW,IAAI,MAAM,QAAQ,IAAI;AAAA,IAChD,UAAU,SAAS,IACf,eAAe,WAAW,QAAQ,MAAM,IACxC,QAAQ,QAA2B,EAAE,cAAc,CAAC,GAAG,gBAAgB,CAAC,GAAG,SAAS,EAAE,YAAY,GAAG,QAAQ,GAAG,cAAc,EAAE,EAAE,CAAC;AAAA,IACvI,kBAAkB,YAAY;AAAA,EAChC,CAAC;AAED,QAAM,eAAe,CAAC,GAAG,SAAS,cAAc,GAAG,YAAY,YAAY;AAC3E,QAAM,gBAAgB,eAAe;AAAA,IACnC,CAAC,EAAE,IAAI,MAAM,MAAM,GAAG;AAAA;AAAA;AAAA,EACxB;AACA,QAAM,iBAAiB,CAAC,GAAG,eAAe,GAAG,SAAS,gBAAgB,GAAG,YAAY,cAAc;AACnG,QAAM,UAAyB;AAAA,IAC7B,YAAY,SAAS,QAAQ,aAAa,YAAY,QAAQ;AAAA,IAC9D,QAAQ,eAAe,SAAS,SAAS,QAAQ,SAAS,YAAY,QAAQ;AAAA,IAC9E,cAAc,SAAS,QAAQ;AAAA,EACjC;AAEA,QAAM,SAAS,IAAI,QAAQ,WAAW,QAAQ,UAAU,aAAa,QAAQ,MAAM,SAAS;AAE5F,MAAI,aAAa,SAAS,GAAG;AAC3B,UAAM,SAAS,SAAS,IAAI,KAAK,2CAA2C;AAAA,EAC9E;AAEA,QAAM,EAAE,OAAO,gBAAgB,WAAW,aAAa,IAAI,MAAM;AAAA,IAC/D;AAAA,IACA;AAAA,IACA,QAAQ;AAAA,IACR;AAAA,EACF;AAEA,QAAM,WAAW,uBAAuB,gBAAgB,cAAc;AACtE,QAAM,gBAAgB,KAAK,IAAI,IAAI;AAEnC;AAAA,IACE;AAAA,IACA,cAAc,QAAQ,UAAU,gBAAgB,QAAQ,MAAM,YAAY,QAAQ,YAAY;AAAA,IAC9F;AAAA,EACF;AAEA,QAAM,eAAe,eAAe,KAAK,YAAY;AACrD,QAAM,SAAS;AAAA,IACb;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA,EAAE,cAAc,aAAa;AAAA,EAC/B;AAEA,MAAI,QAAQ,eAAe,KAAK,QAAQ,SAAS,GAAG;AAClD,WAAO,YAAY,OAAO,OAAO;AAAA,EACnC;AAEA,SAAO,YAAY,OAAO,SAAS,OAAO,iBAAiB;AAC7D;AAEO,SAAS,wBAAwB,QAAyB;AAC/D,SAAO;AAAA,IACL;AAAA,MACE,MAAM;AAAA,MACN,OAAO;AAAA,MACP,aACE;AAAA,MACF,QAAQ;AAAA,MACR,cAAc;AAAA,MACd,aAAa;AAAA,QACX,cAAc;AAAA,QACd,gBAAgB;AAAA,QAChB,iBAAiB;AAAA,QACjB,eAAe;AAAA,MACjB;AAAA,IACF;AAAA,IACA,OAAO,MAAM,QAAQ;AACnB,UAAI,CAAC,gBAAgB,EAAE,UAAU;AAC/B,eAAO,eAAe,YAAY,qBAAqB,UAAU,CAAC,CAAC;AAAA,MACrE;AAEA,YAAM,WAAW,mBAAmB,KAAK,cAAc;AACvD,YAAM,SAAS,MAAM,kBAAkB,MAAM,QAAQ;AAErD,YAAM,SAAS,SAAS,KAAK,KAAK,OAAO,UAAU,kBAAkB,iBAAiB;AACtF,aAAO,eAAe,MAAM;AAAA,IAC9B;AAAA,EACF;AACF;",
4
+ "sourcesContent": ["/**\n * Scrape Links Tool Handler\n *\n * Scrapes many URLs in parallel. Reddit permalinks (reddit.com/r/.../comments/...)\n * are auto-detected and routed through the Reddit API; all other URLs go through\n * the scraper. Both branches feed the same per-URL LLM extraction pipeline.\n *\n * NEVER throws \u2014 every error is returned as a tool-level failure response.\n */\n\nimport type { MCPServer } from 'mcp-use/server';\n\nimport {\n SCRAPER,\n CONCURRENCY,\n getCapabilities,\n getMissingEnvMessage,\n parseEnv,\n} from '../config/index.js';\nimport {\n scrapeLinksOutputSchema,\n scrapeLinksParamsSchema,\n type ScrapeLinksParams,\n type ScrapeLinksOutput,\n} from '../schemas/scrape-links.js';\nimport { ScraperClient } from '../clients/scraper.js';\nimport { RedditClient, type PostResult } from '../clients/reddit.js';\nimport { MarkdownCleaner } from '../services/markdown-cleaner.js';\nimport { createLLMProcessor, processContentWithLLM } from '../services/llm-processor.js';\nimport { removeMetaTags } from '../utils/markdown-formatter.js';\nimport { extractReadableContent } from '../utils/content-extractor.js';\nimport { classifyError } from '../utils/errors.js';\nimport { pMap } from '../utils/concurrency.js';\nimport {\n mcpLog,\n formatSuccess,\n formatError,\n formatBatchHeader,\n formatDuration,\n} from './utils.js';\nimport {\n createToolReporter,\n NOOP_REPORTER,\n toolFailure,\n toolSuccess,\n toToolResponse,\n type ToolExecutionResult,\n type ToolReporter,\n} from './mcp-helpers.js';\n\nconst markdownCleaner = new MarkdownCleaner();\n\nfunction enhanceExtractionInstruction(instruction: string | undefined): string {\n const base = instruction || 'Extract the main content and key information from this page.';\n return `${SCRAPER.EXTRACTION_PREFIX}\\n\\n${base}\\n\\n${SCRAPER.EXTRACTION_SUFFIX}`;\n}\n\n// --- Types ---\n\ninterface ProcessedResult {\n url: string;\n content: string;\n index: number; // original position in params.urls[]\n}\n\ninterface ScrapeMetrics {\n successful: number;\n failed: number;\n totalCredits: number;\n}\n\ninterface ScrapePhaseResult {\n successItems: ProcessedResult[];\n failedContents: string[];\n metrics: ScrapeMetrics;\n}\n\ninterface BranchInput {\n url: string;\n origIndex: number;\n}\n\ninterface ScrapeClients {\n client: ScraperClient;\n llmProcessor: ReturnType<typeof createLLMProcessor>;\n}\n\n// --- Reddit URL detection ---\n\nconst REDDIT_HOST = /(?:^|\\.)reddit\\.com$/i;\nconst REDDIT_POST_PERMALINK = /\\/r\\/[^/]+\\/comments\\/[a-z0-9]+/i;\n\nfunction isRedditUrl(url: string): boolean {\n try {\n const u = new URL(url);\n return REDDIT_HOST.test(u.hostname);\n } catch {\n return false;\n }\n}\n\nfunction isRedditPostPermalink(url: string): boolean {\n try {\n const u = new URL(url);\n return REDDIT_HOST.test(u.hostname) && REDDIT_POST_PERMALINK.test(u.pathname);\n } catch {\n return false;\n }\n}\n\n// --- Error helper ---\n\nfunction createScrapeErrorResponse(\n code: string,\n message: string,\n startTime: number,\n retryable = false,\n alternatives?: string[],\n): ToolExecutionResult<ScrapeLinksOutput> {\n return toolFailure(\n `${formatError({\n code,\n message,\n retryable,\n toolName: 'scrape-links',\n howToFix: code === 'NO_URLS' ? ['Provide at least one valid URL'] : undefined,\n alternatives,\n })}\\n\\nExecution time: ${formatDuration(Date.now() - startTime)}`,\n );\n}\n\n// --- URL partitioning ---\n\ninterface PartitionedUrls {\n webInputs: BranchInput[];\n redditInputs: BranchInput[];\n invalidEntries: { url: string; origIndex: number }[];\n}\n\nfunction partitionUrls(urls: string[]): PartitionedUrls {\n const webInputs: BranchInput[] = [];\n const redditInputs: BranchInput[] = [];\n const invalidEntries: { url: string; origIndex: number }[] = [];\n\n for (let i = 0; i < urls.length; i++) {\n const url = urls[i]!;\n try {\n new URL(url);\n } catch {\n invalidEntries.push({ url, origIndex: i });\n continue;\n }\n if (isRedditUrl(url)) {\n redditInputs.push({ url, origIndex: i });\n } else {\n webInputs.push({ url, origIndex: i });\n }\n }\n\n return { webInputs, redditInputs, invalidEntries };\n}\n\n// --- Web branch ---\n\nasync function fetchWebBranch(\n inputs: BranchInput[],\n client: ScraperClient,\n): Promise<ScrapePhaseResult> {\n if (inputs.length === 0) {\n return { successItems: [], failedContents: [], metrics: { successful: 0, failed: 0, totalCredits: 0 } };\n }\n\n mcpLog('info', `[concurrency] web branch: fanning out ${inputs.length} URL(s) with limit=${CONCURRENCY.SCRAPER}`, 'scrape');\n const urls = inputs.map((i) => i.url);\n const results = await client.scrapeMultiple(urls, { timeout: 60 });\n\n const successItems: ProcessedResult[] = [];\n const failedContents: string[] = [];\n let successful = 0;\n let failed = 0;\n let totalCredits = 0;\n\n for (let i = 0; i < results.length; i++) {\n const result = results[i];\n const origIndex = inputs[i]!.origIndex;\n if (!result) {\n failed++;\n failedContents.push(`## ${inputs[i]!.url}\\n\\n\u274C No result returned`);\n continue;\n }\n\n if (result.error || result.statusCode < 200 || result.statusCode >= 300) {\n failed++;\n const errorMsg = result.error?.message || result.content || `HTTP ${result.statusCode}`;\n failedContents.push(`## ${result.url}\\n\\n\u274C Failed to scrape: ${errorMsg}`);\n continue;\n }\n\n successful++;\n totalCredits += result.credits;\n\n let content: string;\n try {\n const readable = extractReadableContent(result.content, result.url);\n const sourceForCleaner = readable.extracted ? readable.content : result.content;\n content = markdownCleaner.processContent(sourceForCleaner);\n } catch {\n content = result.content;\n }\n\n successItems.push({ url: result.url, content, index: origIndex });\n }\n\n return { successItems, failedContents, metrics: { successful, failed, totalCredits } };\n}\n\n// --- Reddit branch ---\n\nfunction formatRedditPostAsMarkdown(result: PostResult): string {\n const { post, comments } = result;\n const lines: string[] = [];\n lines.push(`# ${post.title}`);\n lines.push('');\n lines.push(`**r/${post.subreddit}** \u2022 u/${post.author} \u2022 \u2B06\uFE0F ${post.score} \u2022 \uD83D\uDCAC ${post.commentCount} comments`);\n lines.push(`\uD83D\uDD17 ${post.url}`);\n lines.push('');\n if (post.body) {\n lines.push('## Post content');\n lines.push('');\n lines.push(post.body);\n lines.push('');\n }\n if (comments.length > 0) {\n lines.push(`## Top comments (${comments.length} total)`);\n lines.push('');\n for (const c of comments) {\n const indent = ' '.repeat(c.depth);\n const op = c.isOP ? ' **[OP]**' : '';\n const score = c.score >= 0 ? `+${c.score}` : `${c.score}`;\n lines.push(`${indent}- **u/${c.author}**${op} _(${score})_`);\n for (const line of c.body.split('\\n')) {\n lines.push(`${indent} ${line}`);\n }\n lines.push('');\n }\n }\n return lines.join('\\n');\n}\n\nasync function fetchRedditBranch(inputs: BranchInput[]): Promise<ScrapePhaseResult> {\n if (inputs.length === 0) {\n return { successItems: [], failedContents: [], metrics: { successful: 0, failed: 0, totalCredits: 0 } };\n }\n\n const env = parseEnv();\n if (!env.REDDIT_CLIENT_ID || !env.REDDIT_CLIENT_SECRET) {\n const failedContents = inputs.map(\n (i) => `## ${i.url}\\n\\n\u274C Reddit URL detected, but Reddit API is not configured. Set \\`REDDIT_CLIENT_ID\\` and \\`REDDIT_CLIENT_SECRET\\` in the server env to enable threaded Reddit scraping.`,\n );\n return {\n successItems: [],\n failedContents,\n metrics: { successful: 0, failed: inputs.length, totalCredits: 0 },\n };\n }\n\n // Warn for non-permalink Reddit URLs (subreddit homepages, /new, /top, /hot,\n // user profiles). The Reddit API path we call requires /r/.../comments/... \u2014\n // reject upfront so the caller sees a helpful message instead of a 404.\n const [postInputs, nonPermalinks] = inputs.reduce<[BranchInput[], BranchInput[]]>(\n ([posts, rest], input) => {\n if (isRedditPostPermalink(input.url)) posts.push(input);\n else rest.push(input);\n return [posts, rest];\n },\n [[], []],\n );\n\n const nonPermalinkFailed = nonPermalinks.map(\n (i) => `## ${i.url}\\n\\n\u274C Only Reddit post permalinks (/r/<sub>/comments/<id>/...) are supported. Use web-search with scope:\"reddit\" to discover post permalinks first.`,\n );\n\n if (postInputs.length === 0) {\n return {\n successItems: [],\n failedContents: nonPermalinkFailed,\n metrics: { successful: 0, failed: nonPermalinks.length, totalCredits: 0 },\n };\n }\n\n mcpLog('info', `[concurrency] reddit branch: fetching ${postInputs.length} post(s) with limit=${CONCURRENCY.REDDIT}`, 'scrape');\n const client = new RedditClient(env.REDDIT_CLIENT_ID, env.REDDIT_CLIENT_SECRET);\n const urls = postInputs.map((i) => i.url);\n const batchResult = await client.batchGetPosts(urls, true);\n const urlToIndex = new Map(postInputs.map((i) => [i.url, i.origIndex]));\n\n const successItems: ProcessedResult[] = [];\n const failedContents: string[] = [...nonPermalinkFailed];\n let successful = 0;\n let failed = nonPermalinks.length;\n\n for (const [url, result] of batchResult.results) {\n const origIndex = urlToIndex.get(url) ?? -1;\n if (result instanceof Error) {\n failed++;\n failedContents.push(`## ${url}\\n\\n\u274C Reddit fetch failed: ${result.message}`);\n continue;\n }\n successful++;\n successItems.push({ url, content: formatRedditPostAsMarkdown(result), index: origIndex });\n }\n\n return { successItems, failedContents, metrics: { successful, failed, totalCredits: 0 } };\n}\n\n// --- LLM extraction (shared by both branches) ---\n\nasync function processItemsWithLlm(\n successItems: ProcessedResult[],\n enhancedInstruction: string,\n llmProcessor: ReturnType<typeof createLLMProcessor>,\n reporter: ToolReporter,\n): Promise<{ items: ProcessedResult[]; llmErrors: number; llmAttempted: number }> {\n let llmErrors = 0;\n\n if (!llmProcessor || successItems.length === 0) {\n if (!llmProcessor && successItems.length > 0) {\n mcpLog('warning', 'LLM unavailable (LLM_API_KEY not set). Returning raw scraped content.', 'scrape');\n void reporter.log('warning', 'llm_extractor_unreachable: planner not configured; raw scraped content returned');\n }\n return { items: successItems, llmErrors, llmAttempted: 0 };\n }\n\n mcpLog('info', `[concurrency] llm extraction: fanning out ${successItems.length} item(s) with limit=${CONCURRENCY.LLM_EXTRACTION}`, 'scrape');\n\n const llmResults = await pMap(\n successItems,\n async (item) => {\n mcpLog('debug', `LLM extracting ${item.url}...`, 'scrape');\n\n const llmResult = await processContentWithLLM(\n item.content,\n { enabled: true, extract: enhancedInstruction, url: item.url },\n llmProcessor,\n );\n\n if (llmResult.processed) {\n return { ...item, content: llmResult.content };\n }\n\n llmErrors++;\n mcpLog('warning', `LLM extraction failed for ${item.url}: ${llmResult.error || 'unknown reason'}`, 'scrape');\n void reporter.log('warning', `llm_extractor_unreachable: ${item.url} \u2014 ${llmResult.error || 'unknown reason'}`);\n return item;\n },\n CONCURRENCY.LLM_EXTRACTION,\n );\n\n return { items: llmResults, llmErrors, llmAttempted: successItems.length };\n}\n\n// --- Output assembly ---\n\nfunction assembleContentEntries(successItems: ProcessedResult[], failedContents: string[]): string[] {\n const sorted = [...successItems].sort((a, b) => a.index - b.index);\n const contents = [...failedContents];\n for (const item of sorted) {\n let content = item.content;\n try {\n content = removeMetaTags(content);\n } catch {\n // Use content as-is\n }\n contents.push(`## ${item.url}\\n\\n${content}`);\n }\n return contents;\n}\n\nfunction buildScrapeResponse(\n params: ScrapeLinksParams,\n contents: string[],\n metrics: ScrapeMetrics,\n llmErrors: number,\n executionTime: number,\n llmAccounting: { llmAttempted: number; llmSucceeded: boolean },\n): { content: string; structuredContent: ScrapeLinksOutput } {\n const llmExtras: Record<string, string | number> = {};\n if (llmAccounting.llmAttempted > 0) {\n const ok = llmAccounting.llmAttempted - llmErrors;\n llmExtras['LLM extraction'] = `${ok}/${llmAccounting.llmAttempted} succeeded`;\n if (!llmAccounting.llmSucceeded) {\n llmExtras['LLM credit'] = '0 charged (no extraction produced)';\n }\n } else if (llmErrors > 0) {\n llmExtras['LLM extraction failures'] = llmErrors;\n }\n\n const batchHeader = formatBatchHeader({\n title: `Scraped Content (${params.urls.length} URLs)`,\n totalItems: params.urls.length,\n successful: metrics.successful,\n failed: metrics.failed,\n extras: {\n 'Credits used': metrics.totalCredits,\n ...llmExtras,\n },\n });\n\n const formattedContent = formatSuccess({\n title: 'Scraping Complete',\n summary: batchHeader,\n data: contents.join('\\n\\n---\\n\\n'),\n metadata: {\n 'Execution time': formatDuration(executionTime),\n },\n });\n\n const metadata: ScrapeLinksOutput['metadata'] = {\n total_items: params.urls.length,\n successful: metrics.successful,\n failed: metrics.failed,\n execution_time_ms: executionTime,\n total_credits: metrics.totalCredits,\n };\n return { content: formattedContent, structuredContent: { metadata } };\n}\n\n// --- Handler ---\n\nexport async function handleScrapeLinks(\n params: ScrapeLinksParams,\n reporter: ToolReporter = NOOP_REPORTER,\n): Promise<ToolExecutionResult<ScrapeLinksOutput>> {\n const startTime = Date.now();\n\n if (!params.urls || params.urls.length === 0) {\n return createScrapeErrorResponse('NO_URLS', 'No URLs provided', startTime);\n }\n\n const { webInputs, redditInputs, invalidEntries } = partitionUrls(params.urls);\n const validCount = webInputs.length + redditInputs.length;\n\n await reporter.log(\n 'info',\n `Partitioned ${params.urls.length} URL(s): ${webInputs.length} web, ${redditInputs.length} reddit, ${invalidEntries.length} invalid`,\n );\n\n if (validCount === 0) {\n return createScrapeErrorResponse(\n 'INVALID_URLS',\n `All ${params.urls.length} URLs are invalid`,\n startTime,\n false,\n [\n 'web-search(queries=[...], extract=\"...\") \u2014 search for valid URLs first, then scrape the results',\n ],\n );\n }\n\n mcpLog(\n 'info',\n `Starting scrape: ${webInputs.length} web + ${redditInputs.length} reddit URL(s)`,\n 'scrape',\n );\n await reporter.progress(15, 100, 'Preparing scraper clients');\n\n // Only initialize web clients if we actually have web URLs. Reddit-only\n // batches run without touching the scraper.\n let clients: ScrapeClients | null = null;\n try {\n if (webInputs.length > 0) {\n clients = { client: new ScraperClient(), llmProcessor: createLLMProcessor() };\n } else {\n // Reddit-only: no scraper needed, but still create the LLM processor\n // so the extraction pass runs.\n clients = {\n client: null as unknown as ScraperClient,\n llmProcessor: createLLMProcessor(),\n };\n }\n } catch (error) {\n const err = classifyError(error);\n return createScrapeErrorResponse(\n 'CLIENT_INIT_FAILED',\n `Failed to initialize scraper: ${err.message}`,\n startTime,\n false,\n [\n 'web-search(queries=[\"topic key findings\", \"topic summary\"], extract=\"key findings and summary\") \u2014 search instead of scraping',\n ],\n );\n }\n\n const enhancedInstruction = enhanceExtractionInstruction(params.extract);\n\n await reporter.progress(35, 100, 'Fetching page content');\n\n // Run both branches in parallel. Failures in one branch do not block the other.\n const [webPhase, redditPhase] = await Promise.all([\n webInputs.length > 0\n ? fetchWebBranch(webInputs, clients.client)\n : Promise.resolve<ScrapePhaseResult>({ successItems: [], failedContents: [], metrics: { successful: 0, failed: 0, totalCredits: 0 } }),\n fetchRedditBranch(redditInputs),\n ]);\n\n const successItems = [...webPhase.successItems, ...redditPhase.successItems];\n const invalidFailed = invalidEntries.map(\n ({ url }) => `## ${url}\\n\\n\u274C Invalid URL format`,\n );\n const failedContents = [...invalidFailed, ...webPhase.failedContents, ...redditPhase.failedContents];\n const metrics: ScrapeMetrics = {\n successful: webPhase.metrics.successful + redditPhase.metrics.successful,\n failed: invalidEntries.length + webPhase.metrics.failed + redditPhase.metrics.failed,\n totalCredits: webPhase.metrics.totalCredits,\n };\n\n await reporter.log('info', `Fetched ${metrics.successful} page(s), ${metrics.failed} failed`);\n\n if (successItems.length > 0) {\n await reporter.progress(80, 100, 'Running LLM extraction over fetched pages');\n }\n\n const { items: processedItems, llmErrors, llmAttempted } = await processItemsWithLlm(\n successItems,\n enhancedInstruction,\n clients.llmProcessor,\n reporter,\n );\n\n const contents = assembleContentEntries(processedItems, failedContents);\n const executionTime = Date.now() - startTime;\n\n mcpLog(\n 'info',\n `Completed: ${metrics.successful} successful, ${metrics.failed} failed, ${metrics.totalCredits} credits used`,\n 'scrape',\n );\n\n const llmSucceeded = llmAttempted > 0 && llmErrors < llmAttempted;\n const result = buildScrapeResponse(\n params,\n contents,\n metrics,\n llmErrors,\n executionTime,\n { llmAttempted, llmSucceeded },\n );\n\n if (metrics.successful === 0 && metrics.failed > 0) {\n return toolFailure(result.content);\n }\n\n return toolSuccess(result.content, result.structuredContent);\n}\n\nexport function registerScrapeLinksTool(server: MCPServer): void {\n server.tool(\n {\n name: 'scrape-links',\n title: 'Scrape Links',\n description:\n 'Fetch many URLs in parallel and run per-URL structured LLM extraction. Auto-detects reddit.com post permalinks and routes them through the Reddit API (threaded post + comments); everything else flows through the HTTP scraper. Safe to call in parallel \u2014 group URLs by context rather than jamming unrelated batches together. Each page returns `## Source`, `## Matches` (verbatim-preserved facts), `## Not found` (explicit gaps), and `## Follow-up signals` (new terms + referenced URLs) that feed the next research loop. Describe the SHAPE of what you want in `extract`, facets separated by `|` (e.g. `root cause | affected versions | fix | workarounds | timeline`).',\n schema: scrapeLinksParamsSchema,\n outputSchema: scrapeLinksOutputSchema,\n annotations: {\n readOnlyHint: true,\n idempotentHint: true,\n destructiveHint: false,\n openWorldHint: true,\n },\n },\n async (args, ctx) => {\n if (!getCapabilities().scraping) {\n return toToolResponse(toolFailure(getMissingEnvMessage('scraping')));\n }\n\n const reporter = createToolReporter(ctx, 'scrape-links');\n const result = await handleScrapeLinks(args, reporter);\n\n await reporter.progress(100, 100, result.isError ? 'Scrape failed' : 'Scrape complete');\n return toToolResponse(result);\n },\n );\n}\n"],
5
+ "mappings": "AAYA;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AACP;AAAA,EACE;AAAA,EACA;AAAA,OAGK;AACP,SAAS,qBAAqB;AAC9B,SAAS,oBAAqC;AAC9C,SAAS,uBAAuB;AAChC,SAAS,oBAAoB,6BAA6B;AAC1D,SAAS,sBAAsB;AAC/B,SAAS,8BAA8B;AACvC,SAAS,qBAAqB;AAC9B,SAAS,YAAY;AACrB;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AACP;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OAGK;AAEP,MAAM,kBAAkB,IAAI,gBAAgB;AAE5C,SAAS,6BAA6B,aAAyC;AAC7E,QAAM,OAAO,eAAe;AAC5B,SAAO,GAAG,QAAQ,iBAAiB;AAAA;AAAA,EAAO,IAAI;AAAA;AAAA,EAAO,QAAQ,iBAAiB;AAChF;AAkCA,MAAM,cAAc;AACpB,MAAM,wBAAwB;AAE9B,SAAS,YAAY,KAAsB;AACzC,MAAI;AACF,UAAM,IAAI,IAAI,IAAI,GAAG;AACrB,WAAO,YAAY,KAAK,EAAE,QAAQ;AAAA,EACpC,QAAQ;AACN,WAAO;AAAA,EACT;AACF;AAEA,SAAS,sBAAsB,KAAsB;AACnD,MAAI;AACF,UAAM,IAAI,IAAI,IAAI,GAAG;AACrB,WAAO,YAAY,KAAK,EAAE,QAAQ,KAAK,sBAAsB,KAAK,EAAE,QAAQ;AAAA,EAC9E,QAAQ;AACN,WAAO;AAAA,EACT;AACF;AAIA,SAAS,0BACP,MACA,SACA,WACA,YAAY,OACZ,cACwC;AACxC,SAAO;AAAA,IACL,GAAG,YAAY;AAAA,MACb;AAAA,MACA;AAAA,MACA;AAAA,MACA,UAAU;AAAA,MACV,UAAU,SAAS,YAAY,CAAC,gCAAgC,IAAI;AAAA,MACpE;AAAA,IACF,CAAC,CAAC;AAAA;AAAA,kBAAuB,eAAe,KAAK,IAAI,IAAI,SAAS,CAAC;AAAA,EACjE;AACF;AAUA,SAAS,cAAc,MAAiC;AACtD,QAAM,YAA2B,CAAC;AAClC,QAAM,eAA8B,CAAC;AACrC,QAAM,iBAAuD,CAAC;AAE9D,WAAS,IAAI,GAAG,IAAI,KAAK,QAAQ,KAAK;AACpC,UAAM,MAAM,KAAK,CAAC;AAClB,QAAI;AACF,UAAI,IAAI,GAAG;AAAA,IACb,QAAQ;AACN,qBAAe,KAAK,EAAE,KAAK,WAAW,EAAE,CAAC;AACzC;AAAA,IACF;AACA,QAAI,YAAY,GAAG,GAAG;AACpB,mBAAa,KAAK,EAAE,KAAK,WAAW,EAAE,CAAC;AAAA,IACzC,OAAO;AACL,gBAAU,KAAK,EAAE,KAAK,WAAW,EAAE,CAAC;AAAA,IACtC;AAAA,EACF;AAEA,SAAO,EAAE,WAAW,cAAc,eAAe;AACnD;AAIA,eAAe,eACb,QACA,QAC4B;AAC5B,MAAI,OAAO,WAAW,GAAG;AACvB,WAAO,EAAE,cAAc,CAAC,GAAG,gBAAgB,CAAC,GAAG,SAAS,EAAE,YAAY,GAAG,QAAQ,GAAG,cAAc,EAAE,EAAE;AAAA,EACxG;AAEA,SAAO,QAAQ,yCAAyC,OAAO,MAAM,sBAAsB,YAAY,OAAO,IAAI,QAAQ;AAC1H,QAAM,OAAO,OAAO,IAAI,CAAC,MAAM,EAAE,GAAG;AACpC,QAAM,UAAU,MAAM,OAAO,eAAe,MAAM,EAAE,SAAS,GAAG,CAAC;AAEjE,QAAM,eAAkC,CAAC;AACzC,QAAM,iBAA2B,CAAC;AAClC,MAAI,aAAa;AACjB,MAAI,SAAS;AACb,MAAI,eAAe;AAEnB,WAAS,IAAI,GAAG,IAAI,QAAQ,QAAQ,KAAK;AACvC,UAAM,SAAS,QAAQ,CAAC;AACxB,UAAM,YAAY,OAAO,CAAC,EAAG;AAC7B,QAAI,CAAC,QAAQ;AACX;AACA,qBAAe,KAAK,MAAM,OAAO,CAAC,EAAG,GAAG;AAAA;AAAA,0BAA0B;AAClE;AAAA,IACF;AAEA,QAAI,OAAO,SAAS,OAAO,aAAa,OAAO,OAAO,cAAc,KAAK;AACvE;AACA,YAAM,WAAW,OAAO,OAAO,WAAW,OAAO,WAAW,QAAQ,OAAO,UAAU;AACrF,qBAAe,KAAK,MAAM,OAAO,GAAG;AAAA;AAAA,2BAA2B,QAAQ,EAAE;AACzE;AAAA,IACF;AAEA;AACA,oBAAgB,OAAO;AAEvB,QAAI;AACJ,QAAI;AACF,YAAM,WAAW,uBAAuB,OAAO,SAAS,OAAO,GAAG;AAClE,YAAM,mBAAmB,SAAS,YAAY,SAAS,UAAU,OAAO;AACxE,gBAAU,gBAAgB,eAAe,gBAAgB;AAAA,IAC3D,QAAQ;AACN,gBAAU,OAAO;AAAA,IACnB;AAEA,iBAAa,KAAK,EAAE,KAAK,OAAO,KAAK,SAAS,OAAO,UAAU,CAAC;AAAA,EAClE;AAEA,SAAO,EAAE,cAAc,gBAAgB,SAAS,EAAE,YAAY,QAAQ,aAAa,EAAE;AACvF;AAIA,SAAS,2BAA2B,QAA4B;AAC9D,QAAM,EAAE,MAAM,SAAS,IAAI;AAC3B,QAAM,QAAkB,CAAC;AACzB,QAAM,KAAK,KAAK,KAAK,KAAK,EAAE;AAC5B,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,OAAO,KAAK,SAAS,eAAU,KAAK,MAAM,wBAAS,KAAK,KAAK,qBAAS,KAAK,YAAY,WAAW;AAC7G,QAAM,KAAK,aAAM,KAAK,GAAG,EAAE;AAC3B,QAAM,KAAK,EAAE;AACb,MAAI,KAAK,MAAM;AACb,UAAM,KAAK,iBAAiB;AAC5B,UAAM,KAAK,EAAE;AACb,UAAM,KAAK,KAAK,IAAI;AACpB,UAAM,KAAK,EAAE;AAAA,EACf;AACA,MAAI,SAAS,SAAS,GAAG;AACvB,UAAM,KAAK,oBAAoB,SAAS,MAAM,SAAS;AACvD,UAAM,KAAK,EAAE;AACb,eAAW,KAAK,UAAU;AACxB,YAAM,SAAS,KAAK,OAAO,EAAE,KAAK;AAClC,YAAM,KAAK,EAAE,OAAO,cAAc;AAClC,YAAM,QAAQ,EAAE,SAAS,IAAI,IAAI,EAAE,KAAK,KAAK,GAAG,EAAE,KAAK;AACvD,YAAM,KAAK,GAAG,MAAM,SAAS,EAAE,MAAM,KAAK,EAAE,MAAM,KAAK,IAAI;AAC3D,iBAAW,QAAQ,EAAE,KAAK,MAAM,IAAI,GAAG;AACrC,cAAM,KAAK,GAAG,MAAM,KAAK,IAAI,EAAE;AAAA,MACjC;AACA,YAAM,KAAK,EAAE;AAAA,IACf;AAAA,EACF;AACA,SAAO,MAAM,KAAK,IAAI;AACxB;AAEA,eAAe,kBAAkB,QAAmD;AAClF,MAAI,OAAO,WAAW,GAAG;AACvB,WAAO,EAAE,cAAc,CAAC,GAAG,gBAAgB,CAAC,GAAG,SAAS,EAAE,YAAY,GAAG,QAAQ,GAAG,cAAc,EAAE,EAAE;AAAA,EACxG;AAEA,QAAM,MAAM,SAAS;AACrB,MAAI,CAAC,IAAI,oBAAoB,CAAC,IAAI,sBAAsB;AACtD,UAAMA,kBAAiB,OAAO;AAAA,MAC5B,CAAC,MAAM,MAAM,EAAE,GAAG;AAAA;AAAA;AAAA,IACpB;AACA,WAAO;AAAA,MACL,cAAc,CAAC;AAAA,MACf,gBAAAA;AAAA,MACA,SAAS,EAAE,YAAY,GAAG,QAAQ,OAAO,QAAQ,cAAc,EAAE;AAAA,IACnE;AAAA,EACF;AAKA,QAAM,CAAC,YAAY,aAAa,IAAI,OAAO;AAAA,IACzC,CAAC,CAAC,OAAO,IAAI,GAAG,UAAU;AACxB,UAAI,sBAAsB,MAAM,GAAG,EAAG,OAAM,KAAK,KAAK;AAAA,UACjD,MAAK,KAAK,KAAK;AACpB,aAAO,CAAC,OAAO,IAAI;AAAA,IACrB;AAAA,IACA,CAAC,CAAC,GAAG,CAAC,CAAC;AAAA,EACT;AAEA,QAAM,qBAAqB,cAAc;AAAA,IACvC,CAAC,MAAM,MAAM,EAAE,GAAG;AAAA;AAAA;AAAA,EACpB;AAEA,MAAI,WAAW,WAAW,GAAG;AAC3B,WAAO;AAAA,MACL,cAAc,CAAC;AAAA,MACf,gBAAgB;AAAA,MAChB,SAAS,EAAE,YAAY,GAAG,QAAQ,cAAc,QAAQ,cAAc,EAAE;AAAA,IAC1E;AAAA,EACF;AAEA,SAAO,QAAQ,yCAAyC,WAAW,MAAM,uBAAuB,YAAY,MAAM,IAAI,QAAQ;AAC9H,QAAM,SAAS,IAAI,aAAa,IAAI,kBAAkB,IAAI,oBAAoB;AAC9E,QAAM,OAAO,WAAW,IAAI,CAAC,MAAM,EAAE,GAAG;AACxC,QAAM,cAAc,MAAM,OAAO,cAAc,MAAM,IAAI;AACzD,QAAM,aAAa,IAAI,IAAI,WAAW,IAAI,CAAC,MAAM,CAAC,EAAE,KAAK,EAAE,SAAS,CAAC,CAAC;AAEtE,QAAM,eAAkC,CAAC;AACzC,QAAM,iBAA2B,CAAC,GAAG,kBAAkB;AACvD,MAAI,aAAa;AACjB,MAAI,SAAS,cAAc;AAE3B,aAAW,CAAC,KAAK,MAAM,KAAK,YAAY,SAAS;AAC/C,UAAM,YAAY,WAAW,IAAI,GAAG,KAAK;AACzC,QAAI,kBAAkB,OAAO;AAC3B;AACA,qBAAe,KAAK,MAAM,GAAG;AAAA;AAAA,8BAA8B,OAAO,OAAO,EAAE;AAC3E;AAAA,IACF;AACA;AACA,iBAAa,KAAK,EAAE,KAAK,SAAS,2BAA2B,MAAM,GAAG,OAAO,UAAU,CAAC;AAAA,EAC1F;AAEA,SAAO,EAAE,cAAc,gBAAgB,SAAS,EAAE,YAAY,QAAQ,cAAc,EAAE,EAAE;AAC1F;AAIA,eAAe,oBACb,cACA,qBACA,cACA,UACgF;AAChF,MAAI,YAAY;AAEhB,MAAI,CAAC,gBAAgB,aAAa,WAAW,GAAG;AAC9C,QAAI,CAAC,gBAAgB,aAAa,SAAS,GAAG;AAC5C,aAAO,WAAW,yEAAyE,QAAQ;AACnG,WAAK,SAAS,IAAI,WAAW,iFAAiF;AAAA,IAChH;AACA,WAAO,EAAE,OAAO,cAAc,WAAW,cAAc,EAAE;AAAA,EAC3D;AAEA,SAAO,QAAQ,6CAA6C,aAAa,MAAM,uBAAuB,YAAY,cAAc,IAAI,QAAQ;AAE5I,QAAM,aAAa,MAAM;AAAA,IACvB;AAAA,IACA,OAAO,SAAS;AACd,aAAO,SAAS,kBAAkB,KAAK,GAAG,OAAO,QAAQ;AAEzD,YAAM,YAAY,MAAM;AAAA,QACtB,KAAK;AAAA,QACL,EAAE,SAAS,MAAM,SAAS,qBAAqB,KAAK,KAAK,IAAI;AAAA,QAC7D;AAAA,MACF;AAEA,UAAI,UAAU,WAAW;AACvB,eAAO,EAAE,GAAG,MAAM,SAAS,UAAU,QAAQ;AAAA,MAC/C;AAEA;AACA,aAAO,WAAW,6BAA6B,KAAK,GAAG,KAAK,UAAU,SAAS,gBAAgB,IAAI,QAAQ;AAC3G,WAAK,SAAS,IAAI,WAAW,8BAA8B,KAAK,GAAG,WAAM,UAAU,SAAS,gBAAgB,EAAE;AAC9G,aAAO;AAAA,IACT;AAAA,IACA,YAAY;AAAA,EACd;AAEA,SAAO,EAAE,OAAO,YAAY,WAAW,cAAc,aAAa,OAAO;AAC3E;AAIA,SAAS,uBAAuB,cAAiC,gBAAoC;AACnG,QAAM,SAAS,CAAC,GAAG,YAAY,EAAE,KAAK,CAAC,GAAG,MAAM,EAAE,QAAQ,EAAE,KAAK;AACjE,QAAM,WAAW,CAAC,GAAG,cAAc;AACnC,aAAW,QAAQ,QAAQ;AACzB,QAAI,UAAU,KAAK;AACnB,QAAI;AACF,gBAAU,eAAe,OAAO;AAAA,IAClC,QAAQ;AAAA,IAER;AACA,aAAS,KAAK,MAAM,KAAK,GAAG;AAAA;AAAA,EAAO,OAAO,EAAE;AAAA,EAC9C;AACA,SAAO;AACT;AAEA,SAAS,oBACP,QACA,UACA,SACA,WACA,eACA,eAC2D;AAC3D,QAAM,YAA6C,CAAC;AACpD,MAAI,cAAc,eAAe,GAAG;AAClC,UAAM,KAAK,cAAc,eAAe;AACxC,cAAU,gBAAgB,IAAI,GAAG,EAAE,IAAI,cAAc,YAAY;AACjE,QAAI,CAAC,cAAc,cAAc;AAC/B,gBAAU,YAAY,IAAI;AAAA,IAC5B;AAAA,EACF,WAAW,YAAY,GAAG;AACxB,cAAU,yBAAyB,IAAI;AAAA,EACzC;AAEA,QAAM,cAAc,kBAAkB;AAAA,IACpC,OAAO,oBAAoB,OAAO,KAAK,MAAM;AAAA,IAC7C,YAAY,OAAO,KAAK;AAAA,IACxB,YAAY,QAAQ;AAAA,IACpB,QAAQ,QAAQ;AAAA,IAChB,QAAQ;AAAA,MACN,gBAAgB,QAAQ;AAAA,MACxB,GAAG;AAAA,IACL;AAAA,EACF,CAAC;AAED,QAAM,mBAAmB,cAAc;AAAA,IACrC,OAAO;AAAA,IACP,SAAS;AAAA,IACT,MAAM,SAAS,KAAK,aAAa;AAAA,IACjC,UAAU;AAAA,MACR,kBAAkB,eAAe,aAAa;AAAA,IAChD;AAAA,EACF,CAAC;AAED,QAAM,WAA0C;AAAA,IAC9C,aAAa,OAAO,KAAK;AAAA,IACzB,YAAY,QAAQ;AAAA,IACpB,QAAQ,QAAQ;AAAA,IAChB,mBAAmB;AAAA,IACnB,eAAe,QAAQ;AAAA,EACzB;AACA,SAAO,EAAE,SAAS,kBAAkB,mBAAmB,EAAE,SAAS,EAAE;AACtE;AAIA,eAAsB,kBACpB,QACA,WAAyB,eACwB;AACjD,QAAM,YAAY,KAAK,IAAI;AAE3B,MAAI,CAAC,OAAO,QAAQ,OAAO,KAAK,WAAW,GAAG;AAC5C,WAAO,0BAA0B,WAAW,oBAAoB,SAAS;AAAA,EAC3E;AAEA,QAAM,EAAE,WAAW,cAAc,eAAe,IAAI,cAAc,OAAO,IAAI;AAC7E,QAAM,aAAa,UAAU,SAAS,aAAa;AAEnD,QAAM,SAAS;AAAA,IACb;AAAA,IACA,eAAe,OAAO,KAAK,MAAM,YAAY,UAAU,MAAM,SAAS,aAAa,MAAM,YAAY,eAAe,MAAM;AAAA,EAC5H;AAEA,MAAI,eAAe,GAAG;AACpB,WAAO;AAAA,MACL;AAAA,MACA,OAAO,OAAO,KAAK,MAAM;AAAA,MACzB;AAAA,MACA;AAAA,MACA;AAAA,QACE;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAEA;AAAA,IACE;AAAA,IACA,oBAAoB,UAAU,MAAM,UAAU,aAAa,MAAM;AAAA,IACjE;AAAA,EACF;AACA,QAAM,SAAS,SAAS,IAAI,KAAK,2BAA2B;AAI5D,MAAI,UAAgC;AACpC,MAAI;AACF,QAAI,UAAU,SAAS,GAAG;AACxB,gBAAU,EAAE,QAAQ,IAAI,cAAc,GAAG,cAAc,mBAAmB,EAAE;AAAA,IAC9E,OAAO;AAGL,gBAAU;AAAA,QACR,QAAQ;AAAA,QACR,cAAc,mBAAmB;AAAA,MACnC;AAAA,IACF;AAAA,EACF,SAAS,OAAO;AACd,UAAM,MAAM,cAAc,KAAK;AAC/B,WAAO;AAAA,MACL;AAAA,MACA,iCAAiC,IAAI,OAAO;AAAA,MAC5C;AAAA,MACA;AAAA,MACA;AAAA,QACE;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAEA,QAAM,sBAAsB,6BAA6B,OAAO,OAAO;AAEvE,QAAM,SAAS,SAAS,IAAI,KAAK,uBAAuB;AAGxD,QAAM,CAAC,UAAU,WAAW,IAAI,MAAM,QAAQ,IAAI;AAAA,IAChD,UAAU,SAAS,IACf,eAAe,WAAW,QAAQ,MAAM,IACxC,QAAQ,QAA2B,EAAE,cAAc,CAAC,GAAG,gBAAgB,CAAC,GAAG,SAAS,EAAE,YAAY,GAAG,QAAQ,GAAG,cAAc,EAAE,EAAE,CAAC;AAAA,IACvI,kBAAkB,YAAY;AAAA,EAChC,CAAC;AAED,QAAM,eAAe,CAAC,GAAG,SAAS,cAAc,GAAG,YAAY,YAAY;AAC3E,QAAM,gBAAgB,eAAe;AAAA,IACnC,CAAC,EAAE,IAAI,MAAM,MAAM,GAAG;AAAA;AAAA;AAAA,EACxB;AACA,QAAM,iBAAiB,CAAC,GAAG,eAAe,GAAG,SAAS,gBAAgB,GAAG,YAAY,cAAc;AACnG,QAAM,UAAyB;AAAA,IAC7B,YAAY,SAAS,QAAQ,aAAa,YAAY,QAAQ;AAAA,IAC9D,QAAQ,eAAe,SAAS,SAAS,QAAQ,SAAS,YAAY,QAAQ;AAAA,IAC9E,cAAc,SAAS,QAAQ;AAAA,EACjC;AAEA,QAAM,SAAS,IAAI,QAAQ,WAAW,QAAQ,UAAU,aAAa,QAAQ,MAAM,SAAS;AAE5F,MAAI,aAAa,SAAS,GAAG;AAC3B,UAAM,SAAS,SAAS,IAAI,KAAK,2CAA2C;AAAA,EAC9E;AAEA,QAAM,EAAE,OAAO,gBAAgB,WAAW,aAAa,IAAI,MAAM;AAAA,IAC/D;AAAA,IACA;AAAA,IACA,QAAQ;AAAA,IACR;AAAA,EACF;AAEA,QAAM,WAAW,uBAAuB,gBAAgB,cAAc;AACtE,QAAM,gBAAgB,KAAK,IAAI,IAAI;AAEnC;AAAA,IACE;AAAA,IACA,cAAc,QAAQ,UAAU,gBAAgB,QAAQ,MAAM,YAAY,QAAQ,YAAY;AAAA,IAC9F;AAAA,EACF;AAEA,QAAM,eAAe,eAAe,KAAK,YAAY;AACrD,QAAM,SAAS;AAAA,IACb;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA,EAAE,cAAc,aAAa;AAAA,EAC/B;AAEA,MAAI,QAAQ,eAAe,KAAK,QAAQ,SAAS,GAAG;AAClD,WAAO,YAAY,OAAO,OAAO;AAAA,EACnC;AAEA,SAAO,YAAY,OAAO,SAAS,OAAO,iBAAiB;AAC7D;AAEO,SAAS,wBAAwB,QAAyB;AAC/D,SAAO;AAAA,IACL;AAAA,MACE,MAAM;AAAA,MACN,OAAO;AAAA,MACP,aACE;AAAA,MACF,QAAQ;AAAA,MACR,cAAc;AAAA,MACd,aAAa;AAAA,QACX,cAAc;AAAA,QACd,gBAAgB;AAAA,QAChB,iBAAiB;AAAA,QACjB,eAAe;AAAA,MACjB;AAAA,IACF;AAAA,IACA,OAAO,MAAM,QAAQ;AACnB,UAAI,CAAC,gBAAgB,EAAE,UAAU;AAC/B,eAAO,eAAe,YAAY,qBAAqB,UAAU,CAAC,CAAC;AAAA,MACrE;AAEA,YAAM,WAAW,mBAAmB,KAAK,cAAc;AACvD,YAAM,SAAS,MAAM,kBAAkB,MAAM,QAAQ;AAErD,YAAM,SAAS,SAAS,KAAK,KAAK,OAAO,UAAU,kBAAkB,iBAAiB;AACtF,aAAO,eAAe,MAAM;AAAA,IAC9B;AAAA,EACF;AACF;",
6
6
  "names": ["failedContents"]
7
7
  }
@@ -115,6 +115,41 @@ function appendSignalsAndFollowUps(markdown, signalsSection, refineQueries, opti
115
115
  }
116
116
  return sections.join("\n");
117
117
  }
118
+ const MIN_START_HERE = 3;
119
+ const MAX_START_HERE = 5;
120
+ function buildStartHereSection(tiers, entryByRank, opts = {}) {
121
+ const min = opts.min ?? MIN_START_HERE;
122
+ const max = opts.max ?? MAX_START_HERE;
123
+ const picks = [];
124
+ for (const candidate of tiers.high) {
125
+ if (picks.length >= max) break;
126
+ picks.push({ candidate, tier: "HIGHLY_RELEVANT" });
127
+ }
128
+ if (picks.length < min) {
129
+ const target = Math.min(min, max);
130
+ for (const candidate of tiers.maybe) {
131
+ if (picks.length >= target) break;
132
+ picks.push({ candidate, tier: "MAYBE_RELEVANT" });
133
+ }
134
+ }
135
+ if (picks.length === 0) return "";
136
+ const lines = [];
137
+ lines.push("## Start here \u2014 best candidates for your extract");
138
+ picks.forEach((pick, i) => {
139
+ const entry = entryByRank.get(pick.candidate.rank);
140
+ const reason = entry?.reason && entry.reason.trim().length > 0 ? entry.reason : "\u2014";
141
+ let domain;
142
+ try {
143
+ domain = new URL(pick.candidate.url).hostname.replace(/^www\./, "");
144
+ } catch {
145
+ domain = pick.candidate.url;
146
+ }
147
+ lines.push(
148
+ `${i + 1}. **[${pick.candidate.title}](${pick.candidate.url})** \u2014 ${domain} \u2014 ${reason} *(${pick.tier}, rank ${pick.candidate.rank})*`
149
+ );
150
+ });
151
+ return lines.join("\n");
152
+ }
118
153
  function buildClassifiedOutput(classification, aggregation, extract, searches, totalQueries, verbose = false) {
119
154
  const rankedUrls = aggregation.rankedUrls;
120
155
  const entryByRank = new Map(classification.results.map((r) => [r.rank, r]));
@@ -143,6 +178,14 @@ function buildClassifiedOutput(classification, aggregation, extract, searches, t
143
178
  lines.push(`> Confidence: \`${classification.confidence}\`${confReason}`);
144
179
  }
145
180
  lines.push("");
181
+ const startHere = buildStartHereSection(
182
+ { high: tiers.high, maybe: tiers.maybe },
183
+ entryByRank
184
+ );
185
+ if (startHere) {
186
+ lines.push(startHere);
187
+ lines.push("");
188
+ }
146
189
  lines.push(`**Summary:** ${classification.synthesis}`);
147
190
  lines.push("");
148
191
  const renderRichRow = (url) => {
@@ -363,7 +406,7 @@ async function handleWebSearch(params, reporter = NOOP_REPORTER) {
363
406
  ---
364
407
  *${formatDuration(executionTime)} | ${aggregation.totalUniqueUrls} unique URLs${llmClassified ? " | LLM classified" : ""}*`;
365
408
  const fullMarkdown = markdown + footer;
366
- return toolSuccess(fullMarkdown, { content: fullMarkdown, results, metadata });
409
+ return toolSuccess(fullMarkdown, { results, metadata });
367
410
  } catch (error) {
368
411
  return buildWebSearchError(error, params, startTime);
369
412
  }
@@ -396,6 +439,7 @@ function registerWebSearchTool(server) {
396
439
  }
397
440
  export {
398
441
  appendSignalsAndFollowUps,
442
+ buildStartHereSection,
399
443
  buildSuggestedFollowUpsSection,
400
444
  handleWebSearch,
401
445
  registerWebSearchTool
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "version": 3,
3
3
  "sources": ["../../../src/tools/search.ts"],
4
- "sourcesContent": ["/**\n * Web Search Tool Handler\n * NEVER throws - always returns structured response for graceful degradation\n */\n\nimport type { MCPServer } from 'mcp-use/server';\n\nimport { getCapabilities, getMissingEnvMessage } from '../config/index.js';\nimport {\n webSearchOutputSchema,\n webSearchParamsSchema,\n type WebSearchParams,\n type WebSearchOutput,\n} from '../schemas/web-search.js';\nimport { SearchClient } from '../clients/search.js';\nimport {\n aggregateAndRank,\n generateUnifiedOutput,\n} from '../utils/url-aggregator.js';\nimport {\n createLLMProcessor,\n classifySearchResults,\n suggestRefineQueriesForRawMode,\n type ClassificationResult,\n type RefineQuerySuggestion,\n} from '../services/llm-processor.js';\nimport { classifyError } from '../utils/errors.js';\nimport { classifySourceByUrl } from '../utils/source-type.js';\nimport {\n mcpLog,\n formatError,\n formatDuration,\n} from './utils.js';\nimport {\n createToolReporter,\n NOOP_REPORTER,\n toolFailure,\n toolSuccess,\n toToolResponse,\n type ToolExecutionResult,\n type ToolReporter,\n} from './mcp-helpers.js';\nimport { sanitizeSuggestion } from '../utils/sanitize.js';\n\n// --- Internal types ---\n\ninterface SearchAggregation {\n readonly rankedUrls: ReturnType<typeof aggregateAndRank>['rankedUrls'];\n readonly totalUniqueUrls: number;\n readonly frequencyThreshold: number;\n readonly thresholdNote?: string;\n}\n\ninterface SearchResponse {\n searches: Parameters<typeof aggregateAndRank>[0];\n totalQueries: number;\n}\n\n// --- Helpers ---\n\n/** Reddit post permalink: /r/{sub}/comments/{id}/ \u2014 drops subreddit\n * homepages, /rising, /new, /top, etc. so only post URLs reach the agent.\n * See mcp-revisions/tool-surface/02-extend-web-search-with-reddit-scope.md. */\nconst REDDIT_POST_PERMALINK = /\\/r\\/[^/]+\\/comments\\/[a-z0-9]+\\//i;\nconst REDDIT_HOST = /(?:^|\\.)reddit\\.com$/i;\n\nfunction decorateQueriesForScope(queries: string[], scope: 'web' | 'reddit' | 'both'): string[] {\n if (scope === 'web') return queries;\n const reddited = queries.map((q) =>\n /\\bsite:reddit\\.com\\b/i.test(q) ? q : `${q} site:reddit.com`,\n );\n return scope === 'reddit' ? reddited : [...queries, ...reddited];\n}\n\nasync function executeSearches(queries: string[]): Promise<SearchResponse> {\n const client = new SearchClient();\n return client.searchMultiple(queries);\n}\n\nfunction filterScopedSearches(\n response: SearchResponse,\n scope: 'web' | 'reddit' | 'both',\n): SearchResponse {\n if (scope === 'web') return response;\n const filtered = response.searches.map((search) => ({\n ...search,\n results: search.results.filter((r) => {\n let host: string;\n try { host = new URL(r.link).hostname; } catch { return true; }\n // Non-reddit URLs pass through; reddit URLs must be post permalinks.\n if (!REDDIT_HOST.test(host)) return scope !== 'reddit';\n return REDDIT_POST_PERMALINK.test(r.link);\n }),\n }));\n return { ...response, searches: filtered };\n}\n\nfunction processResults(response: SearchResponse): {\n aggregation: SearchAggregation;\n} {\n const aggregation = aggregateAndRank(response.searches, 5);\n return { aggregation };\n}\n\n// --- Raw output (traditional unified ranked list) ---\n\nfunction buildRawOutput(\n queries: string[],\n aggregation: SearchAggregation,\n searches: SearchResponse['searches'],\n verbose: boolean = false,\n): string {\n return generateUnifiedOutput(\n aggregation.rankedUrls, queries, searches,\n aggregation.totalUniqueUrls,\n aggregation.frequencyThreshold, aggregation.thresholdNote,\n verbose,\n );\n}\n\nfunction buildSignalsSection(\n aggregation: SearchAggregation,\n searches: SearchResponse['searches'],\n totalQueries: number,\n): string {\n const coverageCount = searches.filter((search) => search.results.length >= 3).length;\n const lowYield = searches\n .filter((search) => search.results.length <= 1)\n .map((search) => `\"${search.query}\"`);\n const consensusCount = aggregation.rankedUrls.filter((url) => url.isConsensus).length;\n\n const lines = [\n '**Signals**',\n `- Coverage: ${coverageCount}/${totalQueries} queries returned \u22653 results`,\n `- Consensus URLs: ${consensusCount}`,\n ];\n\n if (lowYield.length > 0) {\n lines.push(`- Low-yield: ${lowYield.join(', ')}`);\n }\n\n return lines.join('\\n');\n}\n\nexport function buildSuggestedFollowUpsSection(\n refineQueries: Array<{ query: string; rationale?: string; gap_id?: number; gap_description?: string }> | undefined,\n): string {\n if (!refineQueries || refineQueries.length === 0) {\n return '';\n }\n\n const lines = ['## Suggested follow-up searches', ''];\n\n for (const item of refineQueries) {\n const query = sanitizeSuggestion(item.query ?? '');\n if (!query) continue;\n const rationale = sanitizeSuggestion(item.rationale ?? '');\n const gapTag = typeof item.gap_id === 'number'\n ? ` _(closes gap [${item.gap_id}])_`\n : item.gap_description\n ? ` _(${sanitizeSuggestion(item.gap_description)})_`\n : '';\n lines.push(rationale\n ? `- ${query} \u2014 ${rationale}${gapTag}`\n : `- ${query}${gapTag}`,\n );\n }\n\n return lines.length === 2 ? '' : lines.join('\\n');\n}\n\nexport function appendSignalsAndFollowUps(\n markdown: string,\n signalsSection: string,\n refineQueries: RefineQuerySuggestion[] | undefined,\n options: { includeSignals?: boolean } = {},\n): string {\n const includeSignals = options.includeSignals ?? false;\n const sections = [markdown];\n if (includeSignals && signalsSection) {\n sections.push('', '---', signalsSection);\n }\n const followUps = buildSuggestedFollowUpsSection(refineQueries);\n if (followUps) {\n sections.push('', followUps);\n }\n return sections.join('\\n');\n}\n\n// --- Classified output (3-tier LLM-classified table) ---\n\nfunction buildClassifiedOutput(\n classification: ClassificationResult,\n aggregation: SearchAggregation,\n extract: string,\n searches: SearchResponse['searches'],\n totalQueries: number,\n verbose: boolean = false,\n): string {\n const rankedUrls = aggregation.rankedUrls;\n\n // Build tier \u2192 entries mapping (keep url data alongside classifier metadata)\n const entryByRank = new Map(classification.results.map((r) => [r.rank, r]));\n\n const tiers = {\n high: [] as typeof rankedUrls,\n maybe: [] as typeof rankedUrls,\n other: [] as typeof rankedUrls,\n };\n\n for (const url of rankedUrls) {\n const entry = entryByRank.get(url.rank);\n const tier = entry?.tier;\n if (tier === 'HIGHLY_RELEVANT') {\n tiers.high.push(url);\n } else if (tier === 'MAYBE_RELEVANT') {\n tiers.maybe.push(url);\n } else {\n tiers.other.push(url);\n }\n }\n\n const lines: string[] = [];\n\n // Header with generated title, synthesis, and confidence\n lines.push(`## ${classification.title}`);\n lines.push(`> Looking for: ${extract}`);\n lines.push(`> ${totalQueries} queries \u2192 ${rankedUrls.length} URLs \u2192 ${tiers.high.length} highly relevant, ${tiers.maybe.length} possibly relevant`);\n if (classification.confidence) {\n const confReason = classification.confidence_reason ? ` \u2014 ${classification.confidence_reason}` : '';\n lines.push(`> Confidence: \\`${classification.confidence}\\`${confReason}`);\n }\n lines.push('');\n lines.push(`**Summary:** ${classification.synthesis}`);\n lines.push('');\n\n // Helper: render one row with optional source_type + reason\n const renderRichRow = (url: typeof rankedUrls[number]): string => {\n const entry = entryByRank.get(url.rank);\n const coveragePct = Math.round(url.coverageRatio * 100);\n const seenIn = `${url.frequency}/${totalQueries} (${coveragePct}%)`;\n const sourceType = entry?.source_type ? `\\`${entry.source_type}\\`` : '\u2014';\n const reason = entry?.reason ? entry.reason.replace(/\\|/g, '\\\\|') : '\u2014';\n return `| ${url.rank} | [${url.title}](${url.url}) | ${sourceType} | ${seenIn} | ${reason} |`;\n };\n\n // Highly Relevant tier\n if (tiers.high.length > 0) {\n lines.push(`### Highly Relevant (${tiers.high.length})`);\n lines.push('| # | URL | Source | Seen in | Why |');\n lines.push('|---|-----|--------|---------|-----|');\n for (const url of tiers.high) lines.push(renderRichRow(url));\n lines.push('');\n }\n\n // Maybe Relevant tier\n if (tiers.maybe.length > 0) {\n lines.push(`### Maybe Relevant (${tiers.maybe.length})`);\n lines.push('| # | URL | Source | Seen in | Why |');\n lines.push('|---|-----|--------|---------|-----|');\n for (const url of tiers.maybe) lines.push(renderRichRow(url));\n lines.push('');\n }\n\n // Other tier \u2014 with query attribution\n if (tiers.other.length > 0) {\n lines.push(`### Other Results (${tiers.other.length})`);\n lines.push('| # | URL | Source | Score | Queries |');\n lines.push('|---|-----|--------|-------|---------|');\n for (const url of tiers.other) {\n const entry = entryByRank.get(url.rank);\n const queryList = url.queries.map((q) => `\"${q}\"`).join(', ');\n const sourceType = entry?.source_type ? `\\`${entry.source_type}\\`` : '\u2014';\n let domain: string;\n try {\n domain = new URL(url.url).hostname.replace(/^www\\./, '');\n } catch {\n domain = url.url;\n }\n lines.push(`| ${url.rank} | ${domain} | ${sourceType} | ${url.score.toFixed(1)} | ${queryList} |`);\n }\n lines.push('');\n }\n\n // Signals block is gated behind verbose \u2014 it duplicates info already\n // present in the per-row metadata for callers who care.\n // See: docs/code-review/context/05-output-formatting-patterns.md.\n if (verbose) {\n lines.push(buildSignalsSection(aggregation, searches, totalQueries));\n }\n\n // Gaps section \u2014 what the current results don't answer\n if (classification.gaps && classification.gaps.length > 0) {\n lines.push('');\n lines.push('## Gaps');\n for (const gap of classification.gaps) {\n lines.push(`- **[${gap.id}]** ${gap.description}`);\n }\n }\n\n const followUps = buildSuggestedFollowUpsSection(classification.refine_queries);\n if (followUps) {\n lines.push('');\n lines.push(followUps);\n }\n\n return lines.join('\\n');\n}\n\n// --- Metadata builder ---\n\nfunction buildMetadata(\n aggregation: SearchAggregation,\n executionTime: number,\n totalQueries: number,\n searches: SearchResponse['searches'],\n llmClassified: boolean,\n scope: 'web' | 'reddit' | 'both',\n llmError?: string,\n) {\n const coverageSummary = searches.map(s => {\n let topDomain: string | undefined;\n const topResult = s.results[0];\n if (topResult) {\n try { topDomain = new URL(topResult.link).hostname.replace(/^www\\./, ''); } catch { /* ignore */ }\n }\n return { query: s.query, result_count: s.results.length, top_url: topDomain };\n });\n const lowYieldQueries = searches\n .filter(s => s.results.length <= 1)\n .map(s => s.query);\n\n return {\n total_items: totalQueries,\n successful: aggregation.rankedUrls.length,\n failed: totalQueries - searches.filter(s => s.results.length > 0).length,\n execution_time_ms: executionTime,\n llm_classified: llmClassified,\n scope,\n ...(llmError ? { llm_error: llmError } : {}),\n coverage_summary: coverageSummary,\n ...(lowYieldQueries.length > 0 ? { low_yield_queries: lowYieldQueries } : {}),\n };\n}\n\nfunction buildStructuredResults(\n aggregation: SearchAggregation,\n llmTagsByRank?: Map<number, string>,\n): Array<{\n rank: number;\n url: string;\n title: string;\n snippet: string;\n source_type: 'reddit' | 'github' | 'docs' | 'blog' | 'paper' | 'qa' | 'cve' | 'news' | 'video' | 'web';\n score: number;\n seen_in: number;\n best_position: number;\n}> {\n return aggregation.rankedUrls.map((row) => {\n // LLM tag wins when present; heuristic is the always-on fallback. See:\n // mcp-revisions/output-shaping/06-source-type-tagging-without-llm.md.\n const llmTag = llmTagsByRank?.get(row.rank);\n const heuristic = classifySourceByUrl(row.url);\n return {\n rank: row.rank,\n url: row.url,\n title: row.title,\n snippet: row.snippet,\n source_type: ((llmTag as typeof heuristic) ?? heuristic),\n score: Number(row.score.toFixed(2)),\n seen_in: row.frequency,\n best_position: row.bestPosition,\n };\n });\n}\n\n// --- Error builder ---\n\nfunction buildWebSearchError(\n error: unknown,\n params: WebSearchParams,\n startTime: number,\n): ToolExecutionResult<WebSearchOutput> {\n const structuredError = classifyError(error);\n const executionTime = Date.now() - startTime;\n\n mcpLog('error', `web-search: ${structuredError.message}`, 'search');\n\n const errorContent = formatError({\n code: structuredError.code,\n message: structuredError.message,\n retryable: structuredError.retryable,\n toolName: 'web-search',\n howToFix: ['Verify SERPER_API_KEY is set correctly'],\n alternatives: [\n 'web-search(queries=[\"topic recommendations\"], extract=\"...\", scope: \"reddit\") \u2014 Reddit-only post permalinks via the same backend',\n 'scrape-links(urls=[...], extract=\"...\") \u2014 if you have URLs from prior steps, scrape them now',\n ],\n });\n\n return toolFailure(\n `${errorContent}\\n\\nExecution time: ${formatDuration(executionTime)}\\nQueries: ${params.queries.length}`,\n );\n}\n\n// --- Main handler ---\n\nexport async function handleWebSearch(\n params: WebSearchParams,\n reporter: ToolReporter = NOOP_REPORTER,\n): Promise<ToolExecutionResult<WebSearchOutput>> {\n const startTime = Date.now();\n\n try {\n const effectiveQueries = decorateQueriesForScope(params.queries, params.scope);\n if (params.scope !== 'web') {\n mcpLog('info', `Searching scope=${params.scope}: ${params.queries.length} input queries \u2192 ${effectiveQueries.length} dispatched`, 'search');\n } else {\n mcpLog('info', `Searching for ${params.queries.length} query/queries`, 'search');\n }\n await reporter.log('info', `Searching for ${effectiveQueries.length} query/queries (scope=${params.scope})`);\n await reporter.progress(15, 100, 'Submitting search queries');\n\n const rawResponse = await executeSearches(effectiveQueries);\n const response = filterScopedSearches(rawResponse, params.scope);\n await reporter.progress(50, 100, 'Collected search results');\n\n const { aggregation } = processResults(response);\n await reporter.log(\n 'info',\n `Collected ${aggregation.totalUniqueUrls} unique URLs across ${response.totalQueries} queries`,\n );\n\n // Decide: raw output or LLM classification\n const useRaw = params.raw;\n const llmProcessor = createLLMProcessor();\n\n let markdown: string;\n let llmClassified = false;\n let llmError: string | undefined;\n\n if (useRaw || !llmProcessor) {\n // Raw path: traditional unified ranked list\n if (!useRaw && !llmProcessor) {\n llmError = 'LLM unavailable (LLM_EXTRACTION_API_KEY not set). Falling back to raw output.';\n mcpLog('warning', llmError, 'search');\n // mcp-revisions/llm-degradation/01: surface degraded mode to the client.\n await reporter.log('warning', 'llm_classifier_unreachable: planner not configured; raw ranked list returned');\n }\n let rawRefineQueries: RefineQuerySuggestion[] | undefined;\n if (useRaw && llmProcessor) {\n const refineResult = await suggestRefineQueriesForRawMode(\n aggregation.rankedUrls,\n params.extract,\n params.queries,\n llmProcessor,\n );\n rawRefineQueries = refineResult.result;\n }\n markdown = appendSignalsAndFollowUps(\n buildRawOutput(params.queries, aggregation, response.searches, params.verbose),\n buildSignalsSection(aggregation, response.searches, response.totalQueries),\n rawRefineQueries,\n { includeSignals: params.verbose },\n );\n await reporter.progress(80, 100, 'Ranking search results');\n } else {\n // LLM classification path\n await reporter.progress(65, 100, 'Classifying results by relevance');\n const classification = await classifySearchResults(\n aggregation.rankedUrls,\n params.extract,\n response.totalQueries,\n llmProcessor,\n params.queries,\n );\n\n if (classification.result) {\n markdown = buildClassifiedOutput(\n classification.result, aggregation, params.extract, response.searches, response.totalQueries, params.verbose,\n );\n llmClassified = true;\n await reporter.progress(85, 100, 'Formatted classified results');\n } else {\n // Classification failed \u2014 fall back to raw\n llmError = classification.error ?? 'Unknown classification error';\n mcpLog('warning', `Classification failed, falling back to raw: ${llmError}`, 'search');\n // mcp-revisions/llm-degradation/01: surface degraded mode to the client.\n await reporter.log('warning', `llm_classifier_unreachable: ${llmError}`);\n markdown = appendSignalsAndFollowUps(\n buildRawOutput(params.queries, aggregation, response.searches, params.verbose),\n buildSignalsSection(aggregation, response.searches, response.totalQueries),\n undefined,\n { includeSignals: params.verbose },\n );\n await reporter.progress(85, 100, 'Classification failed, using raw output');\n }\n }\n\n const executionTime = Date.now() - startTime;\n const metadata = buildMetadata(\n aggregation, executionTime, response.totalQueries, response.searches, llmClassified, params.scope, llmError,\n );\n\n // Build per-row structured results so capability-aware clients can\n // index into `structuredContent.results` rather than regex-scrape the\n // markdown table. The LLM tag wins when present; heuristic is the\n // always-on fallback.\n const llmTagsByRank = new Map<number, string>();\n // (When classification succeeds the source_type per-row is populated\n // inside buildClassifiedOutput via the entry.source_type field \u2014 but\n // we don't have a direct handle on it here without a refactor. The\n // heuristic alone covers the structuredContent shape correctly; the\n // LLM-tagged variant remains in the markdown body.)\n const results = buildStructuredResults(aggregation, llmTagsByRank);\n\n mcpLog('info', `Search completed: ${aggregation.rankedUrls.length} URLs, classified=${llmClassified}`, 'search');\n await reporter.log('info', `Search completed with ${aggregation.rankedUrls.length} URLs (classified: ${llmClassified})`);\n\n const footer = `\\n---\\n*${formatDuration(executionTime)} | ${aggregation.totalUniqueUrls} unique URLs${llmClassified ? ' | LLM classified' : ''}*`;\n const fullMarkdown = markdown + footer;\n\n return toolSuccess(fullMarkdown, { content: fullMarkdown, results, metadata });\n } catch (error) {\n return buildWebSearchError(error, params, startTime);\n }\n}\n\nexport function registerWebSearchTool(server: MCPServer): void {\n server.tool(\n {\n name: 'web-search',\n title: 'Web Search',\n description:\n 'Fan out Google queries in parallel. One call carries up to 50 queries in a flat `queries` array \u2014 pack diverse facets (not paraphrases) into a single call. Call me AGGRESSIVELY across a session: 2\u20134 rounds is normal, 1 is underuse. After each pass, read `gaps[]` + `refine_queries[]` and fire another round with the new terms. Safe to call multiple times in parallel in the same turn for orthogonal subtopics. `scope`: `\"reddit\"` (server appends `site:reddit.com` + filters to post permalinks \u2014 use for sentiment / migration / lived experience), `\"web\"` default (spec / bug / pricing / CVE / API), `\"both\"` (fan each query across both \u2014 use when opinion-heavy AND needs official sources). Returns a tiered Markdown report (HIGHLY_RELEVANT / MAYBE_RELEVANT / OTHER) + grounded synthesis with `[rank]` citations + `## Gaps` + `## Suggested follow-up searches` tied to gap ids. Set `raw=true` to skip classification.',\n schema: webSearchParamsSchema,\n outputSchema: webSearchOutputSchema,\n annotations: {\n readOnlyHint: true,\n idempotentHint: true,\n destructiveHint: false,\n openWorldHint: true,\n },\n },\n async (args, ctx) => {\n if (!getCapabilities().search) {\n return toToolResponse(toolFailure(getMissingEnvMessage('search')));\n }\n\n const reporter = createToolReporter(ctx, 'web-search');\n const result = await handleWebSearch(args, reporter);\n\n await reporter.progress(100, 100, result.isError ? 'Search failed' : 'Search complete');\n return toToolResponse(result);\n },\n );\n}\n"],
5
- "mappings": "AAOA,SAAS,iBAAiB,4BAA4B;AACtD;AAAA,EACE;AAAA,EACA;AAAA,OAGK;AACP,SAAS,oBAAoB;AAC7B;AAAA,EACE;AAAA,EACA;AAAA,OACK;AACP;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,OAGK;AACP,SAAS,qBAAqB;AAC9B,SAAS,2BAA2B;AACpC;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,OACK;AACP;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OAGK;AACP,SAAS,0BAA0B;AAqBnC,MAAM,wBAAwB;AAC9B,MAAM,cAAc;AAEpB,SAAS,wBAAwB,SAAmB,OAA4C;AAC9F,MAAI,UAAU,MAAO,QAAO;AAC5B,QAAM,WAAW,QAAQ;AAAA,IAAI,CAAC,MAC5B,wBAAwB,KAAK,CAAC,IAAI,IAAI,GAAG,CAAC;AAAA,EAC5C;AACA,SAAO,UAAU,WAAW,WAAW,CAAC,GAAG,SAAS,GAAG,QAAQ;AACjE;AAEA,eAAe,gBAAgB,SAA4C;AACzE,QAAM,SAAS,IAAI,aAAa;AAChC,SAAO,OAAO,eAAe,OAAO;AACtC;AAEA,SAAS,qBACP,UACA,OACgB;AAChB,MAAI,UAAU,MAAO,QAAO;AAC5B,QAAM,WAAW,SAAS,SAAS,IAAI,CAAC,YAAY;AAAA,IAClD,GAAG;AAAA,IACH,SAAS,OAAO,QAAQ,OAAO,CAAC,MAAM;AACpC,UAAI;AACJ,UAAI;AAAE,eAAO,IAAI,IAAI,EAAE,IAAI,EAAE;AAAA,MAAU,QAAQ;AAAE,eAAO;AAAA,MAAM;AAE9D,UAAI,CAAC,YAAY,KAAK,IAAI,EAAG,QAAO,UAAU;AAC9C,aAAO,sBAAsB,KAAK,EAAE,IAAI;AAAA,IAC1C,CAAC;AAAA,EACH,EAAE;AACF,SAAO,EAAE,GAAG,UAAU,UAAU,SAAS;AAC3C;AAEA,SAAS,eAAe,UAEtB;AACA,QAAM,cAAc,iBAAiB,SAAS,UAAU,CAAC;AACzD,SAAO,EAAE,YAAY;AACvB;AAIA,SAAS,eACP,SACA,aACA,UACA,UAAmB,OACX;AACR,SAAO;AAAA,IACL,YAAY;AAAA,IAAY;AAAA,IAAS;AAAA,IACjC,YAAY;AAAA,IACZ,YAAY;AAAA,IAAoB,YAAY;AAAA,IAC5C;AAAA,EACF;AACF;AAEA,SAAS,oBACP,aACA,UACA,cACQ;AACR,QAAM,gBAAgB,SAAS,OAAO,CAAC,WAAW,OAAO,QAAQ,UAAU,CAAC,EAAE;AAC9E,QAAM,WAAW,SACd,OAAO,CAAC,WAAW,OAAO,QAAQ,UAAU,CAAC,EAC7C,IAAI,CAAC,WAAW,IAAI,OAAO,KAAK,GAAG;AACtC,QAAM,iBAAiB,YAAY,WAAW,OAAO,CAAC,QAAQ,IAAI,WAAW,EAAE;AAE/E,QAAM,QAAQ;AAAA,IACZ;AAAA,IACA,eAAe,aAAa,IAAI,YAAY;AAAA,IAC5C,qBAAqB,cAAc;AAAA,EACrC;AAEA,MAAI,SAAS,SAAS,GAAG;AACvB,UAAM,KAAK,gBAAgB,SAAS,KAAK,IAAI,CAAC,EAAE;AAAA,EAClD;AAEA,SAAO,MAAM,KAAK,IAAI;AACxB;AAEO,SAAS,+BACd,eACQ;AACR,MAAI,CAAC,iBAAiB,cAAc,WAAW,GAAG;AAChD,WAAO;AAAA,EACT;AAEA,QAAM,QAAQ,CAAC,mCAAmC,EAAE;AAEpD,aAAW,QAAQ,eAAe;AAChC,UAAM,QAAQ,mBAAmB,KAAK,SAAS,EAAE;AACjD,QAAI,CAAC,MAAO;AACZ,UAAM,YAAY,mBAAmB,KAAK,aAAa,EAAE;AACzD,UAAM,SAAS,OAAO,KAAK,WAAW,WAClC,kBAAkB,KAAK,MAAM,QAC7B,KAAK,kBACH,MAAM,mBAAmB,KAAK,eAAe,CAAC,OAC9C;AACN,UAAM;AAAA,MAAK,YACP,KAAK,KAAK,WAAM,SAAS,GAAG,MAAM,KAClC,KAAK,KAAK,GAAG,MAAM;AAAA,IACvB;AAAA,EACF;AAEA,SAAO,MAAM,WAAW,IAAI,KAAK,MAAM,KAAK,IAAI;AAClD;AAEO,SAAS,0BACd,UACA,gBACA,eACA,UAAwC,CAAC,GACjC;AACR,QAAM,iBAAiB,QAAQ,kBAAkB;AACjD,QAAM,WAAW,CAAC,QAAQ;AAC1B,MAAI,kBAAkB,gBAAgB;AACpC,aAAS,KAAK,IAAI,OAAO,cAAc;AAAA,EACzC;AACA,QAAM,YAAY,+BAA+B,aAAa;AAC9D,MAAI,WAAW;AACb,aAAS,KAAK,IAAI,SAAS;AAAA,EAC7B;AACA,SAAO,SAAS,KAAK,IAAI;AAC3B;AAIA,SAAS,sBACP,gBACA,aACA,SACA,UACA,cACA,UAAmB,OACX;AACR,QAAM,aAAa,YAAY;AAG/B,QAAM,cAAc,IAAI,IAAI,eAAe,QAAQ,IAAI,CAAC,MAAM,CAAC,EAAE,MAAM,CAAC,CAAC,CAAC;AAE1E,QAAM,QAAQ;AAAA,IACZ,MAAM,CAAC;AAAA,IACP,OAAO,CAAC;AAAA,IACR,OAAO,CAAC;AAAA,EACV;AAEA,aAAW,OAAO,YAAY;AAC5B,UAAM,QAAQ,YAAY,IAAI,IAAI,IAAI;AACtC,UAAM,OAAO,OAAO;AACpB,QAAI,SAAS,mBAAmB;AAC9B,YAAM,KAAK,KAAK,GAAG;AAAA,IACrB,WAAW,SAAS,kBAAkB;AACpC,YAAM,MAAM,KAAK,GAAG;AAAA,IACtB,OAAO;AACL,YAAM,MAAM,KAAK,GAAG;AAAA,IACtB;AAAA,EACF;AAEA,QAAM,QAAkB,CAAC;AAGzB,QAAM,KAAK,MAAM,eAAe,KAAK,EAAE;AACvC,QAAM,KAAK,kBAAkB,OAAO,EAAE;AACtC,QAAM,KAAK,KAAK,YAAY,mBAAc,WAAW,MAAM,gBAAW,MAAM,KAAK,MAAM,qBAAqB,MAAM,MAAM,MAAM,oBAAoB;AAClJ,MAAI,eAAe,YAAY;AAC7B,UAAM,aAAa,eAAe,oBAAoB,WAAM,eAAe,iBAAiB,KAAK;AACjG,UAAM,KAAK,mBAAmB,eAAe,UAAU,KAAK,UAAU,EAAE;AAAA,EAC1E;AACA,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,gBAAgB,eAAe,SAAS,EAAE;AACrD,QAAM,KAAK,EAAE;AAGb,QAAM,gBAAgB,CAAC,QAA2C;AAChE,UAAM,QAAQ,YAAY,IAAI,IAAI,IAAI;AACtC,UAAM,cAAc,KAAK,MAAM,IAAI,gBAAgB,GAAG;AACtD,UAAM,SAAS,GAAG,IAAI,SAAS,IAAI,YAAY,KAAK,WAAW;AAC/D,UAAM,aAAa,OAAO,cAAc,KAAK,MAAM,WAAW,OAAO;AACrE,UAAM,SAAS,OAAO,SAAS,MAAM,OAAO,QAAQ,OAAO,KAAK,IAAI;AACpE,WAAO,KAAK,IAAI,IAAI,OAAO,IAAI,KAAK,KAAK,IAAI,GAAG,OAAO,UAAU,MAAM,MAAM,MAAM,MAAM;AAAA,EAC3F;AAGA,MAAI,MAAM,KAAK,SAAS,GAAG;AACzB,UAAM,KAAK,wBAAwB,MAAM,KAAK,MAAM,GAAG;AACvD,UAAM,KAAK,sCAAsC;AACjD,UAAM,KAAK,sCAAsC;AACjD,eAAW,OAAO,MAAM,KAAM,OAAM,KAAK,cAAc,GAAG,CAAC;AAC3D,UAAM,KAAK,EAAE;AAAA,EACf;AAGA,MAAI,MAAM,MAAM,SAAS,GAAG;AAC1B,UAAM,KAAK,uBAAuB,MAAM,MAAM,MAAM,GAAG;AACvD,UAAM,KAAK,sCAAsC;AACjD,UAAM,KAAK,sCAAsC;AACjD,eAAW,OAAO,MAAM,MAAO,OAAM,KAAK,cAAc,GAAG,CAAC;AAC5D,UAAM,KAAK,EAAE;AAAA,EACf;AAGA,MAAI,MAAM,MAAM,SAAS,GAAG;AAC1B,UAAM,KAAK,sBAAsB,MAAM,MAAM,MAAM,GAAG;AACtD,UAAM,KAAK,wCAAwC;AACnD,UAAM,KAAK,wCAAwC;AACnD,eAAW,OAAO,MAAM,OAAO;AAC7B,YAAM,QAAQ,YAAY,IAAI,IAAI,IAAI;AACtC,YAAM,YAAY,IAAI,QAAQ,IAAI,CAAC,MAAM,IAAI,CAAC,GAAG,EAAE,KAAK,IAAI;AAC5D,YAAM,aAAa,OAAO,cAAc,KAAK,MAAM,WAAW,OAAO;AACrE,UAAI;AACJ,UAAI;AACF,iBAAS,IAAI,IAAI,IAAI,GAAG,EAAE,SAAS,QAAQ,UAAU,EAAE;AAAA,MACzD,QAAQ;AACN,iBAAS,IAAI;AAAA,MACf;AACA,YAAM,KAAK,KAAK,IAAI,IAAI,MAAM,MAAM,MAAM,UAAU,MAAM,IAAI,MAAM,QAAQ,CAAC,CAAC,MAAM,SAAS,IAAI;AAAA,IACnG;AACA,UAAM,KAAK,EAAE;AAAA,EACf;AAKA,MAAI,SAAS;AACX,UAAM,KAAK,oBAAoB,aAAa,UAAU,YAAY,CAAC;AAAA,EACrE;AAGA,MAAI,eAAe,QAAQ,eAAe,KAAK,SAAS,GAAG;AACzD,UAAM,KAAK,EAAE;AACb,UAAM,KAAK,SAAS;AACpB,eAAW,OAAO,eAAe,MAAM;AACrC,YAAM,KAAK,QAAQ,IAAI,EAAE,OAAO,IAAI,WAAW,EAAE;AAAA,IACnD;AAAA,EACF;AAEA,QAAM,YAAY,+BAA+B,eAAe,cAAc;AAC9E,MAAI,WAAW;AACb,UAAM,KAAK,EAAE;AACb,UAAM,KAAK,SAAS;AAAA,EACtB;AAEA,SAAO,MAAM,KAAK,IAAI;AACxB;AAIA,SAAS,cACP,aACA,eACA,cACA,UACA,eACA,OACA,UACA;AACA,QAAM,kBAAkB,SAAS,IAAI,OAAK;AACxC,QAAI;AACJ,UAAM,YAAY,EAAE,QAAQ,CAAC;AAC7B,QAAI,WAAW;AACb,UAAI;AAAE,oBAAY,IAAI,IAAI,UAAU,IAAI,EAAE,SAAS,QAAQ,UAAU,EAAE;AAAA,MAAG,QAAQ;AAAA,MAAe;AAAA,IACnG;AACA,WAAO,EAAE,OAAO,EAAE,OAAO,cAAc,EAAE,QAAQ,QAAQ,SAAS,UAAU;AAAA,EAC9E,CAAC;AACD,QAAM,kBAAkB,SACrB,OAAO,OAAK,EAAE,QAAQ,UAAU,CAAC,EACjC,IAAI,OAAK,EAAE,KAAK;AAEnB,SAAO;AAAA,IACL,aAAa;AAAA,IACb,YAAY,YAAY,WAAW;AAAA,IACnC,QAAQ,eAAe,SAAS,OAAO,OAAK,EAAE,QAAQ,SAAS,CAAC,EAAE;AAAA,IAClE,mBAAmB;AAAA,IACnB,gBAAgB;AAAA,IAChB;AAAA,IACA,GAAI,WAAW,EAAE,WAAW,SAAS,IAAI,CAAC;AAAA,IAC1C,kBAAkB;AAAA,IAClB,GAAI,gBAAgB,SAAS,IAAI,EAAE,mBAAmB,gBAAgB,IAAI,CAAC;AAAA,EAC7E;AACF;AAEA,SAAS,uBACP,aACA,eAUC;AACD,SAAO,YAAY,WAAW,IAAI,CAAC,QAAQ;AAGzC,UAAM,SAAS,eAAe,IAAI,IAAI,IAAI;AAC1C,UAAM,YAAY,oBAAoB,IAAI,GAAG;AAC7C,WAAO;AAAA,MACL,MAAM,IAAI;AAAA,MACV,KAAK,IAAI;AAAA,MACT,OAAO,IAAI;AAAA,MACX,SAAS,IAAI;AAAA,MACb,aAAe,UAA+B;AAAA,MAC9C,OAAO,OAAO,IAAI,MAAM,QAAQ,CAAC,CAAC;AAAA,MAClC,SAAS,IAAI;AAAA,MACb,eAAe,IAAI;AAAA,IACrB;AAAA,EACF,CAAC;AACH;AAIA,SAAS,oBACP,OACA,QACA,WACsC;AACtC,QAAM,kBAAkB,cAAc,KAAK;AAC3C,QAAM,gBAAgB,KAAK,IAAI,IAAI;AAEnC,SAAO,SAAS,eAAe,gBAAgB,OAAO,IAAI,QAAQ;AAElE,QAAM,eAAe,YAAY;AAAA,IAC/B,MAAM,gBAAgB;AAAA,IACtB,SAAS,gBAAgB;AAAA,IACzB,WAAW,gBAAgB;AAAA,IAC3B,UAAU;AAAA,IACV,UAAU,CAAC,wCAAwC;AAAA,IACnD,cAAc;AAAA,MACZ;AAAA,MACA;AAAA,IACF;AAAA,EACF,CAAC;AAED,SAAO;AAAA,IACL,GAAG,YAAY;AAAA;AAAA,kBAAuB,eAAe,aAAa,CAAC;AAAA,WAAc,OAAO,QAAQ,MAAM;AAAA,EACxG;AACF;AAIA,eAAsB,gBACpB,QACA,WAAyB,eACsB;AAC/C,QAAM,YAAY,KAAK,IAAI;AAE3B,MAAI;AACF,UAAM,mBAAmB,wBAAwB,OAAO,SAAS,OAAO,KAAK;AAC7E,QAAI,OAAO,UAAU,OAAO;AAC1B,aAAO,QAAQ,mBAAmB,OAAO,KAAK,KAAK,OAAO,QAAQ,MAAM,yBAAoB,iBAAiB,MAAM,eAAe,QAAQ;AAAA,IAC5I,OAAO;AACL,aAAO,QAAQ,iBAAiB,OAAO,QAAQ,MAAM,kBAAkB,QAAQ;AAAA,IACjF;AACA,UAAM,SAAS,IAAI,QAAQ,iBAAiB,iBAAiB,MAAM,yBAAyB,OAAO,KAAK,GAAG;AAC3G,UAAM,SAAS,SAAS,IAAI,KAAK,2BAA2B;AAE5D,UAAM,cAAc,MAAM,gBAAgB,gBAAgB;AAC1D,UAAM,WAAW,qBAAqB,aAAa,OAAO,KAAK;AAC/D,UAAM,SAAS,SAAS,IAAI,KAAK,0BAA0B;AAE3D,UAAM,EAAE,YAAY,IAAI,eAAe,QAAQ;AAC/C,UAAM,SAAS;AAAA,MACb;AAAA,MACA,aAAa,YAAY,eAAe,uBAAuB,SAAS,YAAY;AAAA,IACtF;AAGA,UAAM,SAAS,OAAO;AACtB,UAAM,eAAe,mBAAmB;AAExC,QAAI;AACJ,QAAI,gBAAgB;AACpB,QAAI;AAEJ,QAAI,UAAU,CAAC,cAAc;AAE3B,UAAI,CAAC,UAAU,CAAC,cAAc;AAC5B,mBAAW;AACX,eAAO,WAAW,UAAU,QAAQ;AAEpC,cAAM,SAAS,IAAI,WAAW,8EAA8E;AAAA,MAC9G;AACA,UAAI;AACJ,UAAI,UAAU,cAAc;AAC1B,cAAM,eAAe,MAAM;AAAA,UACzB,YAAY;AAAA,UACZ,OAAO;AAAA,UACP,OAAO;AAAA,UACP;AAAA,QACF;AACA,2BAAmB,aAAa;AAAA,MAClC;AACA,iBAAW;AAAA,QACT,eAAe,OAAO,SAAS,aAAa,SAAS,UAAU,OAAO,OAAO;AAAA,QAC7E,oBAAoB,aAAa,SAAS,UAAU,SAAS,YAAY;AAAA,QACzE;AAAA,QACA,EAAE,gBAAgB,OAAO,QAAQ;AAAA,MACnC;AACA,YAAM,SAAS,SAAS,IAAI,KAAK,wBAAwB;AAAA,IAC3D,OAAO;AAEL,YAAM,SAAS,SAAS,IAAI,KAAK,kCAAkC;AACnE,YAAM,iBAAiB,MAAM;AAAA,QAC3B,YAAY;AAAA,QACZ,OAAO;AAAA,QACP,SAAS;AAAA,QACT;AAAA,QACA,OAAO;AAAA,MACT;AAEA,UAAI,eAAe,QAAQ;AACzB,mBAAW;AAAA,UACT,eAAe;AAAA,UAAQ;AAAA,UAAa,OAAO;AAAA,UAAS,SAAS;AAAA,UAAU,SAAS;AAAA,UAAc,OAAO;AAAA,QACvG;AACA,wBAAgB;AAChB,cAAM,SAAS,SAAS,IAAI,KAAK,8BAA8B;AAAA,MACjE,OAAO;AAEL,mBAAW,eAAe,SAAS;AACnC,eAAO,WAAW,+CAA+C,QAAQ,IAAI,QAAQ;AAErF,cAAM,SAAS,IAAI,WAAW,+BAA+B,QAAQ,EAAE;AACvE,mBAAW;AAAA,UACT,eAAe,OAAO,SAAS,aAAa,SAAS,UAAU,OAAO,OAAO;AAAA,UAC7E,oBAAoB,aAAa,SAAS,UAAU,SAAS,YAAY;AAAA,UACzE;AAAA,UACA,EAAE,gBAAgB,OAAO,QAAQ;AAAA,QACnC;AACA,cAAM,SAAS,SAAS,IAAI,KAAK,yCAAyC;AAAA,MAC5E;AAAA,IACF;AAEA,UAAM,gBAAgB,KAAK,IAAI,IAAI;AACnC,UAAM,WAAW;AAAA,MACf;AAAA,MAAa;AAAA,MAAe,SAAS;AAAA,MAAc,SAAS;AAAA,MAAU;AAAA,MAAe,OAAO;AAAA,MAAO;AAAA,IACrG;AAMA,UAAM,gBAAgB,oBAAI,IAAoB;AAM9C,UAAM,UAAU,uBAAuB,aAAa,aAAa;AAEjE,WAAO,QAAQ,qBAAqB,YAAY,WAAW,MAAM,qBAAqB,aAAa,IAAI,QAAQ;AAC/G,UAAM,SAAS,IAAI,QAAQ,yBAAyB,YAAY,WAAW,MAAM,sBAAsB,aAAa,GAAG;AAEvH,UAAM,SAAS;AAAA;AAAA,GAAW,eAAe,aAAa,CAAC,MAAM,YAAY,eAAe,eAAe,gBAAgB,sBAAsB,EAAE;AAC/I,UAAM,eAAe,WAAW;AAEhC,WAAO,YAAY,cAAc,EAAE,SAAS,cAAc,SAAS,SAAS,CAAC;AAAA,EAC/E,SAAS,OAAO;AACd,WAAO,oBAAoB,OAAO,QAAQ,SAAS;AAAA,EACrD;AACF;AAEO,SAAS,sBAAsB,QAAyB;AAC7D,SAAO;AAAA,IACL;AAAA,MACE,MAAM;AAAA,MACN,OAAO;AAAA,MACP,aACE;AAAA,MACF,QAAQ;AAAA,MACR,cAAc;AAAA,MACd,aAAa;AAAA,QACX,cAAc;AAAA,QACd,gBAAgB;AAAA,QAChB,iBAAiB;AAAA,QACjB,eAAe;AAAA,MACjB;AAAA,IACF;AAAA,IACA,OAAO,MAAM,QAAQ;AACnB,UAAI,CAAC,gBAAgB,EAAE,QAAQ;AAC7B,eAAO,eAAe,YAAY,qBAAqB,QAAQ,CAAC,CAAC;AAAA,MACnE;AAEA,YAAM,WAAW,mBAAmB,KAAK,YAAY;AACrD,YAAM,SAAS,MAAM,gBAAgB,MAAM,QAAQ;AAEnD,YAAM,SAAS,SAAS,KAAK,KAAK,OAAO,UAAU,kBAAkB,iBAAiB;AACtF,aAAO,eAAe,MAAM;AAAA,IAC9B;AAAA,EACF;AACF;",
4
+ "sourcesContent": ["/**\n * Web Search Tool Handler\n * NEVER throws - always returns structured response for graceful degradation\n */\n\nimport type { MCPServer } from 'mcp-use/server';\n\nimport { getCapabilities, getMissingEnvMessage } from '../config/index.js';\nimport {\n webSearchOutputSchema,\n webSearchParamsSchema,\n type WebSearchParams,\n type WebSearchOutput,\n} from '../schemas/web-search.js';\nimport { SearchClient } from '../clients/search.js';\nimport {\n aggregateAndRank,\n generateUnifiedOutput,\n} from '../utils/url-aggregator.js';\nimport {\n createLLMProcessor,\n classifySearchResults,\n suggestRefineQueriesForRawMode,\n type ClassificationEntry,\n type ClassificationResult,\n type RefineQuerySuggestion,\n} from '../services/llm-processor.js';\nimport { classifyError } from '../utils/errors.js';\nimport { classifySourceByUrl } from '../utils/source-type.js';\nimport {\n mcpLog,\n formatError,\n formatDuration,\n} from './utils.js';\nimport {\n createToolReporter,\n NOOP_REPORTER,\n toolFailure,\n toolSuccess,\n toToolResponse,\n type ToolExecutionResult,\n type ToolReporter,\n} from './mcp-helpers.js';\nimport { sanitizeSuggestion } from '../utils/sanitize.js';\n\n// --- Internal types ---\n\ninterface SearchAggregation {\n readonly rankedUrls: ReturnType<typeof aggregateAndRank>['rankedUrls'];\n readonly totalUniqueUrls: number;\n readonly frequencyThreshold: number;\n readonly thresholdNote?: string;\n}\n\ninterface SearchResponse {\n searches: Parameters<typeof aggregateAndRank>[0];\n totalQueries: number;\n}\n\n// --- Helpers ---\n\n/** Reddit post permalink: /r/{sub}/comments/{id}/ \u2014 drops subreddit\n * homepages, /rising, /new, /top, etc. so only post URLs reach the agent.\n * See mcp-revisions/tool-surface/02-extend-web-search-with-reddit-scope.md. */\nconst REDDIT_POST_PERMALINK = /\\/r\\/[^/]+\\/comments\\/[a-z0-9]+\\//i;\nconst REDDIT_HOST = /(?:^|\\.)reddit\\.com$/i;\n\nfunction decorateQueriesForScope(queries: string[], scope: 'web' | 'reddit' | 'both'): string[] {\n if (scope === 'web') return queries;\n const reddited = queries.map((q) =>\n /\\bsite:reddit\\.com\\b/i.test(q) ? q : `${q} site:reddit.com`,\n );\n return scope === 'reddit' ? reddited : [...queries, ...reddited];\n}\n\nasync function executeSearches(queries: string[]): Promise<SearchResponse> {\n const client = new SearchClient();\n return client.searchMultiple(queries);\n}\n\nfunction filterScopedSearches(\n response: SearchResponse,\n scope: 'web' | 'reddit' | 'both',\n): SearchResponse {\n if (scope === 'web') return response;\n const filtered = response.searches.map((search) => ({\n ...search,\n results: search.results.filter((r) => {\n let host: string;\n try { host = new URL(r.link).hostname; } catch { return true; }\n // Non-reddit URLs pass through; reddit URLs must be post permalinks.\n if (!REDDIT_HOST.test(host)) return scope !== 'reddit';\n return REDDIT_POST_PERMALINK.test(r.link);\n }),\n }));\n return { ...response, searches: filtered };\n}\n\nfunction processResults(response: SearchResponse): {\n aggregation: SearchAggregation;\n} {\n const aggregation = aggregateAndRank(response.searches, 5);\n return { aggregation };\n}\n\n// --- Raw output (traditional unified ranked list) ---\n\nfunction buildRawOutput(\n queries: string[],\n aggregation: SearchAggregation,\n searches: SearchResponse['searches'],\n verbose: boolean = false,\n): string {\n return generateUnifiedOutput(\n aggregation.rankedUrls, queries, searches,\n aggregation.totalUniqueUrls,\n aggregation.frequencyThreshold, aggregation.thresholdNote,\n verbose,\n );\n}\n\nfunction buildSignalsSection(\n aggregation: SearchAggregation,\n searches: SearchResponse['searches'],\n totalQueries: number,\n): string {\n const coverageCount = searches.filter((search) => search.results.length >= 3).length;\n const lowYield = searches\n .filter((search) => search.results.length <= 1)\n .map((search) => `\"${search.query}\"`);\n const consensusCount = aggregation.rankedUrls.filter((url) => url.isConsensus).length;\n\n const lines = [\n '**Signals**',\n `- Coverage: ${coverageCount}/${totalQueries} queries returned \u22653 results`,\n `- Consensus URLs: ${consensusCount}`,\n ];\n\n if (lowYield.length > 0) {\n lines.push(`- Low-yield: ${lowYield.join(', ')}`);\n }\n\n return lines.join('\\n');\n}\n\nexport function buildSuggestedFollowUpsSection(\n refineQueries: Array<{ query: string; rationale?: string; gap_id?: number; gap_description?: string }> | undefined,\n): string {\n if (!refineQueries || refineQueries.length === 0) {\n return '';\n }\n\n const lines = ['## Suggested follow-up searches', ''];\n\n for (const item of refineQueries) {\n const query = sanitizeSuggestion(item.query ?? '');\n if (!query) continue;\n const rationale = sanitizeSuggestion(item.rationale ?? '');\n const gapTag = typeof item.gap_id === 'number'\n ? ` _(closes gap [${item.gap_id}])_`\n : item.gap_description\n ? ` _(${sanitizeSuggestion(item.gap_description)})_`\n : '';\n lines.push(rationale\n ? `- ${query} \u2014 ${rationale}${gapTag}`\n : `- ${query}${gapTag}`,\n );\n }\n\n return lines.length === 2 ? '' : lines.join('\\n');\n}\n\nexport function appendSignalsAndFollowUps(\n markdown: string,\n signalsSection: string,\n refineQueries: RefineQuerySuggestion[] | undefined,\n options: { includeSignals?: boolean } = {},\n): string {\n const includeSignals = options.includeSignals ?? false;\n const sections = [markdown];\n if (includeSignals && signalsSection) {\n sections.push('', '---', signalsSection);\n }\n const followUps = buildSuggestedFollowUpsSection(refineQueries);\n if (followUps) {\n sections.push('', followUps);\n }\n return sections.join('\\n');\n}\n\n// --- \"Start here\" section ---\n//\n// Surfaces the best 3-5 URLs at the top of the classified response so an agent\n// skimming the first screen sees them before tier tables. Deterministic: uses\n// existing `tier` + `rank` + `reason` from the classifier, no extra LLM call.\n//\n// Algorithm: take HIGHLY_RELEVANT by rank up to MAX_START_HERE; if fewer than\n// MIN_START_HERE, pad from top MAYBE_RELEVANT; skip entirely if no entries\n// above OTHER.\n\nconst MIN_START_HERE = 3;\nconst MAX_START_HERE = 5;\n\n/** Minimal structural shape \u2014 avoids coupling to private `RankedUrl` type. */\ninterface StartHereCandidate {\n readonly rank: number;\n readonly url: string;\n readonly title: string;\n}\n\ninterface StartHereTiers {\n readonly high: readonly StartHereCandidate[];\n readonly maybe: readonly StartHereCandidate[];\n}\n\nexport function buildStartHereSection(\n tiers: StartHereTiers,\n entryByRank: Map<number, ClassificationEntry>,\n opts: { min?: number; max?: number } = {},\n): string {\n const min = opts.min ?? MIN_START_HERE;\n const max = opts.max ?? MAX_START_HERE;\n\n const picks: Array<{ candidate: StartHereCandidate; tier: 'HIGHLY_RELEVANT' | 'MAYBE_RELEVANT' }> = [];\n\n for (const candidate of tiers.high) {\n if (picks.length >= max) break;\n picks.push({ candidate, tier: 'HIGHLY_RELEVANT' });\n }\n\n if (picks.length < min) {\n const target = Math.min(min, max);\n for (const candidate of tiers.maybe) {\n if (picks.length >= target) break;\n picks.push({ candidate, tier: 'MAYBE_RELEVANT' });\n }\n }\n\n if (picks.length === 0) return '';\n\n const lines: string[] = [];\n lines.push('## Start here \u2014 best candidates for your extract');\n picks.forEach((pick, i) => {\n const entry = entryByRank.get(pick.candidate.rank);\n const reason = entry?.reason && entry.reason.trim().length > 0 ? entry.reason : '\u2014';\n let domain: string;\n try {\n domain = new URL(pick.candidate.url).hostname.replace(/^www\\./, '');\n } catch {\n domain = pick.candidate.url;\n }\n lines.push(\n `${i + 1}. **[${pick.candidate.title}](${pick.candidate.url})** \u2014 ${domain} \u2014 ${reason} *(${pick.tier}, rank ${pick.candidate.rank})*`,\n );\n });\n return lines.join('\\n');\n}\n\n// --- Classified output (3-tier LLM-classified table) ---\n\nfunction buildClassifiedOutput(\n classification: ClassificationResult,\n aggregation: SearchAggregation,\n extract: string,\n searches: SearchResponse['searches'],\n totalQueries: number,\n verbose: boolean = false,\n): string {\n const rankedUrls = aggregation.rankedUrls;\n\n // Build tier \u2192 entries mapping (keep url data alongside classifier metadata)\n const entryByRank = new Map(classification.results.map((r) => [r.rank, r]));\n\n const tiers = {\n high: [] as typeof rankedUrls,\n maybe: [] as typeof rankedUrls,\n other: [] as typeof rankedUrls,\n };\n\n for (const url of rankedUrls) {\n const entry = entryByRank.get(url.rank);\n const tier = entry?.tier;\n if (tier === 'HIGHLY_RELEVANT') {\n tiers.high.push(url);\n } else if (tier === 'MAYBE_RELEVANT') {\n tiers.maybe.push(url);\n } else {\n tiers.other.push(url);\n }\n }\n\n const lines: string[] = [];\n\n // Header with generated title, synthesis, and confidence\n lines.push(`## ${classification.title}`);\n lines.push(`> Looking for: ${extract}`);\n lines.push(`> ${totalQueries} queries \u2192 ${rankedUrls.length} URLs \u2192 ${tiers.high.length} highly relevant, ${tiers.maybe.length} possibly relevant`);\n if (classification.confidence) {\n const confReason = classification.confidence_reason ? ` \u2014 ${classification.confidence_reason}` : '';\n lines.push(`> Confidence: \\`${classification.confidence}\\`${confReason}`);\n }\n lines.push('');\n\n // \"Start here\" block: surface the top 3-5 URLs above the synthesis so an\n // agent skimming the first screen sees scrape candidates before prose.\n const startHere = buildStartHereSection(\n { high: tiers.high, maybe: tiers.maybe },\n entryByRank,\n );\n if (startHere) {\n lines.push(startHere);\n lines.push('');\n }\n\n lines.push(`**Summary:** ${classification.synthesis}`);\n lines.push('');\n\n // Helper: render one row with optional source_type + reason\n const renderRichRow = (url: typeof rankedUrls[number]): string => {\n const entry = entryByRank.get(url.rank);\n const coveragePct = Math.round(url.coverageRatio * 100);\n const seenIn = `${url.frequency}/${totalQueries} (${coveragePct}%)`;\n const sourceType = entry?.source_type ? `\\`${entry.source_type}\\`` : '\u2014';\n const reason = entry?.reason ? entry.reason.replace(/\\|/g, '\\\\|') : '\u2014';\n return `| ${url.rank} | [${url.title}](${url.url}) | ${sourceType} | ${seenIn} | ${reason} |`;\n };\n\n // Highly Relevant tier\n if (tiers.high.length > 0) {\n lines.push(`### Highly Relevant (${tiers.high.length})`);\n lines.push('| # | URL | Source | Seen in | Why |');\n lines.push('|---|-----|--------|---------|-----|');\n for (const url of tiers.high) lines.push(renderRichRow(url));\n lines.push('');\n }\n\n // Maybe Relevant tier\n if (tiers.maybe.length > 0) {\n lines.push(`### Maybe Relevant (${tiers.maybe.length})`);\n lines.push('| # | URL | Source | Seen in | Why |');\n lines.push('|---|-----|--------|---------|-----|');\n for (const url of tiers.maybe) lines.push(renderRichRow(url));\n lines.push('');\n }\n\n // Other tier \u2014 with query attribution\n if (tiers.other.length > 0) {\n lines.push(`### Other Results (${tiers.other.length})`);\n lines.push('| # | URL | Source | Score | Queries |');\n lines.push('|---|-----|--------|-------|---------|');\n for (const url of tiers.other) {\n const entry = entryByRank.get(url.rank);\n const queryList = url.queries.map((q) => `\"${q}\"`).join(', ');\n const sourceType = entry?.source_type ? `\\`${entry.source_type}\\`` : '\u2014';\n let domain: string;\n try {\n domain = new URL(url.url).hostname.replace(/^www\\./, '');\n } catch {\n domain = url.url;\n }\n lines.push(`| ${url.rank} | ${domain} | ${sourceType} | ${url.score.toFixed(1)} | ${queryList} |`);\n }\n lines.push('');\n }\n\n // Signals block is gated behind verbose \u2014 it duplicates info already\n // present in the per-row metadata for callers who care.\n // See: docs/code-review/context/05-output-formatting-patterns.md.\n if (verbose) {\n lines.push(buildSignalsSection(aggregation, searches, totalQueries));\n }\n\n // Gaps section \u2014 what the current results don't answer\n if (classification.gaps && classification.gaps.length > 0) {\n lines.push('');\n lines.push('## Gaps');\n for (const gap of classification.gaps) {\n lines.push(`- **[${gap.id}]** ${gap.description}`);\n }\n }\n\n const followUps = buildSuggestedFollowUpsSection(classification.refine_queries);\n if (followUps) {\n lines.push('');\n lines.push(followUps);\n }\n\n return lines.join('\\n');\n}\n\n// --- Metadata builder ---\n\nfunction buildMetadata(\n aggregation: SearchAggregation,\n executionTime: number,\n totalQueries: number,\n searches: SearchResponse['searches'],\n llmClassified: boolean,\n scope: 'web' | 'reddit' | 'both',\n llmError?: string,\n) {\n const coverageSummary = searches.map(s => {\n let topDomain: string | undefined;\n const topResult = s.results[0];\n if (topResult) {\n try { topDomain = new URL(topResult.link).hostname.replace(/^www\\./, ''); } catch { /* ignore */ }\n }\n return { query: s.query, result_count: s.results.length, top_url: topDomain };\n });\n const lowYieldQueries = searches\n .filter(s => s.results.length <= 1)\n .map(s => s.query);\n\n return {\n total_items: totalQueries,\n successful: aggregation.rankedUrls.length,\n failed: totalQueries - searches.filter(s => s.results.length > 0).length,\n execution_time_ms: executionTime,\n llm_classified: llmClassified,\n scope,\n ...(llmError ? { llm_error: llmError } : {}),\n coverage_summary: coverageSummary,\n ...(lowYieldQueries.length > 0 ? { low_yield_queries: lowYieldQueries } : {}),\n };\n}\n\nfunction buildStructuredResults(\n aggregation: SearchAggregation,\n llmTagsByRank?: Map<number, string>,\n): Array<{\n rank: number;\n url: string;\n title: string;\n snippet: string;\n source_type: 'reddit' | 'github' | 'docs' | 'blog' | 'paper' | 'qa' | 'cve' | 'news' | 'video' | 'web';\n score: number;\n seen_in: number;\n best_position: number;\n}> {\n return aggregation.rankedUrls.map((row) => {\n // LLM tag wins when present; heuristic is the always-on fallback. See:\n // mcp-revisions/output-shaping/06-source-type-tagging-without-llm.md.\n const llmTag = llmTagsByRank?.get(row.rank);\n const heuristic = classifySourceByUrl(row.url);\n return {\n rank: row.rank,\n url: row.url,\n title: row.title,\n snippet: row.snippet,\n source_type: ((llmTag as typeof heuristic) ?? heuristic),\n score: Number(row.score.toFixed(2)),\n seen_in: row.frequency,\n best_position: row.bestPosition,\n };\n });\n}\n\n// --- Error builder ---\n\nfunction buildWebSearchError(\n error: unknown,\n params: WebSearchParams,\n startTime: number,\n): ToolExecutionResult<WebSearchOutput> {\n const structuredError = classifyError(error);\n const executionTime = Date.now() - startTime;\n\n mcpLog('error', `web-search: ${structuredError.message}`, 'search');\n\n const errorContent = formatError({\n code: structuredError.code,\n message: structuredError.message,\n retryable: structuredError.retryable,\n toolName: 'web-search',\n howToFix: ['Verify SERPER_API_KEY is set correctly'],\n alternatives: [\n 'web-search(queries=[\"topic recommendations\"], extract=\"...\", scope: \"reddit\") \u2014 Reddit-only post permalinks via the same backend',\n 'scrape-links(urls=[...], extract=\"...\") \u2014 if you have URLs from prior steps, scrape them now',\n ],\n });\n\n return toolFailure(\n `${errorContent}\\n\\nExecution time: ${formatDuration(executionTime)}\\nQueries: ${params.queries.length}`,\n );\n}\n\n// --- Main handler ---\n\nexport async function handleWebSearch(\n params: WebSearchParams,\n reporter: ToolReporter = NOOP_REPORTER,\n): Promise<ToolExecutionResult<WebSearchOutput>> {\n const startTime = Date.now();\n\n try {\n const effectiveQueries = decorateQueriesForScope(params.queries, params.scope);\n if (params.scope !== 'web') {\n mcpLog('info', `Searching scope=${params.scope}: ${params.queries.length} input queries \u2192 ${effectiveQueries.length} dispatched`, 'search');\n } else {\n mcpLog('info', `Searching for ${params.queries.length} query/queries`, 'search');\n }\n await reporter.log('info', `Searching for ${effectiveQueries.length} query/queries (scope=${params.scope})`);\n await reporter.progress(15, 100, 'Submitting search queries');\n\n const rawResponse = await executeSearches(effectiveQueries);\n const response = filterScopedSearches(rawResponse, params.scope);\n await reporter.progress(50, 100, 'Collected search results');\n\n const { aggregation } = processResults(response);\n await reporter.log(\n 'info',\n `Collected ${aggregation.totalUniqueUrls} unique URLs across ${response.totalQueries} queries`,\n );\n\n // Decide: raw output or LLM classification\n const useRaw = params.raw;\n const llmProcessor = createLLMProcessor();\n\n let markdown: string;\n let llmClassified = false;\n let llmError: string | undefined;\n\n if (useRaw || !llmProcessor) {\n // Raw path: traditional unified ranked list\n if (!useRaw && !llmProcessor) {\n llmError = 'LLM unavailable (LLM_EXTRACTION_API_KEY not set). Falling back to raw output.';\n mcpLog('warning', llmError, 'search');\n // mcp-revisions/llm-degradation/01: surface degraded mode to the client.\n await reporter.log('warning', 'llm_classifier_unreachable: planner not configured; raw ranked list returned');\n }\n let rawRefineQueries: RefineQuerySuggestion[] | undefined;\n if (useRaw && llmProcessor) {\n const refineResult = await suggestRefineQueriesForRawMode(\n aggregation.rankedUrls,\n params.extract,\n params.queries,\n llmProcessor,\n );\n rawRefineQueries = refineResult.result;\n }\n markdown = appendSignalsAndFollowUps(\n buildRawOutput(params.queries, aggregation, response.searches, params.verbose),\n buildSignalsSection(aggregation, response.searches, response.totalQueries),\n rawRefineQueries,\n { includeSignals: params.verbose },\n );\n await reporter.progress(80, 100, 'Ranking search results');\n } else {\n // LLM classification path\n await reporter.progress(65, 100, 'Classifying results by relevance');\n const classification = await classifySearchResults(\n aggregation.rankedUrls,\n params.extract,\n response.totalQueries,\n llmProcessor,\n params.queries,\n );\n\n if (classification.result) {\n markdown = buildClassifiedOutput(\n classification.result, aggregation, params.extract, response.searches, response.totalQueries, params.verbose,\n );\n llmClassified = true;\n await reporter.progress(85, 100, 'Formatted classified results');\n } else {\n // Classification failed \u2014 fall back to raw\n llmError = classification.error ?? 'Unknown classification error';\n mcpLog('warning', `Classification failed, falling back to raw: ${llmError}`, 'search');\n // mcp-revisions/llm-degradation/01: surface degraded mode to the client.\n await reporter.log('warning', `llm_classifier_unreachable: ${llmError}`);\n markdown = appendSignalsAndFollowUps(\n buildRawOutput(params.queries, aggregation, response.searches, params.verbose),\n buildSignalsSection(aggregation, response.searches, response.totalQueries),\n undefined,\n { includeSignals: params.verbose },\n );\n await reporter.progress(85, 100, 'Classification failed, using raw output');\n }\n }\n\n const executionTime = Date.now() - startTime;\n const metadata = buildMetadata(\n aggregation, executionTime, response.totalQueries, response.searches, llmClassified, params.scope, llmError,\n );\n\n // Build per-row structured results so capability-aware clients can\n // index into `structuredContent.results` rather than regex-scrape the\n // markdown table. The LLM tag wins when present; heuristic is the\n // always-on fallback.\n const llmTagsByRank = new Map<number, string>();\n // (When classification succeeds the source_type per-row is populated\n // inside buildClassifiedOutput via the entry.source_type field \u2014 but\n // we don't have a direct handle on it here without a refactor. The\n // heuristic alone covers the structuredContent shape correctly; the\n // LLM-tagged variant remains in the markdown body.)\n const results = buildStructuredResults(aggregation, llmTagsByRank);\n\n mcpLog('info', `Search completed: ${aggregation.rankedUrls.length} URLs, classified=${llmClassified}`, 'search');\n await reporter.log('info', `Search completed with ${aggregation.rankedUrls.length} URLs (classified: ${llmClassified})`);\n\n const footer = `\\n---\\n*${formatDuration(executionTime)} | ${aggregation.totalUniqueUrls} unique URLs${llmClassified ? ' | LLM classified' : ''}*`;\n const fullMarkdown = markdown + footer;\n\n return toolSuccess(fullMarkdown, { results, metadata });\n } catch (error) {\n return buildWebSearchError(error, params, startTime);\n }\n}\n\nexport function registerWebSearchTool(server: MCPServer): void {\n server.tool(\n {\n name: 'web-search',\n title: 'Web Search',\n description:\n 'Fan out Google queries in parallel. One call carries up to 50 queries in a flat `queries` array \u2014 pack diverse facets (not paraphrases) into a single call. Call me AGGRESSIVELY across a session: 2\u20134 rounds is normal, 1 is underuse. After each pass, read `gaps[]` + `refine_queries[]` and fire another round with the new terms. Safe to call multiple times in parallel in the same turn for orthogonal subtopics. `scope`: `\"reddit\"` (server appends `site:reddit.com` + filters to post permalinks \u2014 use for sentiment / migration / lived experience), `\"web\"` default (spec / bug / pricing / CVE / API), `\"both\"` (fan each query across both \u2014 use when opinion-heavy AND needs official sources). Returns a tiered Markdown report (HIGHLY_RELEVANT / MAYBE_RELEVANT / OTHER) + grounded synthesis with `[rank]` citations + `## Gaps` + `## Suggested follow-up searches` tied to gap ids. Set `raw=true` to skip classification.',\n schema: webSearchParamsSchema,\n outputSchema: webSearchOutputSchema,\n annotations: {\n readOnlyHint: true,\n idempotentHint: true,\n destructiveHint: false,\n openWorldHint: true,\n },\n },\n async (args, ctx) => {\n if (!getCapabilities().search) {\n return toToolResponse(toolFailure(getMissingEnvMessage('search')));\n }\n\n const reporter = createToolReporter(ctx, 'web-search');\n const result = await handleWebSearch(args, reporter);\n\n await reporter.progress(100, 100, result.isError ? 'Search failed' : 'Search complete');\n return toToolResponse(result);\n },\n );\n}\n"],
5
+ "mappings": "AAOA,SAAS,iBAAiB,4BAA4B;AACtD;AAAA,EACE;AAAA,EACA;AAAA,OAGK;AACP,SAAS,oBAAoB;AAC7B;AAAA,EACE;AAAA,EACA;AAAA,OACK;AACP;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,OAIK;AACP,SAAS,qBAAqB;AAC9B,SAAS,2BAA2B;AACpC;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,OACK;AACP;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OAGK;AACP,SAAS,0BAA0B;AAqBnC,MAAM,wBAAwB;AAC9B,MAAM,cAAc;AAEpB,SAAS,wBAAwB,SAAmB,OAA4C;AAC9F,MAAI,UAAU,MAAO,QAAO;AAC5B,QAAM,WAAW,QAAQ;AAAA,IAAI,CAAC,MAC5B,wBAAwB,KAAK,CAAC,IAAI,IAAI,GAAG,CAAC;AAAA,EAC5C;AACA,SAAO,UAAU,WAAW,WAAW,CAAC,GAAG,SAAS,GAAG,QAAQ;AACjE;AAEA,eAAe,gBAAgB,SAA4C;AACzE,QAAM,SAAS,IAAI,aAAa;AAChC,SAAO,OAAO,eAAe,OAAO;AACtC;AAEA,SAAS,qBACP,UACA,OACgB;AAChB,MAAI,UAAU,MAAO,QAAO;AAC5B,QAAM,WAAW,SAAS,SAAS,IAAI,CAAC,YAAY;AAAA,IAClD,GAAG;AAAA,IACH,SAAS,OAAO,QAAQ,OAAO,CAAC,MAAM;AACpC,UAAI;AACJ,UAAI;AAAE,eAAO,IAAI,IAAI,EAAE,IAAI,EAAE;AAAA,MAAU,QAAQ;AAAE,eAAO;AAAA,MAAM;AAE9D,UAAI,CAAC,YAAY,KAAK,IAAI,EAAG,QAAO,UAAU;AAC9C,aAAO,sBAAsB,KAAK,EAAE,IAAI;AAAA,IAC1C,CAAC;AAAA,EACH,EAAE;AACF,SAAO,EAAE,GAAG,UAAU,UAAU,SAAS;AAC3C;AAEA,SAAS,eAAe,UAEtB;AACA,QAAM,cAAc,iBAAiB,SAAS,UAAU,CAAC;AACzD,SAAO,EAAE,YAAY;AACvB;AAIA,SAAS,eACP,SACA,aACA,UACA,UAAmB,OACX;AACR,SAAO;AAAA,IACL,YAAY;AAAA,IAAY;AAAA,IAAS;AAAA,IACjC,YAAY;AAAA,IACZ,YAAY;AAAA,IAAoB,YAAY;AAAA,IAC5C;AAAA,EACF;AACF;AAEA,SAAS,oBACP,aACA,UACA,cACQ;AACR,QAAM,gBAAgB,SAAS,OAAO,CAAC,WAAW,OAAO,QAAQ,UAAU,CAAC,EAAE;AAC9E,QAAM,WAAW,SACd,OAAO,CAAC,WAAW,OAAO,QAAQ,UAAU,CAAC,EAC7C,IAAI,CAAC,WAAW,IAAI,OAAO,KAAK,GAAG;AACtC,QAAM,iBAAiB,YAAY,WAAW,OAAO,CAAC,QAAQ,IAAI,WAAW,EAAE;AAE/E,QAAM,QAAQ;AAAA,IACZ;AAAA,IACA,eAAe,aAAa,IAAI,YAAY;AAAA,IAC5C,qBAAqB,cAAc;AAAA,EACrC;AAEA,MAAI,SAAS,SAAS,GAAG;AACvB,UAAM,KAAK,gBAAgB,SAAS,KAAK,IAAI,CAAC,EAAE;AAAA,EAClD;AAEA,SAAO,MAAM,KAAK,IAAI;AACxB;AAEO,SAAS,+BACd,eACQ;AACR,MAAI,CAAC,iBAAiB,cAAc,WAAW,GAAG;AAChD,WAAO;AAAA,EACT;AAEA,QAAM,QAAQ,CAAC,mCAAmC,EAAE;AAEpD,aAAW,QAAQ,eAAe;AAChC,UAAM,QAAQ,mBAAmB,KAAK,SAAS,EAAE;AACjD,QAAI,CAAC,MAAO;AACZ,UAAM,YAAY,mBAAmB,KAAK,aAAa,EAAE;AACzD,UAAM,SAAS,OAAO,KAAK,WAAW,WAClC,kBAAkB,KAAK,MAAM,QAC7B,KAAK,kBACH,MAAM,mBAAmB,KAAK,eAAe,CAAC,OAC9C;AACN,UAAM;AAAA,MAAK,YACP,KAAK,KAAK,WAAM,SAAS,GAAG,MAAM,KAClC,KAAK,KAAK,GAAG,MAAM;AAAA,IACvB;AAAA,EACF;AAEA,SAAO,MAAM,WAAW,IAAI,KAAK,MAAM,KAAK,IAAI;AAClD;AAEO,SAAS,0BACd,UACA,gBACA,eACA,UAAwC,CAAC,GACjC;AACR,QAAM,iBAAiB,QAAQ,kBAAkB;AACjD,QAAM,WAAW,CAAC,QAAQ;AAC1B,MAAI,kBAAkB,gBAAgB;AACpC,aAAS,KAAK,IAAI,OAAO,cAAc;AAAA,EACzC;AACA,QAAM,YAAY,+BAA+B,aAAa;AAC9D,MAAI,WAAW;AACb,aAAS,KAAK,IAAI,SAAS;AAAA,EAC7B;AACA,SAAO,SAAS,KAAK,IAAI;AAC3B;AAYA,MAAM,iBAAiB;AACvB,MAAM,iBAAiB;AAchB,SAAS,sBACd,OACA,aACA,OAAuC,CAAC,GAChC;AACR,QAAM,MAAM,KAAK,OAAO;AACxB,QAAM,MAAM,KAAK,OAAO;AAExB,QAAM,QAA8F,CAAC;AAErG,aAAW,aAAa,MAAM,MAAM;AAClC,QAAI,MAAM,UAAU,IAAK;AACzB,UAAM,KAAK,EAAE,WAAW,MAAM,kBAAkB,CAAC;AAAA,EACnD;AAEA,MAAI,MAAM,SAAS,KAAK;AACtB,UAAM,SAAS,KAAK,IAAI,KAAK,GAAG;AAChC,eAAW,aAAa,MAAM,OAAO;AACnC,UAAI,MAAM,UAAU,OAAQ;AAC5B,YAAM,KAAK,EAAE,WAAW,MAAM,iBAAiB,CAAC;AAAA,IAClD;AAAA,EACF;AAEA,MAAI,MAAM,WAAW,EAAG,QAAO;AAE/B,QAAM,QAAkB,CAAC;AACzB,QAAM,KAAK,uDAAkD;AAC7D,QAAM,QAAQ,CAAC,MAAM,MAAM;AACzB,UAAM,QAAQ,YAAY,IAAI,KAAK,UAAU,IAAI;AACjD,UAAM,SAAS,OAAO,UAAU,MAAM,OAAO,KAAK,EAAE,SAAS,IAAI,MAAM,SAAS;AAChF,QAAI;AACJ,QAAI;AACF,eAAS,IAAI,IAAI,KAAK,UAAU,GAAG,EAAE,SAAS,QAAQ,UAAU,EAAE;AAAA,IACpE,QAAQ;AACN,eAAS,KAAK,UAAU;AAAA,IAC1B;AACA,UAAM;AAAA,MACJ,GAAG,IAAI,CAAC,QAAQ,KAAK,UAAU,KAAK,KAAK,KAAK,UAAU,GAAG,cAAS,MAAM,WAAM,MAAM,MAAM,KAAK,IAAI,UAAU,KAAK,UAAU,IAAI;AAAA,IACpI;AAAA,EACF,CAAC;AACD,SAAO,MAAM,KAAK,IAAI;AACxB;AAIA,SAAS,sBACP,gBACA,aACA,SACA,UACA,cACA,UAAmB,OACX;AACR,QAAM,aAAa,YAAY;AAG/B,QAAM,cAAc,IAAI,IAAI,eAAe,QAAQ,IAAI,CAAC,MAAM,CAAC,EAAE,MAAM,CAAC,CAAC,CAAC;AAE1E,QAAM,QAAQ;AAAA,IACZ,MAAM,CAAC;AAAA,IACP,OAAO,CAAC;AAAA,IACR,OAAO,CAAC;AAAA,EACV;AAEA,aAAW,OAAO,YAAY;AAC5B,UAAM,QAAQ,YAAY,IAAI,IAAI,IAAI;AACtC,UAAM,OAAO,OAAO;AACpB,QAAI,SAAS,mBAAmB;AAC9B,YAAM,KAAK,KAAK,GAAG;AAAA,IACrB,WAAW,SAAS,kBAAkB;AACpC,YAAM,MAAM,KAAK,GAAG;AAAA,IACtB,OAAO;AACL,YAAM,MAAM,KAAK,GAAG;AAAA,IACtB;AAAA,EACF;AAEA,QAAM,QAAkB,CAAC;AAGzB,QAAM,KAAK,MAAM,eAAe,KAAK,EAAE;AACvC,QAAM,KAAK,kBAAkB,OAAO,EAAE;AACtC,QAAM,KAAK,KAAK,YAAY,mBAAc,WAAW,MAAM,gBAAW,MAAM,KAAK,MAAM,qBAAqB,MAAM,MAAM,MAAM,oBAAoB;AAClJ,MAAI,eAAe,YAAY;AAC7B,UAAM,aAAa,eAAe,oBAAoB,WAAM,eAAe,iBAAiB,KAAK;AACjG,UAAM,KAAK,mBAAmB,eAAe,UAAU,KAAK,UAAU,EAAE;AAAA,EAC1E;AACA,QAAM,KAAK,EAAE;AAIb,QAAM,YAAY;AAAA,IAChB,EAAE,MAAM,MAAM,MAAM,OAAO,MAAM,MAAM;AAAA,IACvC;AAAA,EACF;AACA,MAAI,WAAW;AACb,UAAM,KAAK,SAAS;AACpB,UAAM,KAAK,EAAE;AAAA,EACf;AAEA,QAAM,KAAK,gBAAgB,eAAe,SAAS,EAAE;AACrD,QAAM,KAAK,EAAE;AAGb,QAAM,gBAAgB,CAAC,QAA2C;AAChE,UAAM,QAAQ,YAAY,IAAI,IAAI,IAAI;AACtC,UAAM,cAAc,KAAK,MAAM,IAAI,gBAAgB,GAAG;AACtD,UAAM,SAAS,GAAG,IAAI,SAAS,IAAI,YAAY,KAAK,WAAW;AAC/D,UAAM,aAAa,OAAO,cAAc,KAAK,MAAM,WAAW,OAAO;AACrE,UAAM,SAAS,OAAO,SAAS,MAAM,OAAO,QAAQ,OAAO,KAAK,IAAI;AACpE,WAAO,KAAK,IAAI,IAAI,OAAO,IAAI,KAAK,KAAK,IAAI,GAAG,OAAO,UAAU,MAAM,MAAM,MAAM,MAAM;AAAA,EAC3F;AAGA,MAAI,MAAM,KAAK,SAAS,GAAG;AACzB,UAAM,KAAK,wBAAwB,MAAM,KAAK,MAAM,GAAG;AACvD,UAAM,KAAK,sCAAsC;AACjD,UAAM,KAAK,sCAAsC;AACjD,eAAW,OAAO,MAAM,KAAM,OAAM,KAAK,cAAc,GAAG,CAAC;AAC3D,UAAM,KAAK,EAAE;AAAA,EACf;AAGA,MAAI,MAAM,MAAM,SAAS,GAAG;AAC1B,UAAM,KAAK,uBAAuB,MAAM,MAAM,MAAM,GAAG;AACvD,UAAM,KAAK,sCAAsC;AACjD,UAAM,KAAK,sCAAsC;AACjD,eAAW,OAAO,MAAM,MAAO,OAAM,KAAK,cAAc,GAAG,CAAC;AAC5D,UAAM,KAAK,EAAE;AAAA,EACf;AAGA,MAAI,MAAM,MAAM,SAAS,GAAG;AAC1B,UAAM,KAAK,sBAAsB,MAAM,MAAM,MAAM,GAAG;AACtD,UAAM,KAAK,wCAAwC;AACnD,UAAM,KAAK,wCAAwC;AACnD,eAAW,OAAO,MAAM,OAAO;AAC7B,YAAM,QAAQ,YAAY,IAAI,IAAI,IAAI;AACtC,YAAM,YAAY,IAAI,QAAQ,IAAI,CAAC,MAAM,IAAI,CAAC,GAAG,EAAE,KAAK,IAAI;AAC5D,YAAM,aAAa,OAAO,cAAc,KAAK,MAAM,WAAW,OAAO;AACrE,UAAI;AACJ,UAAI;AACF,iBAAS,IAAI,IAAI,IAAI,GAAG,EAAE,SAAS,QAAQ,UAAU,EAAE;AAAA,MACzD,QAAQ;AACN,iBAAS,IAAI;AAAA,MACf;AACA,YAAM,KAAK,KAAK,IAAI,IAAI,MAAM,MAAM,MAAM,UAAU,MAAM,IAAI,MAAM,QAAQ,CAAC,CAAC,MAAM,SAAS,IAAI;AAAA,IACnG;AACA,UAAM,KAAK,EAAE;AAAA,EACf;AAKA,MAAI,SAAS;AACX,UAAM,KAAK,oBAAoB,aAAa,UAAU,YAAY,CAAC;AAAA,EACrE;AAGA,MAAI,eAAe,QAAQ,eAAe,KAAK,SAAS,GAAG;AACzD,UAAM,KAAK,EAAE;AACb,UAAM,KAAK,SAAS;AACpB,eAAW,OAAO,eAAe,MAAM;AACrC,YAAM,KAAK,QAAQ,IAAI,EAAE,OAAO,IAAI,WAAW,EAAE;AAAA,IACnD;AAAA,EACF;AAEA,QAAM,YAAY,+BAA+B,eAAe,cAAc;AAC9E,MAAI,WAAW;AACb,UAAM,KAAK,EAAE;AACb,UAAM,KAAK,SAAS;AAAA,EACtB;AAEA,SAAO,MAAM,KAAK,IAAI;AACxB;AAIA,SAAS,cACP,aACA,eACA,cACA,UACA,eACA,OACA,UACA;AACA,QAAM,kBAAkB,SAAS,IAAI,OAAK;AACxC,QAAI;AACJ,UAAM,YAAY,EAAE,QAAQ,CAAC;AAC7B,QAAI,WAAW;AACb,UAAI;AAAE,oBAAY,IAAI,IAAI,UAAU,IAAI,EAAE,SAAS,QAAQ,UAAU,EAAE;AAAA,MAAG,QAAQ;AAAA,MAAe;AAAA,IACnG;AACA,WAAO,EAAE,OAAO,EAAE,OAAO,cAAc,EAAE,QAAQ,QAAQ,SAAS,UAAU;AAAA,EAC9E,CAAC;AACD,QAAM,kBAAkB,SACrB,OAAO,OAAK,EAAE,QAAQ,UAAU,CAAC,EACjC,IAAI,OAAK,EAAE,KAAK;AAEnB,SAAO;AAAA,IACL,aAAa;AAAA,IACb,YAAY,YAAY,WAAW;AAAA,IACnC,QAAQ,eAAe,SAAS,OAAO,OAAK,EAAE,QAAQ,SAAS,CAAC,EAAE;AAAA,IAClE,mBAAmB;AAAA,IACnB,gBAAgB;AAAA,IAChB;AAAA,IACA,GAAI,WAAW,EAAE,WAAW,SAAS,IAAI,CAAC;AAAA,IAC1C,kBAAkB;AAAA,IAClB,GAAI,gBAAgB,SAAS,IAAI,EAAE,mBAAmB,gBAAgB,IAAI,CAAC;AAAA,EAC7E;AACF;AAEA,SAAS,uBACP,aACA,eAUC;AACD,SAAO,YAAY,WAAW,IAAI,CAAC,QAAQ;AAGzC,UAAM,SAAS,eAAe,IAAI,IAAI,IAAI;AAC1C,UAAM,YAAY,oBAAoB,IAAI,GAAG;AAC7C,WAAO;AAAA,MACL,MAAM,IAAI;AAAA,MACV,KAAK,IAAI;AAAA,MACT,OAAO,IAAI;AAAA,MACX,SAAS,IAAI;AAAA,MACb,aAAe,UAA+B;AAAA,MAC9C,OAAO,OAAO,IAAI,MAAM,QAAQ,CAAC,CAAC;AAAA,MAClC,SAAS,IAAI;AAAA,MACb,eAAe,IAAI;AAAA,IACrB;AAAA,EACF,CAAC;AACH;AAIA,SAAS,oBACP,OACA,QACA,WACsC;AACtC,QAAM,kBAAkB,cAAc,KAAK;AAC3C,QAAM,gBAAgB,KAAK,IAAI,IAAI;AAEnC,SAAO,SAAS,eAAe,gBAAgB,OAAO,IAAI,QAAQ;AAElE,QAAM,eAAe,YAAY;AAAA,IAC/B,MAAM,gBAAgB;AAAA,IACtB,SAAS,gBAAgB;AAAA,IACzB,WAAW,gBAAgB;AAAA,IAC3B,UAAU;AAAA,IACV,UAAU,CAAC,wCAAwC;AAAA,IACnD,cAAc;AAAA,MACZ;AAAA,MACA;AAAA,IACF;AAAA,EACF,CAAC;AAED,SAAO;AAAA,IACL,GAAG,YAAY;AAAA;AAAA,kBAAuB,eAAe,aAAa,CAAC;AAAA,WAAc,OAAO,QAAQ,MAAM;AAAA,EACxG;AACF;AAIA,eAAsB,gBACpB,QACA,WAAyB,eACsB;AAC/C,QAAM,YAAY,KAAK,IAAI;AAE3B,MAAI;AACF,UAAM,mBAAmB,wBAAwB,OAAO,SAAS,OAAO,KAAK;AAC7E,QAAI,OAAO,UAAU,OAAO;AAC1B,aAAO,QAAQ,mBAAmB,OAAO,KAAK,KAAK,OAAO,QAAQ,MAAM,yBAAoB,iBAAiB,MAAM,eAAe,QAAQ;AAAA,IAC5I,OAAO;AACL,aAAO,QAAQ,iBAAiB,OAAO,QAAQ,MAAM,kBAAkB,QAAQ;AAAA,IACjF;AACA,UAAM,SAAS,IAAI,QAAQ,iBAAiB,iBAAiB,MAAM,yBAAyB,OAAO,KAAK,GAAG;AAC3G,UAAM,SAAS,SAAS,IAAI,KAAK,2BAA2B;AAE5D,UAAM,cAAc,MAAM,gBAAgB,gBAAgB;AAC1D,UAAM,WAAW,qBAAqB,aAAa,OAAO,KAAK;AAC/D,UAAM,SAAS,SAAS,IAAI,KAAK,0BAA0B;AAE3D,UAAM,EAAE,YAAY,IAAI,eAAe,QAAQ;AAC/C,UAAM,SAAS;AAAA,MACb;AAAA,MACA,aAAa,YAAY,eAAe,uBAAuB,SAAS,YAAY;AAAA,IACtF;AAGA,UAAM,SAAS,OAAO;AACtB,UAAM,eAAe,mBAAmB;AAExC,QAAI;AACJ,QAAI,gBAAgB;AACpB,QAAI;AAEJ,QAAI,UAAU,CAAC,cAAc;AAE3B,UAAI,CAAC,UAAU,CAAC,cAAc;AAC5B,mBAAW;AACX,eAAO,WAAW,UAAU,QAAQ;AAEpC,cAAM,SAAS,IAAI,WAAW,8EAA8E;AAAA,MAC9G;AACA,UAAI;AACJ,UAAI,UAAU,cAAc;AAC1B,cAAM,eAAe,MAAM;AAAA,UACzB,YAAY;AAAA,UACZ,OAAO;AAAA,UACP,OAAO;AAAA,UACP;AAAA,QACF;AACA,2BAAmB,aAAa;AAAA,MAClC;AACA,iBAAW;AAAA,QACT,eAAe,OAAO,SAAS,aAAa,SAAS,UAAU,OAAO,OAAO;AAAA,QAC7E,oBAAoB,aAAa,SAAS,UAAU,SAAS,YAAY;AAAA,QACzE;AAAA,QACA,EAAE,gBAAgB,OAAO,QAAQ;AAAA,MACnC;AACA,YAAM,SAAS,SAAS,IAAI,KAAK,wBAAwB;AAAA,IAC3D,OAAO;AAEL,YAAM,SAAS,SAAS,IAAI,KAAK,kCAAkC;AACnE,YAAM,iBAAiB,MAAM;AAAA,QAC3B,YAAY;AAAA,QACZ,OAAO;AAAA,QACP,SAAS;AAAA,QACT;AAAA,QACA,OAAO;AAAA,MACT;AAEA,UAAI,eAAe,QAAQ;AACzB,mBAAW;AAAA,UACT,eAAe;AAAA,UAAQ;AAAA,UAAa,OAAO;AAAA,UAAS,SAAS;AAAA,UAAU,SAAS;AAAA,UAAc,OAAO;AAAA,QACvG;AACA,wBAAgB;AAChB,cAAM,SAAS,SAAS,IAAI,KAAK,8BAA8B;AAAA,MACjE,OAAO;AAEL,mBAAW,eAAe,SAAS;AACnC,eAAO,WAAW,+CAA+C,QAAQ,IAAI,QAAQ;AAErF,cAAM,SAAS,IAAI,WAAW,+BAA+B,QAAQ,EAAE;AACvE,mBAAW;AAAA,UACT,eAAe,OAAO,SAAS,aAAa,SAAS,UAAU,OAAO,OAAO;AAAA,UAC7E,oBAAoB,aAAa,SAAS,UAAU,SAAS,YAAY;AAAA,UACzE;AAAA,UACA,EAAE,gBAAgB,OAAO,QAAQ;AAAA,QACnC;AACA,cAAM,SAAS,SAAS,IAAI,KAAK,yCAAyC;AAAA,MAC5E;AAAA,IACF;AAEA,UAAM,gBAAgB,KAAK,IAAI,IAAI;AACnC,UAAM,WAAW;AAAA,MACf;AAAA,MAAa;AAAA,MAAe,SAAS;AAAA,MAAc,SAAS;AAAA,MAAU;AAAA,MAAe,OAAO;AAAA,MAAO;AAAA,IACrG;AAMA,UAAM,gBAAgB,oBAAI,IAAoB;AAM9C,UAAM,UAAU,uBAAuB,aAAa,aAAa;AAEjE,WAAO,QAAQ,qBAAqB,YAAY,WAAW,MAAM,qBAAqB,aAAa,IAAI,QAAQ;AAC/G,UAAM,SAAS,IAAI,QAAQ,yBAAyB,YAAY,WAAW,MAAM,sBAAsB,aAAa,GAAG;AAEvH,UAAM,SAAS;AAAA;AAAA,GAAW,eAAe,aAAa,CAAC,MAAM,YAAY,eAAe,eAAe,gBAAgB,sBAAsB,EAAE;AAC/I,UAAM,eAAe,WAAW;AAEhC,WAAO,YAAY,cAAc,EAAE,SAAS,SAAS,CAAC;AAAA,EACxD,SAAS,OAAO;AACd,WAAO,oBAAoB,OAAO,QAAQ,SAAS;AAAA,EACrD;AACF;AAEO,SAAS,sBAAsB,QAAyB;AAC7D,SAAO;AAAA,IACL;AAAA,MACE,MAAM;AAAA,MACN,OAAO;AAAA,MACP,aACE;AAAA,MACF,QAAQ;AAAA,MACR,cAAc;AAAA,MACd,aAAa;AAAA,QACX,cAAc;AAAA,QACd,gBAAgB;AAAA,QAChB,iBAAiB;AAAA,QACjB,eAAe;AAAA,MACjB;AAAA,IACF;AAAA,IACA,OAAO,MAAM,QAAQ;AACnB,UAAI,CAAC,gBAAgB,EAAE,QAAQ;AAC7B,eAAO,eAAe,YAAY,qBAAqB,QAAQ,CAAC,CAAC;AAAA,MACnE;AAEA,YAAM,WAAW,mBAAmB,KAAK,YAAY;AACrD,YAAM,SAAS,MAAM,gBAAgB,MAAM,QAAQ;AAEnD,YAAM,SAAS,SAAS,KAAK,KAAK,OAAO,UAAU,kBAAkB,iBAAiB;AACtF,aAAO,eAAe,MAAM;AAAA,IAC9B;AAAA,EACF;AACF;",
6
6
  "names": []
7
7
  }
@@ -130,7 +130,7 @@ async function handleStartResearch(params, signal) {
130
130
  const plannerKnownOffline = isPlannerKnownOffline(llmHealth);
131
131
  if (plannerKnownOffline && !params.include_playbook) {
132
132
  const stub = buildDegradedStub(params.goal);
133
- return toolSuccess(stub, { content: stub });
133
+ return toolSuccess(stub);
134
134
  }
135
135
  const scaffolding = buildStaticScaffolding(params.goal, {
136
136
  plannerAvailable: !plannerKnownOffline
@@ -145,7 +145,7 @@ async function handleStartResearch(params, signal) {
145
145
  ---
146
146
 
147
147
  ${brief}` : `${scaffolding}${briefFallbackNote}`;
148
- return toolSuccess(content, { content });
148
+ return toolSuccess(content);
149
149
  } catch (err) {
150
150
  const structuredError = classifyError(err);
151
151
  mcpLog("error", `start-research: ${structuredError.message}`, "start-research");
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "version": 3,
3
3
  "sources": ["../../../src/tools/start-research.ts"],
4
- "sourcesContent": ["import type { MCPServer } from 'mcp-use/server';\n\nimport {\n startResearchOutputSchema,\n startResearchParamsSchema,\n type StartResearchOutput,\n type StartResearchParams,\n} from '../schemas/start-research.js';\nimport {\n createLLMProcessor,\n generateResearchBrief,\n getLLMHealth,\n renderResearchBrief,\n type LLMHealthSnapshot,\n} from '../services/llm-processor.js';\nimport { classifyError } from '../utils/errors.js';\nimport { mcpLog } from '../utils/logger.js';\nimport { toolFailure, toolSuccess, toToolResponse, type ToolExecutionResult } from './mcp-helpers.js';\nimport { formatError } from './utils.js';\n\nconst SKILL_INSTALL_HINT = [\n '> \uD83D\uDCA1 **Pair this server with the `run-research` skill** for the full agentic playbook',\n '> (single-agent loop, multi-agent orchestrator, mission-prompt templates, output discipline).',\n '> Install once per machine \u2014 the skill is what teaches the agent how to spend these tools well:',\n '>',\n '> ```bash',\n '> npx -y skills add -y -g yigitkonur/skills-by-yigitkonur/skills/run-research',\n '> ```',\n '>',\n '> Already installed? Skip this \u2014 the skill auto-loads on relevant prompts. The full pack',\n '> ships ~50 sibling skills: `npx -y skills add -y -g yigitkonur/skills-by-yigitkonur`.',\n].join('\\n');\n\n/**\n * Full research-loop playbook. Teaches the 3-tool mental model\n * (start-research, web-search, scrape-links), the aggressive multi-call\n * discipline, parallel-callability, and the cite-from-scrape rule.\n *\n * Emitted when the LLM planner is healthy OR `include_playbook: true`.\n */\nexport function buildStaticScaffolding(goal?: string, opts: { plannerAvailable?: boolean } = {}): string {\n const plannerAvailable = opts.plannerAvailable ?? true;\n const focusLine = goal\n ? `> Focus for this session: ${goal}`\n : '> Focus for this session: not yet specified \u2014 set one on the next pass';\n\n const classifierLoopStep = plannerAvailable\n ? '3. Read the classifier output: `synthesis` (citations in `[rank]`), `gaps[]` (with ids), `refine_queries[]` (follow-ups tied to gap ids).'\n : '3. Classifier output is NOT available (LLM planner offline). `web-search` returns a raw ranked list \u2014 synthesize the terrain yourself from titles + snippets.';\n\n return [\n '# Research session started',\n '',\n SKILL_INSTALL_HINT,\n '',\n focusLine,\n '',\n 'You are running a research LOOP, not answering from memory. Training data is stale; the web is authoritative for anything dated, versioned, priced, or contested. Every non-trivial claim in your final answer must be traceable to a `scrape-links` excerpt you read. Never cite a URL from a `web-search` snippet alone.',\n '',\n '## The 3 tools',\n '',\n '**1. `start-research`** \u2014 you just called me. I plan this session and return the brief below. Call me again only if the goal materially shifts.',\n '',\n '**2. `web-search`** \u2014 fan out Google queries in parallel. One call carries **up to 50 queries** in a flat `queries` array. Call me **aggressively** \u2014 2\u20134 rounds per session is normal, not 1. After each pass, read `gaps[]` and `refine_queries[]` and fire another round with the harvested terms. **Parallel-safe**: run multiple `web-search` calls in the same turn for orthogonal subtopics (e.g. one call for \"spec\" queries, one call for \"sentiment\" queries). `scope` values:',\n '- `\"reddit\"` \u2192 server appends `site:reddit.com` and filters to post permalinks. Use for sentiment / migration / lived experience.',\n '- `\"web\"` (default) \u2192 open web. Use for spec / bug / pricing / CVE / API / primary-source hunts.',\n '- `\"both\"` \u2192 fans each query across both. Use when the topic is opinion-heavy AND needs official sources.',\n '',\n '**3. `scrape-links`** \u2014 fetch URLs in parallel and run per-URL LLM extraction. **Auto-detects** `reddit.com/r/.../comments/` permalinks and routes them through the Reddit API (threaded post + comments); everything else flows through the HTTP scraper. Mix Reddit + web URLs freely \u2014 both branches run concurrently. **Parallel-safe**: prefer multiple `scrape-links` calls with contextually grouped URLs over one giant mixed batch. Each page returns `## Source`, `## Matches` (verbatim facts), `## Not found` (explicit gaps this page did NOT answer), `## Follow-up signals` (new terms + referenced-but-unscraped URLs that seed your next `web-search` round). Describe extraction SHAPE in `extract`, facets separated by `|`: `root cause | affected versions | fix | workarounds | timeline`.',\n '',\n '## The loop',\n '',\n '1. Read the brief below (if present). Note `primary_branch`, `keyword_seeds`, `gaps_to_watch`, `stop_criteria`.',\n '2. Fire `first_call_sequence` in order. For `primary_branch: reddit`, lead with `web-search scope:\"reddit\"` \u2192 `scrape-links` on the best post permalinks. For `web`, lead with `web-search scope:\"web\"` \u2192 `scrape-links` on HIGHLY_RELEVANT URLs. For `both`, issue two parallel `web-search` calls (one per scope) in the same turn, then one merged `scrape-links`.',\n classifierLoopStep,\n '4. Scrape every HIGHLY_RELEVANT plus the 2\u20133 best MAYBE_RELEVANT. Group URLs into parallel `scrape-links` calls when contexts differ (e.g. one call for docs, one for reddit threads).',\n '5. Harvest from each scrape extract\\'s `## Follow-up signals` \u2014 new terms, version numbers, vendor names, failure modes, referenced URLs. These seed your next `web-search` round.',\n '6. Fire the next `web-search` round with the harvested terms plus any `refine_queries[]` the classifier suggested. Do NOT paraphrase queries already run \u2014 the classifier tracks them.',\n '7. **Stop** when every `gaps_to_watch` item is closed AND the last `web-search` pass surfaced no new terms, OR when you have completed 4 full passes. State remaining gaps explicitly if you hit the cap.',\n '',\n '## Output discipline',\n '',\n '- Cite URL (or Reddit permalink) for every non-trivial claim \u2014 only from a `scrape-links` excerpt you read.',\n '- Quote verbatim: numbers, versions, API names, prices, error messages, stacktraces, people\\'s words.',\n '- Separate documented facts from inferred conclusions explicitly.',\n '- Include the scrape date for time-sensitive claims.',\n '- If you could not verify something, say so \u2014 do not paper over gaps.',\n '- Never cite a URL from a search snippet alone.',\n '',\n '## Post-cutoff discipline',\n '',\n 'For anything released / changed after your training cutoff \u2014 new products, versions, prices, benchmarks \u2014 treat your own query suggestions as hypotheses until a scraped first-party page confirms them. Include `site:<vendor-domain>` queries in your first `web-search` call when the goal names a vendor or product.',\n ].join('\\n');\n}\n\n/**\n * Compact stub emitted when the LLM planner is offline AND the caller did\n * not opt into the full playbook. Names the 3 tools, the loop, parallel-safety,\n * Reddit branch, and cite-from-scrape \u2014 enough to keep an agent moving.\n */\nexport function buildDegradedStub(goal?: string): string {\n const focusLine = goal\n ? `> Focus for this session: ${goal}`\n : '> Focus for this session: not specified \u2014 set one on the next pass.';\n return [\n '# Research session started (LLM planner offline \u2014 compact stub)',\n '',\n SKILL_INSTALL_HINT,\n '',\n focusLine,\n '',\n '**3 tools**: `start-research` (plans), `web-search` (Google fan-out, up to 50 queries/call, `scope: web|reddit|both`), `scrape-links` (fetch URLs in parallel, auto-detects `reddit.com/r/.../comments/` permalinks \u2192 Reddit API; all other URLs \u2192 HTTP scraper). All three are **parallel-callable** \u2014 fire multiple in the same turn when subtopics are orthogonal.',\n '',\n '**Loop**: `web-search` \u2192 `scrape-links` \u2192 read `## Follow-up signals` \u2192 harvest new terms \u2192 next `web-search` round \u2192 stop when gaps close OR after 4 passes. Call `web-search` aggressively (2\u20134 rounds, not 1).',\n '',\n '**Reddit branch**: use `web-search scope:\"reddit\"` for sentiment / migration / lived experience. Skip for CVE / API spec / pricing. Reddit permalinks go straight into `scrape-links` for threaded post + comments.',\n '',\n '**Cite**: every non-trivial claim must trace to a `scrape-links` excerpt, never a search snippet. Quote verbatim for numbers, versions, stacktraces, people\\'s words.',\n '',\n 'Pass `include_playbook: true` to `start-research` for the full tactic reference.',\n ].join('\\n');\n}\n\n/**\n * Backward-compat alias \u2014 older call sites import `buildOrientation` directly.\n */\nexport const buildOrientation = buildStaticScaffolding;\n\n// ============================================================================\n// Planner-offline gate.\n//\n// The problem we are guarding against: a single transient LLM failure (one bad\n// 429, one malformed JSON response from the classifier) used to poison the\n// gate forever and force every subsequent `start-research` call into the\n// compact stub \u2014 even when env was fine and the next call would have\n// succeeded. That created a deadlock where the very tool that could reset\n// the health flag was the tool being blocked.\n//\n// The safer semantics implemented here:\n// 1. If env is not configured, we are offline. Hard stop.\n// 2. Otherwise, require **two consecutive failures** before gating (one\n// blip is tolerated).\n// 3. Even then, the gate only holds for PLANNER_FAILURE_TTL_MS after the\n// most recent failure. After that window we give the planner another\n// chance regardless of the counter \u2014 if it is still broken the next\n// call's failure will re-arm the gate.\n// 4. Any success resets the counter to 0, so the gate opens immediately\n// on recovery.\n// ============================================================================\n\n/** Minimum consecutive failures before the gate closes. */\nexport const PLANNER_FAILURE_THRESHOLD = 2;\n\n/** How long a recent failure burst keeps the gate closed, in ms. */\nexport const PLANNER_FAILURE_TTL_MS = 60_000;\n\ntype PlannerGateHealth = Pick<\n LLMHealthSnapshot,\n 'plannerConfigured' | 'consecutivePlannerFailures' | 'lastPlannerCheckedAt'\n>;\n\n/**\n * Pure predicate \u2014 returns true when the planner should be treated as\n * offline for the purposes of `start-research`. Kept exported and\n * dependency-free so tests can drive it without touching the LLM.\n */\nexport function isPlannerKnownOffline(\n health: PlannerGateHealth,\n nowMs: number = Date.now(),\n): boolean {\n if (!health.plannerConfigured) {\n return true;\n }\n if (health.consecutivePlannerFailures < PLANNER_FAILURE_THRESHOLD) {\n return false;\n }\n if (health.lastPlannerCheckedAt === null) {\n return false;\n }\n const lastMs = Date.parse(health.lastPlannerCheckedAt);\n if (Number.isNaN(lastMs)) {\n return false;\n }\n return nowMs - lastMs < PLANNER_FAILURE_TTL_MS;\n}\n\nasync function buildGoalAwareBrief(\n goal: string,\n signal?: AbortSignal,\n): Promise<string> {\n const processor = createLLMProcessor();\n if (!processor) {\n mcpLog('info', 'start-research: LLM unavailable, returning static orientation only', 'start-research');\n return '';\n }\n\n const brief = await generateResearchBrief(goal, processor, signal);\n if (!brief) {\n mcpLog('warning', 'start-research: brief generation failed, returning static orientation only', 'start-research');\n return '';\n }\n\n return renderResearchBrief(brief);\n}\n\nasync function handleStartResearch(\n params: StartResearchParams,\n signal?: AbortSignal,\n): Promise<ToolExecutionResult<StartResearchOutput>> {\n try {\n const llmHealth = getLLMHealth();\n const plannerKnownOffline = isPlannerKnownOffline(llmHealth);\n\n if (plannerKnownOffline && !params.include_playbook) {\n const stub = buildDegradedStub(params.goal);\n return toolSuccess(stub, { content: stub });\n }\n\n const scaffolding = buildStaticScaffolding(params.goal, {\n plannerAvailable: !plannerKnownOffline,\n });\n\n let brief = '';\n if (params.goal) {\n brief = await buildGoalAwareBrief(params.goal, signal);\n }\n\n const briefFallbackNote = params.goal && !brief\n ? '\\n\\n---\\n\\n> _Goal-tailored brief unavailable: LLM planner is not configured or failed this call. The static playbook above still applies; you can proceed with it, or retry `start-research` after verifying `LLM_API_KEY`._'\n : '';\n\n const content = brief\n ? `${scaffolding}\\n\\n---\\n\\n${brief}`\n : `${scaffolding}${briefFallbackNote}`;\n\n return toolSuccess(content, { content });\n } catch (err: unknown) {\n const structuredError = classifyError(err);\n mcpLog('error', `start-research: ${structuredError.message}`, 'start-research');\n return toolFailure(\n formatError({\n code: structuredError.code,\n message: structuredError.message,\n retryable: structuredError.retryable,\n toolName: 'start-research',\n howToFix: ['Retry start-research. If the failure persists, verify LLM_API_KEY / LLM_BASE_URL / LLM_MODEL.'],\n }),\n );\n }\n}\n\nexport function registerStartResearchTool(server: MCPServer): void {\n server.tool(\n {\n name: 'start-research',\n title: 'Start Research Session',\n description:\n 'Call this FIRST every research session. Provide a `goal`; I return a goal-tailored brief naming (a) `primary_branch` (reddit for sentiment/migration, web for spec/bug/pricing, both when opinion-heavy AND needs official sources), (b) the exact `first_call_sequence` of web-search + scrape-links calls to fire, (c) 25\u201350 keyword seeds for your first `web-search` call, (d) iteration hints, (e) gaps to watch, (f) stop criteria. No goal? You still get the generic 3-tool playbook. Other tools work without calling this, but you will use them worse.',\n schema: startResearchParamsSchema,\n outputSchema: startResearchOutputSchema,\n annotations: {\n readOnlyHint: true,\n idempotentHint: true,\n destructiveHint: false,\n openWorldHint: false,\n },\n },\n async (args) => toToolResponse(await handleStartResearch(args)),\n );\n}\n"],
5
- "mappings": "AAEA;AAAA,EACE;AAAA,EACA;AAAA,OAGK;AACP;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OAEK;AACP,SAAS,qBAAqB;AAC9B,SAAS,cAAc;AACvB,SAAS,aAAa,aAAa,sBAAgD;AACnF,SAAS,mBAAmB;AAE5B,MAAM,qBAAqB;AAAA,EACzB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF,EAAE,KAAK,IAAI;AASJ,SAAS,uBAAuB,MAAe,OAAuC,CAAC,GAAW;AACvG,QAAM,mBAAmB,KAAK,oBAAoB;AAClD,QAAM,YAAY,OACd,6BAA6B,IAAI,KACjC;AAEJ,QAAM,qBAAqB,mBACvB,8IACA;AAEJ,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,EAAE,KAAK,IAAI;AACb;AAOO,SAAS,kBAAkB,MAAuB;AACvD,QAAM,YAAY,OACd,6BAA6B,IAAI,KACjC;AACJ,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,EAAE,KAAK,IAAI;AACb;AAKO,MAAM,mBAAmB;AAyBzB,MAAM,4BAA4B;AAGlC,MAAM,yBAAyB;AAY/B,SAAS,sBACd,QACA,QAAgB,KAAK,IAAI,GAChB;AACT,MAAI,CAAC,OAAO,mBAAmB;AAC7B,WAAO;AAAA,EACT;AACA,MAAI,OAAO,6BAA6B,2BAA2B;AACjE,WAAO;AAAA,EACT;AACA,MAAI,OAAO,yBAAyB,MAAM;AACxC,WAAO;AAAA,EACT;AACA,QAAM,SAAS,KAAK,MAAM,OAAO,oBAAoB;AACrD,MAAI,OAAO,MAAM,MAAM,GAAG;AACxB,WAAO;AAAA,EACT;AACA,SAAO,QAAQ,SAAS;AAC1B;AAEA,eAAe,oBACb,MACA,QACiB;AACjB,QAAM,YAAY,mBAAmB;AACrC,MAAI,CAAC,WAAW;AACd,WAAO,QAAQ,sEAAsE,gBAAgB;AACrG,WAAO;AAAA,EACT;AAEA,QAAM,QAAQ,MAAM,sBAAsB,MAAM,WAAW,MAAM;AACjE,MAAI,CAAC,OAAO;AACV,WAAO,WAAW,8EAA8E,gBAAgB;AAChH,WAAO;AAAA,EACT;AAEA,SAAO,oBAAoB,KAAK;AAClC;AAEA,eAAe,oBACb,QACA,QACmD;AACnD,MAAI;AACF,UAAM,YAAY,aAAa;AAC/B,UAAM,sBAAsB,sBAAsB,SAAS;AAE3D,QAAI,uBAAuB,CAAC,OAAO,kBAAkB;AACnD,YAAM,OAAO,kBAAkB,OAAO,IAAI;AAC1C,aAAO,YAAY,MAAM,EAAE,SAAS,KAAK,CAAC;AAAA,IAC5C;AAEA,UAAM,cAAc,uBAAuB,OAAO,MAAM;AAAA,MACtD,kBAAkB,CAAC;AAAA,IACrB,CAAC;AAED,QAAI,QAAQ;AACZ,QAAI,OAAO,MAAM;AACf,cAAQ,MAAM,oBAAoB,OAAO,MAAM,MAAM;AAAA,IACvD;AAEA,UAAM,oBAAoB,OAAO,QAAQ,CAAC,QACtC,kOACA;AAEJ,UAAM,UAAU,QACZ,GAAG,WAAW;AAAA;AAAA;AAAA;AAAA,EAAc,KAAK,KACjC,GAAG,WAAW,GAAG,iBAAiB;AAEtC,WAAO,YAAY,SAAS,EAAE,QAAQ,CAAC;AAAA,EACzC,SAAS,KAAc;AACrB,UAAM,kBAAkB,cAAc,GAAG;AACzC,WAAO,SAAS,mBAAmB,gBAAgB,OAAO,IAAI,gBAAgB;AAC9E,WAAO;AAAA,MACL,YAAY;AAAA,QACV,MAAM,gBAAgB;AAAA,QACtB,SAAS,gBAAgB;AAAA,QACzB,WAAW,gBAAgB;AAAA,QAC3B,UAAU;AAAA,QACV,UAAU,CAAC,+FAA+F;AAAA,MAC5G,CAAC;AAAA,IACH;AAAA,EACF;AACF;AAEO,SAAS,0BAA0B,QAAyB;AACjE,SAAO;AAAA,IACL;AAAA,MACE,MAAM;AAAA,MACN,OAAO;AAAA,MACP,aACE;AAAA,MACF,QAAQ;AAAA,MACR,cAAc;AAAA,MACd,aAAa;AAAA,QACX,cAAc;AAAA,QACd,gBAAgB;AAAA,QAChB,iBAAiB;AAAA,QACjB,eAAe;AAAA,MACjB;AAAA,IACF;AAAA,IACA,OAAO,SAAS,eAAe,MAAM,oBAAoB,IAAI,CAAC;AAAA,EAChE;AACF;",
4
+ "sourcesContent": ["import type { MCPServer } from 'mcp-use/server';\n\nimport {\n startResearchOutputSchema,\n startResearchParamsSchema,\n type StartResearchOutput,\n type StartResearchParams,\n} from '../schemas/start-research.js';\nimport {\n createLLMProcessor,\n generateResearchBrief,\n getLLMHealth,\n renderResearchBrief,\n type LLMHealthSnapshot,\n} from '../services/llm-processor.js';\nimport { classifyError } from '../utils/errors.js';\nimport { mcpLog } from '../utils/logger.js';\nimport { toolFailure, toolSuccess, toToolResponse, type ToolExecutionResult } from './mcp-helpers.js';\nimport { formatError } from './utils.js';\n\nconst SKILL_INSTALL_HINT = [\n '> \uD83D\uDCA1 **Pair this server with the `run-research` skill** for the full agentic playbook',\n '> (single-agent loop, multi-agent orchestrator, mission-prompt templates, output discipline).',\n '> Install once per machine \u2014 the skill is what teaches the agent how to spend these tools well:',\n '>',\n '> ```bash',\n '> npx -y skills add -y -g yigitkonur/skills-by-yigitkonur/skills/run-research',\n '> ```',\n '>',\n '> Already installed? Skip this \u2014 the skill auto-loads on relevant prompts. The full pack',\n '> ships ~50 sibling skills: `npx -y skills add -y -g yigitkonur/skills-by-yigitkonur`.',\n].join('\\n');\n\n/**\n * Full research-loop playbook. Teaches the 3-tool mental model\n * (start-research, web-search, scrape-links), the aggressive multi-call\n * discipline, parallel-callability, and the cite-from-scrape rule.\n *\n * Emitted when the LLM planner is healthy OR `include_playbook: true`.\n */\nexport function buildStaticScaffolding(goal?: string, opts: { plannerAvailable?: boolean } = {}): string {\n const plannerAvailable = opts.plannerAvailable ?? true;\n const focusLine = goal\n ? `> Focus for this session: ${goal}`\n : '> Focus for this session: not yet specified \u2014 set one on the next pass';\n\n const classifierLoopStep = plannerAvailable\n ? '3. Read the classifier output: `synthesis` (citations in `[rank]`), `gaps[]` (with ids), `refine_queries[]` (follow-ups tied to gap ids).'\n : '3. Classifier output is NOT available (LLM planner offline). `web-search` returns a raw ranked list \u2014 synthesize the terrain yourself from titles + snippets.';\n\n return [\n '# Research session started',\n '',\n SKILL_INSTALL_HINT,\n '',\n focusLine,\n '',\n 'You are running a research LOOP, not answering from memory. Training data is stale; the web is authoritative for anything dated, versioned, priced, or contested. Every non-trivial claim in your final answer must be traceable to a `scrape-links` excerpt you read. Never cite a URL from a `web-search` snippet alone.',\n '',\n '## The 3 tools',\n '',\n '**1. `start-research`** \u2014 you just called me. I plan this session and return the brief below. Call me again only if the goal materially shifts.',\n '',\n '**2. `web-search`** \u2014 fan out Google queries in parallel. One call carries **up to 50 queries** in a flat `queries` array. Call me **aggressively** \u2014 2\u20134 rounds per session is normal, not 1. After each pass, read `gaps[]` and `refine_queries[]` and fire another round with the harvested terms. **Parallel-safe**: run multiple `web-search` calls in the same turn for orthogonal subtopics (e.g. one call for \"spec\" queries, one call for \"sentiment\" queries). `scope` values:',\n '- `\"reddit\"` \u2192 server appends `site:reddit.com` and filters to post permalinks. Use for sentiment / migration / lived experience.',\n '- `\"web\"` (default) \u2192 open web. Use for spec / bug / pricing / CVE / API / primary-source hunts.',\n '- `\"both\"` \u2192 fans each query across both. Use when the topic is opinion-heavy AND needs official sources.',\n '',\n '**3. `scrape-links`** \u2014 fetch URLs in parallel and run per-URL LLM extraction. **Auto-detects** `reddit.com/r/.../comments/` permalinks and routes them through the Reddit API (threaded post + comments); everything else flows through the HTTP scraper. Mix Reddit + web URLs freely \u2014 both branches run concurrently. **Parallel-safe**: prefer multiple `scrape-links` calls with contextually grouped URLs over one giant mixed batch. Each page returns `## Source`, `## Matches` (verbatim facts), `## Not found` (explicit gaps this page did NOT answer), `## Follow-up signals` (new terms + referenced-but-unscraped URLs that seed your next `web-search` round). Describe extraction SHAPE in `extract`, facets separated by `|`: `root cause | affected versions | fix | workarounds | timeline`.',\n '',\n '## The loop',\n '',\n '1. Read the brief below (if present). Note `primary_branch`, `keyword_seeds`, `gaps_to_watch`, `stop_criteria`.',\n '2. Fire `first_call_sequence` in order. For `primary_branch: reddit`, lead with `web-search scope:\"reddit\"` \u2192 `scrape-links` on the best post permalinks. For `web`, lead with `web-search scope:\"web\"` \u2192 `scrape-links` on HIGHLY_RELEVANT URLs. For `both`, issue two parallel `web-search` calls (one per scope) in the same turn, then one merged `scrape-links`.',\n classifierLoopStep,\n '4. Scrape every HIGHLY_RELEVANT plus the 2\u20133 best MAYBE_RELEVANT. Group URLs into parallel `scrape-links` calls when contexts differ (e.g. one call for docs, one for reddit threads).',\n '5. Harvest from each scrape extract\\'s `## Follow-up signals` \u2014 new terms, version numbers, vendor names, failure modes, referenced URLs. These seed your next `web-search` round.',\n '6. Fire the next `web-search` round with the harvested terms plus any `refine_queries[]` the classifier suggested. Do NOT paraphrase queries already run \u2014 the classifier tracks them.',\n '7. **Stop** when every `gaps_to_watch` item is closed AND the last `web-search` pass surfaced no new terms, OR when you have completed 4 full passes. State remaining gaps explicitly if you hit the cap.',\n '',\n '## Output discipline',\n '',\n '- Cite URL (or Reddit permalink) for every non-trivial claim \u2014 only from a `scrape-links` excerpt you read.',\n '- Quote verbatim: numbers, versions, API names, prices, error messages, stacktraces, people\\'s words.',\n '- Separate documented facts from inferred conclusions explicitly.',\n '- Include the scrape date for time-sensitive claims.',\n '- If you could not verify something, say so \u2014 do not paper over gaps.',\n '- Never cite a URL from a search snippet alone.',\n '',\n '## Post-cutoff discipline',\n '',\n 'For anything released / changed after your training cutoff \u2014 new products, versions, prices, benchmarks \u2014 treat your own query suggestions as hypotheses until a scraped first-party page confirms them. Include `site:<vendor-domain>` queries in your first `web-search` call when the goal names a vendor or product.',\n ].join('\\n');\n}\n\n/**\n * Compact stub emitted when the LLM planner is offline AND the caller did\n * not opt into the full playbook. Names the 3 tools, the loop, parallel-safety,\n * Reddit branch, and cite-from-scrape \u2014 enough to keep an agent moving.\n */\nexport function buildDegradedStub(goal?: string): string {\n const focusLine = goal\n ? `> Focus for this session: ${goal}`\n : '> Focus for this session: not specified \u2014 set one on the next pass.';\n return [\n '# Research session started (LLM planner offline \u2014 compact stub)',\n '',\n SKILL_INSTALL_HINT,\n '',\n focusLine,\n '',\n '**3 tools**: `start-research` (plans), `web-search` (Google fan-out, up to 50 queries/call, `scope: web|reddit|both`), `scrape-links` (fetch URLs in parallel, auto-detects `reddit.com/r/.../comments/` permalinks \u2192 Reddit API; all other URLs \u2192 HTTP scraper). All three are **parallel-callable** \u2014 fire multiple in the same turn when subtopics are orthogonal.',\n '',\n '**Loop**: `web-search` \u2192 `scrape-links` \u2192 read `## Follow-up signals` \u2192 harvest new terms \u2192 next `web-search` round \u2192 stop when gaps close OR after 4 passes. Call `web-search` aggressively (2\u20134 rounds, not 1).',\n '',\n '**Reddit branch**: use `web-search scope:\"reddit\"` for sentiment / migration / lived experience. Skip for CVE / API spec / pricing. Reddit permalinks go straight into `scrape-links` for threaded post + comments.',\n '',\n '**Cite**: every non-trivial claim must trace to a `scrape-links` excerpt, never a search snippet. Quote verbatim for numbers, versions, stacktraces, people\\'s words.',\n '',\n 'Pass `include_playbook: true` to `start-research` for the full tactic reference.',\n ].join('\\n');\n}\n\n/**\n * Backward-compat alias \u2014 older call sites import `buildOrientation` directly.\n */\nexport const buildOrientation = buildStaticScaffolding;\n\n// ============================================================================\n// Planner-offline gate.\n//\n// The problem we are guarding against: a single transient LLM failure (one bad\n// 429, one malformed JSON response from the classifier) used to poison the\n// gate forever and force every subsequent `start-research` call into the\n// compact stub \u2014 even when env was fine and the next call would have\n// succeeded. That created a deadlock where the very tool that could reset\n// the health flag was the tool being blocked.\n//\n// The safer semantics implemented here:\n// 1. If env is not configured, we are offline. Hard stop.\n// 2. Otherwise, require **two consecutive failures** before gating (one\n// blip is tolerated).\n// 3. Even then, the gate only holds for PLANNER_FAILURE_TTL_MS after the\n// most recent failure. After that window we give the planner another\n// chance regardless of the counter \u2014 if it is still broken the next\n// call's failure will re-arm the gate.\n// 4. Any success resets the counter to 0, so the gate opens immediately\n// on recovery.\n// ============================================================================\n\n/** Minimum consecutive failures before the gate closes. */\nexport const PLANNER_FAILURE_THRESHOLD = 2;\n\n/** How long a recent failure burst keeps the gate closed, in ms. */\nexport const PLANNER_FAILURE_TTL_MS = 60_000;\n\ntype PlannerGateHealth = Pick<\n LLMHealthSnapshot,\n 'plannerConfigured' | 'consecutivePlannerFailures' | 'lastPlannerCheckedAt'\n>;\n\n/**\n * Pure predicate \u2014 returns true when the planner should be treated as\n * offline for the purposes of `start-research`. Kept exported and\n * dependency-free so tests can drive it without touching the LLM.\n */\nexport function isPlannerKnownOffline(\n health: PlannerGateHealth,\n nowMs: number = Date.now(),\n): boolean {\n if (!health.plannerConfigured) {\n return true;\n }\n if (health.consecutivePlannerFailures < PLANNER_FAILURE_THRESHOLD) {\n return false;\n }\n if (health.lastPlannerCheckedAt === null) {\n return false;\n }\n const lastMs = Date.parse(health.lastPlannerCheckedAt);\n if (Number.isNaN(lastMs)) {\n return false;\n }\n return nowMs - lastMs < PLANNER_FAILURE_TTL_MS;\n}\n\nasync function buildGoalAwareBrief(\n goal: string,\n signal?: AbortSignal,\n): Promise<string> {\n const processor = createLLMProcessor();\n if (!processor) {\n mcpLog('info', 'start-research: LLM unavailable, returning static orientation only', 'start-research');\n return '';\n }\n\n const brief = await generateResearchBrief(goal, processor, signal);\n if (!brief) {\n mcpLog('warning', 'start-research: brief generation failed, returning static orientation only', 'start-research');\n return '';\n }\n\n return renderResearchBrief(brief);\n}\n\nasync function handleStartResearch(\n params: StartResearchParams,\n signal?: AbortSignal,\n): Promise<ToolExecutionResult<StartResearchOutput>> {\n try {\n const llmHealth = getLLMHealth();\n const plannerKnownOffline = isPlannerKnownOffline(llmHealth);\n\n if (plannerKnownOffline && !params.include_playbook) {\n const stub = buildDegradedStub(params.goal);\n return toolSuccess(stub);\n }\n\n const scaffolding = buildStaticScaffolding(params.goal, {\n plannerAvailable: !plannerKnownOffline,\n });\n\n let brief = '';\n if (params.goal) {\n brief = await buildGoalAwareBrief(params.goal, signal);\n }\n\n const briefFallbackNote = params.goal && !brief\n ? '\\n\\n---\\n\\n> _Goal-tailored brief unavailable: LLM planner is not configured or failed this call. The static playbook above still applies; you can proceed with it, or retry `start-research` after verifying `LLM_API_KEY`._'\n : '';\n\n const content = brief\n ? `${scaffolding}\\n\\n---\\n\\n${brief}`\n : `${scaffolding}${briefFallbackNote}`;\n\n return toolSuccess(content);\n } catch (err: unknown) {\n const structuredError = classifyError(err);\n mcpLog('error', `start-research: ${structuredError.message}`, 'start-research');\n return toolFailure(\n formatError({\n code: structuredError.code,\n message: structuredError.message,\n retryable: structuredError.retryable,\n toolName: 'start-research',\n howToFix: ['Retry start-research. If the failure persists, verify LLM_API_KEY / LLM_BASE_URL / LLM_MODEL.'],\n }),\n );\n }\n}\n\nexport function registerStartResearchTool(server: MCPServer): void {\n server.tool(\n {\n name: 'start-research',\n title: 'Start Research Session',\n description:\n 'Call this FIRST every research session. Provide a `goal`; I return a goal-tailored brief naming (a) `primary_branch` (reddit for sentiment/migration, web for spec/bug/pricing, both when opinion-heavy AND needs official sources), (b) the exact `first_call_sequence` of web-search + scrape-links calls to fire, (c) 25\u201350 keyword seeds for your first `web-search` call, (d) iteration hints, (e) gaps to watch, (f) stop criteria. No goal? You still get the generic 3-tool playbook. Other tools work without calling this, but you will use them worse.',\n schema: startResearchParamsSchema,\n outputSchema: startResearchOutputSchema,\n annotations: {\n readOnlyHint: true,\n idempotentHint: true,\n destructiveHint: false,\n openWorldHint: false,\n },\n },\n async (args) => toToolResponse(await handleStartResearch(args)),\n );\n}\n"],
5
+ "mappings": "AAEA;AAAA,EACE;AAAA,EACA;AAAA,OAGK;AACP;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OAEK;AACP,SAAS,qBAAqB;AAC9B,SAAS,cAAc;AACvB,SAAS,aAAa,aAAa,sBAAgD;AACnF,SAAS,mBAAmB;AAE5B,MAAM,qBAAqB;AAAA,EACzB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF,EAAE,KAAK,IAAI;AASJ,SAAS,uBAAuB,MAAe,OAAuC,CAAC,GAAW;AACvG,QAAM,mBAAmB,KAAK,oBAAoB;AAClD,QAAM,YAAY,OACd,6BAA6B,IAAI,KACjC;AAEJ,QAAM,qBAAqB,mBACvB,8IACA;AAEJ,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,EAAE,KAAK,IAAI;AACb;AAOO,SAAS,kBAAkB,MAAuB;AACvD,QAAM,YAAY,OACd,6BAA6B,IAAI,KACjC;AACJ,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,EAAE,KAAK,IAAI;AACb;AAKO,MAAM,mBAAmB;AAyBzB,MAAM,4BAA4B;AAGlC,MAAM,yBAAyB;AAY/B,SAAS,sBACd,QACA,QAAgB,KAAK,IAAI,GAChB;AACT,MAAI,CAAC,OAAO,mBAAmB;AAC7B,WAAO;AAAA,EACT;AACA,MAAI,OAAO,6BAA6B,2BAA2B;AACjE,WAAO;AAAA,EACT;AACA,MAAI,OAAO,yBAAyB,MAAM;AACxC,WAAO;AAAA,EACT;AACA,QAAM,SAAS,KAAK,MAAM,OAAO,oBAAoB;AACrD,MAAI,OAAO,MAAM,MAAM,GAAG;AACxB,WAAO;AAAA,EACT;AACA,SAAO,QAAQ,SAAS;AAC1B;AAEA,eAAe,oBACb,MACA,QACiB;AACjB,QAAM,YAAY,mBAAmB;AACrC,MAAI,CAAC,WAAW;AACd,WAAO,QAAQ,sEAAsE,gBAAgB;AACrG,WAAO;AAAA,EACT;AAEA,QAAM,QAAQ,MAAM,sBAAsB,MAAM,WAAW,MAAM;AACjE,MAAI,CAAC,OAAO;AACV,WAAO,WAAW,8EAA8E,gBAAgB;AAChH,WAAO;AAAA,EACT;AAEA,SAAO,oBAAoB,KAAK;AAClC;AAEA,eAAe,oBACb,QACA,QACmD;AACnD,MAAI;AACF,UAAM,YAAY,aAAa;AAC/B,UAAM,sBAAsB,sBAAsB,SAAS;AAE3D,QAAI,uBAAuB,CAAC,OAAO,kBAAkB;AACnD,YAAM,OAAO,kBAAkB,OAAO,IAAI;AAC1C,aAAO,YAAY,IAAI;AAAA,IACzB;AAEA,UAAM,cAAc,uBAAuB,OAAO,MAAM;AAAA,MACtD,kBAAkB,CAAC;AAAA,IACrB,CAAC;AAED,QAAI,QAAQ;AACZ,QAAI,OAAO,MAAM;AACf,cAAQ,MAAM,oBAAoB,OAAO,MAAM,MAAM;AAAA,IACvD;AAEA,UAAM,oBAAoB,OAAO,QAAQ,CAAC,QACtC,kOACA;AAEJ,UAAM,UAAU,QACZ,GAAG,WAAW;AAAA;AAAA;AAAA;AAAA,EAAc,KAAK,KACjC,GAAG,WAAW,GAAG,iBAAiB;AAEtC,WAAO,YAAY,OAAO;AAAA,EAC5B,SAAS,KAAc;AACrB,UAAM,kBAAkB,cAAc,GAAG;AACzC,WAAO,SAAS,mBAAmB,gBAAgB,OAAO,IAAI,gBAAgB;AAC9E,WAAO;AAAA,MACL,YAAY;AAAA,QACV,MAAM,gBAAgB;AAAA,QACtB,SAAS,gBAAgB;AAAA,QACzB,WAAW,gBAAgB;AAAA,QAC3B,UAAU;AAAA,QACV,UAAU,CAAC,+FAA+F;AAAA,MAC5G,CAAC;AAAA,IACH;AAAA,EACF;AACF;AAEO,SAAS,0BAA0B,QAAyB;AACjE,SAAO;AAAA,IACL;AAAA,MACE,MAAM;AAAA,MACN,OAAO;AAAA,MACP,aACE;AAAA,MACF,QAAQ;AAAA,MACR,cAAc;AAAA,MACd,aAAa;AAAA,QACX,cAAc;AAAA,QACd,gBAAgB;AAAA,QAChB,iBAAiB;AAAA,QACjB,eAAe;AAAA,MACjB;AAAA,IACF;AAAA,IACA,OAAO,SAAS,eAAe,MAAM,oBAAoB,IAAI,CAAC;AAAA,EAChE;AACF;",
6
6
  "names": []
7
7
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mcp-researchpowerpack",
3
- "version": "6.0.1",
3
+ "version": "6.0.3",
4
4
  "description": "HTTP-first MCP research server: start-research (goal-tailored brief), web-search (with Reddit scope), scrape-links (auto-detects Reddit URLs) — built on mcp-use.",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",