mcp-researchpowerpack 6.0.16 → 6.0.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/README.md +19 -14
  2. package/dist/index.js +1 -1
  3. package/dist/index.js.map +1 -1
  4. package/dist/mcp-use.json +2 -2
  5. package/dist/src/clients/jina.js +262 -80
  6. package/dist/src/clients/jina.js.map +2 -2
  7. package/dist/src/clients/kernel.js +142 -0
  8. package/dist/src/clients/kernel.js.map +7 -0
  9. package/dist/src/clients/reddit.js.map +1 -1
  10. package/dist/src/config/index.js +27 -8
  11. package/dist/src/config/index.js.map +2 -2
  12. package/dist/src/effect/errors.js +58 -0
  13. package/dist/src/effect/errors.js.map +7 -0
  14. package/dist/src/effect/runtime.js +15 -0
  15. package/dist/src/effect/runtime.js.map +7 -0
  16. package/dist/src/effect/services.js +97 -0
  17. package/dist/src/effect/services.js.map +7 -0
  18. package/dist/src/schemas/scrape-links.js +14 -20
  19. package/dist/src/schemas/scrape-links.js.map +2 -2
  20. package/dist/src/schemas/start-research.js +2 -2
  21. package/dist/src/schemas/start-research.js.map +1 -1
  22. package/dist/src/schemas/web-search.js +17 -63
  23. package/dist/src/schemas/web-search.js.map +2 -2
  24. package/dist/src/services/llm-processor.js +23 -14
  25. package/dist/src/services/llm-processor.js.map +2 -2
  26. package/dist/src/tools/registry.js +4 -4
  27. package/dist/src/tools/registry.js.map +2 -2
  28. package/dist/src/tools/scrape.js +415 -224
  29. package/dist/src/tools/scrape.js.map +3 -3
  30. package/dist/src/tools/search.js +197 -155
  31. package/dist/src/tools/search.js.map +3 -3
  32. package/dist/src/tools/start-research.js +38 -19
  33. package/dist/src/tools/start-research.js.map +2 -2
  34. package/dist/src/utils/content-quality.js +64 -0
  35. package/dist/src/utils/content-quality.js.map +7 -0
  36. package/dist/src/utils/query-relax.js.map +1 -1
  37. package/dist/src/utils/source-type.js.map +1 -1
  38. package/package.json +4 -2
package/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  # mcp-researchpowerpack
4
4
 
5
- HTTP MCP server for research. Three tools, orientation-first, built for agents that run multi-pass research loops.
5
+ HTTP MCP server for research. Five tools, orientation-first, built for agents that run multi-pass research loops.
6
6
 
7
7
  Built on [mcp-use](https://github.com/nicepkg/mcp-use). No stdio, HTTP only.
8
8
 
@@ -11,8 +11,10 @@ Built on [mcp-use](https://github.com/nicepkg/mcp-use). No stdio, HTTP only.
11
11
  | tool | what it does | needs |
12
12
  |------|-------------|-------|
13
13
  | `start-research` | returns a goal-tailored brief: `primary_branch` (reddit / web / both), exact `first_call_sequence`, 25–50 keyword seeds, iteration hints, gaps to watch, stop criteria. Call FIRST every session. | `LLM_API_KEY` + `LLM_BASE_URL` + `LLM_MODEL` for non-degraded brief generation (optional) |
14
- | `web-search` | parallel Google search, up to 50 queries per call, parallel-callable across turns. `scope: "web" \| "reddit" \| "both"` reddit mode filters to post permalinks. Queries should be retrieval probes, not topic labels: rewrite vague phrases into source-aware searches with anchors such as `site:`, exact quoted terms, versions, error text, package names, or community filters. Returns tiered markdown (HIGHLY_RELEVANT / MAYBE_RELEVANT / OTHER) + grounded synthesis + gaps + refine suggestions. | `SERPER_API_KEY` |
15
- | `scrape-links` | fetch URLs in parallel with per-URL LLM extraction. Auto-detects `reddit.com/r/.../comments/` permalinks and routes them through the Reddit API (threaded post + comments); PDF / DOCX / PPTX / XLSX URLs route through Jina Reader; non-reddit, non-document web URLs flow through Scrape.do. Parallel-callable. | `SCRAPEDO_API_KEY` for web URLs (+ `REDDIT_CLIENT_ID` / `REDDIT_CLIENT_SECRET` for reddit URLs; optional `JINA_API_KEY` for higher document limits) |
14
+ | `raw-web-search` | parallel search, up to 50 `keywords` per call. Serper is primary; Jina Search is fallback when Serper is missing, fails, or yields empty query results. Returns the raw ranked markdown list directly. Use for broad discovery, audit trails, and Reddit permalink discovery via explicit `site:reddit.com/r/.../comments` probes. | `SERPER_API_KEY` or `JINA_API_KEY` |
15
+ | `smart-web-search` | parallel search, up to 50 `keywords` per call, plus required `extract`. Serper/Jina provider order matches raw search. Always runs LLM classification and returns tiered markdown (HIGHLY_RELEVANT / MAYBE_RELEVANT / OTHER) + grounded synthesis + gaps + refine suggestions. Supports `scope: "web" \| "reddit" \| "both"`. | `SERPER_API_KEY` or `JINA_API_KEY` + LLM env |
16
+ | `raw-scrape-links` | fetch URLs in parallel and return full markdown directly. Reddit post permalinks route through the Reddit API with threaded comments. Non-Reddit URLs use Jina Reader first, then Jina Reader through Scrape.do proxy mode, then optional Kernel browser rendering for web pages. | optional `REDDIT_CLIENT_ID` / `REDDIT_CLIENT_SECRET`, `SCRAPEDO_API_KEY`, `JINA_API_KEY`, `KERNEL_API_KEY` |
17
+ | `smart-scrape-links` | same fetch stack as raw scrape, then always runs per-URL LLM extraction with required `extract`. Use for focused evidence packs with `## Source`, `## Matches`, `## Not found`, and `## Follow-up signals`. | raw scrape providers + LLM env |
16
18
 
17
19
  Also exposes `/health` and `health://status`.
18
20
 
@@ -20,7 +22,7 @@ Also exposes `/health` and `health://status`.
20
22
 
21
23
  Call `start-research` once at the beginning of each session with your goal. The server returns a brief that tells the agent exactly which tool to call first (reddit-first for sentiment/migration, web-first for spec/bug/pricing, both when opinion-heavy AND needs official sources), what keyword seeds to fire, and when to stop.
22
24
 
23
- For search fan-out, use bad → better rewrite thinking before calling `web-search`: turn broad phrases like `<feature> support`, `<product> pricing`, `<library> bug fix`, or `<tool> reviews` into source-aware probes such as `site:<official-docs-domain> "<feature>" "<platform-or-version>"`, `site:<vendor-domain> "<product>" pricing "enterprise" OR "free tier"`, `"<exact error text>" "<library-or-package>" "<version>" site:github.com`, or `site:reddit.com/r/<community>/comments "<tool>" "migration" OR "regression"`.
25
+ For search fan-out, use bad → better rewrite thinking before calling `raw-web-search` or `smart-web-search`: turn broad phrases like `<feature> support`, `<product> pricing`, `<library> bug fix`, or `<tool> reviews` into source-aware probes such as `site:<official-docs-domain> "<feature>" "<platform-or-version>"`, `site:<vendor-domain> "<product>" pricing "enterprise" OR "free tier"`, `"<exact error text>" "<library-or-package>" "<version>" site:github.com`, or `site:reddit.com/r/<community>/comments "<tool>" "migration" OR "regression"`.
24
26
 
25
27
  Pair the server with the [`run-research`](https://github.com/yigitkonur/skills-by-yigitkonur/tree/main/skills/run-research) skill for the full agentic playbook:
26
28
 
@@ -69,11 +71,13 @@ Copy `.env.example`, set only what you need. Missing keys don't crash the server
69
71
 
70
72
  | var | enables |
71
73
  |-----|---------|
72
- | `SERPER_API_KEY` | `web-search` (all scopes) |
73
- | `SCRAPEDO_API_KEY` | `scrape-links` for non-reddit, non-document web URLs |
74
- | `REDDIT_CLIENT_ID` + `REDDIT_CLIENT_SECRET` | `scrape-links` for reddit.com permalinks (threaded post + comments) |
75
- | `JINA_API_KEY` | optional higher-rate `scrape-links` document conversion for PDF / DOCX / PPTX / XLSX URLs via Jina Reader |
76
- | `LLM_API_KEY` + `LLM_BASE_URL` + `LLM_MODEL` | goal-tailored brief, AI extraction, search classification, raw-mode refine suggestions |
74
+ | `SERPER_API_KEY` | primary raw/smart web search provider |
75
+ | `SCRAPEDO_API_KEY` | Scrape.do proxy-mode retry for Jina Reader (`X-Proxy-Url`) |
76
+ | `REDDIT_CLIENT_ID` + `REDDIT_CLIENT_SECRET` | raw/smart scrape for reddit.com permalinks (threaded post + comments) |
77
+ | `JINA_API_KEY` | Jina Search fallback and authenticated Jina Reader requests |
78
+ | `KERNEL_API_KEY` | optional Kernel browser-render fallback after Jina direct + proxy fail |
79
+ | `KERNEL_PROJECT` | optional Kernel project scoping header |
80
+ | `LLM_API_KEY` + `LLM_BASE_URL` + `LLM_MODEL` | goal-tailored brief, `smart-web-search`, `smart-scrape-links` |
77
81
 
78
82
  ### llm (AI extraction + classification)
79
83
 
@@ -133,11 +137,12 @@ src/
133
137
  config/ env parsing, capability detection, lazy proxy config
134
138
  clients/ provider API clients (serper, reddit, scrapedo, jina)
135
139
  tools/
136
- registry.ts registerAllTools() — wires 3 tools
140
+ registry.ts registerAllTools() — wires 5 tools
137
141
  start-research.ts goal-tailored brief + static playbook
138
- search.ts web-search handler (with CTR-weighted URL aggregation + LLM classification)
139
- scrape.ts scrape-links handler (reddit + web + document branches in parallel)
140
- mcp-helpers.ts response builders (markdown + structured MCP output)
142
+ search.ts raw/smart search handlers (CTR ranking + optional LLM classification)
143
+ scrape.ts raw/smart scrape handlers (Reddit API, Jina Reader, Scrape.do proxy,
144
+ optional Kernel, optional LLM extraction)
145
+ mcp-helpers.ts markdown response builders
141
146
  utils.ts shared formatters
142
147
  services/
143
148
  llm-processor.ts AI extraction, classification, brief generation — primary + fallback model, always low reasoning
@@ -153,7 +158,7 @@ src/
153
158
  logger.ts mcpLog() — stderr-only (MCP-safe)
154
159
  ```
155
160
 
156
- Key patterns: capability detection at startup, description-led tool routing (no bootstrap gate), always-on structured MCP tool output, tiered classified output in `web-search`, parallel reddit + web + document branches in `scrape-links`, Jina fallback for binary/document content, bounded concurrency via `p-map`, CTR-based URL ranking, tools never throw (always return `toolFailure`), and structured errors with retry classification.
161
+ Key patterns: capability detection at startup, description-led tool routing (no bootstrap gate), markdown-only MCP tool output, raw/smart tool split, tiered classified output in `smart-web-search`, Reddit API routing in scrape tools, Jina Reader first for non-Reddit URLs, Scrape.do proxy-mode retry through `X-Proxy-Url`, optional Kernel browser-render fallback, bounded concurrency via `p-map`, CTR-based URL ranking, tools never throw (always return `toolFailure`), and structured errors with retry classification.
157
162
 
158
163
  ## license
159
164
 
package/dist/index.js CHANGED
@@ -14,7 +14,7 @@ import { getLLMHealth } from "./src/services/llm-processor.js";
14
14
  import { registerAllTools } from "./src/tools/registry.js";
15
15
  const DEFAULT_PORT = 3e3;
16
16
  const SHUTDOWN_TIMEOUT_MS = 1e4;
17
- const WEBSITE_URL = "https://github.com/yigitkonur/mcp-researchpowerpack-http";
17
+ const WEBSITE_URL = "https://github.com/yigitkonur/mcp-researchpowerpack";
18
18
  const LOCAL_DEFAULT_HOST = "127.0.0.1";
19
19
  const startupLogger = Logger.get("startup");
20
20
  function parseCsvEnv(value) {
package/dist/index.js.map CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "version": 3,
3
3
  "sources": ["../index.ts"],
4
- "sourcesContent": ["#!/usr/bin/env node\n\n// Expand libuv thread pool for parallel DNS lookups (default 4 is too low for 20+ concurrent connections)\nif (!process.env.UV_THREADPOOL_SIZE) {\n process.env.UV_THREADPOOL_SIZE = '8';\n}\n\nimport { Logger } from 'mcp-use';\nimport {\n InMemorySessionStore,\n InMemoryStreamManager,\n MCPServer,\n object,\n type ServerConfig,\n} from 'mcp-use/server';\n\nimport { SERVER } from './src/config/index.js';\nimport { getLLMHealth } from './src/services/llm-processor.js';\nimport { registerAllTools } from './src/tools/registry.js';\n\nconst DEFAULT_PORT = 3000 as const;\nconst SHUTDOWN_TIMEOUT_MS = 10_000 as const;\nconst WEBSITE_URL = 'https://github.com/yigitkonur/mcp-researchpowerpack-http' as const;\nconst LOCAL_DEFAULT_HOST = '127.0.0.1' as const;\n\ntype CleanupFn = () => Promise<void>;\n\nconst startupLogger = Logger.get('startup');\n\nfunction parseCsvEnv(value: string | undefined): string[] | undefined {\n if (!value) return undefined;\n\n const parts = value\n .split(',')\n .map((part) => part.trim())\n .filter(Boolean);\n\n return parts.length > 0 ? parts : undefined;\n}\n\nfunction parsePort(value: string | undefined, fallback: number): number {\n const parsed = Number.parseInt(value ?? '', 10);\n if (Number.isFinite(parsed) && parsed > 0) {\n return parsed;\n }\n\n return fallback;\n}\n\nfunction resolvePort(): number {\n const portFlagIndex = process.argv.findIndex((arg) => arg === '--port');\n if (portFlagIndex >= 0) {\n return parsePort(process.argv[portFlagIndex + 1], DEFAULT_PORT);\n }\n\n return parsePort(process.env.PORT, DEFAULT_PORT);\n}\n\nfunction resolveHost(): string {\n const explicitHost = process.env.HOST?.trim();\n if (explicitHost) {\n return explicitHost;\n }\n\n // Cloud runtimes typically inject PORT and expect the process to listen on all interfaces.\n if (process.env.PORT?.trim()) {\n return '0.0.0.0';\n }\n\n return LOCAL_DEFAULT_HOST;\n}\n\nfunction buildCors(allowedOrigins: string[] | undefined): ServerConfig['cors'] {\n if (!allowedOrigins || allowedOrigins.length === 0) {\n return undefined;\n }\n\n return {\n origin: allowedOrigins,\n allowMethods: ['GET', 'HEAD', 'POST', 'PUT', 'DELETE', 'OPTIONS'],\n allowHeaders: [\n 'Content-Type',\n 'Accept',\n 'Authorization',\n 'mcp-protocol-version',\n 'mcp-session-id',\n 'X-Proxy-Token',\n 'X-Target-URL',\n ],\n exposeHeaders: ['mcp-session-id'],\n };\n}\n\nfunction configureLogging(): void {\n Logger.configure({\n level: process.env.NODE_ENV === 'production' ? 'info' : 'debug',\n format: 'minimal',\n });\n\n const debug = process.env.DEBUG?.trim();\n if (debug === '2') {\n Logger.setDebug(2);\n } else if (debug) {\n Logger.setDebug(1);\n }\n}\n\nfunction normalizeOrigin(value: string, envName: string): string {\n try {\n return new URL(value).origin;\n } catch {\n throw new Error(`${envName} must contain absolute URLs with protocol. Received: ${value}`);\n }\n}\n\nfunction resolveAllowedOrigins(): string[] | undefined {\n const explicitOrigins = parseCsvEnv(process.env.ALLOWED_ORIGINS);\n if (explicitOrigins && explicitOrigins.length > 0) {\n return explicitOrigins.map(origin => normalizeOrigin(origin, 'ALLOWED_ORIGINS'));\n }\n\n return undefined;\n}\n\nfunction buildSessionConfig(): {\n sessionConfig: Pick<ServerConfig, 'sessionStore' | 'streamManager'>;\n cleanupFns: CleanupFn[];\n} {\n return {\n sessionConfig: {\n sessionStore: new InMemorySessionStore(),\n streamManager: new InMemoryStreamManager(),\n },\n cleanupFns: [],\n };\n}\n\nfunction buildHealthPayload(server: MCPServer, startedAt: number) {\n const llm = getLLMHealth();\n // Distinguish \"never probed\" (checkedAt === null) from \"probed and failed\"\n // (checkedAt set, ok=false). The raw `lastPlannerOk` defaults to `false`\n // at startup, which would mislead operators into thinking the LLM is\n // broken before it has been exercised once.\n const plannerOkForHealth = llm.lastPlannerCheckedAt === null ? null : llm.lastPlannerOk;\n const extractorOkForHealth = llm.lastExtractorCheckedAt === null ? null : llm.lastExtractorOk;\n return {\n status: 'ok',\n name: SERVER.NAME,\n version: SERVER.VERSION,\n transport: 'http',\n uptime_seconds: Math.floor((Date.now() - startedAt) / 1000),\n active_sessions: server.getActiveSessions().length,\n llm_planner_ok: plannerOkForHealth,\n llm_extractor_ok: extractorOkForHealth,\n llm_planner_checked_at: llm.lastPlannerCheckedAt,\n llm_extractor_checked_at: llm.lastExtractorCheckedAt,\n llm_planner_error: llm.lastPlannerError,\n llm_extractor_error: llm.lastExtractorError,\n planner_configured: llm.plannerConfigured,\n extractor_configured: llm.extractorConfigured,\n // Counter surfacing lets operators diagnose gate behavior from outside\n // the process (see src/tools/start-research.ts for the gate semantics).\n consecutive_planner_failures: llm.consecutivePlannerFailures,\n consecutive_extractor_failures: llm.consecutiveExtractorFailures,\n timestamp: new Date().toISOString(),\n };\n}\n\nasync function main(): Promise<void> {\n configureLogging();\n\n const isProduction = process.env.NODE_ENV === 'production';\n const host = resolveHost();\n const port = resolvePort();\n const baseUrl = process.env.MCP_URL?.trim() || undefined;\n const allowedOrigins = resolveAllowedOrigins();\n\n const { sessionConfig, cleanupFns } = buildSessionConfig();\n\n startupLogger.info(`Starting ${SERVER.NAME} v${SERVER.VERSION}`);\n startupLogger.info(`Binding HTTP server to ${host}:${port}`);\n if (allowedOrigins && allowedOrigins.length > 0) {\n startupLogger.info(`Host validation enabled for origins: ${allowedOrigins.join(', ')}`);\n } else if (isProduction) {\n if (!baseUrl) {\n startupLogger.error(\n 'Production mode requires ALLOWED_ORIGINS or MCP_URL to be set. ' +\n 'Without host validation, the server is vulnerable to DNS rebinding attacks. ' +\n 'Set ALLOWED_ORIGINS to the public deployment URL or custom domain.',\n );\n process.exit(1);\n }\n startupLogger.warn(\n 'Host validation is disabled because ALLOWED_ORIGINS is not set. ' +\n 'MCP_URL is set, so the server will start \u2014 but set ALLOWED_ORIGINS for full origin protection.',\n );\n } else {\n startupLogger.info('Host validation disabled for local development');\n }\n\n const server = new MCPServer({\n name: SERVER.NAME,\n title: 'Research Powerpack',\n version: SERVER.VERSION,\n description: SERVER.DESCRIPTION,\n websiteUrl: WEBSITE_URL,\n host,\n baseUrl,\n cors: buildCors(allowedOrigins),\n allowedOrigins,\n ...sessionConfig,\n });\n\n registerAllTools(server);\n\n // Advertise our LLM-augmentation capability via the MCP `experimental`\n // namespace so capability-aware clients can branch at initialize-time\n // instead of parsing per-call footers. mcp-use creates a fresh native MCP\n // server per session via `getServerForSession()`, so we patch that factory\n // to register our experimental capability on every session. The capability\n // values are read fresh on each session so health flips are observable.\n // See: docs/code-review/context/06-mcp-use-best-practices-primer.md (#3, #6).\n try {\n type Native = { server?: { registerCapabilities?: (caps: Record<string, unknown>) => void } };\n type Patched = { getServerForSession?: (sessionId?: string) => Native };\n const patched = server as unknown as Patched;\n const original = patched.getServerForSession?.bind(server);\n if (original) {\n patched.getServerForSession = (sessionId?: string): Native => {\n const native = original(sessionId);\n try {\n const llm = getLLMHealth();\n native.server?.registerCapabilities?.({\n experimental: {\n research_powerpack: {\n planner_available: llm.plannerConfigured,\n extractor_available: llm.extractorConfigured,\n planner_model: process.env.LLM_MODEL ?? null,\n extractor_model: process.env.LLM_MODEL ?? null,\n },\n },\n });\n } catch {\n // Capability registration is advisory; never block session creation.\n }\n return native;\n };\n }\n } catch (err) {\n startupLogger.warn(`Could not patch session-server factory: ${String(err)}`);\n }\n\n const startedAt = Date.now();\n\n server.get('/health', (c) => c.json(buildHealthPayload(server, startedAt)));\n server.get('/healthz', (c) => c.json(buildHealthPayload(server, startedAt)));\n\n // Some MCP clients (Claude Desktop, Cursor, VS Code) proactively probe\n // /.well-known/oauth-protected-resource before receiving any 401, per the\n // MCP 2025-03-26 spec. Without these routes the server returns 404 and some\n // clients surface a spurious \"authentication required\" error. A minimal PRM\n // response with no authorization_servers field explicitly signals that this\n // server requires no authentication.\n const resourceBaseUrl = baseUrl ?? `http://${host}:${port}`;\n server.get('/.well-known/oauth-protected-resource', (c) =>\n c.json({ resource: resourceBaseUrl }),\n );\n server.get('/.well-known/oauth-protected-resource/mcp', (c) =>\n c.json({ resource: `${resourceBaseUrl}/mcp` }),\n );\n\n server.resource(\n {\n name: 'server-health',\n uri: 'health://status',\n description: 'Current server health, uptime, and active MCP session count.',\n mimeType: 'application/json',\n },\n async () => object(buildHealthPayload(server, startedAt)),\n );\n\n let isShuttingDown = false;\n\n async function shutdown(signal: string, exitCode: number): Promise<void> {\n if (isShuttingDown) return;\n isShuttingDown = true;\n\n const forceExit = setTimeout(() => {\n startupLogger.error(`Forced exit after ${SHUTDOWN_TIMEOUT_MS}ms (${signal})`);\n process.exit(1);\n }, SHUTDOWN_TIMEOUT_MS);\n\n try {\n startupLogger.warn(`Shutdown signal received: ${signal}`);\n await server.close();\n\n for (const cleanupFn of cleanupFns) {\n await cleanupFn();\n }\n\n clearTimeout(forceExit);\n process.exit(exitCode);\n } catch (error) {\n clearTimeout(forceExit);\n const message = error instanceof Error ? (error.stack ?? error.message) : String(error);\n startupLogger.error(`Error while stopping server: ${message}`);\n process.exit(1);\n }\n }\n\n process.on('SIGTERM', () => {\n void shutdown('SIGTERM', 0);\n });\n\n process.on('SIGINT', () => {\n void shutdown('SIGINT', 0);\n });\n\n process.on('uncaughtException', (error) => {\n startupLogger.error(`Uncaught exception: ${error.stack ?? error.message}`);\n void shutdown('uncaughtException', 1);\n });\n\n process.on('unhandledRejection', (reason) => {\n startupLogger.error(`Unhandled rejection: ${String(reason)}`);\n void shutdown('unhandledRejection', 1);\n });\n\n await server.listen(port);\n\n startupLogger.info(`${SERVER.NAME} v${SERVER.VERSION} listening on http://${host}:${port}/mcp`);\n}\n\nvoid main().catch((error) => {\n const message = error instanceof Error ? (error.stack ?? error.message) : String(error);\n startupLogger.error(`Server failed to start: ${message}`);\n process.exit(1);\n});\n"],
4
+ "sourcesContent": ["#!/usr/bin/env node\n\n// Expand libuv thread pool for parallel DNS lookups (default 4 is too low for 20+ concurrent connections)\nif (!process.env.UV_THREADPOOL_SIZE) {\n process.env.UV_THREADPOOL_SIZE = '8';\n}\n\nimport { Logger } from 'mcp-use';\nimport {\n InMemorySessionStore,\n InMemoryStreamManager,\n MCPServer,\n object,\n type ServerConfig,\n} from 'mcp-use/server';\n\nimport { SERVER } from './src/config/index.js';\nimport { getLLMHealth } from './src/services/llm-processor.js';\nimport { registerAllTools } from './src/tools/registry.js';\n\nconst DEFAULT_PORT = 3000 as const;\nconst SHUTDOWN_TIMEOUT_MS = 10_000 as const;\nconst WEBSITE_URL = 'https://github.com/yigitkonur/mcp-researchpowerpack' as const;\nconst LOCAL_DEFAULT_HOST = '127.0.0.1' as const;\n\ntype CleanupFn = () => Promise<void>;\n\nconst startupLogger = Logger.get('startup');\n\nfunction parseCsvEnv(value: string | undefined): string[] | undefined {\n if (!value) return undefined;\n\n const parts = value\n .split(',')\n .map((part) => part.trim())\n .filter(Boolean);\n\n return parts.length > 0 ? parts : undefined;\n}\n\nfunction parsePort(value: string | undefined, fallback: number): number {\n const parsed = Number.parseInt(value ?? '', 10);\n if (Number.isFinite(parsed) && parsed > 0) {\n return parsed;\n }\n\n return fallback;\n}\n\nfunction resolvePort(): number {\n const portFlagIndex = process.argv.findIndex((arg) => arg === '--port');\n if (portFlagIndex >= 0) {\n return parsePort(process.argv[portFlagIndex + 1], DEFAULT_PORT);\n }\n\n return parsePort(process.env.PORT, DEFAULT_PORT);\n}\n\nfunction resolveHost(): string {\n const explicitHost = process.env.HOST?.trim();\n if (explicitHost) {\n return explicitHost;\n }\n\n // Cloud runtimes typically inject PORT and expect the process to listen on all interfaces.\n if (process.env.PORT?.trim()) {\n return '0.0.0.0';\n }\n\n return LOCAL_DEFAULT_HOST;\n}\n\nfunction buildCors(allowedOrigins: string[] | undefined): ServerConfig['cors'] {\n if (!allowedOrigins || allowedOrigins.length === 0) {\n return undefined;\n }\n\n return {\n origin: allowedOrigins,\n allowMethods: ['GET', 'HEAD', 'POST', 'PUT', 'DELETE', 'OPTIONS'],\n allowHeaders: [\n 'Content-Type',\n 'Accept',\n 'Authorization',\n 'mcp-protocol-version',\n 'mcp-session-id',\n 'X-Proxy-Token',\n 'X-Target-URL',\n ],\n exposeHeaders: ['mcp-session-id'],\n };\n}\n\nfunction configureLogging(): void {\n Logger.configure({\n level: process.env.NODE_ENV === 'production' ? 'info' : 'debug',\n format: 'minimal',\n });\n\n const debug = process.env.DEBUG?.trim();\n if (debug === '2') {\n Logger.setDebug(2);\n } else if (debug) {\n Logger.setDebug(1);\n }\n}\n\nfunction normalizeOrigin(value: string, envName: string): string {\n try {\n return new URL(value).origin;\n } catch {\n throw new Error(`${envName} must contain absolute URLs with protocol. Received: ${value}`);\n }\n}\n\nfunction resolveAllowedOrigins(): string[] | undefined {\n const explicitOrigins = parseCsvEnv(process.env.ALLOWED_ORIGINS);\n if (explicitOrigins && explicitOrigins.length > 0) {\n return explicitOrigins.map(origin => normalizeOrigin(origin, 'ALLOWED_ORIGINS'));\n }\n\n return undefined;\n}\n\nfunction buildSessionConfig(): {\n sessionConfig: Pick<ServerConfig, 'sessionStore' | 'streamManager'>;\n cleanupFns: CleanupFn[];\n} {\n return {\n sessionConfig: {\n sessionStore: new InMemorySessionStore(),\n streamManager: new InMemoryStreamManager(),\n },\n cleanupFns: [],\n };\n}\n\nfunction buildHealthPayload(server: MCPServer, startedAt: number) {\n const llm = getLLMHealth();\n // Distinguish \"never probed\" (checkedAt === null) from \"probed and failed\"\n // (checkedAt set, ok=false). The raw `lastPlannerOk` defaults to `false`\n // at startup, which would mislead operators into thinking the LLM is\n // broken before it has been exercised once.\n const plannerOkForHealth = llm.lastPlannerCheckedAt === null ? null : llm.lastPlannerOk;\n const extractorOkForHealth = llm.lastExtractorCheckedAt === null ? null : llm.lastExtractorOk;\n return {\n status: 'ok',\n name: SERVER.NAME,\n version: SERVER.VERSION,\n transport: 'http',\n uptime_seconds: Math.floor((Date.now() - startedAt) / 1000),\n active_sessions: server.getActiveSessions().length,\n llm_planner_ok: plannerOkForHealth,\n llm_extractor_ok: extractorOkForHealth,\n llm_planner_checked_at: llm.lastPlannerCheckedAt,\n llm_extractor_checked_at: llm.lastExtractorCheckedAt,\n llm_planner_error: llm.lastPlannerError,\n llm_extractor_error: llm.lastExtractorError,\n planner_configured: llm.plannerConfigured,\n extractor_configured: llm.extractorConfigured,\n // Counter surfacing lets operators diagnose gate behavior from outside\n // the process (see src/tools/start-research.ts for the gate semantics).\n consecutive_planner_failures: llm.consecutivePlannerFailures,\n consecutive_extractor_failures: llm.consecutiveExtractorFailures,\n timestamp: new Date().toISOString(),\n };\n}\n\nasync function main(): Promise<void> {\n configureLogging();\n\n const isProduction = process.env.NODE_ENV === 'production';\n const host = resolveHost();\n const port = resolvePort();\n const baseUrl = process.env.MCP_URL?.trim() || undefined;\n const allowedOrigins = resolveAllowedOrigins();\n\n const { sessionConfig, cleanupFns } = buildSessionConfig();\n\n startupLogger.info(`Starting ${SERVER.NAME} v${SERVER.VERSION}`);\n startupLogger.info(`Binding HTTP server to ${host}:${port}`);\n if (allowedOrigins && allowedOrigins.length > 0) {\n startupLogger.info(`Host validation enabled for origins: ${allowedOrigins.join(', ')}`);\n } else if (isProduction) {\n if (!baseUrl) {\n startupLogger.error(\n 'Production mode requires ALLOWED_ORIGINS or MCP_URL to be set. ' +\n 'Without host validation, the server is vulnerable to DNS rebinding attacks. ' +\n 'Set ALLOWED_ORIGINS to the public deployment URL or custom domain.',\n );\n process.exit(1);\n }\n startupLogger.warn(\n 'Host validation is disabled because ALLOWED_ORIGINS is not set. ' +\n 'MCP_URL is set, so the server will start \u2014 but set ALLOWED_ORIGINS for full origin protection.',\n );\n } else {\n startupLogger.info('Host validation disabled for local development');\n }\n\n const server = new MCPServer({\n name: SERVER.NAME,\n title: 'Research Powerpack',\n version: SERVER.VERSION,\n description: SERVER.DESCRIPTION,\n websiteUrl: WEBSITE_URL,\n host,\n baseUrl,\n cors: buildCors(allowedOrigins),\n allowedOrigins,\n ...sessionConfig,\n });\n\n registerAllTools(server);\n\n // Advertise our LLM-augmentation capability via the MCP `experimental`\n // namespace so capability-aware clients can branch at initialize-time\n // instead of parsing per-call footers. mcp-use creates a fresh native MCP\n // server per session via `getServerForSession()`, so we patch that factory\n // to register our experimental capability on every session. The capability\n // values are read fresh on each session so health flips are observable.\n // See: docs/code-review/context/06-mcp-use-best-practices-primer.md (#3, #6).\n try {\n type Native = { server?: { registerCapabilities?: (caps: Record<string, unknown>) => void } };\n type Patched = { getServerForSession?: (sessionId?: string) => Native };\n const patched = server as unknown as Patched;\n const original = patched.getServerForSession?.bind(server);\n if (original) {\n patched.getServerForSession = (sessionId?: string): Native => {\n const native = original(sessionId);\n try {\n const llm = getLLMHealth();\n native.server?.registerCapabilities?.({\n experimental: {\n research_powerpack: {\n planner_available: llm.plannerConfigured,\n extractor_available: llm.extractorConfigured,\n planner_model: process.env.LLM_MODEL ?? null,\n extractor_model: process.env.LLM_MODEL ?? null,\n },\n },\n });\n } catch {\n // Capability registration is advisory; never block session creation.\n }\n return native;\n };\n }\n } catch (err) {\n startupLogger.warn(`Could not patch session-server factory: ${String(err)}`);\n }\n\n const startedAt = Date.now();\n\n server.get('/health', (c) => c.json(buildHealthPayload(server, startedAt)));\n server.get('/healthz', (c) => c.json(buildHealthPayload(server, startedAt)));\n\n // Some MCP clients (Claude Desktop, Cursor, VS Code) proactively probe\n // /.well-known/oauth-protected-resource before receiving any 401, per the\n // MCP 2025-03-26 spec. Without these routes the server returns 404 and some\n // clients surface a spurious \"authentication required\" error. A minimal PRM\n // response with no authorization_servers field explicitly signals that this\n // server requires no authentication.\n const resourceBaseUrl = baseUrl ?? `http://${host}:${port}`;\n server.get('/.well-known/oauth-protected-resource', (c) =>\n c.json({ resource: resourceBaseUrl }),\n );\n server.get('/.well-known/oauth-protected-resource/mcp', (c) =>\n c.json({ resource: `${resourceBaseUrl}/mcp` }),\n );\n\n server.resource(\n {\n name: 'server-health',\n uri: 'health://status',\n description: 'Current server health, uptime, and active MCP session count.',\n mimeType: 'application/json',\n },\n async () => object(buildHealthPayload(server, startedAt)),\n );\n\n let isShuttingDown = false;\n\n async function shutdown(signal: string, exitCode: number): Promise<void> {\n if (isShuttingDown) return;\n isShuttingDown = true;\n\n const forceExit = setTimeout(() => {\n startupLogger.error(`Forced exit after ${SHUTDOWN_TIMEOUT_MS}ms (${signal})`);\n process.exit(1);\n }, SHUTDOWN_TIMEOUT_MS);\n\n try {\n startupLogger.warn(`Shutdown signal received: ${signal}`);\n await server.close();\n\n for (const cleanupFn of cleanupFns) {\n await cleanupFn();\n }\n\n clearTimeout(forceExit);\n process.exit(exitCode);\n } catch (error) {\n clearTimeout(forceExit);\n const message = error instanceof Error ? (error.stack ?? error.message) : String(error);\n startupLogger.error(`Error while stopping server: ${message}`);\n process.exit(1);\n }\n }\n\n process.on('SIGTERM', () => {\n void shutdown('SIGTERM', 0);\n });\n\n process.on('SIGINT', () => {\n void shutdown('SIGINT', 0);\n });\n\n process.on('uncaughtException', (error) => {\n startupLogger.error(`Uncaught exception: ${error.stack ?? error.message}`);\n void shutdown('uncaughtException', 1);\n });\n\n process.on('unhandledRejection', (reason) => {\n startupLogger.error(`Unhandled rejection: ${String(reason)}`);\n void shutdown('unhandledRejection', 1);\n });\n\n await server.listen(port);\n\n startupLogger.info(`${SERVER.NAME} v${SERVER.VERSION} listening on http://${host}:${port}/mcp`);\n}\n\nvoid main().catch((error) => {\n const message = error instanceof Error ? (error.stack ?? error.message) : String(error);\n startupLogger.error(`Server failed to start: ${message}`);\n process.exit(1);\n});\n"],
5
5
  "mappings": ";AAGA,IAAI,CAAC,QAAQ,IAAI,oBAAoB;AACnC,UAAQ,IAAI,qBAAqB;AACnC;AAEA,SAAS,cAAc;AACvB;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OAEK;AAEP,SAAS,cAAc;AACvB,SAAS,oBAAoB;AAC7B,SAAS,wBAAwB;AAEjC,MAAM,eAAe;AACrB,MAAM,sBAAsB;AAC5B,MAAM,cAAc;AACpB,MAAM,qBAAqB;AAI3B,MAAM,gBAAgB,OAAO,IAAI,SAAS;AAE1C,SAAS,YAAY,OAAiD;AACpE,MAAI,CAAC,MAAO,QAAO;AAEnB,QAAM,QAAQ,MACX,MAAM,GAAG,EACT,IAAI,CAAC,SAAS,KAAK,KAAK,CAAC,EACzB,OAAO,OAAO;AAEjB,SAAO,MAAM,SAAS,IAAI,QAAQ;AACpC;AAEA,SAAS,UAAU,OAA2B,UAA0B;AACtE,QAAM,SAAS,OAAO,SAAS,SAAS,IAAI,EAAE;AAC9C,MAAI,OAAO,SAAS,MAAM,KAAK,SAAS,GAAG;AACzC,WAAO;AAAA,EACT;AAEA,SAAO;AACT;AAEA,SAAS,cAAsB;AAC7B,QAAM,gBAAgB,QAAQ,KAAK,UAAU,CAAC,QAAQ,QAAQ,QAAQ;AACtE,MAAI,iBAAiB,GAAG;AACtB,WAAO,UAAU,QAAQ,KAAK,gBAAgB,CAAC,GAAG,YAAY;AAAA,EAChE;AAEA,SAAO,UAAU,QAAQ,IAAI,MAAM,YAAY;AACjD;AAEA,SAAS,cAAsB;AAC7B,QAAM,eAAe,QAAQ,IAAI,MAAM,KAAK;AAC5C,MAAI,cAAc;AAChB,WAAO;AAAA,EACT;AAGA,MAAI,QAAQ,IAAI,MAAM,KAAK,GAAG;AAC5B,WAAO;AAAA,EACT;AAEA,SAAO;AACT;AAEA,SAAS,UAAU,gBAA4D;AAC7E,MAAI,CAAC,kBAAkB,eAAe,WAAW,GAAG;AAClD,WAAO;AAAA,EACT;AAEA,SAAO;AAAA,IACL,QAAQ;AAAA,IACR,cAAc,CAAC,OAAO,QAAQ,QAAQ,OAAO,UAAU,SAAS;AAAA,IAChE,cAAc;AAAA,MACZ;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAAA,IACA,eAAe,CAAC,gBAAgB;AAAA,EAClC;AACF;AAEA,SAAS,mBAAyB;AAChC,SAAO,UAAU;AAAA,IACf,OAAO,QAAQ,IAAI,aAAa,eAAe,SAAS;AAAA,IACxD,QAAQ;AAAA,EACV,CAAC;AAED,QAAM,QAAQ,QAAQ,IAAI,OAAO,KAAK;AACtC,MAAI,UAAU,KAAK;AACjB,WAAO,SAAS,CAAC;AAAA,EACnB,WAAW,OAAO;AAChB,WAAO,SAAS,CAAC;AAAA,EACnB;AACF;AAEA,SAAS,gBAAgB,OAAe,SAAyB;AAC/D,MAAI;AACF,WAAO,IAAI,IAAI,KAAK,EAAE;AAAA,EACxB,QAAQ;AACN,UAAM,IAAI,MAAM,GAAG,OAAO,wDAAwD,KAAK,EAAE;AAAA,EAC3F;AACF;AAEA,SAAS,wBAA8C;AACrD,QAAM,kBAAkB,YAAY,QAAQ,IAAI,eAAe;AAC/D,MAAI,mBAAmB,gBAAgB,SAAS,GAAG;AACjD,WAAO,gBAAgB,IAAI,YAAU,gBAAgB,QAAQ,iBAAiB,CAAC;AAAA,EACjF;AAEA,SAAO;AACT;AAEA,SAAS,qBAGP;AACA,SAAO;AAAA,IACL,eAAe;AAAA,MACb,cAAc,IAAI,qBAAqB;AAAA,MACvC,eAAe,IAAI,sBAAsB;AAAA,IAC3C;AAAA,IACA,YAAY,CAAC;AAAA,EACf;AACF;AAEA,SAAS,mBAAmB,QAAmB,WAAmB;AAChE,QAAM,MAAM,aAAa;AAKzB,QAAM,qBAAqB,IAAI,yBAAyB,OAAO,OAAO,IAAI;AAC1E,QAAM,uBAAuB,IAAI,2BAA2B,OAAO,OAAO,IAAI;AAC9E,SAAO;AAAA,IACL,QAAQ;AAAA,IACR,MAAM,OAAO;AAAA,IACb,SAAS,OAAO;AAAA,IAChB,WAAW;AAAA,IACX,gBAAgB,KAAK,OAAO,KAAK,IAAI,IAAI,aAAa,GAAI;AAAA,IAC1D,iBAAiB,OAAO,kBAAkB,EAAE;AAAA,IAC5C,gBAAgB;AAAA,IAChB,kBAAkB;AAAA,IAClB,wBAAwB,IAAI;AAAA,IAC5B,0BAA0B,IAAI;AAAA,IAC9B,mBAAmB,IAAI;AAAA,IACvB,qBAAqB,IAAI;AAAA,IACzB,oBAAoB,IAAI;AAAA,IACxB,sBAAsB,IAAI;AAAA;AAAA;AAAA,IAG1B,8BAA8B,IAAI;AAAA,IAClC,gCAAgC,IAAI;AAAA,IACpC,YAAW,oBAAI,KAAK,GAAE,YAAY;AAAA,EACpC;AACF;AAEA,eAAe,OAAsB;AACnC,mBAAiB;AAEjB,QAAM,eAAe,QAAQ,IAAI,aAAa;AAC9C,QAAM,OAAO,YAAY;AACzB,QAAM,OAAO,YAAY;AACzB,QAAM,UAAU,QAAQ,IAAI,SAAS,KAAK,KAAK;AAC/C,QAAM,iBAAiB,sBAAsB;AAE7C,QAAM,EAAE,eAAe,WAAW,IAAI,mBAAmB;AAEzD,gBAAc,KAAK,YAAY,OAAO,IAAI,KAAK,OAAO,OAAO,EAAE;AAC/D,gBAAc,KAAK,0BAA0B,IAAI,IAAI,IAAI,EAAE;AAC3D,MAAI,kBAAkB,eAAe,SAAS,GAAG;AAC/C,kBAAc,KAAK,wCAAwC,eAAe,KAAK,IAAI,CAAC,EAAE;AAAA,EACxF,WAAW,cAAc;AACvB,QAAI,CAAC,SAAS;AACZ,oBAAc;AAAA,QACZ;AAAA,MAGF;AACA,cAAQ,KAAK,CAAC;AAAA,IAChB;AACA,kBAAc;AAAA,MACZ;AAAA,IAEF;AAAA,EACF,OAAO;AACL,kBAAc,KAAK,gDAAgD;AAAA,EACrE;AAEA,QAAM,SAAS,IAAI,UAAU;AAAA,IAC3B,MAAM,OAAO;AAAA,IACb,OAAO;AAAA,IACP,SAAS,OAAO;AAAA,IAChB,aAAa,OAAO;AAAA,IACpB,YAAY;AAAA,IACZ;AAAA,IACA;AAAA,IACA,MAAM,UAAU,cAAc;AAAA,IAC9B;AAAA,IACA,GAAG;AAAA,EACL,CAAC;AAED,mBAAiB,MAAM;AASvB,MAAI;AAGF,UAAM,UAAU;AAChB,UAAM,WAAW,QAAQ,qBAAqB,KAAK,MAAM;AACzD,QAAI,UAAU;AACZ,cAAQ,sBAAsB,CAAC,cAA+B;AAC5D,cAAM,SAAS,SAAS,SAAS;AACjC,YAAI;AACF,gBAAM,MAAM,aAAa;AACzB,iBAAO,QAAQ,uBAAuB;AAAA,YACpC,cAAc;AAAA,cACZ,oBAAoB;AAAA,gBAClB,mBAAmB,IAAI;AAAA,gBACvB,qBAAqB,IAAI;AAAA,gBACzB,eAAe,QAAQ,IAAI,aAAa;AAAA,gBACxC,iBAAiB,QAAQ,IAAI,aAAa;AAAA,cAC5C;AAAA,YACF;AAAA,UACF,CAAC;AAAA,QACH,QAAQ;AAAA,QAER;AACA,eAAO;AAAA,MACT;AAAA,IACF;AAAA,EACF,SAAS,KAAK;AACZ,kBAAc,KAAK,2CAA2C,OAAO,GAAG,CAAC,EAAE;AAAA,EAC7E;AAEA,QAAM,YAAY,KAAK,IAAI;AAE3B,SAAO,IAAI,WAAW,CAAC,MAAM,EAAE,KAAK,mBAAmB,QAAQ,SAAS,CAAC,CAAC;AAC1E,SAAO,IAAI,YAAY,CAAC,MAAM,EAAE,KAAK,mBAAmB,QAAQ,SAAS,CAAC,CAAC;AAQ3E,QAAM,kBAAkB,WAAW,UAAU,IAAI,IAAI,IAAI;AACzD,SAAO;AAAA,IAAI;AAAA,IAAyC,CAAC,MACnD,EAAE,KAAK,EAAE,UAAU,gBAAgB,CAAC;AAAA,EACtC;AACA,SAAO;AAAA,IAAI;AAAA,IAA6C,CAAC,MACvD,EAAE,KAAK,EAAE,UAAU,GAAG,eAAe,OAAO,CAAC;AAAA,EAC/C;AAEA,SAAO;AAAA,IACL;AAAA,MACE,MAAM;AAAA,MACN,KAAK;AAAA,MACL,aAAa;AAAA,MACb,UAAU;AAAA,IACZ;AAAA,IACA,YAAY,OAAO,mBAAmB,QAAQ,SAAS,CAAC;AAAA,EAC1D;AAEA,MAAI,iBAAiB;AAErB,iBAAe,SAAS,QAAgB,UAAiC;AACvE,QAAI,eAAgB;AACpB,qBAAiB;AAEjB,UAAM,YAAY,WAAW,MAAM;AACjC,oBAAc,MAAM,qBAAqB,mBAAmB,OAAO,MAAM,GAAG;AAC5E,cAAQ,KAAK,CAAC;AAAA,IAChB,GAAG,mBAAmB;AAEtB,QAAI;AACF,oBAAc,KAAK,6BAA6B,MAAM,EAAE;AACxD,YAAM,OAAO,MAAM;AAEnB,iBAAW,aAAa,YAAY;AAClC,cAAM,UAAU;AAAA,MAClB;AAEA,mBAAa,SAAS;AACtB,cAAQ,KAAK,QAAQ;AAAA,IACvB,SAAS,OAAO;AACd,mBAAa,SAAS;AACtB,YAAM,UAAU,iBAAiB,QAAS,MAAM,SAAS,MAAM,UAAW,OAAO,KAAK;AACtF,oBAAc,MAAM,gCAAgC,OAAO,EAAE;AAC7D,cAAQ,KAAK,CAAC;AAAA,IAChB;AAAA,EACF;AAEA,UAAQ,GAAG,WAAW,MAAM;AAC1B,SAAK,SAAS,WAAW,CAAC;AAAA,EAC5B,CAAC;AAED,UAAQ,GAAG,UAAU,MAAM;AACzB,SAAK,SAAS,UAAU,CAAC;AAAA,EAC3B,CAAC;AAED,UAAQ,GAAG,qBAAqB,CAAC,UAAU;AACzC,kBAAc,MAAM,uBAAuB,MAAM,SAAS,MAAM,OAAO,EAAE;AACzE,SAAK,SAAS,qBAAqB,CAAC;AAAA,EACtC,CAAC;AAED,UAAQ,GAAG,sBAAsB,CAAC,WAAW;AAC3C,kBAAc,MAAM,wBAAwB,OAAO,MAAM,CAAC,EAAE;AAC5D,SAAK,SAAS,sBAAsB,CAAC;AAAA,EACvC,CAAC;AAED,QAAM,OAAO,OAAO,IAAI;AAExB,gBAAc,KAAK,GAAG,OAAO,IAAI,KAAK,OAAO,OAAO,wBAAwB,IAAI,IAAI,IAAI,MAAM;AAChG;AAEA,KAAK,KAAK,EAAE,MAAM,CAAC,UAAU;AAC3B,QAAM,UAAU,iBAAiB,QAAS,MAAM,SAAS,MAAM,UAAW,OAAO,KAAK;AACtF,gBAAc,MAAM,2BAA2B,OAAO,EAAE;AACxD,UAAQ,KAAK,CAAC;AAChB,CAAC;",
6
6
  "names": []
7
7
  }
package/dist/mcp-use.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "includeInspector": false,
3
- "buildTime": "2026-04-30T20:36:35.153Z",
4
- "buildId": "7cde881e8eedd617",
3
+ "buildTime": "2026-05-05T10:02:24.122Z",
4
+ "buildId": "94ba0cb513ead054",
5
5
  "entryPoint": "dist/index.js",
6
6
  "widgets": {}
7
7
  }
@@ -7,8 +7,20 @@ import {
7
7
  import { calculateBackoff } from "../utils/retry.js";
8
8
  import { mcpLog } from "../utils/logger.js";
9
9
  const JINA_READER_BASE = "https://r.jina.ai/";
10
- const DEFAULT_TIMEOUT_MS = 6e4;
10
+ const JINA_SEARCH_BASE = "https://s.jina.ai/";
11
+ const DEFAULT_TIMEOUT_SECONDS = 15;
12
+ const DEFAULT_TIMEOUT_MS = DEFAULT_TIMEOUT_SECONDS * 1e3;
11
13
  const MAX_RETRIES = 2;
14
+ const SEARCH_RESULTS_PER_QUERY = 10;
15
+ function buildJinaSearchUrl(query) {
16
+ const params = new URLSearchParams({ q: query });
17
+ return `${JINA_SEARCH_BASE}?${params.toString()}`;
18
+ }
19
+ function buildScrapeDoProxyUrl(token, parameters = "render=false") {
20
+ const trimmed = token.trim();
21
+ if (!trimmed) return "";
22
+ return `http://${encodeURIComponent(trimmed)}:${parameters}@proxy.scrape.do:8080`;
23
+ }
12
24
  class JinaClient {
13
25
  apiKey;
14
26
  constructor(apiKey) {
@@ -20,7 +32,13 @@ class JinaClient {
20
32
  * NEVER throws — always returns a JinaConvertResponse (possibly with error).
21
33
  */
22
34
  async convert(request) {
23
- const { url, timeoutMs = DEFAULT_TIMEOUT_MS } = request;
35
+ const {
36
+ url,
37
+ timeoutSeconds = DEFAULT_TIMEOUT_SECONDS,
38
+ proxyUrl,
39
+ noCache = false,
40
+ allowProxyRetry = false
41
+ } = request;
24
42
  try {
25
43
  new URL(url);
26
44
  } catch {
@@ -31,105 +49,116 @@ class JinaClient {
31
49
  error: { code: ErrorCode.INVALID_INPUT, message: `Invalid URL: ${url}`, retryable: false }
32
50
  };
33
51
  }
34
- const jinaUrl = `${JINA_READER_BASE}${url}`;
52
+ const first = await this.convertOnce({
53
+ url,
54
+ timeoutSeconds,
55
+ proxyUrl,
56
+ noCache
57
+ });
58
+ if (!first.error || !allowProxyRetry || proxyUrl || isTerminalReaderError(first.error)) {
59
+ return first;
60
+ }
61
+ mcpLog("warning", `Jina Reader failed for ${url}; retrying with Jina proxy`, "jina");
62
+ return this.convertOnce({
63
+ url,
64
+ timeoutSeconds,
65
+ proxyUrl: "auto",
66
+ noCache: true
67
+ });
68
+ }
69
+ async searchMultiple(queries) {
70
+ const startTime = Date.now();
71
+ if (queries.length === 0) {
72
+ return {
73
+ searches: [],
74
+ totalQueries: 0,
75
+ executionTime: 0,
76
+ error: { code: ErrorCode.INVALID_INPUT, message: "No queries provided", retryable: false }
77
+ };
78
+ }
79
+ if (!this.apiKey) {
80
+ return {
81
+ searches: [],
82
+ totalQueries: queries.length,
83
+ executionTime: Date.now() - startTime,
84
+ error: { code: ErrorCode.AUTH_ERROR, message: "Jina Search requires JINA_API_KEY", retryable: false }
85
+ };
86
+ }
87
+ const searches = await Promise.all(queries.map((query) => this.searchOne(query)));
88
+ const firstError = searches.find((search) => search.error)?.error;
89
+ const allFailed = searches.every((search) => search.error);
90
+ return {
91
+ searches,
92
+ totalQueries: queries.length,
93
+ executionTime: Date.now() - startTime,
94
+ ...allFailed && firstError ? { error: firstError } : {}
95
+ };
96
+ }
97
+ async convertOnce(request) {
35
98
  const headers = {
36
- Accept: "text/markdown"
99
+ Accept: "application/json",
100
+ "Content-Type": "application/json"
37
101
  };
38
- if (this.apiKey) {
39
- headers["Authorization"] = `Bearer ${this.apiKey}`;
102
+ if (this.apiKey) headers["Authorization"] = `Bearer ${this.apiKey}`;
103
+ if (request.proxyUrl && request.proxyUrl !== "auto") {
104
+ headers["X-Proxy-Url"] = request.proxyUrl;
40
105
  }
106
+ const body = {
107
+ url: request.url,
108
+ respondWith: "markdown",
109
+ timeout: request.timeoutSeconds,
110
+ base: "final",
111
+ removeOverlay: true
112
+ };
113
+ if (request.proxyUrl === "auto") body["proxy"] = "auto";
114
+ if (request.noCache) body["noCache"] = true;
115
+ return this.fetchReader(body, headers, request.timeoutSeconds);
116
+ }
117
+ async fetchReader(body, headers, timeoutSeconds) {
41
118
  let lastError;
42
119
  for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
43
120
  try {
44
- const response = await fetchWithTimeout(jinaUrl, {
45
- method: "GET",
121
+ const response = await fetchWithTimeout(JINA_READER_BASE, {
122
+ method: "POST",
46
123
  headers,
47
- timeoutMs
124
+ body: JSON.stringify(body),
125
+ timeoutMs: (timeoutSeconds + 5) * 1e3
48
126
  });
49
- let content;
50
- try {
51
- content = await response.text();
52
- } catch (readError) {
53
- content = `Failed to read Jina response: ${readError instanceof Error ? readError.message : String(readError)}`;
54
- }
127
+ const raw = await response.text().catch(
128
+ (readError) => `Failed to read Jina response: ${readError instanceof Error ? readError.message : String(readError)}`
129
+ );
55
130
  const usageHeader = response.headers.get("x-usage-tokens");
56
131
  const usageTokens = usageHeader ? Number(usageHeader) : void 0;
132
+ const parsed = parseReaderContent(raw);
57
133
  if (response.ok) {
58
- if (!content.trim()) {
59
- return {
60
- content: "Jina returned an empty body",
61
- statusCode: response.status,
62
- credits: 0,
63
- usageTokens: Number.isFinite(usageTokens) ? usageTokens : void 0,
64
- error: {
65
- code: ErrorCode.UNSUPPORTED_BINARY_CONTENT,
66
- message: "Jina Reader returned empty content for this URL",
67
- retryable: false
68
- }
69
- };
134
+ if (!parsed.content.trim()) {
135
+ return emptyReaderResponse(response.status, usageTokens);
70
136
  }
71
137
  return {
72
- content,
138
+ content: parsed.content,
73
139
  statusCode: response.status,
74
140
  credits: 0,
75
141
  usageTokens: Number.isFinite(usageTokens) ? usageTokens : void 0
76
142
  };
77
143
  }
78
- if (response.status === 401 || response.status === 403) {
79
- return {
80
- content: `Jina auth/quota error (${response.status}): ${content.slice(0, 200)}`,
81
- statusCode: response.status,
82
- credits: 0,
83
- error: {
84
- code: response.status === 401 ? ErrorCode.AUTH_ERROR : ErrorCode.QUOTA_EXCEEDED,
85
- message: response.status === 401 ? "Jina Reader auth failed \u2014 check JINA_API_KEY" : "Jina Reader quota exceeded",
86
- retryable: false,
87
- statusCode: response.status
88
- }
89
- };
90
- }
91
- if (response.status === 404) {
92
- return {
93
- content: `Jina could not fetch the target URL (404)`,
94
- statusCode: 404,
95
- credits: 0,
96
- error: {
97
- code: ErrorCode.NOT_FOUND,
98
- message: "Target URL not reachable by Jina Reader",
99
- retryable: false,
100
- statusCode: 404
101
- }
102
- };
103
- }
104
- if (response.status === 429 || response.status >= 500) {
105
- lastError = classifyError({ status: response.status, message: content.slice(0, 200) });
106
- if (attempt < MAX_RETRIES) {
107
- const delayMs = calculateBackoff(attempt);
108
- mcpLog(
109
- "warning",
110
- `Jina ${response.status} on attempt ${attempt + 1}/${MAX_RETRIES + 1}. Retrying in ${delayMs}ms`,
111
- "jina"
112
- );
113
- await sleep(delayMs);
114
- continue;
115
- }
116
- return {
117
- content: `Jina Reader error (${response.status}): ${content.slice(0, 200)}`,
118
- statusCode: response.status,
119
- credits: 0,
120
- error: lastError
121
- };
144
+ const terminal = terminalReaderResponse(response.status, parsed.content || raw);
145
+ if (terminal) return terminal;
146
+ lastError = classifyError({ status: response.status, message: raw.slice(0, 200) });
147
+ if (lastError.retryable && attempt < MAX_RETRIES) {
148
+ const delayMs = calculateBackoff(attempt);
149
+ mcpLog(
150
+ "warning",
151
+ `Jina ${response.status} on attempt ${attempt + 1}/${MAX_RETRIES + 1}. Retrying in ${delayMs}ms`,
152
+ "jina"
153
+ );
154
+ await sleep(delayMs);
155
+ continue;
122
156
  }
123
157
  return {
124
- content: `Jina Reader error (${response.status}): ${content.slice(0, 200)}`,
158
+ content: `Jina Reader error (${response.status}): ${raw.slice(0, 200)}`,
125
159
  statusCode: response.status,
126
160
  credits: 0,
127
- error: {
128
- code: ErrorCode.INVALID_INPUT,
129
- message: `Jina Reader returned ${response.status}`,
130
- retryable: false,
131
- statusCode: response.status
132
- }
161
+ error: lastError
133
162
  };
134
163
  } catch (error) {
135
164
  lastError = classifyError(error);
@@ -158,8 +187,161 @@ class JinaClient {
158
187
  error: lastError ?? { code: ErrorCode.UNKNOWN_ERROR, message: "All retries exhausted", retryable: false }
159
188
  };
160
189
  }
190
+ async searchOne(query) {
191
+ const headers = {
192
+ Accept: "application/json",
193
+ Authorization: `Bearer ${this.apiKey ?? ""}`
194
+ };
195
+ try {
196
+ const response = await fetchWithTimeout(buildJinaSearchUrl(query), {
197
+ method: "GET",
198
+ headers,
199
+ timeoutMs: DEFAULT_TIMEOUT_MS
200
+ });
201
+ const raw = await response.text().catch(
202
+ (readError) => `Failed to read Jina Search response: ${readError instanceof Error ? readError.message : String(readError)}`
203
+ );
204
+ if (!response.ok) {
205
+ return {
206
+ query,
207
+ results: [],
208
+ totalResults: 0,
209
+ related: [],
210
+ error: classifyError({ status: response.status, message: raw.slice(0, 200) })
211
+ };
212
+ }
213
+ const results = parseSearchResults(raw);
214
+ return { query, results, totalResults: results.length, related: [] };
215
+ } catch (error) {
216
+ return {
217
+ query,
218
+ results: [],
219
+ totalResults: 0,
220
+ related: [],
221
+ error: classifyError(error)
222
+ };
223
+ }
224
+ }
225
+ }
226
+ function parseReaderContent(raw) {
227
+ try {
228
+ const parsed = JSON.parse(raw);
229
+ const data = readRecord(parsed, "data");
230
+ const content = readString(data, "content");
231
+ if (content) return { content };
232
+ } catch {
233
+ }
234
+ return { content: raw };
235
+ }
236
+ function emptyReaderResponse(statusCode, usageTokens) {
237
+ return {
238
+ content: "Jina returned an empty body",
239
+ statusCode,
240
+ credits: 0,
241
+ usageTokens: Number.isFinite(usageTokens) ? usageTokens : void 0,
242
+ error: {
243
+ code: ErrorCode.UNSUPPORTED_BINARY_CONTENT,
244
+ message: "Jina Reader returned empty content for this URL",
245
+ retryable: false
246
+ }
247
+ };
248
+ }
249
+ function terminalReaderResponse(statusCode, content) {
250
+ if (statusCode === 401 || statusCode === 403) {
251
+ return {
252
+ content: `Jina auth/quota error (${statusCode}): ${content.slice(0, 200)}`,
253
+ statusCode,
254
+ credits: 0,
255
+ error: {
256
+ code: statusCode === 401 ? ErrorCode.AUTH_ERROR : ErrorCode.QUOTA_EXCEEDED,
257
+ message: statusCode === 401 ? "Jina Reader auth failed \u2014 check JINA_API_KEY" : "Jina Reader quota exceeded",
258
+ retryable: false,
259
+ statusCode
260
+ }
261
+ };
262
+ }
263
+ if (statusCode === 404) {
264
+ return {
265
+ content: "Jina could not fetch the target URL (404)",
266
+ statusCode: 404,
267
+ credits: 0,
268
+ error: {
269
+ code: ErrorCode.NOT_FOUND,
270
+ message: "Target URL not reachable by Jina Reader",
271
+ retryable: false,
272
+ statusCode: 404
273
+ }
274
+ };
275
+ }
276
+ if (statusCode >= 400 && statusCode < 500 && statusCode !== 429) {
277
+ return {
278
+ content: `Jina Reader error (${statusCode}): ${content.slice(0, 200)}`,
279
+ statusCode,
280
+ credits: 0,
281
+ error: {
282
+ code: ErrorCode.INVALID_INPUT,
283
+ message: `Jina Reader returned ${statusCode}`,
284
+ retryable: false,
285
+ statusCode
286
+ }
287
+ };
288
+ }
289
+ return null;
290
+ }
291
+ function isTerminalReaderError(error) {
292
+ return !error.retryable && (error.code === ErrorCode.AUTH_ERROR || error.code === ErrorCode.QUOTA_EXCEEDED || error.code === ErrorCode.NOT_FOUND || error.code === ErrorCode.INVALID_INPUT);
293
+ }
294
+ function parseSearchResults(raw) {
295
+ let data;
296
+ try {
297
+ const parsed = JSON.parse(raw);
298
+ data = readUnknown(parsed, "data");
299
+ } catch {
300
+ data = parseMarkdownSearchResults(raw);
301
+ }
302
+ const items = Array.isArray(data) ? data : [];
303
+ return items.map((item, index) => normalizeSearchItem(item, index)).filter((item) => item !== null).slice(0, SEARCH_RESULTS_PER_QUERY);
304
+ }
305
+ function normalizeSearchItem(item, index) {
306
+ const link = readString(item, "url") ?? readString(item, "link");
307
+ if (!link) return null;
308
+ return {
309
+ title: readString(item, "title") || link,
310
+ link,
311
+ snippet: (readString(item, "snippet") || readString(item, "description") || readString(item, "content") || "").slice(0, 500),
312
+ date: readString(item, "date") ?? readString(item, "publishedTime"),
313
+ position: index + 1
314
+ };
315
+ }
316
+ function parseMarkdownSearchResults(raw) {
317
+ const items = [];
318
+ const markdownLink = /\[([^\]]+)\]\((https?:\/\/[^)]+)\)/g;
319
+ let match;
320
+ while ((match = markdownLink.exec(raw)) !== null && items.length < SEARCH_RESULTS_PER_QUERY) {
321
+ const title = match[1];
322
+ const url = match[2];
323
+ if (title && url) items.push({ title, url });
324
+ }
325
+ return items;
326
+ }
327
+ function isRecord(value) {
328
+ return typeof value === "object" && value !== null;
329
+ }
330
+ function readUnknown(value, key) {
331
+ return isRecord(value) ? value[key] : void 0;
332
+ }
333
+ function readRecord(value, key) {
334
+ const child = readUnknown(value, key);
335
+ return isRecord(child) ? child : void 0;
336
+ }
337
+ function readString(value, key) {
338
+ const child = readUnknown(value, key);
339
+ return typeof child === "string" ? child : void 0;
161
340
  }
162
341
  export {
163
- JinaClient
342
+ JinaClient,
343
+ buildJinaSearchUrl,
344
+ buildScrapeDoProxyUrl,
345
+ isTerminalReaderError
164
346
  };
165
347
  //# sourceMappingURL=jina.js.map