mcp-researchpowerpack 6.0.16 → 6.0.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +19 -14
- package/dist/index.js +1 -1
- package/dist/index.js.map +1 -1
- package/dist/mcp-use.json +2 -2
- package/dist/src/clients/jina.js +262 -80
- package/dist/src/clients/jina.js.map +2 -2
- package/dist/src/clients/kernel.js +142 -0
- package/dist/src/clients/kernel.js.map +7 -0
- package/dist/src/clients/reddit.js.map +1 -1
- package/dist/src/config/index.js +27 -8
- package/dist/src/config/index.js.map +2 -2
- package/dist/src/effect/errors.js +58 -0
- package/dist/src/effect/errors.js.map +7 -0
- package/dist/src/effect/runtime.js +15 -0
- package/dist/src/effect/runtime.js.map +7 -0
- package/dist/src/effect/services.js +97 -0
- package/dist/src/effect/services.js.map +7 -0
- package/dist/src/schemas/scrape-links.js +14 -20
- package/dist/src/schemas/scrape-links.js.map +2 -2
- package/dist/src/schemas/start-research.js +2 -2
- package/dist/src/schemas/start-research.js.map +1 -1
- package/dist/src/schemas/web-search.js +17 -63
- package/dist/src/schemas/web-search.js.map +2 -2
- package/dist/src/services/llm-processor.js +23 -14
- package/dist/src/services/llm-processor.js.map +2 -2
- package/dist/src/tools/registry.js +4 -4
- package/dist/src/tools/registry.js.map +2 -2
- package/dist/src/tools/scrape.js +415 -224
- package/dist/src/tools/scrape.js.map +3 -3
- package/dist/src/tools/search.js +197 -155
- package/dist/src/tools/search.js.map +3 -3
- package/dist/src/tools/start-research.js +38 -19
- package/dist/src/tools/start-research.js.map +2 -2
- package/dist/src/utils/content-quality.js +64 -0
- package/dist/src/utils/content-quality.js.map +7 -0
- package/dist/src/utils/query-relax.js.map +1 -1
- package/dist/src/utils/source-type.js.map +1 -1
- package/package.json +4 -2
package/README.md
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
# mcp-researchpowerpack
|
|
4
4
|
|
|
5
|
-
HTTP MCP server for research.
|
|
5
|
+
HTTP MCP server for research. Five tools, orientation-first, built for agents that run multi-pass research loops.
|
|
6
6
|
|
|
7
7
|
Built on [mcp-use](https://github.com/nicepkg/mcp-use). No stdio, HTTP only.
|
|
8
8
|
|
|
@@ -11,8 +11,10 @@ Built on [mcp-use](https://github.com/nicepkg/mcp-use). No stdio, HTTP only.
|
|
|
11
11
|
| tool | what it does | needs |
|
|
12
12
|
|------|-------------|-------|
|
|
13
13
|
| `start-research` | returns a goal-tailored brief: `primary_branch` (reddit / web / both), exact `first_call_sequence`, 25–50 keyword seeds, iteration hints, gaps to watch, stop criteria. Call FIRST every session. | `LLM_API_KEY` + `LLM_BASE_URL` + `LLM_MODEL` for non-degraded brief generation (optional) |
|
|
14
|
-
| `web-search` | parallel
|
|
15
|
-
| `
|
|
14
|
+
| `raw-web-search` | parallel search, up to 50 `keywords` per call. Serper is primary; Jina Search is fallback when Serper is missing, fails, or yields empty query results. Returns the raw ranked markdown list directly. Use for broad discovery, audit trails, and Reddit permalink discovery via explicit `site:reddit.com/r/.../comments` probes. | `SERPER_API_KEY` or `JINA_API_KEY` |
|
|
15
|
+
| `smart-web-search` | parallel search, up to 50 `keywords` per call, plus required `extract`. Serper/Jina provider order matches raw search. Always runs LLM classification and returns tiered markdown (HIGHLY_RELEVANT / MAYBE_RELEVANT / OTHER) + grounded synthesis + gaps + refine suggestions. Supports `scope: "web" \| "reddit" \| "both"`. | `SERPER_API_KEY` or `JINA_API_KEY` + LLM env |
|
|
16
|
+
| `raw-scrape-links` | fetch URLs in parallel and return full markdown directly. Reddit post permalinks route through the Reddit API with threaded comments. Non-Reddit URLs use Jina Reader first, then Jina Reader through Scrape.do proxy mode, then optional Kernel browser rendering for web pages. | optional `REDDIT_CLIENT_ID` / `REDDIT_CLIENT_SECRET`, `SCRAPEDO_API_KEY`, `JINA_API_KEY`, `KERNEL_API_KEY` |
|
|
17
|
+
| `smart-scrape-links` | same fetch stack as raw scrape, then always runs per-URL LLM extraction with required `extract`. Use for focused evidence packs with `## Source`, `## Matches`, `## Not found`, and `## Follow-up signals`. | raw scrape providers + LLM env |
|
|
16
18
|
|
|
17
19
|
Also exposes `/health` and `health://status`.
|
|
18
20
|
|
|
@@ -20,7 +22,7 @@ Also exposes `/health` and `health://status`.
|
|
|
20
22
|
|
|
21
23
|
Call `start-research` once at the beginning of each session with your goal. The server returns a brief that tells the agent exactly which tool to call first (reddit-first for sentiment/migration, web-first for spec/bug/pricing, both when opinion-heavy AND needs official sources), what keyword seeds to fire, and when to stop.
|
|
22
24
|
|
|
23
|
-
For search fan-out, use bad → better rewrite thinking before calling `web-search`: turn broad phrases like `<feature> support`, `<product> pricing`, `<library> bug fix`, or `<tool> reviews` into source-aware probes such as `site:<official-docs-domain> "<feature>" "<platform-or-version>"`, `site:<vendor-domain> "<product>" pricing "enterprise" OR "free tier"`, `"<exact error text>" "<library-or-package>" "<version>" site:github.com`, or `site:reddit.com/r/<community>/comments "<tool>" "migration" OR "regression"`.
|
|
25
|
+
For search fan-out, use bad → better rewrite thinking before calling `raw-web-search` or `smart-web-search`: turn broad phrases like `<feature> support`, `<product> pricing`, `<library> bug fix`, or `<tool> reviews` into source-aware probes such as `site:<official-docs-domain> "<feature>" "<platform-or-version>"`, `site:<vendor-domain> "<product>" pricing "enterprise" OR "free tier"`, `"<exact error text>" "<library-or-package>" "<version>" site:github.com`, or `site:reddit.com/r/<community>/comments "<tool>" "migration" OR "regression"`.
|
|
24
26
|
|
|
25
27
|
Pair the server with the [`run-research`](https://github.com/yigitkonur/skills-by-yigitkonur/tree/main/skills/run-research) skill for the full agentic playbook:
|
|
26
28
|
|
|
@@ -69,11 +71,13 @@ Copy `.env.example`, set only what you need. Missing keys don't crash the server
|
|
|
69
71
|
|
|
70
72
|
| var | enables |
|
|
71
73
|
|-----|---------|
|
|
72
|
-
| `SERPER_API_KEY` |
|
|
73
|
-
| `SCRAPEDO_API_KEY` |
|
|
74
|
-
| `REDDIT_CLIENT_ID` + `REDDIT_CLIENT_SECRET` |
|
|
75
|
-
| `JINA_API_KEY` |
|
|
76
|
-
| `
|
|
74
|
+
| `SERPER_API_KEY` | primary raw/smart web search provider |
|
|
75
|
+
| `SCRAPEDO_API_KEY` | Scrape.do proxy-mode retry for Jina Reader (`X-Proxy-Url`) |
|
|
76
|
+
| `REDDIT_CLIENT_ID` + `REDDIT_CLIENT_SECRET` | raw/smart scrape for reddit.com permalinks (threaded post + comments) |
|
|
77
|
+
| `JINA_API_KEY` | Jina Search fallback and authenticated Jina Reader requests |
|
|
78
|
+
| `KERNEL_API_KEY` | optional Kernel browser-render fallback after Jina direct + proxy fail |
|
|
79
|
+
| `KERNEL_PROJECT` | optional Kernel project scoping header |
|
|
80
|
+
| `LLM_API_KEY` + `LLM_BASE_URL` + `LLM_MODEL` | goal-tailored brief, `smart-web-search`, `smart-scrape-links` |
|
|
77
81
|
|
|
78
82
|
### llm (AI extraction + classification)
|
|
79
83
|
|
|
@@ -133,11 +137,12 @@ src/
|
|
|
133
137
|
config/ env parsing, capability detection, lazy proxy config
|
|
134
138
|
clients/ provider API clients (serper, reddit, scrapedo, jina)
|
|
135
139
|
tools/
|
|
136
|
-
registry.ts registerAllTools() — wires
|
|
140
|
+
registry.ts registerAllTools() — wires 5 tools
|
|
137
141
|
start-research.ts goal-tailored brief + static playbook
|
|
138
|
-
search.ts
|
|
139
|
-
scrape.ts scrape
|
|
140
|
-
|
|
142
|
+
search.ts raw/smart search handlers (CTR ranking + optional LLM classification)
|
|
143
|
+
scrape.ts raw/smart scrape handlers (Reddit API, Jina Reader, Scrape.do proxy,
|
|
144
|
+
optional Kernel, optional LLM extraction)
|
|
145
|
+
mcp-helpers.ts markdown response builders
|
|
141
146
|
utils.ts shared formatters
|
|
142
147
|
services/
|
|
143
148
|
llm-processor.ts AI extraction, classification, brief generation — primary + fallback model, always low reasoning
|
|
@@ -153,7 +158,7 @@ src/
|
|
|
153
158
|
logger.ts mcpLog() — stderr-only (MCP-safe)
|
|
154
159
|
```
|
|
155
160
|
|
|
156
|
-
Key patterns: capability detection at startup, description-led tool routing (no bootstrap gate),
|
|
161
|
+
Key patterns: capability detection at startup, description-led tool routing (no bootstrap gate), markdown-only MCP tool output, raw/smart tool split, tiered classified output in `smart-web-search`, Reddit API routing in scrape tools, Jina Reader first for non-Reddit URLs, Scrape.do proxy-mode retry through `X-Proxy-Url`, optional Kernel browser-render fallback, bounded concurrency via `p-map`, CTR-based URL ranking, tools never throw (always return `toolFailure`), and structured errors with retry classification.
|
|
157
162
|
|
|
158
163
|
## license
|
|
159
164
|
|
package/dist/index.js
CHANGED
|
@@ -14,7 +14,7 @@ import { getLLMHealth } from "./src/services/llm-processor.js";
|
|
|
14
14
|
import { registerAllTools } from "./src/tools/registry.js";
|
|
15
15
|
const DEFAULT_PORT = 3e3;
|
|
16
16
|
const SHUTDOWN_TIMEOUT_MS = 1e4;
|
|
17
|
-
const WEBSITE_URL = "https://github.com/yigitkonur/mcp-researchpowerpack
|
|
17
|
+
const WEBSITE_URL = "https://github.com/yigitkonur/mcp-researchpowerpack";
|
|
18
18
|
const LOCAL_DEFAULT_HOST = "127.0.0.1";
|
|
19
19
|
const startupLogger = Logger.get("startup");
|
|
20
20
|
function parseCsvEnv(value) {
|
package/dist/index.js.map
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"version": 3,
|
|
3
3
|
"sources": ["../index.ts"],
|
|
4
|
-
"sourcesContent": ["#!/usr/bin/env node\n\n// Expand libuv thread pool for parallel DNS lookups (default 4 is too low for 20+ concurrent connections)\nif (!process.env.UV_THREADPOOL_SIZE) {\n process.env.UV_THREADPOOL_SIZE = '8';\n}\n\nimport { Logger } from 'mcp-use';\nimport {\n InMemorySessionStore,\n InMemoryStreamManager,\n MCPServer,\n object,\n type ServerConfig,\n} from 'mcp-use/server';\n\nimport { SERVER } from './src/config/index.js';\nimport { getLLMHealth } from './src/services/llm-processor.js';\nimport { registerAllTools } from './src/tools/registry.js';\n\nconst DEFAULT_PORT = 3000 as const;\nconst SHUTDOWN_TIMEOUT_MS = 10_000 as const;\nconst WEBSITE_URL = 'https://github.com/yigitkonur/mcp-researchpowerpack-http' as const;\nconst LOCAL_DEFAULT_HOST = '127.0.0.1' as const;\n\ntype CleanupFn = () => Promise<void>;\n\nconst startupLogger = Logger.get('startup');\n\nfunction parseCsvEnv(value: string | undefined): string[] | undefined {\n if (!value) return undefined;\n\n const parts = value\n .split(',')\n .map((part) => part.trim())\n .filter(Boolean);\n\n return parts.length > 0 ? parts : undefined;\n}\n\nfunction parsePort(value: string | undefined, fallback: number): number {\n const parsed = Number.parseInt(value ?? '', 10);\n if (Number.isFinite(parsed) && parsed > 0) {\n return parsed;\n }\n\n return fallback;\n}\n\nfunction resolvePort(): number {\n const portFlagIndex = process.argv.findIndex((arg) => arg === '--port');\n if (portFlagIndex >= 0) {\n return parsePort(process.argv[portFlagIndex + 1], DEFAULT_PORT);\n }\n\n return parsePort(process.env.PORT, DEFAULT_PORT);\n}\n\nfunction resolveHost(): string {\n const explicitHost = process.env.HOST?.trim();\n if (explicitHost) {\n return explicitHost;\n }\n\n // Cloud runtimes typically inject PORT and expect the process to listen on all interfaces.\n if (process.env.PORT?.trim()) {\n return '0.0.0.0';\n }\n\n return LOCAL_DEFAULT_HOST;\n}\n\nfunction buildCors(allowedOrigins: string[] | undefined): ServerConfig['cors'] {\n if (!allowedOrigins || allowedOrigins.length === 0) {\n return undefined;\n }\n\n return {\n origin: allowedOrigins,\n allowMethods: ['GET', 'HEAD', 'POST', 'PUT', 'DELETE', 'OPTIONS'],\n allowHeaders: [\n 'Content-Type',\n 'Accept',\n 'Authorization',\n 'mcp-protocol-version',\n 'mcp-session-id',\n 'X-Proxy-Token',\n 'X-Target-URL',\n ],\n exposeHeaders: ['mcp-session-id'],\n };\n}\n\nfunction configureLogging(): void {\n Logger.configure({\n level: process.env.NODE_ENV === 'production' ? 'info' : 'debug',\n format: 'minimal',\n });\n\n const debug = process.env.DEBUG?.trim();\n if (debug === '2') {\n Logger.setDebug(2);\n } else if (debug) {\n Logger.setDebug(1);\n }\n}\n\nfunction normalizeOrigin(value: string, envName: string): string {\n try {\n return new URL(value).origin;\n } catch {\n throw new Error(`${envName} must contain absolute URLs with protocol. Received: ${value}`);\n }\n}\n\nfunction resolveAllowedOrigins(): string[] | undefined {\n const explicitOrigins = parseCsvEnv(process.env.ALLOWED_ORIGINS);\n if (explicitOrigins && explicitOrigins.length > 0) {\n return explicitOrigins.map(origin => normalizeOrigin(origin, 'ALLOWED_ORIGINS'));\n }\n\n return undefined;\n}\n\nfunction buildSessionConfig(): {\n sessionConfig: Pick<ServerConfig, 'sessionStore' | 'streamManager'>;\n cleanupFns: CleanupFn[];\n} {\n return {\n sessionConfig: {\n sessionStore: new InMemorySessionStore(),\n streamManager: new InMemoryStreamManager(),\n },\n cleanupFns: [],\n };\n}\n\nfunction buildHealthPayload(server: MCPServer, startedAt: number) {\n const llm = getLLMHealth();\n // Distinguish \"never probed\" (checkedAt === null) from \"probed and failed\"\n // (checkedAt set, ok=false). The raw `lastPlannerOk` defaults to `false`\n // at startup, which would mislead operators into thinking the LLM is\n // broken before it has been exercised once.\n const plannerOkForHealth = llm.lastPlannerCheckedAt === null ? null : llm.lastPlannerOk;\n const extractorOkForHealth = llm.lastExtractorCheckedAt === null ? null : llm.lastExtractorOk;\n return {\n status: 'ok',\n name: SERVER.NAME,\n version: SERVER.VERSION,\n transport: 'http',\n uptime_seconds: Math.floor((Date.now() - startedAt) / 1000),\n active_sessions: server.getActiveSessions().length,\n llm_planner_ok: plannerOkForHealth,\n llm_extractor_ok: extractorOkForHealth,\n llm_planner_checked_at: llm.lastPlannerCheckedAt,\n llm_extractor_checked_at: llm.lastExtractorCheckedAt,\n llm_planner_error: llm.lastPlannerError,\n llm_extractor_error: llm.lastExtractorError,\n planner_configured: llm.plannerConfigured,\n extractor_configured: llm.extractorConfigured,\n // Counter surfacing lets operators diagnose gate behavior from outside\n // the process (see src/tools/start-research.ts for the gate semantics).\n consecutive_planner_failures: llm.consecutivePlannerFailures,\n consecutive_extractor_failures: llm.consecutiveExtractorFailures,\n timestamp: new Date().toISOString(),\n };\n}\n\nasync function main(): Promise<void> {\n configureLogging();\n\n const isProduction = process.env.NODE_ENV === 'production';\n const host = resolveHost();\n const port = resolvePort();\n const baseUrl = process.env.MCP_URL?.trim() || undefined;\n const allowedOrigins = resolveAllowedOrigins();\n\n const { sessionConfig, cleanupFns } = buildSessionConfig();\n\n startupLogger.info(`Starting ${SERVER.NAME} v${SERVER.VERSION}`);\n startupLogger.info(`Binding HTTP server to ${host}:${port}`);\n if (allowedOrigins && allowedOrigins.length > 0) {\n startupLogger.info(`Host validation enabled for origins: ${allowedOrigins.join(', ')}`);\n } else if (isProduction) {\n if (!baseUrl) {\n startupLogger.error(\n 'Production mode requires ALLOWED_ORIGINS or MCP_URL to be set. ' +\n 'Without host validation, the server is vulnerable to DNS rebinding attacks. ' +\n 'Set ALLOWED_ORIGINS to the public deployment URL or custom domain.',\n );\n process.exit(1);\n }\n startupLogger.warn(\n 'Host validation is disabled because ALLOWED_ORIGINS is not set. ' +\n 'MCP_URL is set, so the server will start \u2014 but set ALLOWED_ORIGINS for full origin protection.',\n );\n } else {\n startupLogger.info('Host validation disabled for local development');\n }\n\n const server = new MCPServer({\n name: SERVER.NAME,\n title: 'Research Powerpack',\n version: SERVER.VERSION,\n description: SERVER.DESCRIPTION,\n websiteUrl: WEBSITE_URL,\n host,\n baseUrl,\n cors: buildCors(allowedOrigins),\n allowedOrigins,\n ...sessionConfig,\n });\n\n registerAllTools(server);\n\n // Advertise our LLM-augmentation capability via the MCP `experimental`\n // namespace so capability-aware clients can branch at initialize-time\n // instead of parsing per-call footers. mcp-use creates a fresh native MCP\n // server per session via `getServerForSession()`, so we patch that factory\n // to register our experimental capability on every session. The capability\n // values are read fresh on each session so health flips are observable.\n // See: docs/code-review/context/06-mcp-use-best-practices-primer.md (#3, #6).\n try {\n type Native = { server?: { registerCapabilities?: (caps: Record<string, unknown>) => void } };\n type Patched = { getServerForSession?: (sessionId?: string) => Native };\n const patched = server as unknown as Patched;\n const original = patched.getServerForSession?.bind(server);\n if (original) {\n patched.getServerForSession = (sessionId?: string): Native => {\n const native = original(sessionId);\n try {\n const llm = getLLMHealth();\n native.server?.registerCapabilities?.({\n experimental: {\n research_powerpack: {\n planner_available: llm.plannerConfigured,\n extractor_available: llm.extractorConfigured,\n planner_model: process.env.LLM_MODEL ?? null,\n extractor_model: process.env.LLM_MODEL ?? null,\n },\n },\n });\n } catch {\n // Capability registration is advisory; never block session creation.\n }\n return native;\n };\n }\n } catch (err) {\n startupLogger.warn(`Could not patch session-server factory: ${String(err)}`);\n }\n\n const startedAt = Date.now();\n\n server.get('/health', (c) => c.json(buildHealthPayload(server, startedAt)));\n server.get('/healthz', (c) => c.json(buildHealthPayload(server, startedAt)));\n\n // Some MCP clients (Claude Desktop, Cursor, VS Code) proactively probe\n // /.well-known/oauth-protected-resource before receiving any 401, per the\n // MCP 2025-03-26 spec. Without these routes the server returns 404 and some\n // clients surface a spurious \"authentication required\" error. A minimal PRM\n // response with no authorization_servers field explicitly signals that this\n // server requires no authentication.\n const resourceBaseUrl = baseUrl ?? `http://${host}:${port}`;\n server.get('/.well-known/oauth-protected-resource', (c) =>\n c.json({ resource: resourceBaseUrl }),\n );\n server.get('/.well-known/oauth-protected-resource/mcp', (c) =>\n c.json({ resource: `${resourceBaseUrl}/mcp` }),\n );\n\n server.resource(\n {\n name: 'server-health',\n uri: 'health://status',\n description: 'Current server health, uptime, and active MCP session count.',\n mimeType: 'application/json',\n },\n async () => object(buildHealthPayload(server, startedAt)),\n );\n\n let isShuttingDown = false;\n\n async function shutdown(signal: string, exitCode: number): Promise<void> {\n if (isShuttingDown) return;\n isShuttingDown = true;\n\n const forceExit = setTimeout(() => {\n startupLogger.error(`Forced exit after ${SHUTDOWN_TIMEOUT_MS}ms (${signal})`);\n process.exit(1);\n }, SHUTDOWN_TIMEOUT_MS);\n\n try {\n startupLogger.warn(`Shutdown signal received: ${signal}`);\n await server.close();\n\n for (const cleanupFn of cleanupFns) {\n await cleanupFn();\n }\n\n clearTimeout(forceExit);\n process.exit(exitCode);\n } catch (error) {\n clearTimeout(forceExit);\n const message = error instanceof Error ? (error.stack ?? error.message) : String(error);\n startupLogger.error(`Error while stopping server: ${message}`);\n process.exit(1);\n }\n }\n\n process.on('SIGTERM', () => {\n void shutdown('SIGTERM', 0);\n });\n\n process.on('SIGINT', () => {\n void shutdown('SIGINT', 0);\n });\n\n process.on('uncaughtException', (error) => {\n startupLogger.error(`Uncaught exception: ${error.stack ?? error.message}`);\n void shutdown('uncaughtException', 1);\n });\n\n process.on('unhandledRejection', (reason) => {\n startupLogger.error(`Unhandled rejection: ${String(reason)}`);\n void shutdown('unhandledRejection', 1);\n });\n\n await server.listen(port);\n\n startupLogger.info(`${SERVER.NAME} v${SERVER.VERSION} listening on http://${host}:${port}/mcp`);\n}\n\nvoid main().catch((error) => {\n const message = error instanceof Error ? (error.stack ?? error.message) : String(error);\n startupLogger.error(`Server failed to start: ${message}`);\n process.exit(1);\n});\n"],
|
|
4
|
+
"sourcesContent": ["#!/usr/bin/env node\n\n// Expand libuv thread pool for parallel DNS lookups (default 4 is too low for 20+ concurrent connections)\nif (!process.env.UV_THREADPOOL_SIZE) {\n process.env.UV_THREADPOOL_SIZE = '8';\n}\n\nimport { Logger } from 'mcp-use';\nimport {\n InMemorySessionStore,\n InMemoryStreamManager,\n MCPServer,\n object,\n type ServerConfig,\n} from 'mcp-use/server';\n\nimport { SERVER } from './src/config/index.js';\nimport { getLLMHealth } from './src/services/llm-processor.js';\nimport { registerAllTools } from './src/tools/registry.js';\n\nconst DEFAULT_PORT = 3000 as const;\nconst SHUTDOWN_TIMEOUT_MS = 10_000 as const;\nconst WEBSITE_URL = 'https://github.com/yigitkonur/mcp-researchpowerpack' as const;\nconst LOCAL_DEFAULT_HOST = '127.0.0.1' as const;\n\ntype CleanupFn = () => Promise<void>;\n\nconst startupLogger = Logger.get('startup');\n\nfunction parseCsvEnv(value: string | undefined): string[] | undefined {\n if (!value) return undefined;\n\n const parts = value\n .split(',')\n .map((part) => part.trim())\n .filter(Boolean);\n\n return parts.length > 0 ? parts : undefined;\n}\n\nfunction parsePort(value: string | undefined, fallback: number): number {\n const parsed = Number.parseInt(value ?? '', 10);\n if (Number.isFinite(parsed) && parsed > 0) {\n return parsed;\n }\n\n return fallback;\n}\n\nfunction resolvePort(): number {\n const portFlagIndex = process.argv.findIndex((arg) => arg === '--port');\n if (portFlagIndex >= 0) {\n return parsePort(process.argv[portFlagIndex + 1], DEFAULT_PORT);\n }\n\n return parsePort(process.env.PORT, DEFAULT_PORT);\n}\n\nfunction resolveHost(): string {\n const explicitHost = process.env.HOST?.trim();\n if (explicitHost) {\n return explicitHost;\n }\n\n // Cloud runtimes typically inject PORT and expect the process to listen on all interfaces.\n if (process.env.PORT?.trim()) {\n return '0.0.0.0';\n }\n\n return LOCAL_DEFAULT_HOST;\n}\n\nfunction buildCors(allowedOrigins: string[] | undefined): ServerConfig['cors'] {\n if (!allowedOrigins || allowedOrigins.length === 0) {\n return undefined;\n }\n\n return {\n origin: allowedOrigins,\n allowMethods: ['GET', 'HEAD', 'POST', 'PUT', 'DELETE', 'OPTIONS'],\n allowHeaders: [\n 'Content-Type',\n 'Accept',\n 'Authorization',\n 'mcp-protocol-version',\n 'mcp-session-id',\n 'X-Proxy-Token',\n 'X-Target-URL',\n ],\n exposeHeaders: ['mcp-session-id'],\n };\n}\n\nfunction configureLogging(): void {\n Logger.configure({\n level: process.env.NODE_ENV === 'production' ? 'info' : 'debug',\n format: 'minimal',\n });\n\n const debug = process.env.DEBUG?.trim();\n if (debug === '2') {\n Logger.setDebug(2);\n } else if (debug) {\n Logger.setDebug(1);\n }\n}\n\nfunction normalizeOrigin(value: string, envName: string): string {\n try {\n return new URL(value).origin;\n } catch {\n throw new Error(`${envName} must contain absolute URLs with protocol. Received: ${value}`);\n }\n}\n\nfunction resolveAllowedOrigins(): string[] | undefined {\n const explicitOrigins = parseCsvEnv(process.env.ALLOWED_ORIGINS);\n if (explicitOrigins && explicitOrigins.length > 0) {\n return explicitOrigins.map(origin => normalizeOrigin(origin, 'ALLOWED_ORIGINS'));\n }\n\n return undefined;\n}\n\nfunction buildSessionConfig(): {\n sessionConfig: Pick<ServerConfig, 'sessionStore' | 'streamManager'>;\n cleanupFns: CleanupFn[];\n} {\n return {\n sessionConfig: {\n sessionStore: new InMemorySessionStore(),\n streamManager: new InMemoryStreamManager(),\n },\n cleanupFns: [],\n };\n}\n\nfunction buildHealthPayload(server: MCPServer, startedAt: number) {\n const llm = getLLMHealth();\n // Distinguish \"never probed\" (checkedAt === null) from \"probed and failed\"\n // (checkedAt set, ok=false). The raw `lastPlannerOk` defaults to `false`\n // at startup, which would mislead operators into thinking the LLM is\n // broken before it has been exercised once.\n const plannerOkForHealth = llm.lastPlannerCheckedAt === null ? null : llm.lastPlannerOk;\n const extractorOkForHealth = llm.lastExtractorCheckedAt === null ? null : llm.lastExtractorOk;\n return {\n status: 'ok',\n name: SERVER.NAME,\n version: SERVER.VERSION,\n transport: 'http',\n uptime_seconds: Math.floor((Date.now() - startedAt) / 1000),\n active_sessions: server.getActiveSessions().length,\n llm_planner_ok: plannerOkForHealth,\n llm_extractor_ok: extractorOkForHealth,\n llm_planner_checked_at: llm.lastPlannerCheckedAt,\n llm_extractor_checked_at: llm.lastExtractorCheckedAt,\n llm_planner_error: llm.lastPlannerError,\n llm_extractor_error: llm.lastExtractorError,\n planner_configured: llm.plannerConfigured,\n extractor_configured: llm.extractorConfigured,\n // Counter surfacing lets operators diagnose gate behavior from outside\n // the process (see src/tools/start-research.ts for the gate semantics).\n consecutive_planner_failures: llm.consecutivePlannerFailures,\n consecutive_extractor_failures: llm.consecutiveExtractorFailures,\n timestamp: new Date().toISOString(),\n };\n}\n\nasync function main(): Promise<void> {\n configureLogging();\n\n const isProduction = process.env.NODE_ENV === 'production';\n const host = resolveHost();\n const port = resolvePort();\n const baseUrl = process.env.MCP_URL?.trim() || undefined;\n const allowedOrigins = resolveAllowedOrigins();\n\n const { sessionConfig, cleanupFns } = buildSessionConfig();\n\n startupLogger.info(`Starting ${SERVER.NAME} v${SERVER.VERSION}`);\n startupLogger.info(`Binding HTTP server to ${host}:${port}`);\n if (allowedOrigins && allowedOrigins.length > 0) {\n startupLogger.info(`Host validation enabled for origins: ${allowedOrigins.join(', ')}`);\n } else if (isProduction) {\n if (!baseUrl) {\n startupLogger.error(\n 'Production mode requires ALLOWED_ORIGINS or MCP_URL to be set. ' +\n 'Without host validation, the server is vulnerable to DNS rebinding attacks. ' +\n 'Set ALLOWED_ORIGINS to the public deployment URL or custom domain.',\n );\n process.exit(1);\n }\n startupLogger.warn(\n 'Host validation is disabled because ALLOWED_ORIGINS is not set. ' +\n 'MCP_URL is set, so the server will start \u2014 but set ALLOWED_ORIGINS for full origin protection.',\n );\n } else {\n startupLogger.info('Host validation disabled for local development');\n }\n\n const server = new MCPServer({\n name: SERVER.NAME,\n title: 'Research Powerpack',\n version: SERVER.VERSION,\n description: SERVER.DESCRIPTION,\n websiteUrl: WEBSITE_URL,\n host,\n baseUrl,\n cors: buildCors(allowedOrigins),\n allowedOrigins,\n ...sessionConfig,\n });\n\n registerAllTools(server);\n\n // Advertise our LLM-augmentation capability via the MCP `experimental`\n // namespace so capability-aware clients can branch at initialize-time\n // instead of parsing per-call footers. mcp-use creates a fresh native MCP\n // server per session via `getServerForSession()`, so we patch that factory\n // to register our experimental capability on every session. The capability\n // values are read fresh on each session so health flips are observable.\n // See: docs/code-review/context/06-mcp-use-best-practices-primer.md (#3, #6).\n try {\n type Native = { server?: { registerCapabilities?: (caps: Record<string, unknown>) => void } };\n type Patched = { getServerForSession?: (sessionId?: string) => Native };\n const patched = server as unknown as Patched;\n const original = patched.getServerForSession?.bind(server);\n if (original) {\n patched.getServerForSession = (sessionId?: string): Native => {\n const native = original(sessionId);\n try {\n const llm = getLLMHealth();\n native.server?.registerCapabilities?.({\n experimental: {\n research_powerpack: {\n planner_available: llm.plannerConfigured,\n extractor_available: llm.extractorConfigured,\n planner_model: process.env.LLM_MODEL ?? null,\n extractor_model: process.env.LLM_MODEL ?? null,\n },\n },\n });\n } catch {\n // Capability registration is advisory; never block session creation.\n }\n return native;\n };\n }\n } catch (err) {\n startupLogger.warn(`Could not patch session-server factory: ${String(err)}`);\n }\n\n const startedAt = Date.now();\n\n server.get('/health', (c) => c.json(buildHealthPayload(server, startedAt)));\n server.get('/healthz', (c) => c.json(buildHealthPayload(server, startedAt)));\n\n // Some MCP clients (Claude Desktop, Cursor, VS Code) proactively probe\n // /.well-known/oauth-protected-resource before receiving any 401, per the\n // MCP 2025-03-26 spec. Without these routes the server returns 404 and some\n // clients surface a spurious \"authentication required\" error. A minimal PRM\n // response with no authorization_servers field explicitly signals that this\n // server requires no authentication.\n const resourceBaseUrl = baseUrl ?? `http://${host}:${port}`;\n server.get('/.well-known/oauth-protected-resource', (c) =>\n c.json({ resource: resourceBaseUrl }),\n );\n server.get('/.well-known/oauth-protected-resource/mcp', (c) =>\n c.json({ resource: `${resourceBaseUrl}/mcp` }),\n );\n\n server.resource(\n {\n name: 'server-health',\n uri: 'health://status',\n description: 'Current server health, uptime, and active MCP session count.',\n mimeType: 'application/json',\n },\n async () => object(buildHealthPayload(server, startedAt)),\n );\n\n let isShuttingDown = false;\n\n async function shutdown(signal: string, exitCode: number): Promise<void> {\n if (isShuttingDown) return;\n isShuttingDown = true;\n\n const forceExit = setTimeout(() => {\n startupLogger.error(`Forced exit after ${SHUTDOWN_TIMEOUT_MS}ms (${signal})`);\n process.exit(1);\n }, SHUTDOWN_TIMEOUT_MS);\n\n try {\n startupLogger.warn(`Shutdown signal received: ${signal}`);\n await server.close();\n\n for (const cleanupFn of cleanupFns) {\n await cleanupFn();\n }\n\n clearTimeout(forceExit);\n process.exit(exitCode);\n } catch (error) {\n clearTimeout(forceExit);\n const message = error instanceof Error ? (error.stack ?? error.message) : String(error);\n startupLogger.error(`Error while stopping server: ${message}`);\n process.exit(1);\n }\n }\n\n process.on('SIGTERM', () => {\n void shutdown('SIGTERM', 0);\n });\n\n process.on('SIGINT', () => {\n void shutdown('SIGINT', 0);\n });\n\n process.on('uncaughtException', (error) => {\n startupLogger.error(`Uncaught exception: ${error.stack ?? error.message}`);\n void shutdown('uncaughtException', 1);\n });\n\n process.on('unhandledRejection', (reason) => {\n startupLogger.error(`Unhandled rejection: ${String(reason)}`);\n void shutdown('unhandledRejection', 1);\n });\n\n await server.listen(port);\n\n startupLogger.info(`${SERVER.NAME} v${SERVER.VERSION} listening on http://${host}:${port}/mcp`);\n}\n\nvoid main().catch((error) => {\n const message = error instanceof Error ? (error.stack ?? error.message) : String(error);\n startupLogger.error(`Server failed to start: ${message}`);\n process.exit(1);\n});\n"],
|
|
5
5
|
"mappings": ";AAGA,IAAI,CAAC,QAAQ,IAAI,oBAAoB;AACnC,UAAQ,IAAI,qBAAqB;AACnC;AAEA,SAAS,cAAc;AACvB;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OAEK;AAEP,SAAS,cAAc;AACvB,SAAS,oBAAoB;AAC7B,SAAS,wBAAwB;AAEjC,MAAM,eAAe;AACrB,MAAM,sBAAsB;AAC5B,MAAM,cAAc;AACpB,MAAM,qBAAqB;AAI3B,MAAM,gBAAgB,OAAO,IAAI,SAAS;AAE1C,SAAS,YAAY,OAAiD;AACpE,MAAI,CAAC,MAAO,QAAO;AAEnB,QAAM,QAAQ,MACX,MAAM,GAAG,EACT,IAAI,CAAC,SAAS,KAAK,KAAK,CAAC,EACzB,OAAO,OAAO;AAEjB,SAAO,MAAM,SAAS,IAAI,QAAQ;AACpC;AAEA,SAAS,UAAU,OAA2B,UAA0B;AACtE,QAAM,SAAS,OAAO,SAAS,SAAS,IAAI,EAAE;AAC9C,MAAI,OAAO,SAAS,MAAM,KAAK,SAAS,GAAG;AACzC,WAAO;AAAA,EACT;AAEA,SAAO;AACT;AAEA,SAAS,cAAsB;AAC7B,QAAM,gBAAgB,QAAQ,KAAK,UAAU,CAAC,QAAQ,QAAQ,QAAQ;AACtE,MAAI,iBAAiB,GAAG;AACtB,WAAO,UAAU,QAAQ,KAAK,gBAAgB,CAAC,GAAG,YAAY;AAAA,EAChE;AAEA,SAAO,UAAU,QAAQ,IAAI,MAAM,YAAY;AACjD;AAEA,SAAS,cAAsB;AAC7B,QAAM,eAAe,QAAQ,IAAI,MAAM,KAAK;AAC5C,MAAI,cAAc;AAChB,WAAO;AAAA,EACT;AAGA,MAAI,QAAQ,IAAI,MAAM,KAAK,GAAG;AAC5B,WAAO;AAAA,EACT;AAEA,SAAO;AACT;AAEA,SAAS,UAAU,gBAA4D;AAC7E,MAAI,CAAC,kBAAkB,eAAe,WAAW,GAAG;AAClD,WAAO;AAAA,EACT;AAEA,SAAO;AAAA,IACL,QAAQ;AAAA,IACR,cAAc,CAAC,OAAO,QAAQ,QAAQ,OAAO,UAAU,SAAS;AAAA,IAChE,cAAc;AAAA,MACZ;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAAA,IACA,eAAe,CAAC,gBAAgB;AAAA,EAClC;AACF;AAEA,SAAS,mBAAyB;AAChC,SAAO,UAAU;AAAA,IACf,OAAO,QAAQ,IAAI,aAAa,eAAe,SAAS;AAAA,IACxD,QAAQ;AAAA,EACV,CAAC;AAED,QAAM,QAAQ,QAAQ,IAAI,OAAO,KAAK;AACtC,MAAI,UAAU,KAAK;AACjB,WAAO,SAAS,CAAC;AAAA,EACnB,WAAW,OAAO;AAChB,WAAO,SAAS,CAAC;AAAA,EACnB;AACF;AAEA,SAAS,gBAAgB,OAAe,SAAyB;AAC/D,MAAI;AACF,WAAO,IAAI,IAAI,KAAK,EAAE;AAAA,EACxB,QAAQ;AACN,UAAM,IAAI,MAAM,GAAG,OAAO,wDAAwD,KAAK,EAAE;AAAA,EAC3F;AACF;AAEA,SAAS,wBAA8C;AACrD,QAAM,kBAAkB,YAAY,QAAQ,IAAI,eAAe;AAC/D,MAAI,mBAAmB,gBAAgB,SAAS,GAAG;AACjD,WAAO,gBAAgB,IAAI,YAAU,gBAAgB,QAAQ,iBAAiB,CAAC;AAAA,EACjF;AAEA,SAAO;AACT;AAEA,SAAS,qBAGP;AACA,SAAO;AAAA,IACL,eAAe;AAAA,MACb,cAAc,IAAI,qBAAqB;AAAA,MACvC,eAAe,IAAI,sBAAsB;AAAA,IAC3C;AAAA,IACA,YAAY,CAAC;AAAA,EACf;AACF;AAEA,SAAS,mBAAmB,QAAmB,WAAmB;AAChE,QAAM,MAAM,aAAa;AAKzB,QAAM,qBAAqB,IAAI,yBAAyB,OAAO,OAAO,IAAI;AAC1E,QAAM,uBAAuB,IAAI,2BAA2B,OAAO,OAAO,IAAI;AAC9E,SAAO;AAAA,IACL,QAAQ;AAAA,IACR,MAAM,OAAO;AAAA,IACb,SAAS,OAAO;AAAA,IAChB,WAAW;AAAA,IACX,gBAAgB,KAAK,OAAO,KAAK,IAAI,IAAI,aAAa,GAAI;AAAA,IAC1D,iBAAiB,OAAO,kBAAkB,EAAE;AAAA,IAC5C,gBAAgB;AAAA,IAChB,kBAAkB;AAAA,IAClB,wBAAwB,IAAI;AAAA,IAC5B,0BAA0B,IAAI;AAAA,IAC9B,mBAAmB,IAAI;AAAA,IACvB,qBAAqB,IAAI;AAAA,IACzB,oBAAoB,IAAI;AAAA,IACxB,sBAAsB,IAAI;AAAA;AAAA;AAAA,IAG1B,8BAA8B,IAAI;AAAA,IAClC,gCAAgC,IAAI;AAAA,IACpC,YAAW,oBAAI,KAAK,GAAE,YAAY;AAAA,EACpC;AACF;AAEA,eAAe,OAAsB;AACnC,mBAAiB;AAEjB,QAAM,eAAe,QAAQ,IAAI,aAAa;AAC9C,QAAM,OAAO,YAAY;AACzB,QAAM,OAAO,YAAY;AACzB,QAAM,UAAU,QAAQ,IAAI,SAAS,KAAK,KAAK;AAC/C,QAAM,iBAAiB,sBAAsB;AAE7C,QAAM,EAAE,eAAe,WAAW,IAAI,mBAAmB;AAEzD,gBAAc,KAAK,YAAY,OAAO,IAAI,KAAK,OAAO,OAAO,EAAE;AAC/D,gBAAc,KAAK,0BAA0B,IAAI,IAAI,IAAI,EAAE;AAC3D,MAAI,kBAAkB,eAAe,SAAS,GAAG;AAC/C,kBAAc,KAAK,wCAAwC,eAAe,KAAK,IAAI,CAAC,EAAE;AAAA,EACxF,WAAW,cAAc;AACvB,QAAI,CAAC,SAAS;AACZ,oBAAc;AAAA,QACZ;AAAA,MAGF;AACA,cAAQ,KAAK,CAAC;AAAA,IAChB;AACA,kBAAc;AAAA,MACZ;AAAA,IAEF;AAAA,EACF,OAAO;AACL,kBAAc,KAAK,gDAAgD;AAAA,EACrE;AAEA,QAAM,SAAS,IAAI,UAAU;AAAA,IAC3B,MAAM,OAAO;AAAA,IACb,OAAO;AAAA,IACP,SAAS,OAAO;AAAA,IAChB,aAAa,OAAO;AAAA,IACpB,YAAY;AAAA,IACZ;AAAA,IACA;AAAA,IACA,MAAM,UAAU,cAAc;AAAA,IAC9B;AAAA,IACA,GAAG;AAAA,EACL,CAAC;AAED,mBAAiB,MAAM;AASvB,MAAI;AAGF,UAAM,UAAU;AAChB,UAAM,WAAW,QAAQ,qBAAqB,KAAK,MAAM;AACzD,QAAI,UAAU;AACZ,cAAQ,sBAAsB,CAAC,cAA+B;AAC5D,cAAM,SAAS,SAAS,SAAS;AACjC,YAAI;AACF,gBAAM,MAAM,aAAa;AACzB,iBAAO,QAAQ,uBAAuB;AAAA,YACpC,cAAc;AAAA,cACZ,oBAAoB;AAAA,gBAClB,mBAAmB,IAAI;AAAA,gBACvB,qBAAqB,IAAI;AAAA,gBACzB,eAAe,QAAQ,IAAI,aAAa;AAAA,gBACxC,iBAAiB,QAAQ,IAAI,aAAa;AAAA,cAC5C;AAAA,YACF;AAAA,UACF,CAAC;AAAA,QACH,QAAQ;AAAA,QAER;AACA,eAAO;AAAA,MACT;AAAA,IACF;AAAA,EACF,SAAS,KAAK;AACZ,kBAAc,KAAK,2CAA2C,OAAO,GAAG,CAAC,EAAE;AAAA,EAC7E;AAEA,QAAM,YAAY,KAAK,IAAI;AAE3B,SAAO,IAAI,WAAW,CAAC,MAAM,EAAE,KAAK,mBAAmB,QAAQ,SAAS,CAAC,CAAC;AAC1E,SAAO,IAAI,YAAY,CAAC,MAAM,EAAE,KAAK,mBAAmB,QAAQ,SAAS,CAAC,CAAC;AAQ3E,QAAM,kBAAkB,WAAW,UAAU,IAAI,IAAI,IAAI;AACzD,SAAO;AAAA,IAAI;AAAA,IAAyC,CAAC,MACnD,EAAE,KAAK,EAAE,UAAU,gBAAgB,CAAC;AAAA,EACtC;AACA,SAAO;AAAA,IAAI;AAAA,IAA6C,CAAC,MACvD,EAAE,KAAK,EAAE,UAAU,GAAG,eAAe,OAAO,CAAC;AAAA,EAC/C;AAEA,SAAO;AAAA,IACL;AAAA,MACE,MAAM;AAAA,MACN,KAAK;AAAA,MACL,aAAa;AAAA,MACb,UAAU;AAAA,IACZ;AAAA,IACA,YAAY,OAAO,mBAAmB,QAAQ,SAAS,CAAC;AAAA,EAC1D;AAEA,MAAI,iBAAiB;AAErB,iBAAe,SAAS,QAAgB,UAAiC;AACvE,QAAI,eAAgB;AACpB,qBAAiB;AAEjB,UAAM,YAAY,WAAW,MAAM;AACjC,oBAAc,MAAM,qBAAqB,mBAAmB,OAAO,MAAM,GAAG;AAC5E,cAAQ,KAAK,CAAC;AAAA,IAChB,GAAG,mBAAmB;AAEtB,QAAI;AACF,oBAAc,KAAK,6BAA6B,MAAM,EAAE;AACxD,YAAM,OAAO,MAAM;AAEnB,iBAAW,aAAa,YAAY;AAClC,cAAM,UAAU;AAAA,MAClB;AAEA,mBAAa,SAAS;AACtB,cAAQ,KAAK,QAAQ;AAAA,IACvB,SAAS,OAAO;AACd,mBAAa,SAAS;AACtB,YAAM,UAAU,iBAAiB,QAAS,MAAM,SAAS,MAAM,UAAW,OAAO,KAAK;AACtF,oBAAc,MAAM,gCAAgC,OAAO,EAAE;AAC7D,cAAQ,KAAK,CAAC;AAAA,IAChB;AAAA,EACF;AAEA,UAAQ,GAAG,WAAW,MAAM;AAC1B,SAAK,SAAS,WAAW,CAAC;AAAA,EAC5B,CAAC;AAED,UAAQ,GAAG,UAAU,MAAM;AACzB,SAAK,SAAS,UAAU,CAAC;AAAA,EAC3B,CAAC;AAED,UAAQ,GAAG,qBAAqB,CAAC,UAAU;AACzC,kBAAc,MAAM,uBAAuB,MAAM,SAAS,MAAM,OAAO,EAAE;AACzE,SAAK,SAAS,qBAAqB,CAAC;AAAA,EACtC,CAAC;AAED,UAAQ,GAAG,sBAAsB,CAAC,WAAW;AAC3C,kBAAc,MAAM,wBAAwB,OAAO,MAAM,CAAC,EAAE;AAC5D,SAAK,SAAS,sBAAsB,CAAC;AAAA,EACvC,CAAC;AAED,QAAM,OAAO,OAAO,IAAI;AAExB,gBAAc,KAAK,GAAG,OAAO,IAAI,KAAK,OAAO,OAAO,wBAAwB,IAAI,IAAI,IAAI,MAAM;AAChG;AAEA,KAAK,KAAK,EAAE,MAAM,CAAC,UAAU;AAC3B,QAAM,UAAU,iBAAiB,QAAS,MAAM,SAAS,MAAM,UAAW,OAAO,KAAK;AACtF,gBAAc,MAAM,2BAA2B,OAAO,EAAE;AACxD,UAAQ,KAAK,CAAC;AAChB,CAAC;",
|
|
6
6
|
"names": []
|
|
7
7
|
}
|
package/dist/mcp-use.json
CHANGED
package/dist/src/clients/jina.js
CHANGED
|
@@ -7,8 +7,20 @@ import {
|
|
|
7
7
|
import { calculateBackoff } from "../utils/retry.js";
|
|
8
8
|
import { mcpLog } from "../utils/logger.js";
|
|
9
9
|
const JINA_READER_BASE = "https://r.jina.ai/";
|
|
10
|
-
const
|
|
10
|
+
const JINA_SEARCH_BASE = "https://s.jina.ai/";
|
|
11
|
+
const DEFAULT_TIMEOUT_SECONDS = 15;
|
|
12
|
+
const DEFAULT_TIMEOUT_MS = DEFAULT_TIMEOUT_SECONDS * 1e3;
|
|
11
13
|
const MAX_RETRIES = 2;
|
|
14
|
+
const SEARCH_RESULTS_PER_QUERY = 10;
|
|
15
|
+
function buildJinaSearchUrl(query) {
|
|
16
|
+
const params = new URLSearchParams({ q: query });
|
|
17
|
+
return `${JINA_SEARCH_BASE}?${params.toString()}`;
|
|
18
|
+
}
|
|
19
|
+
function buildScrapeDoProxyUrl(token, parameters = "render=false") {
|
|
20
|
+
const trimmed = token.trim();
|
|
21
|
+
if (!trimmed) return "";
|
|
22
|
+
return `http://${encodeURIComponent(trimmed)}:${parameters}@proxy.scrape.do:8080`;
|
|
23
|
+
}
|
|
12
24
|
class JinaClient {
|
|
13
25
|
apiKey;
|
|
14
26
|
constructor(apiKey) {
|
|
@@ -20,7 +32,13 @@ class JinaClient {
|
|
|
20
32
|
* NEVER throws — always returns a JinaConvertResponse (possibly with error).
|
|
21
33
|
*/
|
|
22
34
|
async convert(request) {
|
|
23
|
-
const {
|
|
35
|
+
const {
|
|
36
|
+
url,
|
|
37
|
+
timeoutSeconds = DEFAULT_TIMEOUT_SECONDS,
|
|
38
|
+
proxyUrl,
|
|
39
|
+
noCache = false,
|
|
40
|
+
allowProxyRetry = false
|
|
41
|
+
} = request;
|
|
24
42
|
try {
|
|
25
43
|
new URL(url);
|
|
26
44
|
} catch {
|
|
@@ -31,105 +49,116 @@ class JinaClient {
|
|
|
31
49
|
error: { code: ErrorCode.INVALID_INPUT, message: `Invalid URL: ${url}`, retryable: false }
|
|
32
50
|
};
|
|
33
51
|
}
|
|
34
|
-
const
|
|
52
|
+
const first = await this.convertOnce({
|
|
53
|
+
url,
|
|
54
|
+
timeoutSeconds,
|
|
55
|
+
proxyUrl,
|
|
56
|
+
noCache
|
|
57
|
+
});
|
|
58
|
+
if (!first.error || !allowProxyRetry || proxyUrl || isTerminalReaderError(first.error)) {
|
|
59
|
+
return first;
|
|
60
|
+
}
|
|
61
|
+
mcpLog("warning", `Jina Reader failed for ${url}; retrying with Jina proxy`, "jina");
|
|
62
|
+
return this.convertOnce({
|
|
63
|
+
url,
|
|
64
|
+
timeoutSeconds,
|
|
65
|
+
proxyUrl: "auto",
|
|
66
|
+
noCache: true
|
|
67
|
+
});
|
|
68
|
+
}
|
|
69
|
+
async searchMultiple(queries) {
|
|
70
|
+
const startTime = Date.now();
|
|
71
|
+
if (queries.length === 0) {
|
|
72
|
+
return {
|
|
73
|
+
searches: [],
|
|
74
|
+
totalQueries: 0,
|
|
75
|
+
executionTime: 0,
|
|
76
|
+
error: { code: ErrorCode.INVALID_INPUT, message: "No queries provided", retryable: false }
|
|
77
|
+
};
|
|
78
|
+
}
|
|
79
|
+
if (!this.apiKey) {
|
|
80
|
+
return {
|
|
81
|
+
searches: [],
|
|
82
|
+
totalQueries: queries.length,
|
|
83
|
+
executionTime: Date.now() - startTime,
|
|
84
|
+
error: { code: ErrorCode.AUTH_ERROR, message: "Jina Search requires JINA_API_KEY", retryable: false }
|
|
85
|
+
};
|
|
86
|
+
}
|
|
87
|
+
const searches = await Promise.all(queries.map((query) => this.searchOne(query)));
|
|
88
|
+
const firstError = searches.find((search) => search.error)?.error;
|
|
89
|
+
const allFailed = searches.every((search) => search.error);
|
|
90
|
+
return {
|
|
91
|
+
searches,
|
|
92
|
+
totalQueries: queries.length,
|
|
93
|
+
executionTime: Date.now() - startTime,
|
|
94
|
+
...allFailed && firstError ? { error: firstError } : {}
|
|
95
|
+
};
|
|
96
|
+
}
|
|
97
|
+
async convertOnce(request) {
|
|
35
98
|
const headers = {
|
|
36
|
-
Accept: "
|
|
99
|
+
Accept: "application/json",
|
|
100
|
+
"Content-Type": "application/json"
|
|
37
101
|
};
|
|
38
|
-
if (this.apiKey) {
|
|
39
|
-
|
|
102
|
+
if (this.apiKey) headers["Authorization"] = `Bearer ${this.apiKey}`;
|
|
103
|
+
if (request.proxyUrl && request.proxyUrl !== "auto") {
|
|
104
|
+
headers["X-Proxy-Url"] = request.proxyUrl;
|
|
40
105
|
}
|
|
106
|
+
const body = {
|
|
107
|
+
url: request.url,
|
|
108
|
+
respondWith: "markdown",
|
|
109
|
+
timeout: request.timeoutSeconds,
|
|
110
|
+
base: "final",
|
|
111
|
+
removeOverlay: true
|
|
112
|
+
};
|
|
113
|
+
if (request.proxyUrl === "auto") body["proxy"] = "auto";
|
|
114
|
+
if (request.noCache) body["noCache"] = true;
|
|
115
|
+
return this.fetchReader(body, headers, request.timeoutSeconds);
|
|
116
|
+
}
|
|
117
|
+
async fetchReader(body, headers, timeoutSeconds) {
|
|
41
118
|
let lastError;
|
|
42
119
|
for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
|
|
43
120
|
try {
|
|
44
|
-
const response = await fetchWithTimeout(
|
|
45
|
-
method: "
|
|
121
|
+
const response = await fetchWithTimeout(JINA_READER_BASE, {
|
|
122
|
+
method: "POST",
|
|
46
123
|
headers,
|
|
47
|
-
|
|
124
|
+
body: JSON.stringify(body),
|
|
125
|
+
timeoutMs: (timeoutSeconds + 5) * 1e3
|
|
48
126
|
});
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
} catch (readError) {
|
|
53
|
-
content = `Failed to read Jina response: ${readError instanceof Error ? readError.message : String(readError)}`;
|
|
54
|
-
}
|
|
127
|
+
const raw = await response.text().catch(
|
|
128
|
+
(readError) => `Failed to read Jina response: ${readError instanceof Error ? readError.message : String(readError)}`
|
|
129
|
+
);
|
|
55
130
|
const usageHeader = response.headers.get("x-usage-tokens");
|
|
56
131
|
const usageTokens = usageHeader ? Number(usageHeader) : void 0;
|
|
132
|
+
const parsed = parseReaderContent(raw);
|
|
57
133
|
if (response.ok) {
|
|
58
|
-
if (!content.trim()) {
|
|
59
|
-
return
|
|
60
|
-
content: "Jina returned an empty body",
|
|
61
|
-
statusCode: response.status,
|
|
62
|
-
credits: 0,
|
|
63
|
-
usageTokens: Number.isFinite(usageTokens) ? usageTokens : void 0,
|
|
64
|
-
error: {
|
|
65
|
-
code: ErrorCode.UNSUPPORTED_BINARY_CONTENT,
|
|
66
|
-
message: "Jina Reader returned empty content for this URL",
|
|
67
|
-
retryable: false
|
|
68
|
-
}
|
|
69
|
-
};
|
|
134
|
+
if (!parsed.content.trim()) {
|
|
135
|
+
return emptyReaderResponse(response.status, usageTokens);
|
|
70
136
|
}
|
|
71
137
|
return {
|
|
72
|
-
content,
|
|
138
|
+
content: parsed.content,
|
|
73
139
|
statusCode: response.status,
|
|
74
140
|
credits: 0,
|
|
75
141
|
usageTokens: Number.isFinite(usageTokens) ? usageTokens : void 0
|
|
76
142
|
};
|
|
77
143
|
}
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
}
|
|
91
|
-
if (response.status === 404) {
|
|
92
|
-
return {
|
|
93
|
-
content: `Jina could not fetch the target URL (404)`,
|
|
94
|
-
statusCode: 404,
|
|
95
|
-
credits: 0,
|
|
96
|
-
error: {
|
|
97
|
-
code: ErrorCode.NOT_FOUND,
|
|
98
|
-
message: "Target URL not reachable by Jina Reader",
|
|
99
|
-
retryable: false,
|
|
100
|
-
statusCode: 404
|
|
101
|
-
}
|
|
102
|
-
};
|
|
103
|
-
}
|
|
104
|
-
if (response.status === 429 || response.status >= 500) {
|
|
105
|
-
lastError = classifyError({ status: response.status, message: content.slice(0, 200) });
|
|
106
|
-
if (attempt < MAX_RETRIES) {
|
|
107
|
-
const delayMs = calculateBackoff(attempt);
|
|
108
|
-
mcpLog(
|
|
109
|
-
"warning",
|
|
110
|
-
`Jina ${response.status} on attempt ${attempt + 1}/${MAX_RETRIES + 1}. Retrying in ${delayMs}ms`,
|
|
111
|
-
"jina"
|
|
112
|
-
);
|
|
113
|
-
await sleep(delayMs);
|
|
114
|
-
continue;
|
|
115
|
-
}
|
|
116
|
-
return {
|
|
117
|
-
content: `Jina Reader error (${response.status}): ${content.slice(0, 200)}`,
|
|
118
|
-
statusCode: response.status,
|
|
119
|
-
credits: 0,
|
|
120
|
-
error: lastError
|
|
121
|
-
};
|
|
144
|
+
const terminal = terminalReaderResponse(response.status, parsed.content || raw);
|
|
145
|
+
if (terminal) return terminal;
|
|
146
|
+
lastError = classifyError({ status: response.status, message: raw.slice(0, 200) });
|
|
147
|
+
if (lastError.retryable && attempt < MAX_RETRIES) {
|
|
148
|
+
const delayMs = calculateBackoff(attempt);
|
|
149
|
+
mcpLog(
|
|
150
|
+
"warning",
|
|
151
|
+
`Jina ${response.status} on attempt ${attempt + 1}/${MAX_RETRIES + 1}. Retrying in ${delayMs}ms`,
|
|
152
|
+
"jina"
|
|
153
|
+
);
|
|
154
|
+
await sleep(delayMs);
|
|
155
|
+
continue;
|
|
122
156
|
}
|
|
123
157
|
return {
|
|
124
|
-
content: `Jina Reader error (${response.status}): ${
|
|
158
|
+
content: `Jina Reader error (${response.status}): ${raw.slice(0, 200)}`,
|
|
125
159
|
statusCode: response.status,
|
|
126
160
|
credits: 0,
|
|
127
|
-
error:
|
|
128
|
-
code: ErrorCode.INVALID_INPUT,
|
|
129
|
-
message: `Jina Reader returned ${response.status}`,
|
|
130
|
-
retryable: false,
|
|
131
|
-
statusCode: response.status
|
|
132
|
-
}
|
|
161
|
+
error: lastError
|
|
133
162
|
};
|
|
134
163
|
} catch (error) {
|
|
135
164
|
lastError = classifyError(error);
|
|
@@ -158,8 +187,161 @@ class JinaClient {
|
|
|
158
187
|
error: lastError ?? { code: ErrorCode.UNKNOWN_ERROR, message: "All retries exhausted", retryable: false }
|
|
159
188
|
};
|
|
160
189
|
}
|
|
190
|
+
async searchOne(query) {
|
|
191
|
+
const headers = {
|
|
192
|
+
Accept: "application/json",
|
|
193
|
+
Authorization: `Bearer ${this.apiKey ?? ""}`
|
|
194
|
+
};
|
|
195
|
+
try {
|
|
196
|
+
const response = await fetchWithTimeout(buildJinaSearchUrl(query), {
|
|
197
|
+
method: "GET",
|
|
198
|
+
headers,
|
|
199
|
+
timeoutMs: DEFAULT_TIMEOUT_MS
|
|
200
|
+
});
|
|
201
|
+
const raw = await response.text().catch(
|
|
202
|
+
(readError) => `Failed to read Jina Search response: ${readError instanceof Error ? readError.message : String(readError)}`
|
|
203
|
+
);
|
|
204
|
+
if (!response.ok) {
|
|
205
|
+
return {
|
|
206
|
+
query,
|
|
207
|
+
results: [],
|
|
208
|
+
totalResults: 0,
|
|
209
|
+
related: [],
|
|
210
|
+
error: classifyError({ status: response.status, message: raw.slice(0, 200) })
|
|
211
|
+
};
|
|
212
|
+
}
|
|
213
|
+
const results = parseSearchResults(raw);
|
|
214
|
+
return { query, results, totalResults: results.length, related: [] };
|
|
215
|
+
} catch (error) {
|
|
216
|
+
return {
|
|
217
|
+
query,
|
|
218
|
+
results: [],
|
|
219
|
+
totalResults: 0,
|
|
220
|
+
related: [],
|
|
221
|
+
error: classifyError(error)
|
|
222
|
+
};
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
function parseReaderContent(raw) {
|
|
227
|
+
try {
|
|
228
|
+
const parsed = JSON.parse(raw);
|
|
229
|
+
const data = readRecord(parsed, "data");
|
|
230
|
+
const content = readString(data, "content");
|
|
231
|
+
if (content) return { content };
|
|
232
|
+
} catch {
|
|
233
|
+
}
|
|
234
|
+
return { content: raw };
|
|
235
|
+
}
|
|
236
|
+
function emptyReaderResponse(statusCode, usageTokens) {
|
|
237
|
+
return {
|
|
238
|
+
content: "Jina returned an empty body",
|
|
239
|
+
statusCode,
|
|
240
|
+
credits: 0,
|
|
241
|
+
usageTokens: Number.isFinite(usageTokens) ? usageTokens : void 0,
|
|
242
|
+
error: {
|
|
243
|
+
code: ErrorCode.UNSUPPORTED_BINARY_CONTENT,
|
|
244
|
+
message: "Jina Reader returned empty content for this URL",
|
|
245
|
+
retryable: false
|
|
246
|
+
}
|
|
247
|
+
};
|
|
248
|
+
}
|
|
249
|
+
function terminalReaderResponse(statusCode, content) {
|
|
250
|
+
if (statusCode === 401 || statusCode === 403) {
|
|
251
|
+
return {
|
|
252
|
+
content: `Jina auth/quota error (${statusCode}): ${content.slice(0, 200)}`,
|
|
253
|
+
statusCode,
|
|
254
|
+
credits: 0,
|
|
255
|
+
error: {
|
|
256
|
+
code: statusCode === 401 ? ErrorCode.AUTH_ERROR : ErrorCode.QUOTA_EXCEEDED,
|
|
257
|
+
message: statusCode === 401 ? "Jina Reader auth failed \u2014 check JINA_API_KEY" : "Jina Reader quota exceeded",
|
|
258
|
+
retryable: false,
|
|
259
|
+
statusCode
|
|
260
|
+
}
|
|
261
|
+
};
|
|
262
|
+
}
|
|
263
|
+
if (statusCode === 404) {
|
|
264
|
+
return {
|
|
265
|
+
content: "Jina could not fetch the target URL (404)",
|
|
266
|
+
statusCode: 404,
|
|
267
|
+
credits: 0,
|
|
268
|
+
error: {
|
|
269
|
+
code: ErrorCode.NOT_FOUND,
|
|
270
|
+
message: "Target URL not reachable by Jina Reader",
|
|
271
|
+
retryable: false,
|
|
272
|
+
statusCode: 404
|
|
273
|
+
}
|
|
274
|
+
};
|
|
275
|
+
}
|
|
276
|
+
if (statusCode >= 400 && statusCode < 500 && statusCode !== 429) {
|
|
277
|
+
return {
|
|
278
|
+
content: `Jina Reader error (${statusCode}): ${content.slice(0, 200)}`,
|
|
279
|
+
statusCode,
|
|
280
|
+
credits: 0,
|
|
281
|
+
error: {
|
|
282
|
+
code: ErrorCode.INVALID_INPUT,
|
|
283
|
+
message: `Jina Reader returned ${statusCode}`,
|
|
284
|
+
retryable: false,
|
|
285
|
+
statusCode
|
|
286
|
+
}
|
|
287
|
+
};
|
|
288
|
+
}
|
|
289
|
+
return null;
|
|
290
|
+
}
|
|
291
|
+
function isTerminalReaderError(error) {
|
|
292
|
+
return !error.retryable && (error.code === ErrorCode.AUTH_ERROR || error.code === ErrorCode.QUOTA_EXCEEDED || error.code === ErrorCode.NOT_FOUND || error.code === ErrorCode.INVALID_INPUT);
|
|
293
|
+
}
|
|
294
|
+
function parseSearchResults(raw) {
|
|
295
|
+
let data;
|
|
296
|
+
try {
|
|
297
|
+
const parsed = JSON.parse(raw);
|
|
298
|
+
data = readUnknown(parsed, "data");
|
|
299
|
+
} catch {
|
|
300
|
+
data = parseMarkdownSearchResults(raw);
|
|
301
|
+
}
|
|
302
|
+
const items = Array.isArray(data) ? data : [];
|
|
303
|
+
return items.map((item, index) => normalizeSearchItem(item, index)).filter((item) => item !== null).slice(0, SEARCH_RESULTS_PER_QUERY);
|
|
304
|
+
}
|
|
305
|
+
function normalizeSearchItem(item, index) {
|
|
306
|
+
const link = readString(item, "url") ?? readString(item, "link");
|
|
307
|
+
if (!link) return null;
|
|
308
|
+
return {
|
|
309
|
+
title: readString(item, "title") || link,
|
|
310
|
+
link,
|
|
311
|
+
snippet: (readString(item, "snippet") || readString(item, "description") || readString(item, "content") || "").slice(0, 500),
|
|
312
|
+
date: readString(item, "date") ?? readString(item, "publishedTime"),
|
|
313
|
+
position: index + 1
|
|
314
|
+
};
|
|
315
|
+
}
|
|
316
|
+
function parseMarkdownSearchResults(raw) {
|
|
317
|
+
const items = [];
|
|
318
|
+
const markdownLink = /\[([^\]]+)\]\((https?:\/\/[^)]+)\)/g;
|
|
319
|
+
let match;
|
|
320
|
+
while ((match = markdownLink.exec(raw)) !== null && items.length < SEARCH_RESULTS_PER_QUERY) {
|
|
321
|
+
const title = match[1];
|
|
322
|
+
const url = match[2];
|
|
323
|
+
if (title && url) items.push({ title, url });
|
|
324
|
+
}
|
|
325
|
+
return items;
|
|
326
|
+
}
|
|
327
|
+
function isRecord(value) {
|
|
328
|
+
return typeof value === "object" && value !== null;
|
|
329
|
+
}
|
|
330
|
+
function readUnknown(value, key) {
|
|
331
|
+
return isRecord(value) ? value[key] : void 0;
|
|
332
|
+
}
|
|
333
|
+
function readRecord(value, key) {
|
|
334
|
+
const child = readUnknown(value, key);
|
|
335
|
+
return isRecord(child) ? child : void 0;
|
|
336
|
+
}
|
|
337
|
+
function readString(value, key) {
|
|
338
|
+
const child = readUnknown(value, key);
|
|
339
|
+
return typeof child === "string" ? child : void 0;
|
|
161
340
|
}
|
|
162
341
|
export {
|
|
163
|
-
JinaClient
|
|
342
|
+
JinaClient,
|
|
343
|
+
buildJinaSearchUrl,
|
|
344
|
+
buildScrapeDoProxyUrl,
|
|
345
|
+
isTerminalReaderError
|
|
164
346
|
};
|
|
165
347
|
//# sourceMappingURL=jina.js.map
|