mcp-researchpowerpack 6.0.7 → 6.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/mcp-use.json +2 -2
- package/dist/src/schemas/scrape-links.js +2 -2
- package/dist/src/schemas/scrape-links.js.map +2 -2
- package/dist/src/services/llm-processor.js +3 -7
- package/dist/src/services/llm-processor.js.map +2 -2
- package/dist/src/tools/scrape.js +39 -6
- package/dist/src/tools/scrape.js.map +2 -2
- package/package.json +1 -1
package/dist/mcp-use.json
CHANGED
|
@@ -5,8 +5,8 @@ const urlSchema = z.string().url({ message: "scrape-links: Invalid URL format" }
|
|
|
5
5
|
).describe("A fully-qualified HTTP or HTTPS URL to scrape.");
|
|
6
6
|
const scrapeLinksParamsSchema = z.object({
|
|
7
7
|
urls: z.array(urlSchema).min(1, { message: "scrape-links: At least 1 URL required" }).describe("URLs to fetch and extract in parallel. Reddit post permalinks (`reddit.com/r/<sub>/comments/<id>/...`) are auto-detected and routed through the Reddit API (threaded post + comments); every other URL flows through the HTTP scraper. Mix reddit + non-reddit URLs freely; both branches run concurrently. Prefer contextually grouped batches \u2014 call this tool multiple times in parallel when URL sets are unrelated, instead of one giant mixed batch."),
|
|
8
|
-
extract: z.string().min(1, { message: "scrape-links: extract cannot be empty" }).describe(
|
|
9
|
-
'
|
|
8
|
+
extract: z.string().min(1, { message: "scrape-links: extract cannot be empty" }).optional().describe(
|
|
9
|
+
'OPTIONAL semantic extraction instruction. Describe the SHAPE of what you want, separated by `|`. When provided, the extractor classifies each page (docs / github-thread / reddit / marketing / cve / paper / announcement / qa / blog / changelog / release-notes) and adjusts emphasis per type: preserves numbers/versions/stacktraces verbatim from docs and CVE pages, quotes Reddit/HN with attribution plus sentiment distribution, flags what the page did NOT answer in a "Not found" section, and surfaces referenced-but-unscraped URLs in a "Follow-up signals" bulletin that feeds the next research loop. Good examples: "root cause | affected versions | fix | workarounds | timeline"; "pricing tiers | rate limits | enterprise contact | free-tier quotas"; "maintainer decisions | accepted fix commits | stacktraces | resolved version". Omit this argument to skip LLM extraction entirely and receive cleaned markdown for each URL (raw mode \u2014 cheaper, faster, and useful when you want the whole page rather than a filtered view).'
|
|
10
10
|
)
|
|
11
11
|
}).strict();
|
|
12
12
|
const scrapeLinksOutputSchema = z.object({
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"version": 3,
|
|
3
3
|
"sources": ["../../../src/schemas/scrape-links.ts"],
|
|
4
|
-
"sourcesContent": ["import { z } from 'zod';\n\nconst urlSchema = z\n .string()\n .url({ message: 'scrape-links: Invalid URL format' })\n .refine(\n url => url.startsWith('http://') || url.startsWith('https://'),\n { message: 'scrape-links: URL must use http:// or https://' }\n )\n .describe('A fully-qualified HTTP or HTTPS URL to scrape.');\n\nexport const scrapeLinksParamsSchema = z.object({\n urls: z\n .array(urlSchema)\n .min(1, { message: 'scrape-links: At least 1 URL required' })\n .describe('URLs to fetch and extract in parallel. Reddit post permalinks (`reddit.com/r/<sub>/comments/<id>/...`) are auto-detected and routed through the Reddit API (threaded post + comments); every other URL flows through the HTTP scraper. Mix reddit + non-reddit URLs freely; both branches run concurrently. Prefer contextually grouped batches \u2014 call this tool multiple times in parallel when URL sets are unrelated, instead of one giant mixed batch.'),\n extract: z\n .string()\n .min(1, { message: 'scrape-links: extract cannot be empty' })\n .describe(\n '
|
|
5
|
-
"mappings": "AAAA,SAAS,SAAS;AAElB,MAAM,YAAY,EACf,OAAO,EACP,IAAI,EAAE,SAAS,mCAAmC,CAAC,EACnD;AAAA,EACC,SAAO,IAAI,WAAW,SAAS,KAAK,IAAI,WAAW,UAAU;AAAA,EAC7D,EAAE,SAAS,iDAAiD;AAC9D,EACC,SAAS,gDAAgD;AAErD,MAAM,0BAA0B,EAAE,OAAO;AAAA,EAC9C,MAAM,EACH,MAAM,SAAS,EACf,IAAI,GAAG,EAAE,SAAS,wCAAwC,CAAC,EAC3D,SAAS,icAA4b;AAAA,EACxc,SAAS,EACN,OAAO,EACP,IAAI,GAAG,EAAE,SAAS,wCAAwC,CAAC,EAC3D;AAAA,IACC;AAAA,EACF;AACJ,CAAC,EAAE,OAAO;AAIH,MAAM,0BAA0B,EAAE,OAAO;AAAA;AAAA;AAAA;AAAA,EAI9C,UAAU,EAAE,OAAO;AAAA,IACjB,aAAa,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,2BAA2B;AAAA,IAChF,YAAY,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,4BAA4B;AAAA,IAChF,QAAQ,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,mBAAmB;AAAA,IACnE,mBAAmB,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,kCAAkC;AAAA,IAC7F,eAAe,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,qCAAqC;AAAA,EAC9F,CAAC,EAAE,OAAO;AACZ,CAAC,EAAE,OAAO;",
|
|
4
|
+
"sourcesContent": ["import { z } from 'zod';\n\nconst urlSchema = z\n .string()\n .url({ message: 'scrape-links: Invalid URL format' })\n .refine(\n url => url.startsWith('http://') || url.startsWith('https://'),\n { message: 'scrape-links: URL must use http:// or https://' }\n )\n .describe('A fully-qualified HTTP or HTTPS URL to scrape.');\n\nexport const scrapeLinksParamsSchema = z.object({\n urls: z\n .array(urlSchema)\n .min(1, { message: 'scrape-links: At least 1 URL required' })\n .describe('URLs to fetch and extract in parallel. Reddit post permalinks (`reddit.com/r/<sub>/comments/<id>/...`) are auto-detected and routed through the Reddit API (threaded post + comments); every other URL flows through the HTTP scraper. Mix reddit + non-reddit URLs freely; both branches run concurrently. Prefer contextually grouped batches \u2014 call this tool multiple times in parallel when URL sets are unrelated, instead of one giant mixed batch.'),\n extract: z\n .string()\n .min(1, { message: 'scrape-links: extract cannot be empty' })\n .optional()\n .describe(\n 'OPTIONAL semantic extraction instruction. Describe the SHAPE of what you want, separated by `|`. When provided, the extractor classifies each page (docs / github-thread / reddit / marketing / cve / paper / announcement / qa / blog / changelog / release-notes) and adjusts emphasis per type: preserves numbers/versions/stacktraces verbatim from docs and CVE pages, quotes Reddit/HN with attribution plus sentiment distribution, flags what the page did NOT answer in a \"Not found\" section, and surfaces referenced-but-unscraped URLs in a \"Follow-up signals\" bulletin that feeds the next research loop. Good examples: \"root cause | affected versions | fix | workarounds | timeline\"; \"pricing tiers | rate limits | enterprise contact | free-tier quotas\"; \"maintainer decisions | accepted fix commits | stacktraces | resolved version\". Omit this argument to skip LLM extraction entirely and receive cleaned markdown for each URL (raw mode \u2014 cheaper, faster, and useful when you want the whole page rather than a filtered view).',\n ),\n}).strict();\n\nexport type ScrapeLinksParams = z.infer<typeof scrapeLinksParamsSchema>;\n\nexport const scrapeLinksOutputSchema = z.object({\n // `content` deliberately NOT duplicated here \u2014 the primary markdown lives in\n // the MCP tool result's `content[0].text`. Previously this schema echoed the\n // whole extraction output, doubling token cost for clients that forward both.\n metadata: z.object({\n total_items: z.number().int().nonnegative().describe('Number of URLs processed.'),\n successful: z.number().int().nonnegative().describe('URLs fetched successfully.'),\n failed: z.number().int().nonnegative().describe('URLs that failed.'),\n execution_time_ms: z.number().int().nonnegative().describe('Wall clock time in milliseconds.'),\n total_credits: z.number().int().nonnegative().describe('External scraping credits consumed.'),\n }).strict(),\n}).strict();\n\nexport type ScrapeLinksOutput = z.infer<typeof scrapeLinksOutputSchema>;\n"],
|
|
5
|
+
"mappings": "AAAA,SAAS,SAAS;AAElB,MAAM,YAAY,EACf,OAAO,EACP,IAAI,EAAE,SAAS,mCAAmC,CAAC,EACnD;AAAA,EACC,SAAO,IAAI,WAAW,SAAS,KAAK,IAAI,WAAW,UAAU;AAAA,EAC7D,EAAE,SAAS,iDAAiD;AAC9D,EACC,SAAS,gDAAgD;AAErD,MAAM,0BAA0B,EAAE,OAAO;AAAA,EAC9C,MAAM,EACH,MAAM,SAAS,EACf,IAAI,GAAG,EAAE,SAAS,wCAAwC,CAAC,EAC3D,SAAS,icAA4b;AAAA,EACxc,SAAS,EACN,OAAO,EACP,IAAI,GAAG,EAAE,SAAS,wCAAwC,CAAC,EAC3D,SAAS,EACT;AAAA,IACC;AAAA,EACF;AACJ,CAAC,EAAE,OAAO;AAIH,MAAM,0BAA0B,EAAE,OAAO;AAAA;AAAA;AAAA;AAAA,EAI9C,UAAU,EAAE,OAAO;AAAA,IACjB,aAAa,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,2BAA2B;AAAA,IAChF,YAAY,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,4BAA4B;AAAA,IAChF,QAAQ,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,mBAAmB;AAAA,IACnE,mBAAmB,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,kCAAkC;AAAA,IAC7F,eAAe,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,qCAAqC;AAAA,EAC9F,CAAC,EAAE,OAAO;AACZ,CAAC,EAAE,OAAO;",
|
|
6
6
|
"names": []
|
|
7
7
|
}
|
|
@@ -291,13 +291,9 @@ RULES:
|
|
|
291
291
|
- Preserve code blocks, command examples, tables exactly.
|
|
292
292
|
- Do NOT add commentary or recommendations outside "Follow-up signals".
|
|
293
293
|
- Page language \u2260 English: quote verbatim in the original language AND provide a parenthetical gloss in English.
|
|
294
|
-
-
|
|
295
|
-
\`## Matches\\n_Page did not load:
|
|
296
|
-
|
|
297
|
-
\`## Matches\\n_Page did not load: paywall_\`
|
|
298
|
-
\`## Matches\\n_Page did not load: JS-render-empty_\`
|
|
299
|
-
\`## Matches\\n_Page did not load: non-text-asset_\`
|
|
300
|
-
\`## Matches\\n_Page did not load: truncated-before-relevant-section_\`
|
|
294
|
+
- Page appears gated (login wall, paywall, JS-render-empty shell) or near-empty: BEFORE dismissing the page, look for ANY visible text \u2014 og:title, og:description, meta description, headline, author name, nav labels, teaser/preview sentences, visible comment snippets. If ANY such text exists, extract it as usual under \`## Source\` + \`## Matches\`, and list the blocked facets under \`## Not found\`. Prefix the first \`## Matches\` bullet with \`**[partial \u2014 <reason>]**\` so the caller knows the body is gated (reasons: \`login-wall | paywall | JS-render-empty | truncated-before-relevant-section\`). ONLY when there is NO visible extractable text at all (< 50 words AND no og:* AND no headline AND no preview), return exactly one line:
|
|
295
|
+
\`## Matches\\n_Page did not load: <reason>_\`
|
|
296
|
+
Valid reasons: \`404 | login-wall | paywall | JS-render-empty | non-text-asset | truncated-before-relevant-section\`.
|
|
301
297
|
|
|
302
298
|
Content:
|
|
303
299
|
${truncatedContent}` : `Clean the following page content: drop navigation, ads, cookie banners, footers, author bios, related-article lists. Preserve headings, paragraphs, code blocks, tables, and inline links as \`[text](url)\`. Do NOT summarize \u2014 preserve the full body.
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"version": 3,
|
|
3
3
|
"sources": ["../../../src/services/llm-processor.ts"],
|
|
4
|
-
"sourcesContent": ["/**\n * LLM Processor for content extraction\n * Uses any OpenAI-compatible endpoint. Reasoning effort is always 'low'.\n * Primary model exhausts its retries first; fallback model (LLM_FALLBACK_MODEL) then\n * gets up to FALLBACK_RETRY_COUNT additional attempts before the call fails.\n * NEVER throws \u2014 always returns a valid result.\n */\n\nimport OpenAI from 'openai';\nimport { LLM_EXTRACTION, getCapabilities } from '../config/index.js';\nimport {\n classifyError,\n sleep,\n ErrorCode,\n withStallProtection,\n type StructuredError,\n} from '../utils/errors.js';\nimport { mcpLog } from '../utils/logger.js';\n\n/** Maximum input characters for LLM processing (~125k tokens, sized for the larger fallback model) */\nconst MAX_LLM_INPUT_CHARS = 500_000 as const;\n\n/**\n * Maximum input characters for the primary model when it has a smaller context window.\n * Used when an input would exceed the mini model's limits so the call goes straight to fallback\n * instead of burning retries on guaranteed context_length_exceeded errors.\n */\nconst MAX_PRIMARY_MODEL_INPUT_CHARS = 100_000 as const;\n\n/** LLM client timeout in milliseconds */\nconst LLM_CLIENT_TIMEOUT_MS = 600_000 as const;\n\n/** Jitter factor for exponential backoff */\nconst BACKOFF_JITTER_FACTOR = 0.3 as const;\n\n/** Stall detection timeout \u2014 abort if no response in this time */\nconst LLM_STALL_TIMEOUT_MS = 75_000 as const;\n\n/** Hard request deadline for LLM calls */\nconst LLM_REQUEST_DEADLINE_MS = 150_000 as const;\n\n// ============================================================================\n// LLM health tracking \u2014 surfaced via health://status so capability-aware\n// clients can branch on degraded mode without parsing per-call footers.\n// ============================================================================\n\ntype LLMHealthKind = 'planner' | 'extractor';\n\nexport interface LLMHealthSnapshot {\n readonly lastPlannerOk: boolean;\n readonly lastExtractorOk: boolean;\n readonly lastPlannerCheckedAt: string | null;\n readonly lastExtractorCheckedAt: string | null;\n readonly lastPlannerError: string | null;\n readonly lastExtractorError: string | null;\n readonly plannerConfigured: boolean;\n readonly extractorConfigured: boolean;\n /** Failures since the last success. Reset to 0 on `markLLMSuccess`. */\n readonly consecutivePlannerFailures: number;\n readonly consecutiveExtractorFailures: number;\n}\n\nconst llmHealth = {\n lastPlannerOk: false,\n lastExtractorOk: false,\n lastPlannerCheckedAt: null as string | null,\n lastExtractorCheckedAt: null as string | null,\n lastPlannerError: null as string | null,\n lastExtractorError: null as string | null,\n consecutivePlannerFailures: 0,\n consecutiveExtractorFailures: 0,\n};\n\nexport function markLLMSuccess(kind: LLMHealthKind): void {\n const ts = new Date().toISOString();\n if (kind === 'planner') {\n llmHealth.lastPlannerOk = true;\n llmHealth.lastPlannerCheckedAt = ts;\n llmHealth.lastPlannerError = null;\n llmHealth.consecutivePlannerFailures = 0;\n } else {\n llmHealth.lastExtractorOk = true;\n llmHealth.lastExtractorCheckedAt = ts;\n llmHealth.lastExtractorError = null;\n llmHealth.consecutiveExtractorFailures = 0;\n }\n}\n\nexport function markLLMFailure(kind: LLMHealthKind, err: unknown): void {\n const ts = new Date().toISOString();\n const message = err instanceof Error ? err.message : String(err ?? 'unknown error');\n if (kind === 'planner') {\n llmHealth.lastPlannerOk = false;\n llmHealth.lastPlannerCheckedAt = ts;\n llmHealth.lastPlannerError = message;\n llmHealth.consecutivePlannerFailures += 1;\n } else {\n llmHealth.lastExtractorOk = false;\n llmHealth.lastExtractorCheckedAt = ts;\n llmHealth.lastExtractorError = message;\n llmHealth.consecutiveExtractorFailures += 1;\n }\n}\n\nexport function getLLMHealth(): LLMHealthSnapshot {\n const cap = getCapabilities();\n return {\n lastPlannerOk: llmHealth.lastPlannerOk,\n lastExtractorOk: llmHealth.lastExtractorOk,\n lastPlannerCheckedAt: llmHealth.lastPlannerCheckedAt,\n lastExtractorCheckedAt: llmHealth.lastExtractorCheckedAt,\n lastPlannerError: llmHealth.lastPlannerError,\n lastExtractorError: llmHealth.lastExtractorError,\n // Static capability \u2014 based on env presence at boot. Runtime health (above)\n // tells whether the last attempt actually succeeded.\n plannerConfigured: cap.llmExtraction,\n extractorConfigured: cap.llmExtraction,\n consecutivePlannerFailures: llmHealth.consecutivePlannerFailures,\n consecutiveExtractorFailures: llmHealth.consecutiveExtractorFailures,\n };\n}\n\n/** Test-only \u2014 reset state between tests. Not exported from index. */\nexport function _resetLLMHealthForTests(): void {\n llmHealth.lastPlannerOk = false;\n llmHealth.lastExtractorOk = false;\n llmHealth.lastPlannerCheckedAt = null;\n llmHealth.lastExtractorCheckedAt = null;\n llmHealth.lastPlannerError = null;\n llmHealth.lastExtractorError = null;\n llmHealth.consecutivePlannerFailures = 0;\n llmHealth.consecutiveExtractorFailures = 0;\n}\n\ninterface ProcessingConfig {\n readonly enabled: boolean;\n readonly extract: string | undefined;\n readonly url?: string;\n}\n\ninterface LLMResult {\n readonly content: string;\n readonly processed: boolean;\n readonly error?: string;\n readonly errorDetails?: StructuredError;\n}\n\n// LLM-specific retry configuration\nconst LLM_RETRY_CONFIG = {\n maxRetries: 2,\n baseDelayMs: 1000,\n maxDelayMs: 5000,\n} as const;\n\n/** Number of additional attempts using the fallback model after primary exhausts. */\nconst FALLBACK_RETRY_COUNT = 3 as const;\n\n// OpenAI-compatible retryable error codes (using Set for type-safe lookup)\nconst RETRYABLE_LLM_ERROR_CODES = new Set([\n 'rate_limit_exceeded',\n 'server_error',\n 'timeout',\n 'service_unavailable',\n]);\n\n/** Type guard for errors with an HTTP status code */\nfunction hasStatus(error: unknown): error is { status: number } {\n return (\n typeof error === 'object' &&\n error !== null &&\n 'status' in error &&\n typeof (error as Record<string, unknown>).status === 'number'\n );\n}\n\nlet llmClient: OpenAI | null = null;\n\ntype OpenAITextGenerator = Pick<OpenAI, 'chat'>;\n\nexport function createLLMProcessor(): OpenAI | null {\n if (!getCapabilities().llmExtraction) return null;\n\n if (!llmClient) {\n llmClient = new OpenAI({\n baseURL: LLM_EXTRACTION.BASE_URL,\n apiKey: LLM_EXTRACTION.API_KEY,\n timeout: LLM_CLIENT_TIMEOUT_MS,\n maxRetries: 0,\n defaultHeaders: { 'X-Title': 'mcp-research-powerpack' },\n });\n mcpLog('info', `LLM extraction configured (model: ${LLM_EXTRACTION.MODEL}, baseURL: ${LLM_EXTRACTION.BASE_URL})`, 'llm');\n }\n return llmClient;\n}\n\nfunction buildChatRequestBody(model: string, prompt: string): Record<string, unknown> {\n return {\n model,\n messages: [{ role: 'user', content: prompt }],\n reasoning_effort: 'low',\n };\n}\n\nexport async function requestText(\n processor: OpenAITextGenerator,\n prompt: string,\n operationLabel: string,\n signal?: AbortSignal,\n modelOverride?: string,\n): Promise<{ content: string | null; model: string; error?: string }> {\n const model = modelOverride || LLM_EXTRACTION.MODEL;\n\n try {\n const response = await withStallProtection(\n (stallSignal) => processor.chat.completions.create(\n buildChatRequestBody(model, prompt) as unknown as OpenAI.ChatCompletionCreateParamsNonStreaming,\n {\n signal: signal ? AbortSignal.any([stallSignal, signal]) : stallSignal,\n timeout: LLM_REQUEST_DEADLINE_MS,\n },\n ),\n LLM_STALL_TIMEOUT_MS,\n 3,\n `${operationLabel} (${model})`,\n );\n\n const content = response.choices?.[0]?.message?.content?.trim();\n if (content) {\n return { content, model };\n }\n\n const err = `Empty response from model ${model}`;\n mcpLog('warning', `${operationLabel} returned empty content for model ${model}`, 'llm');\n return { content: null, model, error: err };\n } catch (err: unknown) {\n const message = err instanceof Error ? err.message : String(err);\n mcpLog('warning', `${operationLabel} failed for model ${model}: ${message}`, 'llm');\n return { content: null, model, error: message };\n }\n}\n\n/**\n * Single LLM call with automatic fallback model.\n * Tries the primary model once; if it fails and LLM_FALLBACK_MODEL is set,\n * retries up to FALLBACK_RETRY_COUNT times on the fallback model.\n * Used for single-shot calls (classify, brief, refine queries).\n */\nexport async function requestTextWithFallback(\n processor: OpenAITextGenerator,\n prompt: string,\n operationLabel: string,\n signal?: AbortSignal,\n): Promise<{ content: string | null; model: string; error?: string }> {\n const primary = await requestText(processor, prompt, operationLabel, signal);\n if (primary.content) return primary;\n\n const fallbackModel = LLM_EXTRACTION.FALLBACK_MODEL;\n if (!fallbackModel) return primary;\n\n mcpLog('warning', `Primary model failed, switching to fallback ${fallbackModel}`, 'llm');\n\n let lastError = primary.error;\n for (let attempt = 0; attempt < FALLBACK_RETRY_COUNT; attempt++) {\n if (attempt > 0) {\n const delayMs = calculateLLMBackoff(attempt - 1);\n mcpLog('warning', `Fallback retry ${attempt}/${FALLBACK_RETRY_COUNT - 1} in ${delayMs}ms`, 'llm');\n try { await sleep(delayMs, signal); } catch { break; }\n }\n const result = await requestText(processor, prompt, `${operationLabel} [fallback]`, signal, fallbackModel);\n if (result.content) return result;\n lastError = result.error;\n }\n\n return { content: null, model: fallbackModel, error: lastError };\n}\n\n/**\n * Check if an LLM error is retryable\n */\nfunction isRetryableLLMError(error: unknown): boolean {\n if (!error || typeof error !== 'object') return false;\n\n // Stall/timeout protection errors - always retry these\n const stallCode = (error as { code?: string })?.code;\n if (stallCode === 'ESTALLED' || stallCode === 'ETIMEDOUT') {\n return true;\n }\n\n // Check HTTP status codes\n if (hasStatus(error)) {\n if (error.status === 429 || error.status === 500 || error.status === 502 || error.status === 503 || error.status === 504) {\n return true;\n }\n }\n\n // Check error codes from the OpenAI-compatible endpoint\n const record = error as Record<string, unknown>;\n const code = typeof record.code === 'string' ? record.code : undefined;\n const nested =\n typeof record.error === 'object' && record.error !== null\n ? (record.error as Record<string, unknown>)\n : null;\n const errorCode =\n code ??\n (nested && typeof nested.code === 'string' ? nested.code : undefined) ??\n (nested && typeof nested.type === 'string' ? nested.type : undefined);\n if (errorCode && RETRYABLE_LLM_ERROR_CODES.has(errorCode)) {\n return true;\n }\n\n // Check message for common patterns\n const message = typeof record.message === 'string' ? record.message.toLowerCase() : '';\n if (\n message.includes('rate limit') ||\n message.includes('timeout') ||\n message.includes('timed out') ||\n message.includes('service unavailable') ||\n message.includes('server error') ||\n message.includes('connection') ||\n message.includes('econnreset')\n ) {\n return true;\n }\n\n return false;\n}\n\n/**\n * Detect \"the prompt is too long for this model\" errors.\n * These are NOT retryable on the same model \u2014 we should skip remaining primary retries\n * and go straight to the fallback model (which has a larger context window).\n */\nfunction isContextWindowError(error: unknown): boolean {\n if (!error || typeof error !== 'object') return false;\n\n const record = error as Record<string, unknown>;\n const nested =\n typeof record.error === 'object' && record.error !== null\n ? (record.error as Record<string, unknown>)\n : null;\n\n const code = typeof record.code === 'string' ? record.code : undefined;\n const nestedCode = nested && typeof nested.code === 'string' ? nested.code : undefined;\n if (code === 'context_length_exceeded' || nestedCode === 'context_length_exceeded') {\n return true;\n }\n\n const messages: string[] = [];\n if (typeof record.message === 'string') messages.push(record.message);\n if (nested && typeof nested.message === 'string') messages.push(nested.message);\n const combined = messages.join(' ').toLowerCase();\n return (\n combined.includes('context length') ||\n combined.includes('context window') ||\n combined.includes('maximum context') ||\n combined.includes('maximum tokens') ||\n combined.includes('token limit') ||\n combined.includes('too many tokens') ||\n combined.includes('prompt is too long') ||\n combined.includes('reduce the length')\n );\n}\n\n/**\n * Calculate backoff delay with jitter for LLM retries\n */\nfunction calculateLLMBackoff(attempt: number): number {\n const exponentialDelay = LLM_RETRY_CONFIG.baseDelayMs * Math.pow(2, attempt);\n const jitter = Math.random() * BACKOFF_JITTER_FACTOR * exponentialDelay;\n return Math.min(exponentialDelay + jitter, LLM_RETRY_CONFIG.maxDelayMs);\n}\n\n/**\n * Process content with LLM extraction\n * NEVER throws - always returns a valid LLMResult\n * Implements retry logic with exponential backoff for transient failures\n */\nexport async function processContentWithLLM(\n content: string,\n config: ProcessingConfig,\n processor?: OpenAI | null,\n signal?: AbortSignal\n): Promise<LLMResult> {\n // Early returns for invalid/skip conditions\n if (!config.enabled) {\n return { content, processed: false };\n }\n\n if (!processor) {\n return {\n content,\n processed: false,\n error: 'LLM processor not available (LLM_API_KEY, LLM_BASE_URL, and LLM_MODEL must all be set)',\n errorDetails: {\n code: ErrorCode.AUTH_ERROR,\n message: 'LLM processor not available',\n retryable: false,\n },\n };\n }\n\n if (!content?.trim()) {\n return { content: content || '', processed: false, error: 'Empty content provided' };\n }\n\n // Truncate extremely long content to avoid blowing past even the fallback model's context.\n const truncatedContent = content.length > MAX_LLM_INPUT_CHARS\n ? content.substring(0, MAX_LLM_INPUT_CHARS) + '\\n\\n[Content truncated due to length]'\n : content;\n\n // If the prompt would exceed the primary (mini) model's smaller context window,\n // skip it entirely and go straight to the fallback model. Saves burning retries\n // on guaranteed context_length_exceeded errors.\n const skipPrimaryForSize =\n truncatedContent.length > MAX_PRIMARY_MODEL_INPUT_CHARS && !!LLM_EXTRACTION.FALLBACK_MODEL;\n\n // Sanitize URL before sending to LLM: drop query string and fragment\n // so signed URLs, session tokens, auth params, or tracking hashes never\n // land in a third-party LLM prompt. Keep origin + path for page-type classification.\n const safeUrl = (() => {\n if (!config.url) return undefined;\n try {\n const u = new URL(config.url);\n return `${u.origin}${u.pathname}`;\n } catch {\n return undefined;\n }\n })();\n const urlLine = safeUrl ? `PAGE URL: ${safeUrl}\\n\\n` : '';\n\n const prompt = config.extract\n ? `You are a factual extractor for a research agent. Extract ONLY the information that matches the instruction below. Do not summarize, interpret, or editorialize.\n\n${urlLine}EXTRACTION INSTRUCTION: ${config.extract}\n\nSTEP 1 \u2014 Classify this page. Look at the URL if present, plus structural cues (code blocks, table patterns, comment threads, marketing copy). Pick ONE:\n\\`docs | changelog | github-readme | github-thread | reddit | hackernews | forum | blog | marketing | announcement | qa | cve | paper | release-notes | other\\`\n\nSTEP 2 \u2014 Adjust emphasis by page type:\n- docs / changelog / github-readme / release-notes \u2192 API signatures, version numbers, flags, exact config keys, code blocks. Copy verbatim. Preserve tables as tables.\n- github-thread \u2192 weight MAINTAINER comments (label \"[maintainer]\") over drive-by commenters. Preserve stacktraces verbatim. Capture chronological resolution \u2014 what was decided and when. Link the accepted-fix commit/PR if referenced.\n- reddit / hackernews / forum \u2192 lived experience. Quote verbatim with attribution (\"u/foo wrote: \u2026\" or \"user <name>\"). Prioritize replies with stack details, specific failure stories, or replies that contradict the OP. Record overall sentiment distribution as one bullet if clear skew (\"~70% agree / ~20% dissent / rest off-topic\"). Drop context-free opinions (\"this sucks\") from Matches.\n- blog \u2192 prioritize concrete reproductions, code, measurements. If the author makes a claim without evidence, mark \"[unsourced claim]\".\n- marketing / announcement \u2192 pricing tiers, feature matrices verbatim, free-tier quotas, enterprise contact. Preserve tables as tables. Treat roadmap/future-tense claims skeptically \u2014 note them as \"[announced, not shipped]\" when framing is future-tense.\n- qa (stackoverflow) \u2192 accepted answer's code + high-voted disagreements. Always note the answer date \u2014 SO rots.\n- cve \u2192 CVSS vector verbatim, CWE, CPE ranges, affected versions, fix version, references. Each with its label.\n- paper \u2192 claim, method, dataset, benchmark numbers, comparison baseline. Preserve numeric deltas verbatim.\n\nSTEP 3 \u2014 Emit markdown with these sections, in order:\n\n## Source\n- URL: <verbatim if visible, else \"unknown\">\n- Page type: <the type you picked>\n- Page date: <verbatim if visible, else \"not visible\">\n- Author / maintainer (if identifiable): <verbatim>\n\n## Matches\nOne bullet per distinct piece of matching info:\n- **<short label>** \u2014 the information. Quote VERBATIM for: numbers, versions, dates, API names, prices, error messages, stacktraces, CVSS vectors, benchmark scores, command flags, proper nouns, and people's words. Backticks for code/identifiers. Preserve tables.\n\n## Not found\nEvery part of the extraction instruction this page did NOT answer. Be explicit. Example: \"Enterprise pricing contact \u2014 not present on this page.\"\n\n## Follow-up signals\nShort bullets \u2014 NEW angles this page surfaced that the agent should investigate. Include: new terms, unexpected vendor names, contradicting claims, referenced-but-unscraped URLs. Copy URLs VERBATIM from the source; if only anchor text is visible, write \"anchor: <text> (URL not in scraped content)\". Skip this section if nothing new surfaced. Do NOT invent.\n\n## Contradictions\n(Include this section only if the page contains internally contradictory claims.) Bullet each contradiction with both sides quoted verbatim.\n\n## Truncation\n(Include only if content appears cut mid-element.) \"Content cut mid-<table row / code block / comment / paragraph>; extraction may be incomplete for <section>.\"\n\nRULES:\n- Never paraphrase numbers, versions, code, or quoted text.\n- If an instruction item is not answered, it goes in \"Not found\" \u2014 do NOT invent an answer to please the caller.\n- Preserve code blocks, command examples, tables exactly.\n- Do NOT add commentary or recommendations outside \"Follow-up signals\".\n- Page language \u2260 English: quote verbatim in the original language AND provide a parenthetical gloss in English.\n- Content clearly failed to load: return ONLY a single line, choosing from:\n \\`## Matches\\\\n_Page did not load: 404_\\`\n \\`## Matches\\\\n_Page did not load: login-wall_\\`\n \\`## Matches\\\\n_Page did not load: paywall_\\`\n \\`## Matches\\\\n_Page did not load: JS-render-empty_\\`\n \\`## Matches\\\\n_Page did not load: non-text-asset_\\`\n \\`## Matches\\\\n_Page did not load: truncated-before-relevant-section_\\`\n\nContent:\n${truncatedContent}`\n : `Clean the following page content: drop navigation, ads, cookie banners, footers, author bios, related-article lists. Preserve headings, paragraphs, code blocks, tables, and inline links as \\`[text](url)\\`. Do NOT summarize \u2014 preserve the full body.\n\n${urlLine}Content:\n${truncatedContent}`;\n\n let lastError: StructuredError | undefined;\n\n // Phase 1: primary model with up to LLM_RETRY_CONFIG.maxRetries retries.\n // Skip entirely when the input is too big for the primary's context window.\n if (skipPrimaryForSize) {\n mcpLog(\n 'info',\n `Input ${truncatedContent.length} chars exceeds primary model cap (${MAX_PRIMARY_MODEL_INPUT_CHARS}); routing directly to fallback`,\n 'llm',\n );\n } else {\n for (let attempt = 0; attempt <= LLM_RETRY_CONFIG.maxRetries; attempt++) {\n try {\n if (attempt === 0) {\n mcpLog('info', `Starting extraction with ${LLM_EXTRACTION.MODEL}`, 'llm');\n } else {\n mcpLog('warning', `Retry attempt ${attempt}/${LLM_RETRY_CONFIG.maxRetries}`, 'llm');\n }\n\n const response = await requestText(processor, prompt, 'LLM extraction', signal);\n\n if (response.content) {\n mcpLog('info', `Successfully extracted ${response.content.length} characters`, 'llm');\n markLLMSuccess('extractor');\n return { content: response.content, processed: true };\n }\n\n // Empty response \u2014 not retryable\n mcpLog('warning', 'Received empty response from LLM', 'llm');\n markLLMFailure('extractor', 'LLM returned empty response');\n return {\n content,\n processed: false,\n error: 'LLM returned empty response',\n errorDetails: {\n code: ErrorCode.INTERNAL_ERROR,\n message: 'LLM returned empty response',\n retryable: false,\n },\n };\n\n } catch (err: unknown) {\n lastError = classifyError(err);\n const status = hasStatus(err) ? err.status : undefined;\n const code = typeof err === 'object' && err !== null && 'code' in err\n ? String((err as Record<string, unknown>).code)\n : undefined;\n const ctxErr = isContextWindowError(err);\n mcpLog('error', `Error (attempt ${attempt + 1}): ${lastError.message} [status=${status}, code=${code}, retryable=${isRetryableLLMError(err)}, context_window=${ctxErr}]`, 'llm');\n\n // Context window errors are not retryable on the same model \u2014 jump to fallback.\n if (ctxErr) {\n mcpLog('warning', 'Context window exceeded on primary \u2014 skipping remaining retries, routing to fallback', 'llm');\n break;\n }\n\n if (isRetryableLLMError(err) && attempt < LLM_RETRY_CONFIG.maxRetries) {\n const delayMs = calculateLLMBackoff(attempt);\n mcpLog('warning', `Retrying in ${delayMs}ms...`, 'llm');\n try { await sleep(delayMs, signal); } catch { break; }\n continue;\n }\n break;\n }\n }\n }\n\n // Phase 2: fallback model \u2014 FALLBACK_RETRY_COUNT attempts before giving up\n const fallbackModel = LLM_EXTRACTION.FALLBACK_MODEL;\n if (fallbackModel) {\n mcpLog('warning', `Primary exhausted, switching to fallback ${fallbackModel}`, 'llm');\n for (let attempt = 0; attempt < FALLBACK_RETRY_COUNT; attempt++) {\n if (attempt > 0) {\n const delayMs = calculateLLMBackoff(attempt - 1);\n mcpLog('warning', `Fallback retry ${attempt}/${FALLBACK_RETRY_COUNT - 1} in ${delayMs}ms`, 'llm');\n try { await sleep(delayMs, signal); } catch { break; }\n }\n try {\n const response = await requestText(processor, prompt, 'LLM extraction [fallback]', signal, fallbackModel);\n if (response.content) {\n mcpLog('info', `Fallback extracted ${response.content.length} characters`, 'llm');\n markLLMSuccess('extractor');\n return { content: response.content, processed: true };\n }\n mcpLog('warning', 'Fallback returned empty response', 'llm');\n break;\n } catch (err: unknown) {\n lastError = classifyError(err);\n mcpLog('error', `Fallback error (attempt ${attempt + 1}): ${lastError.message}`, 'llm');\n }\n }\n }\n\n const errorMessage = lastError?.message || 'Unknown LLM error';\n mcpLog('error', `All attempts failed: ${errorMessage}. Returning original content.`, 'llm');\n markLLMFailure('extractor', errorMessage);\n\n return {\n content,\n processed: false,\n error: `LLM extraction failed: ${errorMessage}`,\n errorDetails: lastError || {\n code: ErrorCode.UNKNOWN_ERROR,\n message: errorMessage,\n retryable: false,\n },\n };\n}\n\n// ============================================================================\n// Web-Search Result Classification\n// ============================================================================\n\n/** Maximum URLs to send to the LLM for classification */\nconst MAX_CLASSIFICATION_URLS = 50 as const;\n\n/** Classification tiers */\ntype ClassificationTier = 'HIGHLY_RELEVANT' | 'MAYBE_RELEVANT' | 'OTHER';\n\nexport interface ClassificationEntry {\n readonly rank: number;\n readonly tier: ClassificationTier;\n readonly source_type?: string;\n readonly reason?: string;\n}\n\nexport interface ClassificationGap {\n readonly id: number;\n readonly description: string;\n}\n\nexport interface ClassificationResult {\n readonly title: string;\n readonly synthesis: string;\n readonly results: ClassificationEntry[];\n readonly refine_queries?: Array<{\n readonly query: string;\n readonly rationale: string;\n readonly gap_id?: number;\n }>;\n readonly confidence?: 'high' | 'medium' | 'low';\n readonly confidence_reason?: string;\n readonly gaps?: ClassificationGap[];\n}\n\nexport interface RefineQuerySuggestion {\n readonly query: string;\n readonly rationale: string;\n readonly gap_id?: number;\n readonly gap_description?: string;\n}\n\n/**\n * Classify web-search results by relevance to an objective using the LLM.\n * Sends only titles, snippets, and domain names \u2014 does NOT fetch URLs.\n * Returns null on failure (caller should fall back to raw output).\n */\nexport async function classifySearchResults(\n rankedUrls: ReadonlyArray<{\n readonly rank: number;\n readonly url: string;\n readonly title: string;\n readonly snippet: string;\n readonly frequency: number;\n readonly queries: string[];\n }>,\n objective: string,\n totalQueries: number,\n processor: OpenAI,\n previousQueries: readonly string[] = [],\n): Promise<{ result: ClassificationResult | null; error?: string }> {\n const urlsToClassify = rankedUrls.slice(0, MAX_CLASSIFICATION_URLS);\n\n // Descending static weights fed to the LLM. Higher-ranked URLs get a bigger\n // weight so the classifier biases HIGHLY_RELEVANT toward them. The weights\n // here are a shown-to-LLM summary, not the internal CTR ranking (which\n // still runs in url-aggregator.ts). Rank 11+ all bucket to w=1.\n const STATIC_WEIGHTS = [30, 20, 15, 10, 8, 6, 5, 4, 3, 2] as const;\n const weightForRank = (rank: number): number => STATIC_WEIGHTS[rank - 1] ?? 1;\n\n // Build compressed result list \u2014 weight + title + domain + snippet (truncated)\n const lines: string[] = [];\n for (const url of urlsToClassify) {\n let domain: string;\n try {\n domain = new URL(url.url).hostname.replace(/^www\\./, '');\n } catch {\n domain = url.url;\n }\n const snippet = url.snippet.length > 120\n ? url.snippet.slice(0, 117) + '...'\n : url.snippet;\n lines.push(`[${url.rank}] w=${weightForRank(url.rank)} ${url.title} \u2014 ${domain} \u2014 ${snippet}`);\n }\n\n const prevQueriesBlock = previousQueries.length > 0\n ? previousQueries.map((q) => `- ${q}`).join('\\n')\n : '- (none provided)';\n const today = new Date().toISOString().slice(0, 10);\n\n const prompt = `You are the relevance filter for a research agent. Classify each search result below against the objective and produce a structured analysis.\n\nOBJECTIVE: ${objective}\nTODAY: ${today}\n\nPREVIOUS QUERIES (already run \u2014 do NOT paraphrase in refine_queries):\n${prevQueriesBlock}\n\nReturn ONLY a JSON object (no markdown, no code fences):\n\n{\n \"title\": \"2\u20138 word label for this RESULT CLUSTER (not the objective)\",\n \"synthesis\": \"3\u20135 sentences grounded in the results. Every non-trivial claim cites a rank in [brackets], e.g. '[3] documents the flag; [7][12] report it is broken on macOS.' A synthesis with zero citations is invalid.\",\n \"confidence\": \"high | medium | low\",\n \"confidence_reason\": \"one sentence \u2014 why\",\n \"gaps\": [\n { \"id\": 0, \"description\": \"specific, actionable thing the current results do NOT answer \u2014 not 'more info needed'\" }\n ],\n \"refine_queries\": [\n { \"query\": \"concrete next search\", \"gap_id\": 0, \"rationale\": \"\u226412 words\" }\n ],\n \"results\": [\n {\n \"rank\": 1,\n \"tier\": \"HIGHLY_RELEVANT | MAYBE_RELEVANT | OTHER\",\n \"source_type\": \"vendor_doc | github | reddit | hackernews | blog | news | marketing | stackoverflow | cve | paper | release_notes | aggregator | other\",\n \"reason\": \"\u226412 words citing the snippet cue that drove the tier\"\n }\n ]\n}\n\nWEIGHT SCHEME: each row is prefixed with a weight (w=N). Higher weight means the URL ranked better across input queries \u2014 prefer HIGHLY_RELEVANT for high-weight rows when content matches the objective. Weight alone never justifies HIGHLY_RELEVANT; snippet cues still drive the decision.\n\nSOURCE-OF-TRUTH RUBRIC (the \"primary source\" is goal-dependent \u2014 infer goal type from the objective):\n- spec / API / config questions \u2192 vendor_doc, github (README, RFC), release_notes are primary\n- bug / failure-mode questions \u2192 github (issue/PR), stackoverflow are primary\n- migration / sentiment / lived-experience \u2192 reddit, hackernews, blog are primary; docs are secondary\n- pricing / commercial \u2192 marketing (the vendor's own pricing page IS the primary source, but treat feature lists skeptically)\n- security / CVE \u2192 cve databases, distro security trackers (nvd.nist.gov, security-tracker.debian.org, ubuntu.com/security) are primary\n- synthesis / open-ended \u2192 blend; no single type is primary\n- product launch \u2192 vendor_doc + news + marketing for the launch itself; blogs + reddit for independent verification\n\nFRESHNESS: proportional to topic velocity. For a week-old release, demote anything older than 30 days. For general tech questions, demote older than 18 months. For stable protocols (HTTP, TCP, POSIX), don't demote by age.\n\nCONFIDENCE:\n- high = \u22653 HIGHLY_RELEVANT results from INDEPENDENT domains agree on the core answer\n- medium = \u22652 HIGHLY_RELEVANT exist but disagree or share a domain; OR a single authoritative primary source answers it\n- low = otherwise; snippet-only judgments cap at medium\n\nREFINE QUERIES \u2014 each MUST differ from every previousQuery by:\n- a new operator (site:, quotes, verbatim version number), OR\n- a domain-specific noun ABSENT from every prior query\nAdding a year alone does NOT count as differentiation.\nEach refine_query MUST reference a specific gap_id from the gaps array above.\nProduce 4\u20138 refine_queries total. Cover: (a) a primary-source probe, (b) a temporal sharpener, (c) a failure-mode or comparison probe, (d) at least one new-term probe seeded by a specific result's snippet.\n\nRULES:\n- Classify ALL ${urlsToClassify.length} results. Do not skip or collapse any.\n- Use only the three tier values.\n- Judge from title + domain + snippet only. Do NOT invent facts not present in the snippet.\n- If ALL results are OTHER: synthesis = \"\", confidence = \"low\", and \\`gaps\\` must explicitly state why the current queries missed the target.\n- Casing: tier = UPPERCASE_WITH_UNDERSCORES, confidence = lowercase.\n\nSEARCH RESULTS (${urlsToClassify.length} URLs from ${totalQueries} queries):\n${lines.join('\\n')}`;\n\n try {\n mcpLog('info', `Classifying ${urlsToClassify.length} URLs against objective`, 'llm');\n\n const response = await requestTextWithFallback(\n processor,\n prompt,\n 'Search classification',\n );\n\n if (!response.content) {\n const errMsg = response.error ?? 'LLM returned empty classification response';\n markLLMFailure('planner', errMsg);\n return { result: null, error: errMsg };\n }\n\n // Strip markdown code fences if present\n const cleaned = response.content.replace(/^```(?:json)?\\s*\\n?/m, '').replace(/\\n?```\\s*$/m, '').trim();\n const parsed = JSON.parse(cleaned) as ClassificationResult;\n\n // Validate the response shape.\n // Note: synthesis is typed not truthy \u2014 the prompt explicitly instructs an empty string\n // for the all-OTHER case, and we must not reject that.\n if (!parsed.title || typeof parsed.synthesis !== 'string' || !Array.isArray(parsed.results)) {\n const errMsg = 'LLM response missing required fields (title, synthesis, results)';\n markLLMFailure('planner', errMsg);\n return { result: null, error: errMsg };\n }\n\n mcpLog('info', `Classification complete: ${parsed.results.filter(r => r.tier === 'HIGHLY_RELEVANT').length} highly relevant`, 'llm');\n markLLMSuccess('planner');\n return { result: parsed };\n } catch (err: unknown) {\n const message = err instanceof Error ? err.message : String(err);\n mcpLog('error', `Classification failed: ${message}`, 'llm');\n markLLMFailure('planner', message);\n return { result: null, error: `Classification failed: ${message}` };\n }\n}\n\nexport async function suggestRefineQueriesForRawMode(\n rankedUrls: ReadonlyArray<{\n readonly rank: number;\n readonly url: string;\n readonly title: string;\n }>,\n objective: string,\n originalQueries: readonly string[],\n processor: OpenAI,\n): Promise<{ result: RefineQuerySuggestion[]; error?: string }> {\n const urlsToSummarize = rankedUrls.slice(0, 12);\n const lines = urlsToSummarize.map((url) => {\n let domain: string;\n try {\n domain = new URL(url.url).hostname.replace(/^www\\./, '');\n } catch {\n domain = url.url;\n }\n return `[${url.rank}] ${url.title} \u2014 ${domain}`;\n });\n\n const prompt = `You are generating follow-up search queries for an agent using raw web-search results.\n\nReturn ONLY a JSON object (no markdown, no code fences):\n{\n \"refine_queries\": [\n { \"query\": \"next search query\", \"gap_description\": \"what gap this closes\", \"rationale\": \"\u226412 words on why\" }\n ]\n}\n\nOBJECTIVE: ${objective}\n\nPREVIOUS QUERIES (already run \u2014 do NOT paraphrase):\n${originalQueries.map((query) => `- ${query}`).join('\\n')}\n\nTOP RESULT TITLES (to seed new-term probes):\n${lines.join('\\n')}\n\nRULES:\n- Produce 4\u20136 diverse follow-ups. Cover: (a) a primary-source probe (site:, RFC, vendor docs); (b) a temporal sharpener (changelog, version number); (c) a failure-mode or comparison probe; (d) at least one new-term probe seeded by a specific result title.\n- Each query MUST differ from every previousQuery by either a new operator (site:, quotes, a verbatim version number) OR a domain-specific noun absent from every prior query. Adding a year alone does NOT count.\n- Each refine_query MUST include a \\`gap_description\\` naming what the current results don't answer.\n- Do not include URLs.\n- Keep rationales \u226412 words.`;\n\n try {\n const response = await requestTextWithFallback(\n processor,\n prompt,\n 'Raw-mode refine query generation',\n );\n\n if (!response.content) {\n const errMsg = response.error ?? 'LLM returned empty raw-mode refine query response';\n markLLMFailure('planner', errMsg);\n return { result: [], error: errMsg };\n }\n\n const cleaned = response.content.replace(/^```(?:json)?\\s*\\n?/m, '').replace(/\\n?```\\s*$/m, '').trim();\n const parsed = JSON.parse(cleaned) as { refine_queries?: RefineQuerySuggestion[] };\n\n markLLMSuccess('planner');\n return { result: Array.isArray(parsed.refine_queries) ? parsed.refine_queries : [] };\n } catch (err: unknown) {\n const message = err instanceof Error ? err.message : String(err);\n mcpLog('error', `Raw-mode refine query generation failed: ${message}`, 'llm');\n markLLMFailure('planner', message);\n return { result: [], error: message };\n }\n}\n\n// ============================================================================\n// Research Brief \u2014 goal-aware orientation (called by start-research)\n// ============================================================================\n\nexport type PrimaryBranch = 'reddit' | 'web' | 'both';\n\nexport interface ResearchBriefStep {\n readonly tool: 'web-search' | 'scrape-links';\n readonly reason: string;\n}\n\nexport interface ResearchBrief {\n readonly goal_class: string;\n readonly goal_class_reason: string;\n readonly primary_branch: PrimaryBranch;\n readonly primary_branch_reason: string;\n readonly freshness_window: string;\n readonly first_call_sequence: readonly ResearchBriefStep[];\n readonly keyword_seeds: readonly string[];\n readonly iteration_hints: readonly string[];\n readonly gaps_to_watch: readonly string[];\n readonly stop_criteria: readonly string[];\n}\n\nconst VALID_GOAL_CLASSES = new Set([\n 'spec', 'bug', 'migration', 'sentiment', 'pricing', 'security',\n 'synthesis', 'product_launch', 'other',\n]);\n\nconst VALID_FRESHNESS = new Set(['days', 'weeks', 'months', 'years']);\nconst VALID_BRANCHES = new Set<PrimaryBranch>(['reddit', 'web', 'both']);\nconst VALID_STEP_TOOLS = new Set(['web-search', 'scrape-links']);\n\nfunction isStringArray(value: unknown): value is string[] {\n return Array.isArray(value) && value.every((v) => typeof v === 'string');\n}\n\nfunction isStepArray(value: unknown): value is ResearchBriefStep[] {\n return Array.isArray(value) && value.every((s) => {\n if (typeof s !== 'object' || s === null) return false;\n const tool = (s as Record<string, unknown>).tool;\n const reason = (s as Record<string, unknown>).reason;\n return typeof tool === 'string'\n && VALID_STEP_TOOLS.has(tool)\n && typeof reason === 'string'\n && reason.trim().length > 0;\n });\n}\n\nexport function parseResearchBrief(raw: string): ResearchBrief | null {\n try {\n const cleaned = raw.replace(/^```(?:json)?\\s*\\n?/m, '').replace(/\\n?```\\s*$/m, '').trim();\n const parsed = JSON.parse(cleaned) as Record<string, unknown>;\n\n const goal_class = typeof parsed.goal_class === 'string' ? parsed.goal_class : null;\n if (!goal_class || !VALID_GOAL_CLASSES.has(goal_class)) return null;\n\n const freshness_window = typeof parsed.freshness_window === 'string' ? parsed.freshness_window : null;\n if (!freshness_window || !VALID_FRESHNESS.has(freshness_window)) return null;\n\n const primary_branch = parsed.primary_branch;\n if (typeof primary_branch !== 'string' || !VALID_BRANCHES.has(primary_branch as PrimaryBranch)) return null;\n\n if (!isStepArray(parsed.first_call_sequence) || parsed.first_call_sequence.length === 0) return null;\n if (!isStringArray(parsed.keyword_seeds) || parsed.keyword_seeds.length === 0) return null;\n\n return {\n goal_class,\n goal_class_reason: typeof parsed.goal_class_reason === 'string' ? parsed.goal_class_reason : '',\n primary_branch: primary_branch as PrimaryBranch,\n primary_branch_reason: typeof parsed.primary_branch_reason === 'string' ? parsed.primary_branch_reason : '',\n freshness_window,\n first_call_sequence: parsed.first_call_sequence,\n keyword_seeds: parsed.keyword_seeds.filter((s) => s.trim().length > 0),\n iteration_hints: isStringArray(parsed.iteration_hints) ? parsed.iteration_hints : [],\n gaps_to_watch: isStringArray(parsed.gaps_to_watch) ? parsed.gaps_to_watch : [],\n stop_criteria: isStringArray(parsed.stop_criteria) ? parsed.stop_criteria : [],\n };\n } catch {\n return null;\n }\n}\n\nexport async function generateResearchBrief(\n goal: string,\n processor: OpenAI,\n signal?: AbortSignal,\n): Promise<ResearchBrief | null> {\n const today = new Date().toISOString().slice(0, 10);\n\n const prompt = `You are a research planner. An agent is about to run a multi-pass research loop on the goal below using 3 tools:\n\n - web-search: fan-out Google, scope: web|reddit|both, up to 50 queries per call, parallel-callable (multiple calls per turn)\n - scrape-links: fetch URLs in parallel, auto-detects reddit.com post permalinks \u2192 Reddit API (threaded post+comments); all other URLs \u2192 HTTP scraper; parallel-callable\n\nProduce a tailored JSON brief.\n\nGOAL: ${goal}\nTODAY: ${today}\n\nReturn ONLY a JSON object (no markdown, no code fences):\n\n{\n \"goal_class\": \"spec | bug | migration | sentiment | pricing | security | synthesis | product_launch | other\",\n \"goal_class_reason\": \"one sentence \u2014 why this class\",\n \"primary_branch\": \"reddit | web | both\",\n \"primary_branch_reason\": \"one sentence \u2014 why this branch leads\",\n \"freshness_window\": \"days | weeks | months | years\",\n \"first_call_sequence\": [\n { \"tool\": \"web-search | scrape-links\", \"reason\": \"what this call establishes for the agent\" }\n ],\n \"keyword_seeds\": [\"25\u201350 concrete Google queries \u2014 flat list, to be fired in the first web-search call\"],\n \"iteration_hints\": [\"2\u20135 pointers on which harvested terms / follow-up signals to watch for after pass 1\"],\n \"gaps_to_watch\": [\"2\u20135 concrete questions the agent MUST verify or the answer is incomplete\"],\n \"stop_criteria\": [\"2\u20134 checkable conditions \u2014 all must hold before the agent declares done\"]\n}\n\nRULES:\n\nprimary_branch:\n- \"reddit\" \u2192 sentiment / migration / lived-experience / community-consensus goals. Leads with scope:\"reddit\" web-search.\n- \"web\" \u2192 spec / bug / pricing / CVE / API / primary-source goals. Leads with scope:\"web\" web-search.\n- \"both\" \u2192 opinion-heavy AND needs official sources (e.g. product launch + practitioner reception).\n\nfirst_call_sequence:\n- 1\u20133 steps.\n- reddit-first: step 1 = web-search (caller sets scope:\"reddit\"), step 2 = scrape-links on best post permalinks.\n- web-first: step 1 = web-search (scope:\"web\"), step 2 = scrape-links on HIGHLY_RELEVANT URLs.\n- both: step 1 = two parallel web-search calls (one scope:\"reddit\", one scope:\"web\"), step 2 = merged scrape-links.\n\nkeyword_seeds:\n- 25\u201350 total. Narrow bug \u2192 fewer. Open synthesis \u2192 more.\n- Use operators where helpful (site:, quotes, verbatim version numbers).\n- DIVERSE facets \u2014 same noun-phrase cannot repeat across seeds with adjectives-only variation.\n- Do NOT invent vendor names you are uncertain exist.\n- For \\`site:<domain>\\` filters, ONLY use domains you are highly confident are real. Safe choices: \\`github.com\\`, \\`stackoverflow.com\\`, \\`reddit.com\\`, \\`news.ycombinator.com\\`, \\`arxiv.org\\`, \\`nvd.nist.gov\\`, \\`pypi.org\\`, \\`npmjs.com\\`, plus any canonical homepage/docs domain explicitly spelled out in the goal itself (e.g. goal names \"Cursor\" \u2192 \\`cursor.com\\`/\\`docs.cursor.com\\` is acceptable). If you don't know the product's real docs domain, leave the query open (no \\`site:\\`) instead of guessing.\n\nfreshness_window:\n- If the goal mentions a recent release / date / version, use \"days\" or \"weeks\".\n- Stable protocols / APIs \u2192 \"months\" or \"years\".`;\n\n try {\n const response = await requestTextWithFallback(\n processor,\n prompt,\n 'Research brief generation',\n signal,\n );\n\n if (!response.content) {\n mcpLog('warning', `Research brief generation returned no content: ${response.error ?? 'unknown'}`, 'llm');\n markLLMFailure('planner', response.error ?? 'empty response');\n return null;\n }\n\n const brief = parseResearchBrief(response.content);\n if (!brief) {\n mcpLog('warning', 'Research brief JSON parse or shape validation failed', 'llm');\n markLLMFailure('planner', 'brief parse/validation failed');\n return null;\n }\n\n markLLMSuccess('planner');\n return brief;\n } catch (err: unknown) {\n const message = err instanceof Error ? err.message : String(err);\n mcpLog('warning', `Research brief generation failed: ${message}`, 'llm');\n markLLMFailure('planner', message);\n return null;\n }\n}\n\nexport function renderResearchBrief(brief: ResearchBrief): string {\n const lines: string[] = [];\n\n lines.push('## Your research brief (goal-tailored)');\n lines.push('');\n lines.push(`**Goal class**: \\`${brief.goal_class}\\` \u2014 ${brief.goal_class_reason}`);\n lines.push(`**Primary branch**: \\`${brief.primary_branch}\\` \u2014 ${brief.primary_branch_reason}`);\n lines.push(`**Freshness**: \\`${brief.freshness_window}\\``);\n lines.push('');\n\n if (brief.first_call_sequence.length > 0) {\n lines.push('### First-call sequence');\n brief.first_call_sequence.forEach((step, i) => {\n lines.push(`${i + 1}. \\`${step.tool}\\` \u2014 ${step.reason}`);\n });\n lines.push('');\n }\n\n if (brief.keyword_seeds.length > 0) {\n lines.push(`### Keyword seeds (${brief.keyword_seeds.length}) \u2014 fire these in your first \\`web-search\\` call as a flat \\`queries\\` array`);\n for (const seed of brief.keyword_seeds) {\n lines.push(`- ${seed}`);\n }\n lines.push('');\n }\n\n if (brief.iteration_hints.length > 0) {\n lines.push('### Iteration hints (harvest new terms from scrape extracts\\' `## Follow-up signals`)');\n for (const hint of brief.iteration_hints) lines.push(`- ${hint}`);\n lines.push('');\n }\n\n if (brief.gaps_to_watch.length > 0) {\n lines.push('### Gaps to watch');\n for (const gap of brief.gaps_to_watch) lines.push(`- ${gap}`);\n lines.push('');\n }\n\n if (brief.stop_criteria.length > 0) {\n lines.push('### Stop criteria');\n for (const c of brief.stop_criteria) lines.push(`- ${c}`);\n lines.push('');\n }\n\n lines.push('---');\n lines.push('');\n lines.push('Fire `first_call_sequence` now. After each `scrape-links`, harvest new terms from `## Follow-up signals` and build your next `web-search` round. Stop when every gap is closed.');\n\n return lines.join('\\n');\n}\n"],
|
|
5
|
-
"mappings": "AAQA,OAAO,YAAY;AACnB,SAAS,gBAAgB,uBAAuB;AAChD;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OAEK;AACP,SAAS,cAAc;AAGvB,MAAM,sBAAsB;AAO5B,MAAM,gCAAgC;AAGtC,MAAM,wBAAwB;AAG9B,MAAM,wBAAwB;AAG9B,MAAM,uBAAuB;AAG7B,MAAM,0BAA0B;AAuBhC,MAAM,YAAY;AAAA,EAChB,eAAe;AAAA,EACf,iBAAiB;AAAA,EACjB,sBAAsB;AAAA,EACtB,wBAAwB;AAAA,EACxB,kBAAkB;AAAA,EAClB,oBAAoB;AAAA,EACpB,4BAA4B;AAAA,EAC5B,8BAA8B;AAChC;AAEO,SAAS,eAAe,MAA2B;AACxD,QAAM,MAAK,oBAAI,KAAK,GAAE,YAAY;AAClC,MAAI,SAAS,WAAW;AACtB,cAAU,gBAAgB;AAC1B,cAAU,uBAAuB;AACjC,cAAU,mBAAmB;AAC7B,cAAU,6BAA6B;AAAA,EACzC,OAAO;AACL,cAAU,kBAAkB;AAC5B,cAAU,yBAAyB;AACnC,cAAU,qBAAqB;AAC/B,cAAU,+BAA+B;AAAA,EAC3C;AACF;AAEO,SAAS,eAAe,MAAqB,KAAoB;AACtE,QAAM,MAAK,oBAAI,KAAK,GAAE,YAAY;AAClC,QAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,OAAO,eAAe;AAClF,MAAI,SAAS,WAAW;AACtB,cAAU,gBAAgB;AAC1B,cAAU,uBAAuB;AACjC,cAAU,mBAAmB;AAC7B,cAAU,8BAA8B;AAAA,EAC1C,OAAO;AACL,cAAU,kBAAkB;AAC5B,cAAU,yBAAyB;AACnC,cAAU,qBAAqB;AAC/B,cAAU,gCAAgC;AAAA,EAC5C;AACF;AAEO,SAAS,eAAkC;AAChD,QAAM,MAAM,gBAAgB;AAC5B,SAAO;AAAA,IACL,eAAe,UAAU;AAAA,IACzB,iBAAiB,UAAU;AAAA,IAC3B,sBAAsB,UAAU;AAAA,IAChC,wBAAwB,UAAU;AAAA,IAClC,kBAAkB,UAAU;AAAA,IAC5B,oBAAoB,UAAU;AAAA;AAAA;AAAA,IAG9B,mBAAmB,IAAI;AAAA,IACvB,qBAAqB,IAAI;AAAA,IACzB,4BAA4B,UAAU;AAAA,IACtC,8BAA8B,UAAU;AAAA,EAC1C;AACF;AAGO,SAAS,0BAAgC;AAC9C,YAAU,gBAAgB;AAC1B,YAAU,kBAAkB;AAC5B,YAAU,uBAAuB;AACjC,YAAU,yBAAyB;AACnC,YAAU,mBAAmB;AAC7B,YAAU,qBAAqB;AAC/B,YAAU,6BAA6B;AACvC,YAAU,+BAA+B;AAC3C;AAgBA,MAAM,mBAAmB;AAAA,EACvB,YAAY;AAAA,EACZ,aAAa;AAAA,EACb,YAAY;AACd;AAGA,MAAM,uBAAuB;AAG7B,MAAM,4BAA4B,oBAAI,IAAI;AAAA,EACxC;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF,CAAC;AAGD,SAAS,UAAU,OAA6C;AAC9D,SACE,OAAO,UAAU,YACjB,UAAU,QACV,YAAY,SACZ,OAAQ,MAAkC,WAAW;AAEzD;AAEA,IAAI,YAA2B;AAIxB,SAAS,qBAAoC;AAClD,MAAI,CAAC,gBAAgB,EAAE,cAAe,QAAO;AAE7C,MAAI,CAAC,WAAW;AACd,gBAAY,IAAI,OAAO;AAAA,MACrB,SAAS,eAAe;AAAA,MACxB,QAAQ,eAAe;AAAA,MACvB,SAAS;AAAA,MACT,YAAY;AAAA,MACZ,gBAAgB,EAAE,WAAW,yBAAyB;AAAA,IACxD,CAAC;AACD,WAAO,QAAQ,qCAAqC,eAAe,KAAK,cAAc,eAAe,QAAQ,KAAK,KAAK;AAAA,EACzH;AACA,SAAO;AACT;AAEA,SAAS,qBAAqB,OAAe,QAAyC;AACpF,SAAO;AAAA,IACL;AAAA,IACA,UAAU,CAAC,EAAE,MAAM,QAAQ,SAAS,OAAO,CAAC;AAAA,IAC5C,kBAAkB;AAAA,EACpB;AACF;AAEA,eAAsB,YACpB,WACA,QACA,gBACA,QACA,eACoE;AACpE,QAAM,QAAQ,iBAAiB,eAAe;AAE9C,MAAI;AACF,UAAM,WAAW,MAAM;AAAA,MACrB,CAAC,gBAAgB,UAAU,KAAK,YAAY;AAAA,QAC1C,qBAAqB,OAAO,MAAM;AAAA,QAClC;AAAA,UACE,QAAQ,SAAS,YAAY,IAAI,CAAC,aAAa,MAAM,CAAC,IAAI;AAAA,UAC1D,SAAS;AAAA,QACX;AAAA,MACF;AAAA,MACA;AAAA,MACA;AAAA,MACA,GAAG,cAAc,KAAK,KAAK;AAAA,IAC7B;AAEA,UAAM,UAAU,SAAS,UAAU,CAAC,GAAG,SAAS,SAAS,KAAK;AAC9D,QAAI,SAAS;AACX,aAAO,EAAE,SAAS,MAAM;AAAA,IAC1B;AAEA,UAAM,MAAM,6BAA6B,KAAK;AAC9C,WAAO,WAAW,GAAG,cAAc,qCAAqC,KAAK,IAAI,KAAK;AACtF,WAAO,EAAE,SAAS,MAAM,OAAO,OAAO,IAAI;AAAA,EAC5C,SAAS,KAAc;AACrB,UAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC/D,WAAO,WAAW,GAAG,cAAc,qBAAqB,KAAK,KAAK,OAAO,IAAI,KAAK;AAClF,WAAO,EAAE,SAAS,MAAM,OAAO,OAAO,QAAQ;AAAA,EAChD;AACF;AAQA,eAAsB,wBACpB,WACA,QACA,gBACA,QACoE;AACpE,QAAM,UAAU,MAAM,YAAY,WAAW,QAAQ,gBAAgB,MAAM;AAC3E,MAAI,QAAQ,QAAS,QAAO;AAE5B,QAAM,gBAAgB,eAAe;AACrC,MAAI,CAAC,cAAe,QAAO;AAE3B,SAAO,WAAW,+CAA+C,aAAa,IAAI,KAAK;AAEvF,MAAI,YAAY,QAAQ;AACxB,WAAS,UAAU,GAAG,UAAU,sBAAsB,WAAW;AAC/D,QAAI,UAAU,GAAG;AACf,YAAM,UAAU,oBAAoB,UAAU,CAAC;AAC/C,aAAO,WAAW,kBAAkB,OAAO,IAAI,uBAAuB,CAAC,OAAO,OAAO,MAAM,KAAK;AAChG,UAAI;AAAE,cAAM,MAAM,SAAS,MAAM;AAAA,MAAG,QAAQ;AAAE;AAAA,MAAO;AAAA,IACvD;AACA,UAAM,SAAS,MAAM,YAAY,WAAW,QAAQ,GAAG,cAAc,eAAe,QAAQ,aAAa;AACzG,QAAI,OAAO,QAAS,QAAO;AAC3B,gBAAY,OAAO;AAAA,EACrB;AAEA,SAAO,EAAE,SAAS,MAAM,OAAO,eAAe,OAAO,UAAU;AACjE;AAKA,SAAS,oBAAoB,OAAyB;AACpD,MAAI,CAAC,SAAS,OAAO,UAAU,SAAU,QAAO;AAGhD,QAAM,YAAa,OAA6B;AAChD,MAAI,cAAc,cAAc,cAAc,aAAa;AACzD,WAAO;AAAA,EACT;AAGA,MAAI,UAAU,KAAK,GAAG;AACpB,QAAI,MAAM,WAAW,OAAO,MAAM,WAAW,OAAO,MAAM,WAAW,OAAO,MAAM,WAAW,OAAO,MAAM,WAAW,KAAK;AACxH,aAAO;AAAA,IACT;AAAA,EACF;AAGA,QAAM,SAAS;AACf,QAAM,OAAO,OAAO,OAAO,SAAS,WAAW,OAAO,OAAO;AAC7D,QAAM,SACJ,OAAO,OAAO,UAAU,YAAY,OAAO,UAAU,OAChD,OAAO,QACR;AACN,QAAM,YACJ,SACC,UAAU,OAAO,OAAO,SAAS,WAAW,OAAO,OAAO,YAC1D,UAAU,OAAO,OAAO,SAAS,WAAW,OAAO,OAAO;AAC7D,MAAI,aAAa,0BAA0B,IAAI,SAAS,GAAG;AACzD,WAAO;AAAA,EACT;AAGA,QAAM,UAAU,OAAO,OAAO,YAAY,WAAW,OAAO,QAAQ,YAAY,IAAI;AACpF,MACE,QAAQ,SAAS,YAAY,KAC7B,QAAQ,SAAS,SAAS,KAC1B,QAAQ,SAAS,WAAW,KAC5B,QAAQ,SAAS,qBAAqB,KACtC,QAAQ,SAAS,cAAc,KAC/B,QAAQ,SAAS,YAAY,KAC7B,QAAQ,SAAS,YAAY,GAC7B;AACA,WAAO;AAAA,EACT;AAEA,SAAO;AACT;AAOA,SAAS,qBAAqB,OAAyB;AACrD,MAAI,CAAC,SAAS,OAAO,UAAU,SAAU,QAAO;AAEhD,QAAM,SAAS;AACf,QAAM,SACJ,OAAO,OAAO,UAAU,YAAY,OAAO,UAAU,OAChD,OAAO,QACR;AAEN,QAAM,OAAO,OAAO,OAAO,SAAS,WAAW,OAAO,OAAO;AAC7D,QAAM,aAAa,UAAU,OAAO,OAAO,SAAS,WAAW,OAAO,OAAO;AAC7E,MAAI,SAAS,6BAA6B,eAAe,2BAA2B;AAClF,WAAO;AAAA,EACT;AAEA,QAAM,WAAqB,CAAC;AAC5B,MAAI,OAAO,OAAO,YAAY,SAAU,UAAS,KAAK,OAAO,OAAO;AACpE,MAAI,UAAU,OAAO,OAAO,YAAY,SAAU,UAAS,KAAK,OAAO,OAAO;AAC9E,QAAM,WAAW,SAAS,KAAK,GAAG,EAAE,YAAY;AAChD,SACE,SAAS,SAAS,gBAAgB,KAClC,SAAS,SAAS,gBAAgB,KAClC,SAAS,SAAS,iBAAiB,KACnC,SAAS,SAAS,gBAAgB,KAClC,SAAS,SAAS,aAAa,KAC/B,SAAS,SAAS,iBAAiB,KACnC,SAAS,SAAS,oBAAoB,KACtC,SAAS,SAAS,mBAAmB;AAEzC;AAKA,SAAS,oBAAoB,SAAyB;AACpD,QAAM,mBAAmB,iBAAiB,cAAc,KAAK,IAAI,GAAG,OAAO;AAC3E,QAAM,SAAS,KAAK,OAAO,IAAI,wBAAwB;AACvD,SAAO,KAAK,IAAI,mBAAmB,QAAQ,iBAAiB,UAAU;AACxE;AAOA,eAAsB,sBACpB,SACA,QACA,WACA,QACoB;AAEpB,MAAI,CAAC,OAAO,SAAS;AACnB,WAAO,EAAE,SAAS,WAAW,MAAM;AAAA,EACrC;AAEA,MAAI,CAAC,WAAW;AACd,WAAO;AAAA,MACL;AAAA,MACA,WAAW;AAAA,MACX,OAAO;AAAA,MACP,cAAc;AAAA,QACZ,MAAM,UAAU;AAAA,QAChB,SAAS;AAAA,QACT,WAAW;AAAA,MACb;AAAA,IACF;AAAA,EACF;AAEA,MAAI,CAAC,SAAS,KAAK,GAAG;AACpB,WAAO,EAAE,SAAS,WAAW,IAAI,WAAW,OAAO,OAAO,yBAAyB;AAAA,EACrF;AAGA,QAAM,mBAAmB,QAAQ,SAAS,sBACtC,QAAQ,UAAU,GAAG,mBAAmB,IAAI,0CAC5C;AAKJ,QAAM,qBACJ,iBAAiB,SAAS,iCAAiC,CAAC,CAAC,eAAe;AAK9E,QAAM,WAAW,MAAM;AACrB,QAAI,CAAC,OAAO,IAAK,QAAO;AACxB,QAAI;AACF,YAAM,IAAI,IAAI,IAAI,OAAO,GAAG;AAC5B,aAAO,GAAG,EAAE,MAAM,GAAG,EAAE,QAAQ;AAAA,IACjC,QAAQ;AACN,aAAO;AAAA,IACT;AAAA,EACF,GAAG;AACH,QAAM,UAAU,UAAU,aAAa,OAAO;AAAA;AAAA,IAAS;AAEvD,QAAM,SAAS,OAAO,UAClB;AAAA;AAAA,EAEJ,OAAO,2BAA2B,OAAO,OAAO;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAsDhD,gBAAgB,KACZ;AAAA;AAAA,EAEJ,OAAO;AAAA,EACP,gBAAgB;AAEhB,MAAI;AAIJ,MAAI,oBAAoB;AACtB;AAAA,MACE;AAAA,MACA,SAAS,iBAAiB,MAAM,qCAAqC,6BAA6B;AAAA,MAClG;AAAA,IACF;AAAA,EACF,OAAO;AACL,aAAS,UAAU,GAAG,WAAW,iBAAiB,YAAY,WAAW;AACvE,UAAI;AACF,YAAI,YAAY,GAAG;AACjB,iBAAO,QAAQ,4BAA4B,eAAe,KAAK,IAAI,KAAK;AAAA,QAC1E,OAAO;AACL,iBAAO,WAAW,iBAAiB,OAAO,IAAI,iBAAiB,UAAU,IAAI,KAAK;AAAA,QACpF;AAEA,cAAM,WAAW,MAAM,YAAY,WAAW,QAAQ,kBAAkB,MAAM;AAE9E,YAAI,SAAS,SAAS;AACpB,iBAAO,QAAQ,0BAA0B,SAAS,QAAQ,MAAM,eAAe,KAAK;AACpF,yBAAe,WAAW;AAC1B,iBAAO,EAAE,SAAS,SAAS,SAAS,WAAW,KAAK;AAAA,QACtD;AAGA,eAAO,WAAW,oCAAoC,KAAK;AAC3D,uBAAe,aAAa,6BAA6B;AACzD,eAAO;AAAA,UACL;AAAA,UACA,WAAW;AAAA,UACX,OAAO;AAAA,UACP,cAAc;AAAA,YACZ,MAAM,UAAU;AAAA,YAChB,SAAS;AAAA,YACT,WAAW;AAAA,UACb;AAAA,QACF;AAAA,MAEF,SAAS,KAAc;AACrB,oBAAY,cAAc,GAAG;AAC7B,cAAM,SAAS,UAAU,GAAG,IAAI,IAAI,SAAS;AAC7C,cAAM,OAAO,OAAO,QAAQ,YAAY,QAAQ,QAAQ,UAAU,MAC9D,OAAQ,IAAgC,IAAI,IAC5C;AACJ,cAAM,SAAS,qBAAqB,GAAG;AACvC,eAAO,SAAS,kBAAkB,UAAU,CAAC,MAAM,UAAU,OAAO,YAAY,MAAM,UAAU,IAAI,eAAe,oBAAoB,GAAG,CAAC,oBAAoB,MAAM,KAAK,KAAK;AAG/K,YAAI,QAAQ;AACV,iBAAO,WAAW,6FAAwF,KAAK;AAC/G;AAAA,QACF;AAEA,YAAI,oBAAoB,GAAG,KAAK,UAAU,iBAAiB,YAAY;AACrE,gBAAM,UAAU,oBAAoB,OAAO;AAC3C,iBAAO,WAAW,eAAe,OAAO,SAAS,KAAK;AACtD,cAAI;AAAE,kBAAM,MAAM,SAAS,MAAM;AAAA,UAAG,QAAQ;AAAE;AAAA,UAAO;AACrD;AAAA,QACF;AACA;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAGA,QAAM,gBAAgB,eAAe;AACrC,MAAI,eAAe;AACjB,WAAO,WAAW,4CAA4C,aAAa,IAAI,KAAK;AACpF,aAAS,UAAU,GAAG,UAAU,sBAAsB,WAAW;AAC/D,UAAI,UAAU,GAAG;AACf,cAAM,UAAU,oBAAoB,UAAU,CAAC;AAC/C,eAAO,WAAW,kBAAkB,OAAO,IAAI,uBAAuB,CAAC,OAAO,OAAO,MAAM,KAAK;AAChG,YAAI;AAAE,gBAAM,MAAM,SAAS,MAAM;AAAA,QAAG,QAAQ;AAAE;AAAA,QAAO;AAAA,MACvD;AACA,UAAI;AACF,cAAM,WAAW,MAAM,YAAY,WAAW,QAAQ,6BAA6B,QAAQ,aAAa;AACxG,YAAI,SAAS,SAAS;AACpB,iBAAO,QAAQ,sBAAsB,SAAS,QAAQ,MAAM,eAAe,KAAK;AAChF,yBAAe,WAAW;AAC1B,iBAAO,EAAE,SAAS,SAAS,SAAS,WAAW,KAAK;AAAA,QACtD;AACA,eAAO,WAAW,oCAAoC,KAAK;AAC3D;AAAA,MACF,SAAS,KAAc;AACrB,oBAAY,cAAc,GAAG;AAC7B,eAAO,SAAS,2BAA2B,UAAU,CAAC,MAAM,UAAU,OAAO,IAAI,KAAK;AAAA,MACxF;AAAA,IACF;AAAA,EACF;AAEA,QAAM,eAAe,WAAW,WAAW;AAC3C,SAAO,SAAS,wBAAwB,YAAY,iCAAiC,KAAK;AAC1F,iBAAe,aAAa,YAAY;AAExC,SAAO;AAAA,IACL;AAAA,IACA,WAAW;AAAA,IACX,OAAO,0BAA0B,YAAY;AAAA,IAC7C,cAAc,aAAa;AAAA,MACzB,MAAM,UAAU;AAAA,MAChB,SAAS;AAAA,MACT,WAAW;AAAA,IACb;AAAA,EACF;AACF;AAOA,MAAM,0BAA0B;AA2ChC,eAAsB,sBACpB,YAQA,WACA,cACA,WACA,kBAAqC,CAAC,GAC4B;AAClE,QAAM,iBAAiB,WAAW,MAAM,GAAG,uBAAuB;AAMlE,QAAM,iBAAiB,CAAC,IAAI,IAAI,IAAI,IAAI,GAAG,GAAG,GAAG,GAAG,GAAG,CAAC;AACxD,QAAM,gBAAgB,CAAC,SAAyB,eAAe,OAAO,CAAC,KAAK;AAG5E,QAAM,QAAkB,CAAC;AACzB,aAAW,OAAO,gBAAgB;AAChC,QAAI;AACJ,QAAI;AACF,eAAS,IAAI,IAAI,IAAI,GAAG,EAAE,SAAS,QAAQ,UAAU,EAAE;AAAA,IACzD,QAAQ;AACN,eAAS,IAAI;AAAA,IACf;AACA,UAAM,UAAU,IAAI,QAAQ,SAAS,MACjC,IAAI,QAAQ,MAAM,GAAG,GAAG,IAAI,QAC5B,IAAI;AACR,UAAM,KAAK,IAAI,IAAI,IAAI,OAAO,cAAc,IAAI,IAAI,CAAC,IAAI,IAAI,KAAK,WAAM,MAAM,WAAM,OAAO,EAAE;AAAA,EAC/F;AAEA,QAAM,mBAAmB,gBAAgB,SAAS,IAC9C,gBAAgB,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,EAAE,KAAK,IAAI,IAC9C;AACJ,QAAM,SAAQ,oBAAI,KAAK,GAAE,YAAY,EAAE,MAAM,GAAG,EAAE;AAElD,QAAM,SAAS;AAAA;AAAA,aAEJ,SAAS;AAAA,SACb,KAAK;AAAA;AAAA;AAAA,EAGZ,gBAAgB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,iBAmDD,eAAe,MAAM;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,kBAMpB,eAAe,MAAM,cAAc,YAAY;AAAA,EAC/D,MAAM,KAAK,IAAI,CAAC;AAEhB,MAAI;AACF,WAAO,QAAQ,eAAe,eAAe,MAAM,2BAA2B,KAAK;AAEnF,UAAM,WAAW,MAAM;AAAA,MACrB;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAEA,QAAI,CAAC,SAAS,SAAS;AACrB,YAAM,SAAS,SAAS,SAAS;AACjC,qBAAe,WAAW,MAAM;AAChC,aAAO,EAAE,QAAQ,MAAM,OAAO,OAAO;AAAA,IACvC;AAGA,UAAM,UAAU,SAAS,QAAQ,QAAQ,wBAAwB,EAAE,EAAE,QAAQ,eAAe,EAAE,EAAE,KAAK;AACrG,UAAM,SAAS,KAAK,MAAM,OAAO;AAKjC,QAAI,CAAC,OAAO,SAAS,OAAO,OAAO,cAAc,YAAY,CAAC,MAAM,QAAQ,OAAO,OAAO,GAAG;AAC3F,YAAM,SAAS;AACf,qBAAe,WAAW,MAAM;AAChC,aAAO,EAAE,QAAQ,MAAM,OAAO,OAAO;AAAA,IACvC;AAEA,WAAO,QAAQ,4BAA4B,OAAO,QAAQ,OAAO,OAAK,EAAE,SAAS,iBAAiB,EAAE,MAAM,oBAAoB,KAAK;AACnI,mBAAe,SAAS;AACxB,WAAO,EAAE,QAAQ,OAAO;AAAA,EAC1B,SAAS,KAAc;AACrB,UAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC/D,WAAO,SAAS,0BAA0B,OAAO,IAAI,KAAK;AAC1D,mBAAe,WAAW,OAAO;AACjC,WAAO,EAAE,QAAQ,MAAM,OAAO,0BAA0B,OAAO,GAAG;AAAA,EACpE;AACF;AAEA,eAAsB,+BACpB,YAKA,WACA,iBACA,WAC8D;AAC9D,QAAM,kBAAkB,WAAW,MAAM,GAAG,EAAE;AAC9C,QAAM,QAAQ,gBAAgB,IAAI,CAAC,QAAQ;AACzC,QAAI;AACJ,QAAI;AACF,eAAS,IAAI,IAAI,IAAI,GAAG,EAAE,SAAS,QAAQ,UAAU,EAAE;AAAA,IACzD,QAAQ;AACN,eAAS,IAAI;AAAA,IACf;AACA,WAAO,IAAI,IAAI,IAAI,KAAK,IAAI,KAAK,WAAM,MAAM;AAAA,EAC/C,CAAC;AAED,QAAM,SAAS;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,aASJ,SAAS;AAAA;AAAA;AAAA,EAGpB,gBAAgB,IAAI,CAAC,UAAU,KAAK,KAAK,EAAE,EAAE,KAAK,IAAI,CAAC;AAAA;AAAA;AAAA,EAGvD,MAAM,KAAK,IAAI,CAAC;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAShB,MAAI;AACF,UAAM,WAAW,MAAM;AAAA,MACrB;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAEA,QAAI,CAAC,SAAS,SAAS;AACrB,YAAM,SAAS,SAAS,SAAS;AACjC,qBAAe,WAAW,MAAM;AAChC,aAAO,EAAE,QAAQ,CAAC,GAAG,OAAO,OAAO;AAAA,IACrC;AAEA,UAAM,UAAU,SAAS,QAAQ,QAAQ,wBAAwB,EAAE,EAAE,QAAQ,eAAe,EAAE,EAAE,KAAK;AACrG,UAAM,SAAS,KAAK,MAAM,OAAO;AAEjC,mBAAe,SAAS;AACxB,WAAO,EAAE,QAAQ,MAAM,QAAQ,OAAO,cAAc,IAAI,OAAO,iBAAiB,CAAC,EAAE;AAAA,EACrF,SAAS,KAAc;AACrB,UAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC/D,WAAO,SAAS,4CAA4C,OAAO,IAAI,KAAK;AAC5E,mBAAe,WAAW,OAAO;AACjC,WAAO,EAAE,QAAQ,CAAC,GAAG,OAAO,QAAQ;AAAA,EACtC;AACF;AA0BA,MAAM,qBAAqB,oBAAI,IAAI;AAAA,EACjC;AAAA,EAAQ;AAAA,EAAO;AAAA,EAAa;AAAA,EAAa;AAAA,EAAW;AAAA,EACpD;AAAA,EAAa;AAAA,EAAkB;AACjC,CAAC;AAED,MAAM,kBAAkB,oBAAI,IAAI,CAAC,QAAQ,SAAS,UAAU,OAAO,CAAC;AACpE,MAAM,iBAAiB,oBAAI,IAAmB,CAAC,UAAU,OAAO,MAAM,CAAC;AACvE,MAAM,mBAAmB,oBAAI,IAAI,CAAC,cAAc,cAAc,CAAC;AAE/D,SAAS,cAAc,OAAmC;AACxD,SAAO,MAAM,QAAQ,KAAK,KAAK,MAAM,MAAM,CAAC,MAAM,OAAO,MAAM,QAAQ;AACzE;AAEA,SAAS,YAAY,OAA8C;AACjE,SAAO,MAAM,QAAQ,KAAK,KAAK,MAAM,MAAM,CAAC,MAAM;AAChD,QAAI,OAAO,MAAM,YAAY,MAAM,KAAM,QAAO;AAChD,UAAM,OAAQ,EAA8B;AAC5C,UAAM,SAAU,EAA8B;AAC9C,WAAO,OAAO,SAAS,YAClB,iBAAiB,IAAI,IAAI,KACzB,OAAO,WAAW,YAClB,OAAO,KAAK,EAAE,SAAS;AAAA,EAC9B,CAAC;AACH;AAEO,SAAS,mBAAmB,KAAmC;AACpE,MAAI;AACF,UAAM,UAAU,IAAI,QAAQ,wBAAwB,EAAE,EAAE,QAAQ,eAAe,EAAE,EAAE,KAAK;AACxF,UAAM,SAAS,KAAK,MAAM,OAAO;AAEjC,UAAM,aAAa,OAAO,OAAO,eAAe,WAAW,OAAO,aAAa;AAC/E,QAAI,CAAC,cAAc,CAAC,mBAAmB,IAAI,UAAU,EAAG,QAAO;AAE/D,UAAM,mBAAmB,OAAO,OAAO,qBAAqB,WAAW,OAAO,mBAAmB;AACjG,QAAI,CAAC,oBAAoB,CAAC,gBAAgB,IAAI,gBAAgB,EAAG,QAAO;AAExE,UAAM,iBAAiB,OAAO;AAC9B,QAAI,OAAO,mBAAmB,YAAY,CAAC,eAAe,IAAI,cAA+B,EAAG,QAAO;AAEvG,QAAI,CAAC,YAAY,OAAO,mBAAmB,KAAK,OAAO,oBAAoB,WAAW,EAAG,QAAO;AAChG,QAAI,CAAC,cAAc,OAAO,aAAa,KAAK,OAAO,cAAc,WAAW,EAAG,QAAO;AAEtF,WAAO;AAAA,MACL;AAAA,MACA,mBAAmB,OAAO,OAAO,sBAAsB,WAAW,OAAO,oBAAoB;AAAA,MAC7F;AAAA,MACA,uBAAuB,OAAO,OAAO,0BAA0B,WAAW,OAAO,wBAAwB;AAAA,MACzG;AAAA,MACA,qBAAqB,OAAO;AAAA,MAC5B,eAAe,OAAO,cAAc,OAAO,CAAC,MAAM,EAAE,KAAK,EAAE,SAAS,CAAC;AAAA,MACrE,iBAAiB,cAAc,OAAO,eAAe,IAAI,OAAO,kBAAkB,CAAC;AAAA,MACnF,eAAe,cAAc,OAAO,aAAa,IAAI,OAAO,gBAAgB,CAAC;AAAA,MAC7E,eAAe,cAAc,OAAO,aAAa,IAAI,OAAO,gBAAgB,CAAC;AAAA,IAC/E;AAAA,EACF,QAAQ;AACN,WAAO;AAAA,EACT;AACF;AAEA,eAAsB,sBACpB,MACA,WACA,QAC+B;AAC/B,QAAM,SAAQ,oBAAI,KAAK,GAAE,YAAY,EAAE,MAAM,GAAG,EAAE;AAElD,QAAM,SAAS;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,QAOT,IAAI;AAAA,SACH,KAAK;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AA2CZ,MAAI;AACF,UAAM,WAAW,MAAM;AAAA,MACrB;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAEA,QAAI,CAAC,SAAS,SAAS;AACrB,aAAO,WAAW,kDAAkD,SAAS,SAAS,SAAS,IAAI,KAAK;AACxG,qBAAe,WAAW,SAAS,SAAS,gBAAgB;AAC5D,aAAO;AAAA,IACT;AAEA,UAAM,QAAQ,mBAAmB,SAAS,OAAO;AACjD,QAAI,CAAC,OAAO;AACV,aAAO,WAAW,wDAAwD,KAAK;AAC/E,qBAAe,WAAW,+BAA+B;AACzD,aAAO;AAAA,IACT;AAEA,mBAAe,SAAS;AACxB,WAAO;AAAA,EACT,SAAS,KAAc;AACrB,UAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC/D,WAAO,WAAW,qCAAqC,OAAO,IAAI,KAAK;AACvE,mBAAe,WAAW,OAAO;AACjC,WAAO;AAAA,EACT;AACF;AAEO,SAAS,oBAAoB,OAA8B;AAChE,QAAM,QAAkB,CAAC;AAEzB,QAAM,KAAK,wCAAwC;AACnD,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,qBAAqB,MAAM,UAAU,aAAQ,MAAM,iBAAiB,EAAE;AACjF,QAAM,KAAK,yBAAyB,MAAM,cAAc,aAAQ,MAAM,qBAAqB,EAAE;AAC7F,QAAM,KAAK,oBAAoB,MAAM,gBAAgB,IAAI;AACzD,QAAM,KAAK,EAAE;AAEb,MAAI,MAAM,oBAAoB,SAAS,GAAG;AACxC,UAAM,KAAK,yBAAyB;AACpC,UAAM,oBAAoB,QAAQ,CAAC,MAAM,MAAM;AAC7C,YAAM,KAAK,GAAG,IAAI,CAAC,OAAO,KAAK,IAAI,aAAQ,KAAK,MAAM,EAAE;AAAA,IAC1D,CAAC;AACD,UAAM,KAAK,EAAE;AAAA,EACf;AAEA,MAAI,MAAM,cAAc,SAAS,GAAG;AAClC,UAAM,KAAK,sBAAsB,MAAM,cAAc,MAAM,mFAA8E;AACzI,eAAW,QAAQ,MAAM,eAAe;AACtC,YAAM,KAAK,KAAK,IAAI,EAAE;AAAA,IACxB;AACA,UAAM,KAAK,EAAE;AAAA,EACf;AAEA,MAAI,MAAM,gBAAgB,SAAS,GAAG;AACpC,UAAM,KAAK,sFAAuF;AAClG,eAAW,QAAQ,MAAM,gBAAiB,OAAM,KAAK,KAAK,IAAI,EAAE;AAChE,UAAM,KAAK,EAAE;AAAA,EACf;AAEA,MAAI,MAAM,cAAc,SAAS,GAAG;AAClC,UAAM,KAAK,mBAAmB;AAC9B,eAAW,OAAO,MAAM,cAAe,OAAM,KAAK,KAAK,GAAG,EAAE;AAC5D,UAAM,KAAK,EAAE;AAAA,EACf;AAEA,MAAI,MAAM,cAAc,SAAS,GAAG;AAClC,UAAM,KAAK,mBAAmB;AAC9B,eAAW,KAAK,MAAM,cAAe,OAAM,KAAK,KAAK,CAAC,EAAE;AACxD,UAAM,KAAK,EAAE;AAAA,EACf;AAEA,QAAM,KAAK,KAAK;AAChB,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,iLAAiL;AAE5L,SAAO,MAAM,KAAK,IAAI;AACxB;",
|
|
4
|
+
"sourcesContent": ["/**\n * LLM Processor for content extraction\n * Uses any OpenAI-compatible endpoint. Reasoning effort is always 'low'.\n * Primary model exhausts its retries first; fallback model (LLM_FALLBACK_MODEL) then\n * gets up to FALLBACK_RETRY_COUNT additional attempts before the call fails.\n * NEVER throws \u2014 always returns a valid result.\n */\n\nimport OpenAI from 'openai';\nimport { LLM_EXTRACTION, getCapabilities } from '../config/index.js';\nimport {\n classifyError,\n sleep,\n ErrorCode,\n withStallProtection,\n type StructuredError,\n} from '../utils/errors.js';\nimport { mcpLog } from '../utils/logger.js';\n\n/** Maximum input characters for LLM processing (~125k tokens, sized for the larger fallback model) */\nconst MAX_LLM_INPUT_CHARS = 500_000 as const;\n\n/**\n * Maximum input characters for the primary model when it has a smaller context window.\n * Used when an input would exceed the mini model's limits so the call goes straight to fallback\n * instead of burning retries on guaranteed context_length_exceeded errors.\n */\nconst MAX_PRIMARY_MODEL_INPUT_CHARS = 100_000 as const;\n\n/** LLM client timeout in milliseconds */\nconst LLM_CLIENT_TIMEOUT_MS = 600_000 as const;\n\n/** Jitter factor for exponential backoff */\nconst BACKOFF_JITTER_FACTOR = 0.3 as const;\n\n/** Stall detection timeout \u2014 abort if no response in this time */\nconst LLM_STALL_TIMEOUT_MS = 75_000 as const;\n\n/** Hard request deadline for LLM calls */\nconst LLM_REQUEST_DEADLINE_MS = 150_000 as const;\n\n// ============================================================================\n// LLM health tracking \u2014 surfaced via health://status so capability-aware\n// clients can branch on degraded mode without parsing per-call footers.\n// ============================================================================\n\ntype LLMHealthKind = 'planner' | 'extractor';\n\nexport interface LLMHealthSnapshot {\n readonly lastPlannerOk: boolean;\n readonly lastExtractorOk: boolean;\n readonly lastPlannerCheckedAt: string | null;\n readonly lastExtractorCheckedAt: string | null;\n readonly lastPlannerError: string | null;\n readonly lastExtractorError: string | null;\n readonly plannerConfigured: boolean;\n readonly extractorConfigured: boolean;\n /** Failures since the last success. Reset to 0 on `markLLMSuccess`. */\n readonly consecutivePlannerFailures: number;\n readonly consecutiveExtractorFailures: number;\n}\n\nconst llmHealth = {\n lastPlannerOk: false,\n lastExtractorOk: false,\n lastPlannerCheckedAt: null as string | null,\n lastExtractorCheckedAt: null as string | null,\n lastPlannerError: null as string | null,\n lastExtractorError: null as string | null,\n consecutivePlannerFailures: 0,\n consecutiveExtractorFailures: 0,\n};\n\nexport function markLLMSuccess(kind: LLMHealthKind): void {\n const ts = new Date().toISOString();\n if (kind === 'planner') {\n llmHealth.lastPlannerOk = true;\n llmHealth.lastPlannerCheckedAt = ts;\n llmHealth.lastPlannerError = null;\n llmHealth.consecutivePlannerFailures = 0;\n } else {\n llmHealth.lastExtractorOk = true;\n llmHealth.lastExtractorCheckedAt = ts;\n llmHealth.lastExtractorError = null;\n llmHealth.consecutiveExtractorFailures = 0;\n }\n}\n\nexport function markLLMFailure(kind: LLMHealthKind, err: unknown): void {\n const ts = new Date().toISOString();\n const message = err instanceof Error ? err.message : String(err ?? 'unknown error');\n if (kind === 'planner') {\n llmHealth.lastPlannerOk = false;\n llmHealth.lastPlannerCheckedAt = ts;\n llmHealth.lastPlannerError = message;\n llmHealth.consecutivePlannerFailures += 1;\n } else {\n llmHealth.lastExtractorOk = false;\n llmHealth.lastExtractorCheckedAt = ts;\n llmHealth.lastExtractorError = message;\n llmHealth.consecutiveExtractorFailures += 1;\n }\n}\n\nexport function getLLMHealth(): LLMHealthSnapshot {\n const cap = getCapabilities();\n return {\n lastPlannerOk: llmHealth.lastPlannerOk,\n lastExtractorOk: llmHealth.lastExtractorOk,\n lastPlannerCheckedAt: llmHealth.lastPlannerCheckedAt,\n lastExtractorCheckedAt: llmHealth.lastExtractorCheckedAt,\n lastPlannerError: llmHealth.lastPlannerError,\n lastExtractorError: llmHealth.lastExtractorError,\n // Static capability \u2014 based on env presence at boot. Runtime health (above)\n // tells whether the last attempt actually succeeded.\n plannerConfigured: cap.llmExtraction,\n extractorConfigured: cap.llmExtraction,\n consecutivePlannerFailures: llmHealth.consecutivePlannerFailures,\n consecutiveExtractorFailures: llmHealth.consecutiveExtractorFailures,\n };\n}\n\n/** Test-only \u2014 reset state between tests. Not exported from index. */\nexport function _resetLLMHealthForTests(): void {\n llmHealth.lastPlannerOk = false;\n llmHealth.lastExtractorOk = false;\n llmHealth.lastPlannerCheckedAt = null;\n llmHealth.lastExtractorCheckedAt = null;\n llmHealth.lastPlannerError = null;\n llmHealth.lastExtractorError = null;\n llmHealth.consecutivePlannerFailures = 0;\n llmHealth.consecutiveExtractorFailures = 0;\n}\n\ninterface ProcessingConfig {\n readonly enabled: boolean;\n readonly extract: string | undefined;\n readonly url?: string;\n}\n\ninterface LLMResult {\n readonly content: string;\n readonly processed: boolean;\n readonly error?: string;\n readonly errorDetails?: StructuredError;\n}\n\n// LLM-specific retry configuration\nconst LLM_RETRY_CONFIG = {\n maxRetries: 2,\n baseDelayMs: 1000,\n maxDelayMs: 5000,\n} as const;\n\n/** Number of additional attempts using the fallback model after primary exhausts. */\nconst FALLBACK_RETRY_COUNT = 3 as const;\n\n// OpenAI-compatible retryable error codes (using Set for type-safe lookup)\nconst RETRYABLE_LLM_ERROR_CODES = new Set([\n 'rate_limit_exceeded',\n 'server_error',\n 'timeout',\n 'service_unavailable',\n]);\n\n/** Type guard for errors with an HTTP status code */\nfunction hasStatus(error: unknown): error is { status: number } {\n return (\n typeof error === 'object' &&\n error !== null &&\n 'status' in error &&\n typeof (error as Record<string, unknown>).status === 'number'\n );\n}\n\nlet llmClient: OpenAI | null = null;\n\ntype OpenAITextGenerator = Pick<OpenAI, 'chat'>;\n\nexport function createLLMProcessor(): OpenAI | null {\n if (!getCapabilities().llmExtraction) return null;\n\n if (!llmClient) {\n llmClient = new OpenAI({\n baseURL: LLM_EXTRACTION.BASE_URL,\n apiKey: LLM_EXTRACTION.API_KEY,\n timeout: LLM_CLIENT_TIMEOUT_MS,\n maxRetries: 0,\n defaultHeaders: { 'X-Title': 'mcp-research-powerpack' },\n });\n mcpLog('info', `LLM extraction configured (model: ${LLM_EXTRACTION.MODEL}, baseURL: ${LLM_EXTRACTION.BASE_URL})`, 'llm');\n }\n return llmClient;\n}\n\nfunction buildChatRequestBody(model: string, prompt: string): Record<string, unknown> {\n return {\n model,\n messages: [{ role: 'user', content: prompt }],\n reasoning_effort: 'low',\n };\n}\n\nexport async function requestText(\n processor: OpenAITextGenerator,\n prompt: string,\n operationLabel: string,\n signal?: AbortSignal,\n modelOverride?: string,\n): Promise<{ content: string | null; model: string; error?: string }> {\n const model = modelOverride || LLM_EXTRACTION.MODEL;\n\n try {\n const response = await withStallProtection(\n (stallSignal) => processor.chat.completions.create(\n buildChatRequestBody(model, prompt) as unknown as OpenAI.ChatCompletionCreateParamsNonStreaming,\n {\n signal: signal ? AbortSignal.any([stallSignal, signal]) : stallSignal,\n timeout: LLM_REQUEST_DEADLINE_MS,\n },\n ),\n LLM_STALL_TIMEOUT_MS,\n 3,\n `${operationLabel} (${model})`,\n );\n\n const content = response.choices?.[0]?.message?.content?.trim();\n if (content) {\n return { content, model };\n }\n\n const err = `Empty response from model ${model}`;\n mcpLog('warning', `${operationLabel} returned empty content for model ${model}`, 'llm');\n return { content: null, model, error: err };\n } catch (err: unknown) {\n const message = err instanceof Error ? err.message : String(err);\n mcpLog('warning', `${operationLabel} failed for model ${model}: ${message}`, 'llm');\n return { content: null, model, error: message };\n }\n}\n\n/**\n * Single LLM call with automatic fallback model.\n * Tries the primary model once; if it fails and LLM_FALLBACK_MODEL is set,\n * retries up to FALLBACK_RETRY_COUNT times on the fallback model.\n * Used for single-shot calls (classify, brief, refine queries).\n */\nexport async function requestTextWithFallback(\n processor: OpenAITextGenerator,\n prompt: string,\n operationLabel: string,\n signal?: AbortSignal,\n): Promise<{ content: string | null; model: string; error?: string }> {\n const primary = await requestText(processor, prompt, operationLabel, signal);\n if (primary.content) return primary;\n\n const fallbackModel = LLM_EXTRACTION.FALLBACK_MODEL;\n if (!fallbackModel) return primary;\n\n mcpLog('warning', `Primary model failed, switching to fallback ${fallbackModel}`, 'llm');\n\n let lastError = primary.error;\n for (let attempt = 0; attempt < FALLBACK_RETRY_COUNT; attempt++) {\n if (attempt > 0) {\n const delayMs = calculateLLMBackoff(attempt - 1);\n mcpLog('warning', `Fallback retry ${attempt}/${FALLBACK_RETRY_COUNT - 1} in ${delayMs}ms`, 'llm');\n try { await sleep(delayMs, signal); } catch { break; }\n }\n const result = await requestText(processor, prompt, `${operationLabel} [fallback]`, signal, fallbackModel);\n if (result.content) return result;\n lastError = result.error;\n }\n\n return { content: null, model: fallbackModel, error: lastError };\n}\n\n/**\n * Check if an LLM error is retryable\n */\nfunction isRetryableLLMError(error: unknown): boolean {\n if (!error || typeof error !== 'object') return false;\n\n // Stall/timeout protection errors - always retry these\n const stallCode = (error as { code?: string })?.code;\n if (stallCode === 'ESTALLED' || stallCode === 'ETIMEDOUT') {\n return true;\n }\n\n // Check HTTP status codes\n if (hasStatus(error)) {\n if (error.status === 429 || error.status === 500 || error.status === 502 || error.status === 503 || error.status === 504) {\n return true;\n }\n }\n\n // Check error codes from the OpenAI-compatible endpoint\n const record = error as Record<string, unknown>;\n const code = typeof record.code === 'string' ? record.code : undefined;\n const nested =\n typeof record.error === 'object' && record.error !== null\n ? (record.error as Record<string, unknown>)\n : null;\n const errorCode =\n code ??\n (nested && typeof nested.code === 'string' ? nested.code : undefined) ??\n (nested && typeof nested.type === 'string' ? nested.type : undefined);\n if (errorCode && RETRYABLE_LLM_ERROR_CODES.has(errorCode)) {\n return true;\n }\n\n // Check message for common patterns\n const message = typeof record.message === 'string' ? record.message.toLowerCase() : '';\n if (\n message.includes('rate limit') ||\n message.includes('timeout') ||\n message.includes('timed out') ||\n message.includes('service unavailable') ||\n message.includes('server error') ||\n message.includes('connection') ||\n message.includes('econnreset')\n ) {\n return true;\n }\n\n return false;\n}\n\n/**\n * Detect \"the prompt is too long for this model\" errors.\n * These are NOT retryable on the same model \u2014 we should skip remaining primary retries\n * and go straight to the fallback model (which has a larger context window).\n */\nfunction isContextWindowError(error: unknown): boolean {\n if (!error || typeof error !== 'object') return false;\n\n const record = error as Record<string, unknown>;\n const nested =\n typeof record.error === 'object' && record.error !== null\n ? (record.error as Record<string, unknown>)\n : null;\n\n const code = typeof record.code === 'string' ? record.code : undefined;\n const nestedCode = nested && typeof nested.code === 'string' ? nested.code : undefined;\n if (code === 'context_length_exceeded' || nestedCode === 'context_length_exceeded') {\n return true;\n }\n\n const messages: string[] = [];\n if (typeof record.message === 'string') messages.push(record.message);\n if (nested && typeof nested.message === 'string') messages.push(nested.message);\n const combined = messages.join(' ').toLowerCase();\n return (\n combined.includes('context length') ||\n combined.includes('context window') ||\n combined.includes('maximum context') ||\n combined.includes('maximum tokens') ||\n combined.includes('token limit') ||\n combined.includes('too many tokens') ||\n combined.includes('prompt is too long') ||\n combined.includes('reduce the length')\n );\n}\n\n/**\n * Calculate backoff delay with jitter for LLM retries\n */\nfunction calculateLLMBackoff(attempt: number): number {\n const exponentialDelay = LLM_RETRY_CONFIG.baseDelayMs * Math.pow(2, attempt);\n const jitter = Math.random() * BACKOFF_JITTER_FACTOR * exponentialDelay;\n return Math.min(exponentialDelay + jitter, LLM_RETRY_CONFIG.maxDelayMs);\n}\n\n/**\n * Process content with LLM extraction\n * NEVER throws - always returns a valid LLMResult\n * Implements retry logic with exponential backoff for transient failures\n */\nexport async function processContentWithLLM(\n content: string,\n config: ProcessingConfig,\n processor?: OpenAI | null,\n signal?: AbortSignal\n): Promise<LLMResult> {\n // Early returns for invalid/skip conditions\n if (!config.enabled) {\n return { content, processed: false };\n }\n\n if (!processor) {\n return {\n content,\n processed: false,\n error: 'LLM processor not available (LLM_API_KEY, LLM_BASE_URL, and LLM_MODEL must all be set)',\n errorDetails: {\n code: ErrorCode.AUTH_ERROR,\n message: 'LLM processor not available',\n retryable: false,\n },\n };\n }\n\n if (!content?.trim()) {\n return { content: content || '', processed: false, error: 'Empty content provided' };\n }\n\n // Truncate extremely long content to avoid blowing past even the fallback model's context.\n const truncatedContent = content.length > MAX_LLM_INPUT_CHARS\n ? content.substring(0, MAX_LLM_INPUT_CHARS) + '\\n\\n[Content truncated due to length]'\n : content;\n\n // If the prompt would exceed the primary (mini) model's smaller context window,\n // skip it entirely and go straight to the fallback model. Saves burning retries\n // on guaranteed context_length_exceeded errors.\n const skipPrimaryForSize =\n truncatedContent.length > MAX_PRIMARY_MODEL_INPUT_CHARS && !!LLM_EXTRACTION.FALLBACK_MODEL;\n\n // Sanitize URL before sending to LLM: drop query string and fragment\n // so signed URLs, session tokens, auth params, or tracking hashes never\n // land in a third-party LLM prompt. Keep origin + path for page-type classification.\n const safeUrl = (() => {\n if (!config.url) return undefined;\n try {\n const u = new URL(config.url);\n return `${u.origin}${u.pathname}`;\n } catch {\n return undefined;\n }\n })();\n const urlLine = safeUrl ? `PAGE URL: ${safeUrl}\\n\\n` : '';\n\n const prompt = config.extract\n ? `You are a factual extractor for a research agent. Extract ONLY the information that matches the instruction below. Do not summarize, interpret, or editorialize.\n\n${urlLine}EXTRACTION INSTRUCTION: ${config.extract}\n\nSTEP 1 \u2014 Classify this page. Look at the URL if present, plus structural cues (code blocks, table patterns, comment threads, marketing copy). Pick ONE:\n\\`docs | changelog | github-readme | github-thread | reddit | hackernews | forum | blog | marketing | announcement | qa | cve | paper | release-notes | other\\`\n\nSTEP 2 \u2014 Adjust emphasis by page type:\n- docs / changelog / github-readme / release-notes \u2192 API signatures, version numbers, flags, exact config keys, code blocks. Copy verbatim. Preserve tables as tables.\n- github-thread \u2192 weight MAINTAINER comments (label \"[maintainer]\") over drive-by commenters. Preserve stacktraces verbatim. Capture chronological resolution \u2014 what was decided and when. Link the accepted-fix commit/PR if referenced.\n- reddit / hackernews / forum \u2192 lived experience. Quote verbatim with attribution (\"u/foo wrote: \u2026\" or \"user <name>\"). Prioritize replies with stack details, specific failure stories, or replies that contradict the OP. Record overall sentiment distribution as one bullet if clear skew (\"~70% agree / ~20% dissent / rest off-topic\"). Drop context-free opinions (\"this sucks\") from Matches.\n- blog \u2192 prioritize concrete reproductions, code, measurements. If the author makes a claim without evidence, mark \"[unsourced claim]\".\n- marketing / announcement \u2192 pricing tiers, feature matrices verbatim, free-tier quotas, enterprise contact. Preserve tables as tables. Treat roadmap/future-tense claims skeptically \u2014 note them as \"[announced, not shipped]\" when framing is future-tense.\n- qa (stackoverflow) \u2192 accepted answer's code + high-voted disagreements. Always note the answer date \u2014 SO rots.\n- cve \u2192 CVSS vector verbatim, CWE, CPE ranges, affected versions, fix version, references. Each with its label.\n- paper \u2192 claim, method, dataset, benchmark numbers, comparison baseline. Preserve numeric deltas verbatim.\n\nSTEP 3 \u2014 Emit markdown with these sections, in order:\n\n## Source\n- URL: <verbatim if visible, else \"unknown\">\n- Page type: <the type you picked>\n- Page date: <verbatim if visible, else \"not visible\">\n- Author / maintainer (if identifiable): <verbatim>\n\n## Matches\nOne bullet per distinct piece of matching info:\n- **<short label>** \u2014 the information. Quote VERBATIM for: numbers, versions, dates, API names, prices, error messages, stacktraces, CVSS vectors, benchmark scores, command flags, proper nouns, and people's words. Backticks for code/identifiers. Preserve tables.\n\n## Not found\nEvery part of the extraction instruction this page did NOT answer. Be explicit. Example: \"Enterprise pricing contact \u2014 not present on this page.\"\n\n## Follow-up signals\nShort bullets \u2014 NEW angles this page surfaced that the agent should investigate. Include: new terms, unexpected vendor names, contradicting claims, referenced-but-unscraped URLs. Copy URLs VERBATIM from the source; if only anchor text is visible, write \"anchor: <text> (URL not in scraped content)\". Skip this section if nothing new surfaced. Do NOT invent.\n\n## Contradictions\n(Include this section only if the page contains internally contradictory claims.) Bullet each contradiction with both sides quoted verbatim.\n\n## Truncation\n(Include only if content appears cut mid-element.) \"Content cut mid-<table row / code block / comment / paragraph>; extraction may be incomplete for <section>.\"\n\nRULES:\n- Never paraphrase numbers, versions, code, or quoted text.\n- If an instruction item is not answered, it goes in \"Not found\" \u2014 do NOT invent an answer to please the caller.\n- Preserve code blocks, command examples, tables exactly.\n- Do NOT add commentary or recommendations outside \"Follow-up signals\".\n- Page language \u2260 English: quote verbatim in the original language AND provide a parenthetical gloss in English.\n- Page appears gated (login wall, paywall, JS-render-empty shell) or near-empty: BEFORE dismissing the page, look for ANY visible text \u2014 og:title, og:description, meta description, headline, author name, nav labels, teaser/preview sentences, visible comment snippets. If ANY such text exists, extract it as usual under \\`## Source\\` + \\`## Matches\\`, and list the blocked facets under \\`## Not found\\`. Prefix the first \\`## Matches\\` bullet with \\`**[partial \u2014 <reason>]**\\` so the caller knows the body is gated (reasons: \\`login-wall | paywall | JS-render-empty | truncated-before-relevant-section\\`). ONLY when there is NO visible extractable text at all (< 50 words AND no og:* AND no headline AND no preview), return exactly one line:\n \\`## Matches\\\\n_Page did not load: <reason>_\\`\n Valid reasons: \\`404 | login-wall | paywall | JS-render-empty | non-text-asset | truncated-before-relevant-section\\`.\n\nContent:\n${truncatedContent}`\n : `Clean the following page content: drop navigation, ads, cookie banners, footers, author bios, related-article lists. Preserve headings, paragraphs, code blocks, tables, and inline links as \\`[text](url)\\`. Do NOT summarize \u2014 preserve the full body.\n\n${urlLine}Content:\n${truncatedContent}`;\n\n let lastError: StructuredError | undefined;\n\n // Phase 1: primary model with up to LLM_RETRY_CONFIG.maxRetries retries.\n // Skip entirely when the input is too big for the primary's context window.\n if (skipPrimaryForSize) {\n mcpLog(\n 'info',\n `Input ${truncatedContent.length} chars exceeds primary model cap (${MAX_PRIMARY_MODEL_INPUT_CHARS}); routing directly to fallback`,\n 'llm',\n );\n } else {\n for (let attempt = 0; attempt <= LLM_RETRY_CONFIG.maxRetries; attempt++) {\n try {\n if (attempt === 0) {\n mcpLog('info', `Starting extraction with ${LLM_EXTRACTION.MODEL}`, 'llm');\n } else {\n mcpLog('warning', `Retry attempt ${attempt}/${LLM_RETRY_CONFIG.maxRetries}`, 'llm');\n }\n\n const response = await requestText(processor, prompt, 'LLM extraction', signal);\n\n if (response.content) {\n mcpLog('info', `Successfully extracted ${response.content.length} characters`, 'llm');\n markLLMSuccess('extractor');\n return { content: response.content, processed: true };\n }\n\n // Empty response \u2014 not retryable\n mcpLog('warning', 'Received empty response from LLM', 'llm');\n markLLMFailure('extractor', 'LLM returned empty response');\n return {\n content,\n processed: false,\n error: 'LLM returned empty response',\n errorDetails: {\n code: ErrorCode.INTERNAL_ERROR,\n message: 'LLM returned empty response',\n retryable: false,\n },\n };\n\n } catch (err: unknown) {\n lastError = classifyError(err);\n const status = hasStatus(err) ? err.status : undefined;\n const code = typeof err === 'object' && err !== null && 'code' in err\n ? String((err as Record<string, unknown>).code)\n : undefined;\n const ctxErr = isContextWindowError(err);\n mcpLog('error', `Error (attempt ${attempt + 1}): ${lastError.message} [status=${status}, code=${code}, retryable=${isRetryableLLMError(err)}, context_window=${ctxErr}]`, 'llm');\n\n // Context window errors are not retryable on the same model \u2014 jump to fallback.\n if (ctxErr) {\n mcpLog('warning', 'Context window exceeded on primary \u2014 skipping remaining retries, routing to fallback', 'llm');\n break;\n }\n\n if (isRetryableLLMError(err) && attempt < LLM_RETRY_CONFIG.maxRetries) {\n const delayMs = calculateLLMBackoff(attempt);\n mcpLog('warning', `Retrying in ${delayMs}ms...`, 'llm');\n try { await sleep(delayMs, signal); } catch { break; }\n continue;\n }\n break;\n }\n }\n }\n\n // Phase 2: fallback model \u2014 FALLBACK_RETRY_COUNT attempts before giving up\n const fallbackModel = LLM_EXTRACTION.FALLBACK_MODEL;\n if (fallbackModel) {\n mcpLog('warning', `Primary exhausted, switching to fallback ${fallbackModel}`, 'llm');\n for (let attempt = 0; attempt < FALLBACK_RETRY_COUNT; attempt++) {\n if (attempt > 0) {\n const delayMs = calculateLLMBackoff(attempt - 1);\n mcpLog('warning', `Fallback retry ${attempt}/${FALLBACK_RETRY_COUNT - 1} in ${delayMs}ms`, 'llm');\n try { await sleep(delayMs, signal); } catch { break; }\n }\n try {\n const response = await requestText(processor, prompt, 'LLM extraction [fallback]', signal, fallbackModel);\n if (response.content) {\n mcpLog('info', `Fallback extracted ${response.content.length} characters`, 'llm');\n markLLMSuccess('extractor');\n return { content: response.content, processed: true };\n }\n mcpLog('warning', 'Fallback returned empty response', 'llm');\n break;\n } catch (err: unknown) {\n lastError = classifyError(err);\n mcpLog('error', `Fallback error (attempt ${attempt + 1}): ${lastError.message}`, 'llm');\n }\n }\n }\n\n const errorMessage = lastError?.message || 'Unknown LLM error';\n mcpLog('error', `All attempts failed: ${errorMessage}. Returning original content.`, 'llm');\n markLLMFailure('extractor', errorMessage);\n\n return {\n content,\n processed: false,\n error: `LLM extraction failed: ${errorMessage}`,\n errorDetails: lastError || {\n code: ErrorCode.UNKNOWN_ERROR,\n message: errorMessage,\n retryable: false,\n },\n };\n}\n\n// ============================================================================\n// Web-Search Result Classification\n// ============================================================================\n\n/** Maximum URLs to send to the LLM for classification */\nconst MAX_CLASSIFICATION_URLS = 50 as const;\n\n/** Classification tiers */\ntype ClassificationTier = 'HIGHLY_RELEVANT' | 'MAYBE_RELEVANT' | 'OTHER';\n\nexport interface ClassificationEntry {\n readonly rank: number;\n readonly tier: ClassificationTier;\n readonly source_type?: string;\n readonly reason?: string;\n}\n\nexport interface ClassificationGap {\n readonly id: number;\n readonly description: string;\n}\n\nexport interface ClassificationResult {\n readonly title: string;\n readonly synthesis: string;\n readonly results: ClassificationEntry[];\n readonly refine_queries?: Array<{\n readonly query: string;\n readonly rationale: string;\n readonly gap_id?: number;\n }>;\n readonly confidence?: 'high' | 'medium' | 'low';\n readonly confidence_reason?: string;\n readonly gaps?: ClassificationGap[];\n}\n\nexport interface RefineQuerySuggestion {\n readonly query: string;\n readonly rationale: string;\n readonly gap_id?: number;\n readonly gap_description?: string;\n}\n\n/**\n * Classify web-search results by relevance to an objective using the LLM.\n * Sends only titles, snippets, and domain names \u2014 does NOT fetch URLs.\n * Returns null on failure (caller should fall back to raw output).\n */\nexport async function classifySearchResults(\n rankedUrls: ReadonlyArray<{\n readonly rank: number;\n readonly url: string;\n readonly title: string;\n readonly snippet: string;\n readonly frequency: number;\n readonly queries: string[];\n }>,\n objective: string,\n totalQueries: number,\n processor: OpenAI,\n previousQueries: readonly string[] = [],\n): Promise<{ result: ClassificationResult | null; error?: string }> {\n const urlsToClassify = rankedUrls.slice(0, MAX_CLASSIFICATION_URLS);\n\n // Descending static weights fed to the LLM. Higher-ranked URLs get a bigger\n // weight so the classifier biases HIGHLY_RELEVANT toward them. The weights\n // here are a shown-to-LLM summary, not the internal CTR ranking (which\n // still runs in url-aggregator.ts). Rank 11+ all bucket to w=1.\n const STATIC_WEIGHTS = [30, 20, 15, 10, 8, 6, 5, 4, 3, 2] as const;\n const weightForRank = (rank: number): number => STATIC_WEIGHTS[rank - 1] ?? 1;\n\n // Build compressed result list \u2014 weight + title + domain + snippet (truncated)\n const lines: string[] = [];\n for (const url of urlsToClassify) {\n let domain: string;\n try {\n domain = new URL(url.url).hostname.replace(/^www\\./, '');\n } catch {\n domain = url.url;\n }\n const snippet = url.snippet.length > 120\n ? url.snippet.slice(0, 117) + '...'\n : url.snippet;\n lines.push(`[${url.rank}] w=${weightForRank(url.rank)} ${url.title} \u2014 ${domain} \u2014 ${snippet}`);\n }\n\n const prevQueriesBlock = previousQueries.length > 0\n ? previousQueries.map((q) => `- ${q}`).join('\\n')\n : '- (none provided)';\n const today = new Date().toISOString().slice(0, 10);\n\n const prompt = `You are the relevance filter for a research agent. Classify each search result below against the objective and produce a structured analysis.\n\nOBJECTIVE: ${objective}\nTODAY: ${today}\n\nPREVIOUS QUERIES (already run \u2014 do NOT paraphrase in refine_queries):\n${prevQueriesBlock}\n\nReturn ONLY a JSON object (no markdown, no code fences):\n\n{\n \"title\": \"2\u20138 word label for this RESULT CLUSTER (not the objective)\",\n \"synthesis\": \"3\u20135 sentences grounded in the results. Every non-trivial claim cites a rank in [brackets], e.g. '[3] documents the flag; [7][12] report it is broken on macOS.' A synthesis with zero citations is invalid.\",\n \"confidence\": \"high | medium | low\",\n \"confidence_reason\": \"one sentence \u2014 why\",\n \"gaps\": [\n { \"id\": 0, \"description\": \"specific, actionable thing the current results do NOT answer \u2014 not 'more info needed'\" }\n ],\n \"refine_queries\": [\n { \"query\": \"concrete next search\", \"gap_id\": 0, \"rationale\": \"\u226412 words\" }\n ],\n \"results\": [\n {\n \"rank\": 1,\n \"tier\": \"HIGHLY_RELEVANT | MAYBE_RELEVANT | OTHER\",\n \"source_type\": \"vendor_doc | github | reddit | hackernews | blog | news | marketing | stackoverflow | cve | paper | release_notes | aggregator | other\",\n \"reason\": \"\u226412 words citing the snippet cue that drove the tier\"\n }\n ]\n}\n\nWEIGHT SCHEME: each row is prefixed with a weight (w=N). Higher weight means the URL ranked better across input queries \u2014 prefer HIGHLY_RELEVANT for high-weight rows when content matches the objective. Weight alone never justifies HIGHLY_RELEVANT; snippet cues still drive the decision.\n\nSOURCE-OF-TRUTH RUBRIC (the \"primary source\" is goal-dependent \u2014 infer goal type from the objective):\n- spec / API / config questions \u2192 vendor_doc, github (README, RFC), release_notes are primary\n- bug / failure-mode questions \u2192 github (issue/PR), stackoverflow are primary\n- migration / sentiment / lived-experience \u2192 reddit, hackernews, blog are primary; docs are secondary\n- pricing / commercial \u2192 marketing (the vendor's own pricing page IS the primary source, but treat feature lists skeptically)\n- security / CVE \u2192 cve databases, distro security trackers (nvd.nist.gov, security-tracker.debian.org, ubuntu.com/security) are primary\n- synthesis / open-ended \u2192 blend; no single type is primary\n- product launch \u2192 vendor_doc + news + marketing for the launch itself; blogs + reddit for independent verification\n\nFRESHNESS: proportional to topic velocity. For a week-old release, demote anything older than 30 days. For general tech questions, demote older than 18 months. For stable protocols (HTTP, TCP, POSIX), don't demote by age.\n\nCONFIDENCE:\n- high = \u22653 HIGHLY_RELEVANT results from INDEPENDENT domains agree on the core answer\n- medium = \u22652 HIGHLY_RELEVANT exist but disagree or share a domain; OR a single authoritative primary source answers it\n- low = otherwise; snippet-only judgments cap at medium\n\nREFINE QUERIES \u2014 each MUST differ from every previousQuery by:\n- a new operator (site:, quotes, verbatim version number), OR\n- a domain-specific noun ABSENT from every prior query\nAdding a year alone does NOT count as differentiation.\nEach refine_query MUST reference a specific gap_id from the gaps array above.\nProduce 4\u20138 refine_queries total. Cover: (a) a primary-source probe, (b) a temporal sharpener, (c) a failure-mode or comparison probe, (d) at least one new-term probe seeded by a specific result's snippet.\n\nRULES:\n- Classify ALL ${urlsToClassify.length} results. Do not skip or collapse any.\n- Use only the three tier values.\n- Judge from title + domain + snippet only. Do NOT invent facts not present in the snippet.\n- If ALL results are OTHER: synthesis = \"\", confidence = \"low\", and \\`gaps\\` must explicitly state why the current queries missed the target.\n- Casing: tier = UPPERCASE_WITH_UNDERSCORES, confidence = lowercase.\n\nSEARCH RESULTS (${urlsToClassify.length} URLs from ${totalQueries} queries):\n${lines.join('\\n')}`;\n\n try {\n mcpLog('info', `Classifying ${urlsToClassify.length} URLs against objective`, 'llm');\n\n const response = await requestTextWithFallback(\n processor,\n prompt,\n 'Search classification',\n );\n\n if (!response.content) {\n const errMsg = response.error ?? 'LLM returned empty classification response';\n markLLMFailure('planner', errMsg);\n return { result: null, error: errMsg };\n }\n\n // Strip markdown code fences if present\n const cleaned = response.content.replace(/^```(?:json)?\\s*\\n?/m, '').replace(/\\n?```\\s*$/m, '').trim();\n const parsed = JSON.parse(cleaned) as ClassificationResult;\n\n // Validate the response shape.\n // Note: synthesis is typed not truthy \u2014 the prompt explicitly instructs an empty string\n // for the all-OTHER case, and we must not reject that.\n if (!parsed.title || typeof parsed.synthesis !== 'string' || !Array.isArray(parsed.results)) {\n const errMsg = 'LLM response missing required fields (title, synthesis, results)';\n markLLMFailure('planner', errMsg);\n return { result: null, error: errMsg };\n }\n\n mcpLog('info', `Classification complete: ${parsed.results.filter(r => r.tier === 'HIGHLY_RELEVANT').length} highly relevant`, 'llm');\n markLLMSuccess('planner');\n return { result: parsed };\n } catch (err: unknown) {\n const message = err instanceof Error ? err.message : String(err);\n mcpLog('error', `Classification failed: ${message}`, 'llm');\n markLLMFailure('planner', message);\n return { result: null, error: `Classification failed: ${message}` };\n }\n}\n\nexport async function suggestRefineQueriesForRawMode(\n rankedUrls: ReadonlyArray<{\n readonly rank: number;\n readonly url: string;\n readonly title: string;\n }>,\n objective: string,\n originalQueries: readonly string[],\n processor: OpenAI,\n): Promise<{ result: RefineQuerySuggestion[]; error?: string }> {\n const urlsToSummarize = rankedUrls.slice(0, 12);\n const lines = urlsToSummarize.map((url) => {\n let domain: string;\n try {\n domain = new URL(url.url).hostname.replace(/^www\\./, '');\n } catch {\n domain = url.url;\n }\n return `[${url.rank}] ${url.title} \u2014 ${domain}`;\n });\n\n const prompt = `You are generating follow-up search queries for an agent using raw web-search results.\n\nReturn ONLY a JSON object (no markdown, no code fences):\n{\n \"refine_queries\": [\n { \"query\": \"next search query\", \"gap_description\": \"what gap this closes\", \"rationale\": \"\u226412 words on why\" }\n ]\n}\n\nOBJECTIVE: ${objective}\n\nPREVIOUS QUERIES (already run \u2014 do NOT paraphrase):\n${originalQueries.map((query) => `- ${query}`).join('\\n')}\n\nTOP RESULT TITLES (to seed new-term probes):\n${lines.join('\\n')}\n\nRULES:\n- Produce 4\u20136 diverse follow-ups. Cover: (a) a primary-source probe (site:, RFC, vendor docs); (b) a temporal sharpener (changelog, version number); (c) a failure-mode or comparison probe; (d) at least one new-term probe seeded by a specific result title.\n- Each query MUST differ from every previousQuery by either a new operator (site:, quotes, a verbatim version number) OR a domain-specific noun absent from every prior query. Adding a year alone does NOT count.\n- Each refine_query MUST include a \\`gap_description\\` naming what the current results don't answer.\n- Do not include URLs.\n- Keep rationales \u226412 words.`;\n\n try {\n const response = await requestTextWithFallback(\n processor,\n prompt,\n 'Raw-mode refine query generation',\n );\n\n if (!response.content) {\n const errMsg = response.error ?? 'LLM returned empty raw-mode refine query response';\n markLLMFailure('planner', errMsg);\n return { result: [], error: errMsg };\n }\n\n const cleaned = response.content.replace(/^```(?:json)?\\s*\\n?/m, '').replace(/\\n?```\\s*$/m, '').trim();\n const parsed = JSON.parse(cleaned) as { refine_queries?: RefineQuerySuggestion[] };\n\n markLLMSuccess('planner');\n return { result: Array.isArray(parsed.refine_queries) ? parsed.refine_queries : [] };\n } catch (err: unknown) {\n const message = err instanceof Error ? err.message : String(err);\n mcpLog('error', `Raw-mode refine query generation failed: ${message}`, 'llm');\n markLLMFailure('planner', message);\n return { result: [], error: message };\n }\n}\n\n// ============================================================================\n// Research Brief \u2014 goal-aware orientation (called by start-research)\n// ============================================================================\n\nexport type PrimaryBranch = 'reddit' | 'web' | 'both';\n\nexport interface ResearchBriefStep {\n readonly tool: 'web-search' | 'scrape-links';\n readonly reason: string;\n}\n\nexport interface ResearchBrief {\n readonly goal_class: string;\n readonly goal_class_reason: string;\n readonly primary_branch: PrimaryBranch;\n readonly primary_branch_reason: string;\n readonly freshness_window: string;\n readonly first_call_sequence: readonly ResearchBriefStep[];\n readonly keyword_seeds: readonly string[];\n readonly iteration_hints: readonly string[];\n readonly gaps_to_watch: readonly string[];\n readonly stop_criteria: readonly string[];\n}\n\nconst VALID_GOAL_CLASSES = new Set([\n 'spec', 'bug', 'migration', 'sentiment', 'pricing', 'security',\n 'synthesis', 'product_launch', 'other',\n]);\n\nconst VALID_FRESHNESS = new Set(['days', 'weeks', 'months', 'years']);\nconst VALID_BRANCHES = new Set<PrimaryBranch>(['reddit', 'web', 'both']);\nconst VALID_STEP_TOOLS = new Set(['web-search', 'scrape-links']);\n\nfunction isStringArray(value: unknown): value is string[] {\n return Array.isArray(value) && value.every((v) => typeof v === 'string');\n}\n\nfunction isStepArray(value: unknown): value is ResearchBriefStep[] {\n return Array.isArray(value) && value.every((s) => {\n if (typeof s !== 'object' || s === null) return false;\n const tool = (s as Record<string, unknown>).tool;\n const reason = (s as Record<string, unknown>).reason;\n return typeof tool === 'string'\n && VALID_STEP_TOOLS.has(tool)\n && typeof reason === 'string'\n && reason.trim().length > 0;\n });\n}\n\nexport function parseResearchBrief(raw: string): ResearchBrief | null {\n try {\n const cleaned = raw.replace(/^```(?:json)?\\s*\\n?/m, '').replace(/\\n?```\\s*$/m, '').trim();\n const parsed = JSON.parse(cleaned) as Record<string, unknown>;\n\n const goal_class = typeof parsed.goal_class === 'string' ? parsed.goal_class : null;\n if (!goal_class || !VALID_GOAL_CLASSES.has(goal_class)) return null;\n\n const freshness_window = typeof parsed.freshness_window === 'string' ? parsed.freshness_window : null;\n if (!freshness_window || !VALID_FRESHNESS.has(freshness_window)) return null;\n\n const primary_branch = parsed.primary_branch;\n if (typeof primary_branch !== 'string' || !VALID_BRANCHES.has(primary_branch as PrimaryBranch)) return null;\n\n if (!isStepArray(parsed.first_call_sequence) || parsed.first_call_sequence.length === 0) return null;\n if (!isStringArray(parsed.keyword_seeds) || parsed.keyword_seeds.length === 0) return null;\n\n return {\n goal_class,\n goal_class_reason: typeof parsed.goal_class_reason === 'string' ? parsed.goal_class_reason : '',\n primary_branch: primary_branch as PrimaryBranch,\n primary_branch_reason: typeof parsed.primary_branch_reason === 'string' ? parsed.primary_branch_reason : '',\n freshness_window,\n first_call_sequence: parsed.first_call_sequence,\n keyword_seeds: parsed.keyword_seeds.filter((s) => s.trim().length > 0),\n iteration_hints: isStringArray(parsed.iteration_hints) ? parsed.iteration_hints : [],\n gaps_to_watch: isStringArray(parsed.gaps_to_watch) ? parsed.gaps_to_watch : [],\n stop_criteria: isStringArray(parsed.stop_criteria) ? parsed.stop_criteria : [],\n };\n } catch {\n return null;\n }\n}\n\nexport async function generateResearchBrief(\n goal: string,\n processor: OpenAI,\n signal?: AbortSignal,\n): Promise<ResearchBrief | null> {\n const today = new Date().toISOString().slice(0, 10);\n\n const prompt = `You are a research planner. An agent is about to run a multi-pass research loop on the goal below using 3 tools:\n\n - web-search: fan-out Google, scope: web|reddit|both, up to 50 queries per call, parallel-callable (multiple calls per turn)\n - scrape-links: fetch URLs in parallel, auto-detects reddit.com post permalinks \u2192 Reddit API (threaded post+comments); all other URLs \u2192 HTTP scraper; parallel-callable\n\nProduce a tailored JSON brief.\n\nGOAL: ${goal}\nTODAY: ${today}\n\nReturn ONLY a JSON object (no markdown, no code fences):\n\n{\n \"goal_class\": \"spec | bug | migration | sentiment | pricing | security | synthesis | product_launch | other\",\n \"goal_class_reason\": \"one sentence \u2014 why this class\",\n \"primary_branch\": \"reddit | web | both\",\n \"primary_branch_reason\": \"one sentence \u2014 why this branch leads\",\n \"freshness_window\": \"days | weeks | months | years\",\n \"first_call_sequence\": [\n { \"tool\": \"web-search | scrape-links\", \"reason\": \"what this call establishes for the agent\" }\n ],\n \"keyword_seeds\": [\"25\u201350 concrete Google queries \u2014 flat list, to be fired in the first web-search call\"],\n \"iteration_hints\": [\"2\u20135 pointers on which harvested terms / follow-up signals to watch for after pass 1\"],\n \"gaps_to_watch\": [\"2\u20135 concrete questions the agent MUST verify or the answer is incomplete\"],\n \"stop_criteria\": [\"2\u20134 checkable conditions \u2014 all must hold before the agent declares done\"]\n}\n\nRULES:\n\nprimary_branch:\n- \"reddit\" \u2192 sentiment / migration / lived-experience / community-consensus goals. Leads with scope:\"reddit\" web-search.\n- \"web\" \u2192 spec / bug / pricing / CVE / API / primary-source goals. Leads with scope:\"web\" web-search.\n- \"both\" \u2192 opinion-heavy AND needs official sources (e.g. product launch + practitioner reception).\n\nfirst_call_sequence:\n- 1\u20133 steps.\n- reddit-first: step 1 = web-search (caller sets scope:\"reddit\"), step 2 = scrape-links on best post permalinks.\n- web-first: step 1 = web-search (scope:\"web\"), step 2 = scrape-links on HIGHLY_RELEVANT URLs.\n- both: step 1 = two parallel web-search calls (one scope:\"reddit\", one scope:\"web\"), step 2 = merged scrape-links.\n\nkeyword_seeds:\n- 25\u201350 total. Narrow bug \u2192 fewer. Open synthesis \u2192 more.\n- Use operators where helpful (site:, quotes, verbatim version numbers).\n- DIVERSE facets \u2014 same noun-phrase cannot repeat across seeds with adjectives-only variation.\n- Do NOT invent vendor names you are uncertain exist.\n- For \\`site:<domain>\\` filters, ONLY use domains you are highly confident are real. Safe choices: \\`github.com\\`, \\`stackoverflow.com\\`, \\`reddit.com\\`, \\`news.ycombinator.com\\`, \\`arxiv.org\\`, \\`nvd.nist.gov\\`, \\`pypi.org\\`, \\`npmjs.com\\`, plus any canonical homepage/docs domain explicitly spelled out in the goal itself (e.g. goal names \"Cursor\" \u2192 \\`cursor.com\\`/\\`docs.cursor.com\\` is acceptable). If you don't know the product's real docs domain, leave the query open (no \\`site:\\`) instead of guessing.\n\nfreshness_window:\n- If the goal mentions a recent release / date / version, use \"days\" or \"weeks\".\n- Stable protocols / APIs \u2192 \"months\" or \"years\".`;\n\n try {\n const response = await requestTextWithFallback(\n processor,\n prompt,\n 'Research brief generation',\n signal,\n );\n\n if (!response.content) {\n mcpLog('warning', `Research brief generation returned no content: ${response.error ?? 'unknown'}`, 'llm');\n markLLMFailure('planner', response.error ?? 'empty response');\n return null;\n }\n\n const brief = parseResearchBrief(response.content);\n if (!brief) {\n mcpLog('warning', 'Research brief JSON parse or shape validation failed', 'llm');\n markLLMFailure('planner', 'brief parse/validation failed');\n return null;\n }\n\n markLLMSuccess('planner');\n return brief;\n } catch (err: unknown) {\n const message = err instanceof Error ? err.message : String(err);\n mcpLog('warning', `Research brief generation failed: ${message}`, 'llm');\n markLLMFailure('planner', message);\n return null;\n }\n}\n\nexport function renderResearchBrief(brief: ResearchBrief): string {\n const lines: string[] = [];\n\n lines.push('## Your research brief (goal-tailored)');\n lines.push('');\n lines.push(`**Goal class**: \\`${brief.goal_class}\\` \u2014 ${brief.goal_class_reason}`);\n lines.push(`**Primary branch**: \\`${brief.primary_branch}\\` \u2014 ${brief.primary_branch_reason}`);\n lines.push(`**Freshness**: \\`${brief.freshness_window}\\``);\n lines.push('');\n\n if (brief.first_call_sequence.length > 0) {\n lines.push('### First-call sequence');\n brief.first_call_sequence.forEach((step, i) => {\n lines.push(`${i + 1}. \\`${step.tool}\\` \u2014 ${step.reason}`);\n });\n lines.push('');\n }\n\n if (brief.keyword_seeds.length > 0) {\n lines.push(`### Keyword seeds (${brief.keyword_seeds.length}) \u2014 fire these in your first \\`web-search\\` call as a flat \\`queries\\` array`);\n for (const seed of brief.keyword_seeds) {\n lines.push(`- ${seed}`);\n }\n lines.push('');\n }\n\n if (brief.iteration_hints.length > 0) {\n lines.push('### Iteration hints (harvest new terms from scrape extracts\\' `## Follow-up signals`)');\n for (const hint of brief.iteration_hints) lines.push(`- ${hint}`);\n lines.push('');\n }\n\n if (brief.gaps_to_watch.length > 0) {\n lines.push('### Gaps to watch');\n for (const gap of brief.gaps_to_watch) lines.push(`- ${gap}`);\n lines.push('');\n }\n\n if (brief.stop_criteria.length > 0) {\n lines.push('### Stop criteria');\n for (const c of brief.stop_criteria) lines.push(`- ${c}`);\n lines.push('');\n }\n\n lines.push('---');\n lines.push('');\n lines.push('Fire `first_call_sequence` now. After each `scrape-links`, harvest new terms from `## Follow-up signals` and build your next `web-search` round. Stop when every gap is closed.');\n\n return lines.join('\\n');\n}\n"],
|
|
5
|
+
"mappings": "AAQA,OAAO,YAAY;AACnB,SAAS,gBAAgB,uBAAuB;AAChD;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OAEK;AACP,SAAS,cAAc;AAGvB,MAAM,sBAAsB;AAO5B,MAAM,gCAAgC;AAGtC,MAAM,wBAAwB;AAG9B,MAAM,wBAAwB;AAG9B,MAAM,uBAAuB;AAG7B,MAAM,0BAA0B;AAuBhC,MAAM,YAAY;AAAA,EAChB,eAAe;AAAA,EACf,iBAAiB;AAAA,EACjB,sBAAsB;AAAA,EACtB,wBAAwB;AAAA,EACxB,kBAAkB;AAAA,EAClB,oBAAoB;AAAA,EACpB,4BAA4B;AAAA,EAC5B,8BAA8B;AAChC;AAEO,SAAS,eAAe,MAA2B;AACxD,QAAM,MAAK,oBAAI,KAAK,GAAE,YAAY;AAClC,MAAI,SAAS,WAAW;AACtB,cAAU,gBAAgB;AAC1B,cAAU,uBAAuB;AACjC,cAAU,mBAAmB;AAC7B,cAAU,6BAA6B;AAAA,EACzC,OAAO;AACL,cAAU,kBAAkB;AAC5B,cAAU,yBAAyB;AACnC,cAAU,qBAAqB;AAC/B,cAAU,+BAA+B;AAAA,EAC3C;AACF;AAEO,SAAS,eAAe,MAAqB,KAAoB;AACtE,QAAM,MAAK,oBAAI,KAAK,GAAE,YAAY;AAClC,QAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,OAAO,eAAe;AAClF,MAAI,SAAS,WAAW;AACtB,cAAU,gBAAgB;AAC1B,cAAU,uBAAuB;AACjC,cAAU,mBAAmB;AAC7B,cAAU,8BAA8B;AAAA,EAC1C,OAAO;AACL,cAAU,kBAAkB;AAC5B,cAAU,yBAAyB;AACnC,cAAU,qBAAqB;AAC/B,cAAU,gCAAgC;AAAA,EAC5C;AACF;AAEO,SAAS,eAAkC;AAChD,QAAM,MAAM,gBAAgB;AAC5B,SAAO;AAAA,IACL,eAAe,UAAU;AAAA,IACzB,iBAAiB,UAAU;AAAA,IAC3B,sBAAsB,UAAU;AAAA,IAChC,wBAAwB,UAAU;AAAA,IAClC,kBAAkB,UAAU;AAAA,IAC5B,oBAAoB,UAAU;AAAA;AAAA;AAAA,IAG9B,mBAAmB,IAAI;AAAA,IACvB,qBAAqB,IAAI;AAAA,IACzB,4BAA4B,UAAU;AAAA,IACtC,8BAA8B,UAAU;AAAA,EAC1C;AACF;AAGO,SAAS,0BAAgC;AAC9C,YAAU,gBAAgB;AAC1B,YAAU,kBAAkB;AAC5B,YAAU,uBAAuB;AACjC,YAAU,yBAAyB;AACnC,YAAU,mBAAmB;AAC7B,YAAU,qBAAqB;AAC/B,YAAU,6BAA6B;AACvC,YAAU,+BAA+B;AAC3C;AAgBA,MAAM,mBAAmB;AAAA,EACvB,YAAY;AAAA,EACZ,aAAa;AAAA,EACb,YAAY;AACd;AAGA,MAAM,uBAAuB;AAG7B,MAAM,4BAA4B,oBAAI,IAAI;AAAA,EACxC;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF,CAAC;AAGD,SAAS,UAAU,OAA6C;AAC9D,SACE,OAAO,UAAU,YACjB,UAAU,QACV,YAAY,SACZ,OAAQ,MAAkC,WAAW;AAEzD;AAEA,IAAI,YAA2B;AAIxB,SAAS,qBAAoC;AAClD,MAAI,CAAC,gBAAgB,EAAE,cAAe,QAAO;AAE7C,MAAI,CAAC,WAAW;AACd,gBAAY,IAAI,OAAO;AAAA,MACrB,SAAS,eAAe;AAAA,MACxB,QAAQ,eAAe;AAAA,MACvB,SAAS;AAAA,MACT,YAAY;AAAA,MACZ,gBAAgB,EAAE,WAAW,yBAAyB;AAAA,IACxD,CAAC;AACD,WAAO,QAAQ,qCAAqC,eAAe,KAAK,cAAc,eAAe,QAAQ,KAAK,KAAK;AAAA,EACzH;AACA,SAAO;AACT;AAEA,SAAS,qBAAqB,OAAe,QAAyC;AACpF,SAAO;AAAA,IACL;AAAA,IACA,UAAU,CAAC,EAAE,MAAM,QAAQ,SAAS,OAAO,CAAC;AAAA,IAC5C,kBAAkB;AAAA,EACpB;AACF;AAEA,eAAsB,YACpB,WACA,QACA,gBACA,QACA,eACoE;AACpE,QAAM,QAAQ,iBAAiB,eAAe;AAE9C,MAAI;AACF,UAAM,WAAW,MAAM;AAAA,MACrB,CAAC,gBAAgB,UAAU,KAAK,YAAY;AAAA,QAC1C,qBAAqB,OAAO,MAAM;AAAA,QAClC;AAAA,UACE,QAAQ,SAAS,YAAY,IAAI,CAAC,aAAa,MAAM,CAAC,IAAI;AAAA,UAC1D,SAAS;AAAA,QACX;AAAA,MACF;AAAA,MACA;AAAA,MACA;AAAA,MACA,GAAG,cAAc,KAAK,KAAK;AAAA,IAC7B;AAEA,UAAM,UAAU,SAAS,UAAU,CAAC,GAAG,SAAS,SAAS,KAAK;AAC9D,QAAI,SAAS;AACX,aAAO,EAAE,SAAS,MAAM;AAAA,IAC1B;AAEA,UAAM,MAAM,6BAA6B,KAAK;AAC9C,WAAO,WAAW,GAAG,cAAc,qCAAqC,KAAK,IAAI,KAAK;AACtF,WAAO,EAAE,SAAS,MAAM,OAAO,OAAO,IAAI;AAAA,EAC5C,SAAS,KAAc;AACrB,UAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC/D,WAAO,WAAW,GAAG,cAAc,qBAAqB,KAAK,KAAK,OAAO,IAAI,KAAK;AAClF,WAAO,EAAE,SAAS,MAAM,OAAO,OAAO,QAAQ;AAAA,EAChD;AACF;AAQA,eAAsB,wBACpB,WACA,QACA,gBACA,QACoE;AACpE,QAAM,UAAU,MAAM,YAAY,WAAW,QAAQ,gBAAgB,MAAM;AAC3E,MAAI,QAAQ,QAAS,QAAO;AAE5B,QAAM,gBAAgB,eAAe;AACrC,MAAI,CAAC,cAAe,QAAO;AAE3B,SAAO,WAAW,+CAA+C,aAAa,IAAI,KAAK;AAEvF,MAAI,YAAY,QAAQ;AACxB,WAAS,UAAU,GAAG,UAAU,sBAAsB,WAAW;AAC/D,QAAI,UAAU,GAAG;AACf,YAAM,UAAU,oBAAoB,UAAU,CAAC;AAC/C,aAAO,WAAW,kBAAkB,OAAO,IAAI,uBAAuB,CAAC,OAAO,OAAO,MAAM,KAAK;AAChG,UAAI;AAAE,cAAM,MAAM,SAAS,MAAM;AAAA,MAAG,QAAQ;AAAE;AAAA,MAAO;AAAA,IACvD;AACA,UAAM,SAAS,MAAM,YAAY,WAAW,QAAQ,GAAG,cAAc,eAAe,QAAQ,aAAa;AACzG,QAAI,OAAO,QAAS,QAAO;AAC3B,gBAAY,OAAO;AAAA,EACrB;AAEA,SAAO,EAAE,SAAS,MAAM,OAAO,eAAe,OAAO,UAAU;AACjE;AAKA,SAAS,oBAAoB,OAAyB;AACpD,MAAI,CAAC,SAAS,OAAO,UAAU,SAAU,QAAO;AAGhD,QAAM,YAAa,OAA6B;AAChD,MAAI,cAAc,cAAc,cAAc,aAAa;AACzD,WAAO;AAAA,EACT;AAGA,MAAI,UAAU,KAAK,GAAG;AACpB,QAAI,MAAM,WAAW,OAAO,MAAM,WAAW,OAAO,MAAM,WAAW,OAAO,MAAM,WAAW,OAAO,MAAM,WAAW,KAAK;AACxH,aAAO;AAAA,IACT;AAAA,EACF;AAGA,QAAM,SAAS;AACf,QAAM,OAAO,OAAO,OAAO,SAAS,WAAW,OAAO,OAAO;AAC7D,QAAM,SACJ,OAAO,OAAO,UAAU,YAAY,OAAO,UAAU,OAChD,OAAO,QACR;AACN,QAAM,YACJ,SACC,UAAU,OAAO,OAAO,SAAS,WAAW,OAAO,OAAO,YAC1D,UAAU,OAAO,OAAO,SAAS,WAAW,OAAO,OAAO;AAC7D,MAAI,aAAa,0BAA0B,IAAI,SAAS,GAAG;AACzD,WAAO;AAAA,EACT;AAGA,QAAM,UAAU,OAAO,OAAO,YAAY,WAAW,OAAO,QAAQ,YAAY,IAAI;AACpF,MACE,QAAQ,SAAS,YAAY,KAC7B,QAAQ,SAAS,SAAS,KAC1B,QAAQ,SAAS,WAAW,KAC5B,QAAQ,SAAS,qBAAqB,KACtC,QAAQ,SAAS,cAAc,KAC/B,QAAQ,SAAS,YAAY,KAC7B,QAAQ,SAAS,YAAY,GAC7B;AACA,WAAO;AAAA,EACT;AAEA,SAAO;AACT;AAOA,SAAS,qBAAqB,OAAyB;AACrD,MAAI,CAAC,SAAS,OAAO,UAAU,SAAU,QAAO;AAEhD,QAAM,SAAS;AACf,QAAM,SACJ,OAAO,OAAO,UAAU,YAAY,OAAO,UAAU,OAChD,OAAO,QACR;AAEN,QAAM,OAAO,OAAO,OAAO,SAAS,WAAW,OAAO,OAAO;AAC7D,QAAM,aAAa,UAAU,OAAO,OAAO,SAAS,WAAW,OAAO,OAAO;AAC7E,MAAI,SAAS,6BAA6B,eAAe,2BAA2B;AAClF,WAAO;AAAA,EACT;AAEA,QAAM,WAAqB,CAAC;AAC5B,MAAI,OAAO,OAAO,YAAY,SAAU,UAAS,KAAK,OAAO,OAAO;AACpE,MAAI,UAAU,OAAO,OAAO,YAAY,SAAU,UAAS,KAAK,OAAO,OAAO;AAC9E,QAAM,WAAW,SAAS,KAAK,GAAG,EAAE,YAAY;AAChD,SACE,SAAS,SAAS,gBAAgB,KAClC,SAAS,SAAS,gBAAgB,KAClC,SAAS,SAAS,iBAAiB,KACnC,SAAS,SAAS,gBAAgB,KAClC,SAAS,SAAS,aAAa,KAC/B,SAAS,SAAS,iBAAiB,KACnC,SAAS,SAAS,oBAAoB,KACtC,SAAS,SAAS,mBAAmB;AAEzC;AAKA,SAAS,oBAAoB,SAAyB;AACpD,QAAM,mBAAmB,iBAAiB,cAAc,KAAK,IAAI,GAAG,OAAO;AAC3E,QAAM,SAAS,KAAK,OAAO,IAAI,wBAAwB;AACvD,SAAO,KAAK,IAAI,mBAAmB,QAAQ,iBAAiB,UAAU;AACxE;AAOA,eAAsB,sBACpB,SACA,QACA,WACA,QACoB;AAEpB,MAAI,CAAC,OAAO,SAAS;AACnB,WAAO,EAAE,SAAS,WAAW,MAAM;AAAA,EACrC;AAEA,MAAI,CAAC,WAAW;AACd,WAAO;AAAA,MACL;AAAA,MACA,WAAW;AAAA,MACX,OAAO;AAAA,MACP,cAAc;AAAA,QACZ,MAAM,UAAU;AAAA,QAChB,SAAS;AAAA,QACT,WAAW;AAAA,MACb;AAAA,IACF;AAAA,EACF;AAEA,MAAI,CAAC,SAAS,KAAK,GAAG;AACpB,WAAO,EAAE,SAAS,WAAW,IAAI,WAAW,OAAO,OAAO,yBAAyB;AAAA,EACrF;AAGA,QAAM,mBAAmB,QAAQ,SAAS,sBACtC,QAAQ,UAAU,GAAG,mBAAmB,IAAI,0CAC5C;AAKJ,QAAM,qBACJ,iBAAiB,SAAS,iCAAiC,CAAC,CAAC,eAAe;AAK9E,QAAM,WAAW,MAAM;AACrB,QAAI,CAAC,OAAO,IAAK,QAAO;AACxB,QAAI;AACF,YAAM,IAAI,IAAI,IAAI,OAAO,GAAG;AAC5B,aAAO,GAAG,EAAE,MAAM,GAAG,EAAE,QAAQ;AAAA,IACjC,QAAQ;AACN,aAAO;AAAA,IACT;AAAA,EACF,GAAG;AACH,QAAM,UAAU,UAAU,aAAa,OAAO;AAAA;AAAA,IAAS;AAEvD,QAAM,SAAS,OAAO,UAClB;AAAA;AAAA,EAEJ,OAAO,2BAA2B,OAAO,OAAO;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAkDhD,gBAAgB,KACZ;AAAA;AAAA,EAEJ,OAAO;AAAA,EACP,gBAAgB;AAEhB,MAAI;AAIJ,MAAI,oBAAoB;AACtB;AAAA,MACE;AAAA,MACA,SAAS,iBAAiB,MAAM,qCAAqC,6BAA6B;AAAA,MAClG;AAAA,IACF;AAAA,EACF,OAAO;AACL,aAAS,UAAU,GAAG,WAAW,iBAAiB,YAAY,WAAW;AACvE,UAAI;AACF,YAAI,YAAY,GAAG;AACjB,iBAAO,QAAQ,4BAA4B,eAAe,KAAK,IAAI,KAAK;AAAA,QAC1E,OAAO;AACL,iBAAO,WAAW,iBAAiB,OAAO,IAAI,iBAAiB,UAAU,IAAI,KAAK;AAAA,QACpF;AAEA,cAAM,WAAW,MAAM,YAAY,WAAW,QAAQ,kBAAkB,MAAM;AAE9E,YAAI,SAAS,SAAS;AACpB,iBAAO,QAAQ,0BAA0B,SAAS,QAAQ,MAAM,eAAe,KAAK;AACpF,yBAAe,WAAW;AAC1B,iBAAO,EAAE,SAAS,SAAS,SAAS,WAAW,KAAK;AAAA,QACtD;AAGA,eAAO,WAAW,oCAAoC,KAAK;AAC3D,uBAAe,aAAa,6BAA6B;AACzD,eAAO;AAAA,UACL;AAAA,UACA,WAAW;AAAA,UACX,OAAO;AAAA,UACP,cAAc;AAAA,YACZ,MAAM,UAAU;AAAA,YAChB,SAAS;AAAA,YACT,WAAW;AAAA,UACb;AAAA,QACF;AAAA,MAEF,SAAS,KAAc;AACrB,oBAAY,cAAc,GAAG;AAC7B,cAAM,SAAS,UAAU,GAAG,IAAI,IAAI,SAAS;AAC7C,cAAM,OAAO,OAAO,QAAQ,YAAY,QAAQ,QAAQ,UAAU,MAC9D,OAAQ,IAAgC,IAAI,IAC5C;AACJ,cAAM,SAAS,qBAAqB,GAAG;AACvC,eAAO,SAAS,kBAAkB,UAAU,CAAC,MAAM,UAAU,OAAO,YAAY,MAAM,UAAU,IAAI,eAAe,oBAAoB,GAAG,CAAC,oBAAoB,MAAM,KAAK,KAAK;AAG/K,YAAI,QAAQ;AACV,iBAAO,WAAW,6FAAwF,KAAK;AAC/G;AAAA,QACF;AAEA,YAAI,oBAAoB,GAAG,KAAK,UAAU,iBAAiB,YAAY;AACrE,gBAAM,UAAU,oBAAoB,OAAO;AAC3C,iBAAO,WAAW,eAAe,OAAO,SAAS,KAAK;AACtD,cAAI;AAAE,kBAAM,MAAM,SAAS,MAAM;AAAA,UAAG,QAAQ;AAAE;AAAA,UAAO;AACrD;AAAA,QACF;AACA;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAGA,QAAM,gBAAgB,eAAe;AACrC,MAAI,eAAe;AACjB,WAAO,WAAW,4CAA4C,aAAa,IAAI,KAAK;AACpF,aAAS,UAAU,GAAG,UAAU,sBAAsB,WAAW;AAC/D,UAAI,UAAU,GAAG;AACf,cAAM,UAAU,oBAAoB,UAAU,CAAC;AAC/C,eAAO,WAAW,kBAAkB,OAAO,IAAI,uBAAuB,CAAC,OAAO,OAAO,MAAM,KAAK;AAChG,YAAI;AAAE,gBAAM,MAAM,SAAS,MAAM;AAAA,QAAG,QAAQ;AAAE;AAAA,QAAO;AAAA,MACvD;AACA,UAAI;AACF,cAAM,WAAW,MAAM,YAAY,WAAW,QAAQ,6BAA6B,QAAQ,aAAa;AACxG,YAAI,SAAS,SAAS;AACpB,iBAAO,QAAQ,sBAAsB,SAAS,QAAQ,MAAM,eAAe,KAAK;AAChF,yBAAe,WAAW;AAC1B,iBAAO,EAAE,SAAS,SAAS,SAAS,WAAW,KAAK;AAAA,QACtD;AACA,eAAO,WAAW,oCAAoC,KAAK;AAC3D;AAAA,MACF,SAAS,KAAc;AACrB,oBAAY,cAAc,GAAG;AAC7B,eAAO,SAAS,2BAA2B,UAAU,CAAC,MAAM,UAAU,OAAO,IAAI,KAAK;AAAA,MACxF;AAAA,IACF;AAAA,EACF;AAEA,QAAM,eAAe,WAAW,WAAW;AAC3C,SAAO,SAAS,wBAAwB,YAAY,iCAAiC,KAAK;AAC1F,iBAAe,aAAa,YAAY;AAExC,SAAO;AAAA,IACL;AAAA,IACA,WAAW;AAAA,IACX,OAAO,0BAA0B,YAAY;AAAA,IAC7C,cAAc,aAAa;AAAA,MACzB,MAAM,UAAU;AAAA,MAChB,SAAS;AAAA,MACT,WAAW;AAAA,IACb;AAAA,EACF;AACF;AAOA,MAAM,0BAA0B;AA2ChC,eAAsB,sBACpB,YAQA,WACA,cACA,WACA,kBAAqC,CAAC,GAC4B;AAClE,QAAM,iBAAiB,WAAW,MAAM,GAAG,uBAAuB;AAMlE,QAAM,iBAAiB,CAAC,IAAI,IAAI,IAAI,IAAI,GAAG,GAAG,GAAG,GAAG,GAAG,CAAC;AACxD,QAAM,gBAAgB,CAAC,SAAyB,eAAe,OAAO,CAAC,KAAK;AAG5E,QAAM,QAAkB,CAAC;AACzB,aAAW,OAAO,gBAAgB;AAChC,QAAI;AACJ,QAAI;AACF,eAAS,IAAI,IAAI,IAAI,GAAG,EAAE,SAAS,QAAQ,UAAU,EAAE;AAAA,IACzD,QAAQ;AACN,eAAS,IAAI;AAAA,IACf;AACA,UAAM,UAAU,IAAI,QAAQ,SAAS,MACjC,IAAI,QAAQ,MAAM,GAAG,GAAG,IAAI,QAC5B,IAAI;AACR,UAAM,KAAK,IAAI,IAAI,IAAI,OAAO,cAAc,IAAI,IAAI,CAAC,IAAI,IAAI,KAAK,WAAM,MAAM,WAAM,OAAO,EAAE;AAAA,EAC/F;AAEA,QAAM,mBAAmB,gBAAgB,SAAS,IAC9C,gBAAgB,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,EAAE,KAAK,IAAI,IAC9C;AACJ,QAAM,SAAQ,oBAAI,KAAK,GAAE,YAAY,EAAE,MAAM,GAAG,EAAE;AAElD,QAAM,SAAS;AAAA;AAAA,aAEJ,SAAS;AAAA,SACb,KAAK;AAAA;AAAA;AAAA,EAGZ,gBAAgB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,iBAmDD,eAAe,MAAM;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,kBAMpB,eAAe,MAAM,cAAc,YAAY;AAAA,EAC/D,MAAM,KAAK,IAAI,CAAC;AAEhB,MAAI;AACF,WAAO,QAAQ,eAAe,eAAe,MAAM,2BAA2B,KAAK;AAEnF,UAAM,WAAW,MAAM;AAAA,MACrB;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAEA,QAAI,CAAC,SAAS,SAAS;AACrB,YAAM,SAAS,SAAS,SAAS;AACjC,qBAAe,WAAW,MAAM;AAChC,aAAO,EAAE,QAAQ,MAAM,OAAO,OAAO;AAAA,IACvC;AAGA,UAAM,UAAU,SAAS,QAAQ,QAAQ,wBAAwB,EAAE,EAAE,QAAQ,eAAe,EAAE,EAAE,KAAK;AACrG,UAAM,SAAS,KAAK,MAAM,OAAO;AAKjC,QAAI,CAAC,OAAO,SAAS,OAAO,OAAO,cAAc,YAAY,CAAC,MAAM,QAAQ,OAAO,OAAO,GAAG;AAC3F,YAAM,SAAS;AACf,qBAAe,WAAW,MAAM;AAChC,aAAO,EAAE,QAAQ,MAAM,OAAO,OAAO;AAAA,IACvC;AAEA,WAAO,QAAQ,4BAA4B,OAAO,QAAQ,OAAO,OAAK,EAAE,SAAS,iBAAiB,EAAE,MAAM,oBAAoB,KAAK;AACnI,mBAAe,SAAS;AACxB,WAAO,EAAE,QAAQ,OAAO;AAAA,EAC1B,SAAS,KAAc;AACrB,UAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC/D,WAAO,SAAS,0BAA0B,OAAO,IAAI,KAAK;AAC1D,mBAAe,WAAW,OAAO;AACjC,WAAO,EAAE,QAAQ,MAAM,OAAO,0BAA0B,OAAO,GAAG;AAAA,EACpE;AACF;AAEA,eAAsB,+BACpB,YAKA,WACA,iBACA,WAC8D;AAC9D,QAAM,kBAAkB,WAAW,MAAM,GAAG,EAAE;AAC9C,QAAM,QAAQ,gBAAgB,IAAI,CAAC,QAAQ;AACzC,QAAI;AACJ,QAAI;AACF,eAAS,IAAI,IAAI,IAAI,GAAG,EAAE,SAAS,QAAQ,UAAU,EAAE;AAAA,IACzD,QAAQ;AACN,eAAS,IAAI;AAAA,IACf;AACA,WAAO,IAAI,IAAI,IAAI,KAAK,IAAI,KAAK,WAAM,MAAM;AAAA,EAC/C,CAAC;AAED,QAAM,SAAS;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,aASJ,SAAS;AAAA;AAAA;AAAA,EAGpB,gBAAgB,IAAI,CAAC,UAAU,KAAK,KAAK,EAAE,EAAE,KAAK,IAAI,CAAC;AAAA;AAAA;AAAA,EAGvD,MAAM,KAAK,IAAI,CAAC;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAShB,MAAI;AACF,UAAM,WAAW,MAAM;AAAA,MACrB;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAEA,QAAI,CAAC,SAAS,SAAS;AACrB,YAAM,SAAS,SAAS,SAAS;AACjC,qBAAe,WAAW,MAAM;AAChC,aAAO,EAAE,QAAQ,CAAC,GAAG,OAAO,OAAO;AAAA,IACrC;AAEA,UAAM,UAAU,SAAS,QAAQ,QAAQ,wBAAwB,EAAE,EAAE,QAAQ,eAAe,EAAE,EAAE,KAAK;AACrG,UAAM,SAAS,KAAK,MAAM,OAAO;AAEjC,mBAAe,SAAS;AACxB,WAAO,EAAE,QAAQ,MAAM,QAAQ,OAAO,cAAc,IAAI,OAAO,iBAAiB,CAAC,EAAE;AAAA,EACrF,SAAS,KAAc;AACrB,UAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC/D,WAAO,SAAS,4CAA4C,OAAO,IAAI,KAAK;AAC5E,mBAAe,WAAW,OAAO;AACjC,WAAO,EAAE,QAAQ,CAAC,GAAG,OAAO,QAAQ;AAAA,EACtC;AACF;AA0BA,MAAM,qBAAqB,oBAAI,IAAI;AAAA,EACjC;AAAA,EAAQ;AAAA,EAAO;AAAA,EAAa;AAAA,EAAa;AAAA,EAAW;AAAA,EACpD;AAAA,EAAa;AAAA,EAAkB;AACjC,CAAC;AAED,MAAM,kBAAkB,oBAAI,IAAI,CAAC,QAAQ,SAAS,UAAU,OAAO,CAAC;AACpE,MAAM,iBAAiB,oBAAI,IAAmB,CAAC,UAAU,OAAO,MAAM,CAAC;AACvE,MAAM,mBAAmB,oBAAI,IAAI,CAAC,cAAc,cAAc,CAAC;AAE/D,SAAS,cAAc,OAAmC;AACxD,SAAO,MAAM,QAAQ,KAAK,KAAK,MAAM,MAAM,CAAC,MAAM,OAAO,MAAM,QAAQ;AACzE;AAEA,SAAS,YAAY,OAA8C;AACjE,SAAO,MAAM,QAAQ,KAAK,KAAK,MAAM,MAAM,CAAC,MAAM;AAChD,QAAI,OAAO,MAAM,YAAY,MAAM,KAAM,QAAO;AAChD,UAAM,OAAQ,EAA8B;AAC5C,UAAM,SAAU,EAA8B;AAC9C,WAAO,OAAO,SAAS,YAClB,iBAAiB,IAAI,IAAI,KACzB,OAAO,WAAW,YAClB,OAAO,KAAK,EAAE,SAAS;AAAA,EAC9B,CAAC;AACH;AAEO,SAAS,mBAAmB,KAAmC;AACpE,MAAI;AACF,UAAM,UAAU,IAAI,QAAQ,wBAAwB,EAAE,EAAE,QAAQ,eAAe,EAAE,EAAE,KAAK;AACxF,UAAM,SAAS,KAAK,MAAM,OAAO;AAEjC,UAAM,aAAa,OAAO,OAAO,eAAe,WAAW,OAAO,aAAa;AAC/E,QAAI,CAAC,cAAc,CAAC,mBAAmB,IAAI,UAAU,EAAG,QAAO;AAE/D,UAAM,mBAAmB,OAAO,OAAO,qBAAqB,WAAW,OAAO,mBAAmB;AACjG,QAAI,CAAC,oBAAoB,CAAC,gBAAgB,IAAI,gBAAgB,EAAG,QAAO;AAExE,UAAM,iBAAiB,OAAO;AAC9B,QAAI,OAAO,mBAAmB,YAAY,CAAC,eAAe,IAAI,cAA+B,EAAG,QAAO;AAEvG,QAAI,CAAC,YAAY,OAAO,mBAAmB,KAAK,OAAO,oBAAoB,WAAW,EAAG,QAAO;AAChG,QAAI,CAAC,cAAc,OAAO,aAAa,KAAK,OAAO,cAAc,WAAW,EAAG,QAAO;AAEtF,WAAO;AAAA,MACL;AAAA,MACA,mBAAmB,OAAO,OAAO,sBAAsB,WAAW,OAAO,oBAAoB;AAAA,MAC7F;AAAA,MACA,uBAAuB,OAAO,OAAO,0BAA0B,WAAW,OAAO,wBAAwB;AAAA,MACzG;AAAA,MACA,qBAAqB,OAAO;AAAA,MAC5B,eAAe,OAAO,cAAc,OAAO,CAAC,MAAM,EAAE,KAAK,EAAE,SAAS,CAAC;AAAA,MACrE,iBAAiB,cAAc,OAAO,eAAe,IAAI,OAAO,kBAAkB,CAAC;AAAA,MACnF,eAAe,cAAc,OAAO,aAAa,IAAI,OAAO,gBAAgB,CAAC;AAAA,MAC7E,eAAe,cAAc,OAAO,aAAa,IAAI,OAAO,gBAAgB,CAAC;AAAA,IAC/E;AAAA,EACF,QAAQ;AACN,WAAO;AAAA,EACT;AACF;AAEA,eAAsB,sBACpB,MACA,WACA,QAC+B;AAC/B,QAAM,SAAQ,oBAAI,KAAK,GAAE,YAAY,EAAE,MAAM,GAAG,EAAE;AAElD,QAAM,SAAS;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,QAOT,IAAI;AAAA,SACH,KAAK;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AA2CZ,MAAI;AACF,UAAM,WAAW,MAAM;AAAA,MACrB;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAEA,QAAI,CAAC,SAAS,SAAS;AACrB,aAAO,WAAW,kDAAkD,SAAS,SAAS,SAAS,IAAI,KAAK;AACxG,qBAAe,WAAW,SAAS,SAAS,gBAAgB;AAC5D,aAAO;AAAA,IACT;AAEA,UAAM,QAAQ,mBAAmB,SAAS,OAAO;AACjD,QAAI,CAAC,OAAO;AACV,aAAO,WAAW,wDAAwD,KAAK;AAC/E,qBAAe,WAAW,+BAA+B;AACzD,aAAO;AAAA,IACT;AAEA,mBAAe,SAAS;AACxB,WAAO;AAAA,EACT,SAAS,KAAc;AACrB,UAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC/D,WAAO,WAAW,qCAAqC,OAAO,IAAI,KAAK;AACvE,mBAAe,WAAW,OAAO;AACjC,WAAO;AAAA,EACT;AACF;AAEO,SAAS,oBAAoB,OAA8B;AAChE,QAAM,QAAkB,CAAC;AAEzB,QAAM,KAAK,wCAAwC;AACnD,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,qBAAqB,MAAM,UAAU,aAAQ,MAAM,iBAAiB,EAAE;AACjF,QAAM,KAAK,yBAAyB,MAAM,cAAc,aAAQ,MAAM,qBAAqB,EAAE;AAC7F,QAAM,KAAK,oBAAoB,MAAM,gBAAgB,IAAI;AACzD,QAAM,KAAK,EAAE;AAEb,MAAI,MAAM,oBAAoB,SAAS,GAAG;AACxC,UAAM,KAAK,yBAAyB;AACpC,UAAM,oBAAoB,QAAQ,CAAC,MAAM,MAAM;AAC7C,YAAM,KAAK,GAAG,IAAI,CAAC,OAAO,KAAK,IAAI,aAAQ,KAAK,MAAM,EAAE;AAAA,IAC1D,CAAC;AACD,UAAM,KAAK,EAAE;AAAA,EACf;AAEA,MAAI,MAAM,cAAc,SAAS,GAAG;AAClC,UAAM,KAAK,sBAAsB,MAAM,cAAc,MAAM,mFAA8E;AACzI,eAAW,QAAQ,MAAM,eAAe;AACtC,YAAM,KAAK,KAAK,IAAI,EAAE;AAAA,IACxB;AACA,UAAM,KAAK,EAAE;AAAA,EACf;AAEA,MAAI,MAAM,gBAAgB,SAAS,GAAG;AACpC,UAAM,KAAK,sFAAuF;AAClG,eAAW,QAAQ,MAAM,gBAAiB,OAAM,KAAK,KAAK,IAAI,EAAE;AAChE,UAAM,KAAK,EAAE;AAAA,EACf;AAEA,MAAI,MAAM,cAAc,SAAS,GAAG;AAClC,UAAM,KAAK,mBAAmB;AAC9B,eAAW,OAAO,MAAM,cAAe,OAAM,KAAK,KAAK,GAAG,EAAE;AAC5D,UAAM,KAAK,EAAE;AAAA,EACf;AAEA,MAAI,MAAM,cAAc,SAAS,GAAG;AAClC,UAAM,KAAK,mBAAmB;AAC9B,eAAW,KAAK,MAAM,cAAe,OAAM,KAAK,KAAK,CAAC,EAAE;AACxD,UAAM,KAAK,EAAE;AAAA,EACf;AAEA,QAAM,KAAK,KAAK;AAChB,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,iLAAiL;AAE5L,SAAO,MAAM,KAAK,IAAI;AACxB;",
|
|
6
6
|
"names": []
|
|
7
7
|
}
|
package/dist/src/tools/scrape.js
CHANGED
|
@@ -161,7 +161,7 @@ async function fetchWebBranch(inputs, client) {
|
|
|
161
161
|
} catch {
|
|
162
162
|
content = result.content;
|
|
163
163
|
}
|
|
164
|
-
successItems.push({ url: result.url, content, index: origIndex });
|
|
164
|
+
successItems.push({ url: result.url, content, index: origIndex, rawContent: content });
|
|
165
165
|
}
|
|
166
166
|
return {
|
|
167
167
|
successItems,
|
|
@@ -221,7 +221,7 @@ async function fetchDocumentBranch(inputs, jinaClient, scrapeErrorContext) {
|
|
|
221
221
|
continue;
|
|
222
222
|
}
|
|
223
223
|
successful++;
|
|
224
|
-
successItems.push({ url: input.url, content: result.content, index: input.origIndex });
|
|
224
|
+
successItems.push({ url: input.url, content: result.content, index: input.origIndex, rawContent: result.content });
|
|
225
225
|
}
|
|
226
226
|
return { successItems, failedContents, metrics: { successful, failed, totalCredits: 0 } };
|
|
227
227
|
}
|
|
@@ -311,12 +311,37 @@ async function fetchRedditBranch(inputs) {
|
|
|
311
311
|
continue;
|
|
312
312
|
}
|
|
313
313
|
successful++;
|
|
314
|
-
|
|
314
|
+
const md = formatRedditPostAsMarkdown(result);
|
|
315
|
+
successItems.push({ url, content: md, index: origIndex, rawContent: md });
|
|
315
316
|
}
|
|
316
317
|
return { successItems, failedContents, metrics: { successful, failed, totalCredits: 0 } };
|
|
317
318
|
}
|
|
319
|
+
const TERSE_LLM_FAILURE_RE = /^\s*##\s*Matches\s*\n+\s*_Page did not load:\s*([a-z0-9_-]+)_\s*\.?\s*$/i;
|
|
320
|
+
const RAW_FALLBACK_CHAR_CAP = 4e3;
|
|
321
|
+
function detectTerseFailure(llmOutput) {
|
|
322
|
+
const m = llmOutput.trim().match(TERSE_LLM_FAILURE_RE);
|
|
323
|
+
return m ? m[1] : null;
|
|
324
|
+
}
|
|
325
|
+
function mergeLlmWithRawFallback(llmOutput, rawContent) {
|
|
326
|
+
const reason = detectTerseFailure(llmOutput);
|
|
327
|
+
if (!reason) return llmOutput;
|
|
328
|
+
const trimmed = rawContent?.trim();
|
|
329
|
+
if (!trimmed) return llmOutput;
|
|
330
|
+
const snippet = trimmed.length > RAW_FALLBACK_CHAR_CAP ? trimmed.slice(0, RAW_FALLBACK_CHAR_CAP) + "\n\n\u2026[raw truncated]" : trimmed;
|
|
331
|
+
return `${llmOutput.trim()}
|
|
332
|
+
|
|
333
|
+
## Raw content (LLM flagged page as ${reason})
|
|
334
|
+
|
|
335
|
+
${snippet}`;
|
|
336
|
+
}
|
|
318
337
|
async function processItemsWithLlm(successItems, enhancedInstruction, llmProcessor, reporter) {
|
|
319
338
|
let llmErrors = 0;
|
|
339
|
+
if (!enhancedInstruction) {
|
|
340
|
+
if (successItems.length > 0) {
|
|
341
|
+
mcpLog("info", "Raw mode: extract omitted \u2014 returning cleaned scraped content without LLM pass", "scrape");
|
|
342
|
+
}
|
|
343
|
+
return { items: successItems, llmErrors, llmAttempted: 0 };
|
|
344
|
+
}
|
|
320
345
|
if (!llmProcessor || successItems.length === 0) {
|
|
321
346
|
if (!llmProcessor && successItems.length > 0) {
|
|
322
347
|
mcpLog("warning", "LLM unavailable (LLM_API_KEY not set). Returning raw scraped content.", "scrape");
|
|
@@ -335,7 +360,12 @@ async function processItemsWithLlm(successItems, enhancedInstruction, llmProcess
|
|
|
335
360
|
llmProcessor
|
|
336
361
|
);
|
|
337
362
|
if (llmResult.processed) {
|
|
338
|
-
|
|
363
|
+
const merged = mergeLlmWithRawFallback(llmResult.content, item.rawContent);
|
|
364
|
+
if (merged !== llmResult.content) {
|
|
365
|
+
mcpLog("warning", `LLM emitted terse escape line for ${item.url} \u2014 preserved raw fallback`, "scrape");
|
|
366
|
+
void reporter.log("warning", `llm_terse_escape: ${item.url} \u2014 preserving raw fallback`);
|
|
367
|
+
}
|
|
368
|
+
return { ...item, content: merged };
|
|
339
369
|
}
|
|
340
370
|
llmErrors++;
|
|
341
371
|
mcpLog("warning", `LLM extraction failed for ${item.url}: ${llmResult.error || "unknown reason"}`, "scrape");
|
|
@@ -455,7 +485,7 @@ async function handleScrapeLinks(params, reporter = NOOP_REPORTER) {
|
|
|
455
485
|
]
|
|
456
486
|
);
|
|
457
487
|
}
|
|
458
|
-
const enhancedInstruction = enhanceExtractionInstruction(params.extract);
|
|
488
|
+
const enhancedInstruction = params.extract ? enhanceExtractionInstruction(params.extract) : void 0;
|
|
459
489
|
await reporter.progress(35, 100, "Fetching page content");
|
|
460
490
|
const emptyPhase = {
|
|
461
491
|
successItems: [],
|
|
@@ -548,7 +578,7 @@ function registerScrapeLinksTool(server) {
|
|
|
548
578
|
{
|
|
549
579
|
name: "scrape-links",
|
|
550
580
|
title: "Scrape Links",
|
|
551
|
-
description: "Fetch many URLs in parallel
|
|
581
|
+
description: "Fetch many URLs in parallel. With `extract` set, run per-URL structured LLM extraction (each page returns `## Source`, `## Matches` verbatim facts, `## Not found` gaps, `## Follow-up signals` new terms + referenced URLs); omit `extract` for raw mode (cleaned markdown per URL, no LLM pass). Auto-detects reddit.com post permalinks \u2192 Reddit API (threaded post + comments); PDF/DOCX/PPTX/XLSX \u2192 Jina Reader; everything else \u2192 HTTP scraper. Safe to call in parallel \u2014 group URLs by context rather than jamming unrelated batches together. Describe the SHAPE of what you want in `extract`, facets separated by `|` (e.g. `root cause | affected versions | fix | workarounds | timeline`).",
|
|
552
582
|
schema: scrapeLinksParamsSchema,
|
|
553
583
|
outputSchema: scrapeLinksOutputSchema,
|
|
554
584
|
annotations: {
|
|
@@ -570,8 +600,11 @@ function registerScrapeLinksTool(server) {
|
|
|
570
600
|
);
|
|
571
601
|
}
|
|
572
602
|
export {
|
|
603
|
+
RAW_FALLBACK_CHAR_CAP,
|
|
604
|
+
detectTerseFailure,
|
|
573
605
|
formatJinaFailure,
|
|
574
606
|
handleScrapeLinks,
|
|
607
|
+
mergeLlmWithRawFallback,
|
|
575
608
|
registerScrapeLinksTool
|
|
576
609
|
};
|
|
577
610
|
//# sourceMappingURL=scrape.js.map
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"version": 3,
|
|
3
3
|
"sources": ["../../../src/tools/scrape.ts"],
|
|
4
|
-
"sourcesContent": ["/**\n * Scrape Links Tool Handler\n *\n * Scrapes many URLs in parallel. Reddit permalinks (reddit.com/r/.../comments/...)\n * are auto-detected and routed through the Reddit API; all other URLs go through\n * the scraper. Both branches feed the same per-URL LLM extraction pipeline.\n *\n * NEVER throws \u2014 every error is returned as a tool-level failure response.\n */\n\nimport type { MCPServer } from 'mcp-use/server';\n\nimport {\n SCRAPER,\n CONCURRENCY,\n getCapabilities,\n getMissingEnvMessage,\n parseEnv,\n} from '../config/index.js';\nimport {\n scrapeLinksOutputSchema,\n scrapeLinksParamsSchema,\n type ScrapeLinksParams,\n type ScrapeLinksOutput,\n} from '../schemas/scrape-links.js';\nimport { ScraperClient } from '../clients/scraper.js';\nimport { RedditClient, type PostResult } from '../clients/reddit.js';\nimport { JinaClient } from '../clients/jina.js';\nimport { MarkdownCleaner } from '../services/markdown-cleaner.js';\nimport { createLLMProcessor, processContentWithLLM } from '../services/llm-processor.js';\nimport { removeMetaTags } from '../utils/markdown-formatter.js';\nimport { extractReadableContent } from '../utils/content-extractor.js';\nimport { classifyError, ErrorCode } from '../utils/errors.js';\nimport { isDocumentUrl } from '../utils/source-type.js';\nimport { pMap, pMapSettled } from '../utils/concurrency.js';\nimport {\n mcpLog,\n formatSuccess,\n formatError,\n formatBatchHeader,\n formatDuration,\n} from './utils.js';\nimport {\n createToolReporter,\n NOOP_REPORTER,\n toolFailure,\n toolSuccess,\n toToolResponse,\n type ToolExecutionResult,\n type ToolReporter,\n} from './mcp-helpers.js';\n\nconst markdownCleaner = new MarkdownCleaner();\n\nfunction enhanceExtractionInstruction(instruction: string | undefined): string {\n const base = instruction || 'Extract the main content and key information from this page.';\n return `${SCRAPER.EXTRACTION_PREFIX}\\n\\n${base}\\n\\n${SCRAPER.EXTRACTION_SUFFIX}`;\n}\n\n// --- Types ---\n\ninterface ProcessedResult {\n url: string;\n content: string;\n index: number; // original position in params.urls[]\n}\n\ninterface ScrapeMetrics {\n successful: number;\n failed: number;\n totalCredits: number;\n}\n\ninterface ScrapePhaseResult {\n successItems: ProcessedResult[];\n failedContents: string[];\n metrics: ScrapeMetrics;\n}\n\ninterface BranchInput {\n url: string;\n origIndex: number;\n}\n\ninterface ScrapeClients {\n client: ScraperClient;\n jinaClient: JinaClient;\n llmProcessor: ReturnType<typeof createLLMProcessor>;\n}\n\n/**\n * Any URL the web branch decides to hand off to Jina Reader \u2014 either because\n * Scrape.do returned a binary content-type, or because Scrape.do failed\n * outright (non-404 error). `scrapeError` is preserved so that, if Jina also\n * fails, the final error message can surface both layers.\n *\n * Genuine 404s are NOT put here \u2014 the URL doesn't exist; Jina won't help.\n */\ninterface JinaFallback {\n url: string;\n origIndex: number;\n reason: 'binary_content' | 'scrape_failed';\n scrapeError?: string;\n}\n\ninterface WebPhaseResult extends ScrapePhaseResult {\n jinaFallbacks: JinaFallback[];\n}\n\n// --- Reddit URL detection ---\n\nconst REDDIT_HOST = /(?:^|\\.)reddit\\.com$/i;\nconst REDDIT_POST_PERMALINK = /\\/r\\/[^/]+\\/comments\\/[a-z0-9]+/i;\n\nfunction isRedditUrl(url: string): boolean {\n try {\n const u = new URL(url);\n return REDDIT_HOST.test(u.hostname);\n } catch {\n return false;\n }\n}\n\nfunction isRedditPostPermalink(url: string): boolean {\n try {\n const u = new URL(url);\n return REDDIT_HOST.test(u.hostname) && REDDIT_POST_PERMALINK.test(u.pathname);\n } catch {\n return false;\n }\n}\n\n// --- Error helper ---\n\nfunction createScrapeErrorResponse(\n code: string,\n message: string,\n startTime: number,\n retryable = false,\n alternatives?: string[],\n): ToolExecutionResult<ScrapeLinksOutput> {\n return toolFailure(\n `${formatError({\n code,\n message,\n retryable,\n toolName: 'scrape-links',\n howToFix: code === 'NO_URLS' ? ['Provide at least one valid URL'] : undefined,\n alternatives,\n })}\\n\\nExecution time: ${formatDuration(Date.now() - startTime)}`,\n );\n}\n\n// --- URL partitioning ---\n\ninterface PartitionedUrls {\n webInputs: BranchInput[];\n redditInputs: BranchInput[];\n documentInputs: BranchInput[];\n invalidEntries: { url: string; origIndex: number }[];\n}\n\nfunction partitionUrls(urls: string[]): PartitionedUrls {\n const webInputs: BranchInput[] = [];\n const redditInputs: BranchInput[] = [];\n const documentInputs: BranchInput[] = [];\n const invalidEntries: { url: string; origIndex: number }[] = [];\n\n for (let i = 0; i < urls.length; i++) {\n const url = urls[i]!;\n try {\n new URL(url);\n } catch {\n invalidEntries.push({ url, origIndex: i });\n continue;\n }\n // Document URLs (.pdf/.docx/.pptx/.xlsx) go straight to Jina Reader \u2014\n // bypassing Scrape.do because it cannot decode binary bodies. Ordered\n // before the Reddit check so a hypothetical PDF on a reddit-adjacent host\n // still takes the document path.\n if (isDocumentUrl(url)) {\n documentInputs.push({ url, origIndex: i });\n } else if (isRedditUrl(url)) {\n redditInputs.push({ url, origIndex: i });\n } else {\n webInputs.push({ url, origIndex: i });\n }\n }\n\n return { webInputs, redditInputs, documentInputs, invalidEntries };\n}\n\n// --- Web branch ---\n\nasync function fetchWebBranch(\n inputs: BranchInput[],\n client: ScraperClient,\n): Promise<WebPhaseResult> {\n if (inputs.length === 0) {\n return {\n successItems: [],\n failedContents: [],\n metrics: { successful: 0, failed: 0, totalCredits: 0 },\n jinaFallbacks: [],\n };\n }\n\n mcpLog('info', `[concurrency] web branch: fanning out ${inputs.length} URL(s) with limit=${CONCURRENCY.SCRAPER}`, 'scrape');\n const urls = inputs.map((i) => i.url);\n const results = await client.scrapeMultiple(urls, { timeout: 60 });\n const urlToIndex = new Map(inputs.map((i) => [i.url, i.origIndex]));\n\n const successItems: ProcessedResult[] = [];\n const failedContents: string[] = [];\n const jinaFallbacks: JinaFallback[] = [];\n let successful = 0;\n let failed = 0;\n let totalCredits = 0;\n\n for (let i = 0; i < results.length; i++) {\n const result = results[i];\n const origIndex = inputs[i]!.origIndex;\n if (!result) {\n failed++;\n failedContents.push(`## ${inputs[i]!.url}\\n\\n\u274C No result returned`);\n continue;\n }\n\n // Binary document detected by content-type \u2014 defer to Jina Reader.\n if (result.error?.code === ErrorCode.UNSUPPORTED_BINARY_CONTENT) {\n jinaFallbacks.push({\n url: result.url,\n origIndex: urlToIndex.get(result.url) ?? origIndex,\n reason: 'binary_content',\n });\n continue;\n }\n\n // Scrape.do failure \u2014 only 404s are treated as hard fails (Jina won't\n // help when the page genuinely doesn't exist). Every other failure mode\n // (302 redirect loops, WAF blocks, timeouts, 5xx, service unavailable)\n // gets a second chance through Jina Reader, which uses different IPs\n // and handles many anti-bot surfaces differently.\n const scrapeFailed = Boolean(result.error) || result.statusCode < 200 || result.statusCode >= 300;\n if (scrapeFailed && result.statusCode !== 404) {\n jinaFallbacks.push({\n url: result.url,\n origIndex: urlToIndex.get(result.url) ?? origIndex,\n reason: 'scrape_failed',\n scrapeError: result.error?.message || result.content || `HTTP ${result.statusCode}`,\n });\n continue;\n }\n if (scrapeFailed) {\n failed++;\n failedContents.push(`## ${result.url}\\n\\n\u274C Failed to scrape: HTTP 404 \u2014 Page not found`);\n continue;\n }\n\n successful++;\n totalCredits += result.credits;\n\n let content: string;\n try {\n const readable = extractReadableContent(result.content, result.url);\n const sourceForCleaner = readable.extracted ? readable.content : result.content;\n content = markdownCleaner.processContent(sourceForCleaner);\n } catch {\n content = result.content;\n }\n\n successItems.push({ url: result.url, content, index: origIndex });\n }\n\n return {\n successItems,\n failedContents,\n metrics: { successful, failed, totalCredits },\n jinaFallbacks,\n };\n}\n\n// --- Document branch (Jina Reader) ---\n\n/**\n * Format a Jina-failure line. If the URL was deferred here *after* Scrape.do\n * already failed, surface both layers' errors so the caller can see that this\n * isn't just a Jina glitch \u2014 the primary path failed too.\n *\n * Exported for unit testing.\n */\nexport function formatJinaFailure(url: string, jinaError: string, scrapeError?: string): string {\n if (scrapeError) {\n return `## ${url}\\n\\n\u274C Both scrapers failed. Scrape.do: ${scrapeError}. Jina Reader: ${jinaError}.`;\n }\n return `## ${url}\\n\\n\u274C Document conversion failed: ${jinaError}`;\n}\n\nasync function fetchDocumentBranch(\n inputs: BranchInput[],\n jinaClient: JinaClient,\n /** Optional: map url \u2192 original Scrape.do error, for fallback messaging. */\n scrapeErrorContext?: Map<string, string>,\n): Promise<ScrapePhaseResult> {\n if (inputs.length === 0) {\n return { successItems: [], failedContents: [], metrics: { successful: 0, failed: 0, totalCredits: 0 } };\n }\n\n mcpLog(\n 'info',\n `[concurrency] document branch (jina): converting ${inputs.length} URL(s) with limit=${CONCURRENCY.SCRAPER}`,\n 'scrape',\n );\n\n const results = await pMapSettled(\n inputs,\n (input) => jinaClient.convert({ url: input.url }),\n CONCURRENCY.SCRAPER,\n );\n\n const successItems: ProcessedResult[] = [];\n const failedContents: string[] = [];\n let successful = 0;\n let failed = 0;\n\n for (let i = 0; i < results.length; i++) {\n const settled = results[i];\n const input = inputs[i]!;\n const scrapeError = scrapeErrorContext?.get(input.url);\n if (!settled) {\n failed++;\n failedContents.push(formatJinaFailure(input.url, 'No result returned', scrapeError));\n continue;\n }\n if (settled.status === 'rejected') {\n failed++;\n const reason = settled.reason instanceof Error ? settled.reason.message : String(settled.reason);\n failedContents.push(formatJinaFailure(input.url, reason, scrapeError));\n continue;\n }\n\n const result = settled.value;\n if (result.error || result.statusCode < 200 || result.statusCode >= 300) {\n failed++;\n const errorMsg = result.error?.message || `HTTP ${result.statusCode}`;\n failedContents.push(formatJinaFailure(input.url, errorMsg, scrapeError));\n continue;\n }\n\n successful++;\n successItems.push({ url: input.url, content: result.content, index: input.origIndex });\n }\n\n return { successItems, failedContents, metrics: { successful, failed, totalCredits: 0 } };\n}\n\n// --- Reddit branch ---\n\nfunction formatRedditPostAsMarkdown(result: PostResult): string {\n const { post, comments } = result;\n const lines: string[] = [];\n lines.push(`# ${post.title}`);\n lines.push('');\n lines.push(`**r/${post.subreddit}** \u2022 u/${post.author} \u2022 \u2B06\uFE0F ${post.score} \u2022 \uD83D\uDCAC ${post.commentCount} comments`);\n lines.push(`\uD83D\uDD17 ${post.url}`);\n lines.push('');\n if (post.body) {\n lines.push('## Post content');\n lines.push('');\n lines.push(post.body);\n lines.push('');\n }\n if (comments.length > 0) {\n lines.push(`## Top comments (${comments.length} total)`);\n lines.push('');\n for (const c of comments) {\n const indent = ' '.repeat(c.depth);\n const op = c.isOP ? ' **[OP]**' : '';\n const score = c.score >= 0 ? `+${c.score}` : `${c.score}`;\n lines.push(`${indent}- **u/${c.author}**${op} _(${score})_`);\n for (const line of c.body.split('\\n')) {\n lines.push(`${indent} ${line}`);\n }\n lines.push('');\n }\n }\n return lines.join('\\n');\n}\n\nasync function fetchRedditBranch(inputs: BranchInput[]): Promise<ScrapePhaseResult> {\n if (inputs.length === 0) {\n return { successItems: [], failedContents: [], metrics: { successful: 0, failed: 0, totalCredits: 0 } };\n }\n\n const env = parseEnv();\n if (!env.REDDIT_CLIENT_ID || !env.REDDIT_CLIENT_SECRET) {\n const failedContents = inputs.map(\n (i) => `## ${i.url}\\n\\n\u274C Reddit URL detected, but Reddit API is not configured. Set \\`REDDIT_CLIENT_ID\\` and \\`REDDIT_CLIENT_SECRET\\` in the server env to enable threaded Reddit scraping.`,\n );\n return {\n successItems: [],\n failedContents,\n metrics: { successful: 0, failed: inputs.length, totalCredits: 0 },\n };\n }\n\n // Warn for non-permalink Reddit URLs (subreddit homepages, /new, /top, /hot,\n // user profiles). The Reddit API path we call requires /r/.../comments/... \u2014\n // reject upfront so the caller sees a helpful message instead of a 404.\n const [postInputs, nonPermalinks] = inputs.reduce<[BranchInput[], BranchInput[]]>(\n ([posts, rest], input) => {\n if (isRedditPostPermalink(input.url)) posts.push(input);\n else rest.push(input);\n return [posts, rest];\n },\n [[], []],\n );\n\n const nonPermalinkFailed = nonPermalinks.map(\n (i) => `## ${i.url}\\n\\n\u274C Only Reddit post permalinks (/r/<sub>/comments/<id>/...) are supported. Use web-search with scope:\"reddit\" to discover post permalinks first.`,\n );\n\n if (postInputs.length === 0) {\n return {\n successItems: [],\n failedContents: nonPermalinkFailed,\n metrics: { successful: 0, failed: nonPermalinks.length, totalCredits: 0 },\n };\n }\n\n mcpLog('info', `[concurrency] reddit branch: fetching ${postInputs.length} post(s) with limit=${CONCURRENCY.REDDIT}`, 'scrape');\n const client = new RedditClient(env.REDDIT_CLIENT_ID, env.REDDIT_CLIENT_SECRET);\n const urls = postInputs.map((i) => i.url);\n const batchResult = await client.batchGetPosts(urls, true);\n const urlToIndex = new Map(postInputs.map((i) => [i.url, i.origIndex]));\n\n const successItems: ProcessedResult[] = [];\n const failedContents: string[] = [...nonPermalinkFailed];\n let successful = 0;\n let failed = nonPermalinks.length;\n\n for (const [url, result] of batchResult.results) {\n const origIndex = urlToIndex.get(url) ?? -1;\n if (result instanceof Error) {\n failed++;\n failedContents.push(`## ${url}\\n\\n\u274C Reddit fetch failed: ${result.message}`);\n continue;\n }\n successful++;\n successItems.push({ url, content: formatRedditPostAsMarkdown(result), index: origIndex });\n }\n\n return { successItems, failedContents, metrics: { successful, failed, totalCredits: 0 } };\n}\n\n// --- LLM extraction (shared by both branches) ---\n\nasync function processItemsWithLlm(\n successItems: ProcessedResult[],\n enhancedInstruction: string,\n llmProcessor: ReturnType<typeof createLLMProcessor>,\n reporter: ToolReporter,\n): Promise<{ items: ProcessedResult[]; llmErrors: number; llmAttempted: number }> {\n let llmErrors = 0;\n\n if (!llmProcessor || successItems.length === 0) {\n if (!llmProcessor && successItems.length > 0) {\n mcpLog('warning', 'LLM unavailable (LLM_API_KEY not set). Returning raw scraped content.', 'scrape');\n void reporter.log('warning', 'llm_extractor_unreachable: planner not configured; raw scraped content returned');\n }\n return { items: successItems, llmErrors, llmAttempted: 0 };\n }\n\n mcpLog('info', `[concurrency] llm extraction: fanning out ${successItems.length} item(s) with limit=${CONCURRENCY.LLM_EXTRACTION}`, 'scrape');\n\n const llmResults = await pMap(\n successItems,\n async (item) => {\n mcpLog('debug', `LLM extracting ${item.url}...`, 'scrape');\n\n const llmResult = await processContentWithLLM(\n item.content,\n { enabled: true, extract: enhancedInstruction, url: item.url },\n llmProcessor,\n );\n\n if (llmResult.processed) {\n return { ...item, content: llmResult.content };\n }\n\n llmErrors++;\n mcpLog('warning', `LLM extraction failed for ${item.url}: ${llmResult.error || 'unknown reason'}`, 'scrape');\n void reporter.log('warning', `llm_extractor_unreachable: ${item.url} \u2014 ${llmResult.error || 'unknown reason'}`);\n return item;\n },\n CONCURRENCY.LLM_EXTRACTION,\n );\n\n return { items: llmResults, llmErrors, llmAttempted: successItems.length };\n}\n\n// --- Output assembly ---\n\nfunction assembleContentEntries(successItems: ProcessedResult[], failedContents: string[]): string[] {\n const sorted = [...successItems].sort((a, b) => a.index - b.index);\n const contents = [...failedContents];\n for (const item of sorted) {\n let content = item.content;\n try {\n content = removeMetaTags(content);\n } catch {\n // Use content as-is\n }\n contents.push(`## ${item.url}\\n\\n${content}`);\n }\n return contents;\n}\n\nfunction buildScrapeResponse(\n params: ScrapeLinksParams,\n contents: string[],\n metrics: ScrapeMetrics,\n llmErrors: number,\n executionTime: number,\n llmAccounting: { llmAttempted: number; llmSucceeded: boolean },\n): { content: string; structuredContent: ScrapeLinksOutput } {\n const llmExtras: Record<string, string | number> = {};\n if (llmAccounting.llmAttempted > 0) {\n const ok = llmAccounting.llmAttempted - llmErrors;\n llmExtras['LLM extraction'] = `${ok}/${llmAccounting.llmAttempted} succeeded`;\n if (!llmAccounting.llmSucceeded) {\n llmExtras['LLM credit'] = '0 charged (no extraction produced)';\n }\n } else if (llmErrors > 0) {\n llmExtras['LLM extraction failures'] = llmErrors;\n }\n\n const batchHeader = formatBatchHeader({\n title: `Scraped Content (${params.urls.length} URLs)`,\n totalItems: params.urls.length,\n successful: metrics.successful,\n failed: metrics.failed,\n extras: {\n 'Credits used': metrics.totalCredits,\n ...llmExtras,\n },\n });\n\n const formattedContent = formatSuccess({\n title: 'Scraping Complete',\n summary: batchHeader,\n data: contents.join('\\n\\n---\\n\\n'),\n metadata: {\n 'Execution time': formatDuration(executionTime),\n },\n });\n\n const metadata: ScrapeLinksOutput['metadata'] = {\n total_items: params.urls.length,\n successful: metrics.successful,\n failed: metrics.failed,\n execution_time_ms: executionTime,\n total_credits: metrics.totalCredits,\n };\n return { content: formattedContent, structuredContent: { metadata } };\n}\n\n// --- Handler ---\n\nexport async function handleScrapeLinks(\n params: ScrapeLinksParams,\n reporter: ToolReporter = NOOP_REPORTER,\n): Promise<ToolExecutionResult<ScrapeLinksOutput>> {\n const startTime = Date.now();\n\n if (!params.urls || params.urls.length === 0) {\n return createScrapeErrorResponse('NO_URLS', 'No URLs provided', startTime);\n }\n\n const { webInputs, redditInputs, documentInputs, invalidEntries } = partitionUrls(params.urls);\n const validCount = webInputs.length + redditInputs.length + documentInputs.length;\n\n await reporter.log(\n 'info',\n `Partitioned ${params.urls.length} URL(s): ${webInputs.length} web, ${redditInputs.length} reddit, ${documentInputs.length} document, ${invalidEntries.length} invalid`,\n );\n\n if (validCount === 0) {\n return createScrapeErrorResponse(\n 'INVALID_URLS',\n `All ${params.urls.length} URLs are invalid`,\n startTime,\n false,\n [\n 'web-search(queries=[...], extract=\"...\") \u2014 search for valid URLs first, then scrape the results',\n ],\n );\n }\n\n mcpLog(\n 'info',\n `Starting scrape: ${webInputs.length} web + ${redditInputs.length} reddit + ${documentInputs.length} document URL(s)`,\n 'scrape',\n );\n await reporter.progress(15, 100, 'Preparing scraper clients');\n\n // Only initialize the Scrape.do client if we actually have HTML/web URLs.\n // The Jina client is cheap (no auth needed) and always constructed so the\n // document branch and the web\u2192Jina fallback path both work uniformly.\n let clients: ScrapeClients | null = null;\n try {\n const jinaClient = new JinaClient();\n if (webInputs.length > 0) {\n clients = {\n client: new ScraperClient(),\n jinaClient,\n llmProcessor: createLLMProcessor(),\n };\n } else {\n clients = {\n client: null as unknown as ScraperClient,\n jinaClient,\n llmProcessor: createLLMProcessor(),\n };\n }\n } catch (error) {\n const err = classifyError(error);\n return createScrapeErrorResponse(\n 'CLIENT_INIT_FAILED',\n `Failed to initialize scraper: ${err.message}`,\n startTime,\n false,\n [\n 'web-search(queries=[\"topic key findings\", \"topic summary\"], extract=\"key findings and summary\") \u2014 search instead of scraping',\n ],\n );\n }\n\n const enhancedInstruction = enhanceExtractionInstruction(params.extract);\n\n await reporter.progress(35, 100, 'Fetching page content');\n\n // Phase 1 \u2014 run all three branches in parallel. Failures in one branch do\n // not block the others. The web branch may surface URLs to reroute via\n // `jinaFallbacks` (binary content-type OR non-404 Scrape.do failure),\n // which Phase 2 re-runs through Jina Reader.\n const emptyPhase: WebPhaseResult = {\n successItems: [], failedContents: [],\n metrics: { successful: 0, failed: 0, totalCredits: 0 },\n jinaFallbacks: [],\n };\n const [webPhase, redditPhase, documentPhase] = await Promise.all([\n webInputs.length > 0\n ? fetchWebBranch(webInputs, clients.client)\n : Promise.resolve<WebPhaseResult>(emptyPhase),\n fetchRedditBranch(redditInputs),\n fetchDocumentBranch(documentInputs, clients.jinaClient),\n ]);\n\n // Phase 2 \u2014 Jina Reader as a fallback for web-branch URLs that either\n // returned binary content or failed outright on Scrape.do.\n let deferredPhase: ScrapePhaseResult = {\n successItems: [], failedContents: [],\n metrics: { successful: 0, failed: 0, totalCredits: 0 },\n };\n if (webPhase.jinaFallbacks.length > 0) {\n const binaryCount = webPhase.jinaFallbacks.filter((f) => f.reason === 'binary_content').length;\n const failedCount = webPhase.jinaFallbacks.length - binaryCount;\n await reporter.log(\n 'info',\n `Rerouting ${webPhase.jinaFallbacks.length} URL(s) to Jina Reader: ${binaryCount} binary, ${failedCount} scrape-failed`,\n );\n const fallbackInputs: BranchInput[] = webPhase.jinaFallbacks.map((f) => ({\n url: f.url,\n origIndex: f.origIndex,\n }));\n const errorContext = new Map<string, string>(\n webPhase.jinaFallbacks\n .filter((f) => f.scrapeError !== undefined)\n .map((f) => [f.url, f.scrapeError as string]),\n );\n deferredPhase = await fetchDocumentBranch(fallbackInputs, clients.jinaClient, errorContext);\n }\n\n const successItems = [\n ...webPhase.successItems,\n ...redditPhase.successItems,\n ...documentPhase.successItems,\n ...deferredPhase.successItems,\n ];\n const invalidFailed = invalidEntries.map(\n ({ url }) => `## ${url}\\n\\n\u274C Invalid URL format`,\n );\n const failedContents = [\n ...invalidFailed,\n ...webPhase.failedContents,\n ...redditPhase.failedContents,\n ...documentPhase.failedContents,\n ...deferredPhase.failedContents,\n ];\n const metrics: ScrapeMetrics = {\n successful:\n webPhase.metrics.successful\n + redditPhase.metrics.successful\n + documentPhase.metrics.successful\n + deferredPhase.metrics.successful,\n failed:\n invalidEntries.length\n + webPhase.metrics.failed\n + redditPhase.metrics.failed\n + documentPhase.metrics.failed\n + deferredPhase.metrics.failed,\n totalCredits: webPhase.metrics.totalCredits,\n };\n\n await reporter.log('info', `Fetched ${metrics.successful} page(s), ${metrics.failed} failed`);\n\n if (successItems.length > 0) {\n await reporter.progress(80, 100, 'Running LLM extraction over fetched pages');\n }\n\n const { items: processedItems, llmErrors, llmAttempted } = await processItemsWithLlm(\n successItems,\n enhancedInstruction,\n clients.llmProcessor,\n reporter,\n );\n\n const contents = assembleContentEntries(processedItems, failedContents);\n const executionTime = Date.now() - startTime;\n\n mcpLog(\n 'info',\n `Completed: ${metrics.successful} successful, ${metrics.failed} failed, ${metrics.totalCredits} credits used`,\n 'scrape',\n );\n\n const llmSucceeded = llmAttempted > 0 && llmErrors < llmAttempted;\n const result = buildScrapeResponse(\n params,\n contents,\n metrics,\n llmErrors,\n executionTime,\n { llmAttempted, llmSucceeded },\n );\n\n if (metrics.successful === 0 && metrics.failed > 0) {\n return toolFailure(result.content);\n }\n\n return toolSuccess(result.content, result.structuredContent);\n}\n\nexport function registerScrapeLinksTool(server: MCPServer): void {\n server.tool(\n {\n name: 'scrape-links',\n title: 'Scrape Links',\n description:\n 'Fetch many URLs in parallel and run per-URL structured LLM extraction. Auto-detects reddit.com post permalinks and routes them through the Reddit API (threaded post + comments); everything else flows through the HTTP scraper. Safe to call in parallel \u2014 group URLs by context rather than jamming unrelated batches together. Each page returns `## Source`, `## Matches` (verbatim-preserved facts), `## Not found` (explicit gaps), and `## Follow-up signals` (new terms + referenced URLs) that feed the next research loop. Describe the SHAPE of what you want in `extract`, facets separated by `|` (e.g. `root cause | affected versions | fix | workarounds | timeline`).',\n schema: scrapeLinksParamsSchema,\n outputSchema: scrapeLinksOutputSchema,\n annotations: {\n readOnlyHint: true,\n idempotentHint: true,\n destructiveHint: false,\n openWorldHint: true,\n },\n },\n async (args, ctx) => {\n if (!getCapabilities().scraping) {\n return toToolResponse(toolFailure(getMissingEnvMessage('scraping')));\n }\n\n const reporter = createToolReporter(ctx, 'scrape-links');\n const result = await handleScrapeLinks(args, reporter);\n\n await reporter.progress(100, 100, result.isError ? 'Scrape failed' : 'Scrape complete');\n return toToolResponse(result);\n },\n );\n}\n"],
|
|
5
|
-
"mappings": "AAYA;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AACP;AAAA,EACE;AAAA,EACA;AAAA,OAGK;AACP,SAAS,qBAAqB;AAC9B,SAAS,oBAAqC;AAC9C,SAAS,kBAAkB;AAC3B,SAAS,uBAAuB;AAChC,SAAS,oBAAoB,6BAA6B;AAC1D,SAAS,sBAAsB;AAC/B,SAAS,8BAA8B;AACvC,SAAS,eAAe,iBAAiB;AACzC,SAAS,qBAAqB;AAC9B,SAAS,MAAM,mBAAmB;AAClC;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AACP;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OAGK;AAEP,MAAM,kBAAkB,IAAI,gBAAgB;AAE5C,SAAS,6BAA6B,aAAyC;AAC7E,QAAM,OAAO,eAAe;AAC5B,SAAO,GAAG,QAAQ,iBAAiB;AAAA;AAAA,EAAO,IAAI;AAAA;AAAA,EAAO,QAAQ,iBAAiB;AAChF;AAsDA,MAAM,cAAc;AACpB,MAAM,wBAAwB;AAE9B,SAAS,YAAY,KAAsB;AACzC,MAAI;AACF,UAAM,IAAI,IAAI,IAAI,GAAG;AACrB,WAAO,YAAY,KAAK,EAAE,QAAQ;AAAA,EACpC,QAAQ;AACN,WAAO;AAAA,EACT;AACF;AAEA,SAAS,sBAAsB,KAAsB;AACnD,MAAI;AACF,UAAM,IAAI,IAAI,IAAI,GAAG;AACrB,WAAO,YAAY,KAAK,EAAE,QAAQ,KAAK,sBAAsB,KAAK,EAAE,QAAQ;AAAA,EAC9E,QAAQ;AACN,WAAO;AAAA,EACT;AACF;AAIA,SAAS,0BACP,MACA,SACA,WACA,YAAY,OACZ,cACwC;AACxC,SAAO;AAAA,IACL,GAAG,YAAY;AAAA,MACb;AAAA,MACA;AAAA,MACA;AAAA,MACA,UAAU;AAAA,MACV,UAAU,SAAS,YAAY,CAAC,gCAAgC,IAAI;AAAA,MACpE;AAAA,IACF,CAAC,CAAC;AAAA;AAAA,kBAAuB,eAAe,KAAK,IAAI,IAAI,SAAS,CAAC;AAAA,EACjE;AACF;AAWA,SAAS,cAAc,MAAiC;AACtD,QAAM,YAA2B,CAAC;AAClC,QAAM,eAA8B,CAAC;AACrC,QAAM,iBAAgC,CAAC;AACvC,QAAM,iBAAuD,CAAC;AAE9D,WAAS,IAAI,GAAG,IAAI,KAAK,QAAQ,KAAK;AACpC,UAAM,MAAM,KAAK,CAAC;AAClB,QAAI;AACF,UAAI,IAAI,GAAG;AAAA,IACb,QAAQ;AACN,qBAAe,KAAK,EAAE,KAAK,WAAW,EAAE,CAAC;AACzC;AAAA,IACF;AAKA,QAAI,cAAc,GAAG,GAAG;AACtB,qBAAe,KAAK,EAAE,KAAK,WAAW,EAAE,CAAC;AAAA,IAC3C,WAAW,YAAY,GAAG,GAAG;AAC3B,mBAAa,KAAK,EAAE,KAAK,WAAW,EAAE,CAAC;AAAA,IACzC,OAAO;AACL,gBAAU,KAAK,EAAE,KAAK,WAAW,EAAE,CAAC;AAAA,IACtC;AAAA,EACF;AAEA,SAAO,EAAE,WAAW,cAAc,gBAAgB,eAAe;AACnE;AAIA,eAAe,eACb,QACA,QACyB;AACzB,MAAI,OAAO,WAAW,GAAG;AACvB,WAAO;AAAA,MACL,cAAc,CAAC;AAAA,MACf,gBAAgB,CAAC;AAAA,MACjB,SAAS,EAAE,YAAY,GAAG,QAAQ,GAAG,cAAc,EAAE;AAAA,MACrD,eAAe,CAAC;AAAA,IAClB;AAAA,EACF;AAEA,SAAO,QAAQ,yCAAyC,OAAO,MAAM,sBAAsB,YAAY,OAAO,IAAI,QAAQ;AAC1H,QAAM,OAAO,OAAO,IAAI,CAAC,MAAM,EAAE,GAAG;AACpC,QAAM,UAAU,MAAM,OAAO,eAAe,MAAM,EAAE,SAAS,GAAG,CAAC;AACjE,QAAM,aAAa,IAAI,IAAI,OAAO,IAAI,CAAC,MAAM,CAAC,EAAE,KAAK,EAAE,SAAS,CAAC,CAAC;AAElE,QAAM,eAAkC,CAAC;AACzC,QAAM,iBAA2B,CAAC;AAClC,QAAM,gBAAgC,CAAC;AACvC,MAAI,aAAa;AACjB,MAAI,SAAS;AACb,MAAI,eAAe;AAEnB,WAAS,IAAI,GAAG,IAAI,QAAQ,QAAQ,KAAK;AACvC,UAAM,SAAS,QAAQ,CAAC;AACxB,UAAM,YAAY,OAAO,CAAC,EAAG;AAC7B,QAAI,CAAC,QAAQ;AACX;AACA,qBAAe,KAAK,MAAM,OAAO,CAAC,EAAG,GAAG;AAAA;AAAA,0BAA0B;AAClE;AAAA,IACF;AAGA,QAAI,OAAO,OAAO,SAAS,UAAU,4BAA4B;AAC/D,oBAAc,KAAK;AAAA,QACjB,KAAK,OAAO;AAAA,QACZ,WAAW,WAAW,IAAI,OAAO,GAAG,KAAK;AAAA,QACzC,QAAQ;AAAA,MACV,CAAC;AACD;AAAA,IACF;AAOA,UAAM,eAAe,QAAQ,OAAO,KAAK,KAAK,OAAO,aAAa,OAAO,OAAO,cAAc;AAC9F,QAAI,gBAAgB,OAAO,eAAe,KAAK;AAC7C,oBAAc,KAAK;AAAA,QACjB,KAAK,OAAO;AAAA,QACZ,WAAW,WAAW,IAAI,OAAO,GAAG,KAAK;AAAA,QACzC,QAAQ;AAAA,QACR,aAAa,OAAO,OAAO,WAAW,OAAO,WAAW,QAAQ,OAAO,UAAU;AAAA,MACnF,CAAC;AACD;AAAA,IACF;AACA,QAAI,cAAc;AAChB;AACA,qBAAe,KAAK,MAAM,OAAO,GAAG;AAAA;AAAA,wDAAmD;AACvF;AAAA,IACF;AAEA;AACA,oBAAgB,OAAO;AAEvB,QAAI;AACJ,QAAI;AACF,YAAM,WAAW,uBAAuB,OAAO,SAAS,OAAO,GAAG;AAClE,YAAM,mBAAmB,SAAS,YAAY,SAAS,UAAU,OAAO;AACxE,gBAAU,gBAAgB,eAAe,gBAAgB;AAAA,IAC3D,QAAQ;AACN,gBAAU,OAAO;AAAA,IACnB;AAEA,iBAAa,KAAK,EAAE,KAAK,OAAO,KAAK,SAAS,OAAO,UAAU,CAAC;AAAA,EAClE;AAEA,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA,SAAS,EAAE,YAAY,QAAQ,aAAa;AAAA,IAC5C;AAAA,EACF;AACF;AAWO,SAAS,kBAAkB,KAAa,WAAmB,aAA8B;AAC9F,MAAI,aAAa;AACf,WAAO,MAAM,GAAG;AAAA;AAAA,0CAA0C,WAAW,kBAAkB,SAAS;AAAA,EAClG;AACA,SAAO,MAAM,GAAG;AAAA;AAAA,qCAAqC,SAAS;AAChE;AAEA,eAAe,oBACb,QACA,YAEA,oBAC4B;AAC5B,MAAI,OAAO,WAAW,GAAG;AACvB,WAAO,EAAE,cAAc,CAAC,GAAG,gBAAgB,CAAC,GAAG,SAAS,EAAE,YAAY,GAAG,QAAQ,GAAG,cAAc,EAAE,EAAE;AAAA,EACxG;AAEA;AAAA,IACE;AAAA,IACA,oDAAoD,OAAO,MAAM,sBAAsB,YAAY,OAAO;AAAA,IAC1G;AAAA,EACF;AAEA,QAAM,UAAU,MAAM;AAAA,IACpB;AAAA,IACA,CAAC,UAAU,WAAW,QAAQ,EAAE,KAAK,MAAM,IAAI,CAAC;AAAA,IAChD,YAAY;AAAA,EACd;AAEA,QAAM,eAAkC,CAAC;AACzC,QAAM,iBAA2B,CAAC;AAClC,MAAI,aAAa;AACjB,MAAI,SAAS;AAEb,WAAS,IAAI,GAAG,IAAI,QAAQ,QAAQ,KAAK;AACvC,UAAM,UAAU,QAAQ,CAAC;AACzB,UAAM,QAAQ,OAAO,CAAC;AACtB,UAAM,cAAc,oBAAoB,IAAI,MAAM,GAAG;AACrD,QAAI,CAAC,SAAS;AACZ;AACA,qBAAe,KAAK,kBAAkB,MAAM,KAAK,sBAAsB,WAAW,CAAC;AACnF;AAAA,IACF;AACA,QAAI,QAAQ,WAAW,YAAY;AACjC;AACA,YAAM,SAAS,QAAQ,kBAAkB,QAAQ,QAAQ,OAAO,UAAU,OAAO,QAAQ,MAAM;AAC/F,qBAAe,KAAK,kBAAkB,MAAM,KAAK,QAAQ,WAAW,CAAC;AACrE;AAAA,IACF;AAEA,UAAM,SAAS,QAAQ;AACvB,QAAI,OAAO,SAAS,OAAO,aAAa,OAAO,OAAO,cAAc,KAAK;AACvE;AACA,YAAM,WAAW,OAAO,OAAO,WAAW,QAAQ,OAAO,UAAU;AACnE,qBAAe,KAAK,kBAAkB,MAAM,KAAK,UAAU,WAAW,CAAC;AACvE;AAAA,IACF;AAEA;AACA,iBAAa,KAAK,EAAE,KAAK,MAAM,KAAK,SAAS,OAAO,SAAS,OAAO,MAAM,UAAU,CAAC;AAAA,EACvF;AAEA,SAAO,EAAE,cAAc,gBAAgB,SAAS,EAAE,YAAY,QAAQ,cAAc,EAAE,EAAE;AAC1F;AAIA,SAAS,2BAA2B,QAA4B;AAC9D,QAAM,EAAE,MAAM,SAAS,IAAI;AAC3B,QAAM,QAAkB,CAAC;AACzB,QAAM,KAAK,KAAK,KAAK,KAAK,EAAE;AAC5B,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,OAAO,KAAK,SAAS,eAAU,KAAK,MAAM,wBAAS,KAAK,KAAK,qBAAS,KAAK,YAAY,WAAW;AAC7G,QAAM,KAAK,aAAM,KAAK,GAAG,EAAE;AAC3B,QAAM,KAAK,EAAE;AACb,MAAI,KAAK,MAAM;AACb,UAAM,KAAK,iBAAiB;AAC5B,UAAM,KAAK,EAAE;AACb,UAAM,KAAK,KAAK,IAAI;AACpB,UAAM,KAAK,EAAE;AAAA,EACf;AACA,MAAI,SAAS,SAAS,GAAG;AACvB,UAAM,KAAK,oBAAoB,SAAS,MAAM,SAAS;AACvD,UAAM,KAAK,EAAE;AACb,eAAW,KAAK,UAAU;AACxB,YAAM,SAAS,KAAK,OAAO,EAAE,KAAK;AAClC,YAAM,KAAK,EAAE,OAAO,cAAc;AAClC,YAAM,QAAQ,EAAE,SAAS,IAAI,IAAI,EAAE,KAAK,KAAK,GAAG,EAAE,KAAK;AACvD,YAAM,KAAK,GAAG,MAAM,SAAS,EAAE,MAAM,KAAK,EAAE,MAAM,KAAK,IAAI;AAC3D,iBAAW,QAAQ,EAAE,KAAK,MAAM,IAAI,GAAG;AACrC,cAAM,KAAK,GAAG,MAAM,KAAK,IAAI,EAAE;AAAA,MACjC;AACA,YAAM,KAAK,EAAE;AAAA,IACf;AAAA,EACF;AACA,SAAO,MAAM,KAAK,IAAI;AACxB;AAEA,eAAe,kBAAkB,QAAmD;AAClF,MAAI,OAAO,WAAW,GAAG;AACvB,WAAO,EAAE,cAAc,CAAC,GAAG,gBAAgB,CAAC,GAAG,SAAS,EAAE,YAAY,GAAG,QAAQ,GAAG,cAAc,EAAE,EAAE;AAAA,EACxG;AAEA,QAAM,MAAM,SAAS;AACrB,MAAI,CAAC,IAAI,oBAAoB,CAAC,IAAI,sBAAsB;AACtD,UAAMA,kBAAiB,OAAO;AAAA,MAC5B,CAAC,MAAM,MAAM,EAAE,GAAG;AAAA;AAAA;AAAA,IACpB;AACA,WAAO;AAAA,MACL,cAAc,CAAC;AAAA,MACf,gBAAAA;AAAA,MACA,SAAS,EAAE,YAAY,GAAG,QAAQ,OAAO,QAAQ,cAAc,EAAE;AAAA,IACnE;AAAA,EACF;AAKA,QAAM,CAAC,YAAY,aAAa,IAAI,OAAO;AAAA,IACzC,CAAC,CAAC,OAAO,IAAI,GAAG,UAAU;AACxB,UAAI,sBAAsB,MAAM,GAAG,EAAG,OAAM,KAAK,KAAK;AAAA,UACjD,MAAK,KAAK,KAAK;AACpB,aAAO,CAAC,OAAO,IAAI;AAAA,IACrB;AAAA,IACA,CAAC,CAAC,GAAG,CAAC,CAAC;AAAA,EACT;AAEA,QAAM,qBAAqB,cAAc;AAAA,IACvC,CAAC,MAAM,MAAM,EAAE,GAAG;AAAA;AAAA;AAAA,EACpB;AAEA,MAAI,WAAW,WAAW,GAAG;AAC3B,WAAO;AAAA,MACL,cAAc,CAAC;AAAA,MACf,gBAAgB;AAAA,MAChB,SAAS,EAAE,YAAY,GAAG,QAAQ,cAAc,QAAQ,cAAc,EAAE;AAAA,IAC1E;AAAA,EACF;AAEA,SAAO,QAAQ,yCAAyC,WAAW,MAAM,uBAAuB,YAAY,MAAM,IAAI,QAAQ;AAC9H,QAAM,SAAS,IAAI,aAAa,IAAI,kBAAkB,IAAI,oBAAoB;AAC9E,QAAM,OAAO,WAAW,IAAI,CAAC,MAAM,EAAE,GAAG;AACxC,QAAM,cAAc,MAAM,OAAO,cAAc,MAAM,IAAI;AACzD,QAAM,aAAa,IAAI,IAAI,WAAW,IAAI,CAAC,MAAM,CAAC,EAAE,KAAK,EAAE,SAAS,CAAC,CAAC;AAEtE,QAAM,eAAkC,CAAC;AACzC,QAAM,iBAA2B,CAAC,GAAG,kBAAkB;AACvD,MAAI,aAAa;AACjB,MAAI,SAAS,cAAc;AAE3B,aAAW,CAAC,KAAK,MAAM,KAAK,YAAY,SAAS;AAC/C,UAAM,YAAY,WAAW,IAAI,GAAG,KAAK;AACzC,QAAI,kBAAkB,OAAO;AAC3B;AACA,qBAAe,KAAK,MAAM,GAAG;AAAA;AAAA,8BAA8B,OAAO,OAAO,EAAE;AAC3E;AAAA,IACF;AACA;AACA,iBAAa,KAAK,EAAE,KAAK,SAAS,2BAA2B,MAAM,GAAG,OAAO,UAAU,CAAC;AAAA,EAC1F;AAEA,SAAO,EAAE,cAAc,gBAAgB,SAAS,EAAE,YAAY,QAAQ,cAAc,EAAE,EAAE;AAC1F;AAIA,eAAe,oBACb,cACA,qBACA,cACA,UACgF;AAChF,MAAI,YAAY;AAEhB,MAAI,CAAC,gBAAgB,aAAa,WAAW,GAAG;AAC9C,QAAI,CAAC,gBAAgB,aAAa,SAAS,GAAG;AAC5C,aAAO,WAAW,yEAAyE,QAAQ;AACnG,WAAK,SAAS,IAAI,WAAW,iFAAiF;AAAA,IAChH;AACA,WAAO,EAAE,OAAO,cAAc,WAAW,cAAc,EAAE;AAAA,EAC3D;AAEA,SAAO,QAAQ,6CAA6C,aAAa,MAAM,uBAAuB,YAAY,cAAc,IAAI,QAAQ;AAE5I,QAAM,aAAa,MAAM;AAAA,IACvB;AAAA,IACA,OAAO,SAAS;AACd,aAAO,SAAS,kBAAkB,KAAK,GAAG,OAAO,QAAQ;AAEzD,YAAM,YAAY,MAAM;AAAA,QACtB,KAAK;AAAA,QACL,EAAE,SAAS,MAAM,SAAS,qBAAqB,KAAK,KAAK,IAAI;AAAA,QAC7D;AAAA,MACF;AAEA,UAAI,UAAU,WAAW;AACvB,eAAO,EAAE,GAAG,MAAM,SAAS,UAAU,QAAQ;AAAA,MAC/C;AAEA;AACA,aAAO,WAAW,6BAA6B,KAAK,GAAG,KAAK,UAAU,SAAS,gBAAgB,IAAI,QAAQ;AAC3G,WAAK,SAAS,IAAI,WAAW,8BAA8B,KAAK,GAAG,WAAM,UAAU,SAAS,gBAAgB,EAAE;AAC9G,aAAO;AAAA,IACT;AAAA,IACA,YAAY;AAAA,EACd;AAEA,SAAO,EAAE,OAAO,YAAY,WAAW,cAAc,aAAa,OAAO;AAC3E;AAIA,SAAS,uBAAuB,cAAiC,gBAAoC;AACnG,QAAM,SAAS,CAAC,GAAG,YAAY,EAAE,KAAK,CAAC,GAAG,MAAM,EAAE,QAAQ,EAAE,KAAK;AACjE,QAAM,WAAW,CAAC,GAAG,cAAc;AACnC,aAAW,QAAQ,QAAQ;AACzB,QAAI,UAAU,KAAK;AACnB,QAAI;AACF,gBAAU,eAAe,OAAO;AAAA,IAClC,QAAQ;AAAA,IAER;AACA,aAAS,KAAK,MAAM,KAAK,GAAG;AAAA;AAAA,EAAO,OAAO,EAAE;AAAA,EAC9C;AACA,SAAO;AACT;AAEA,SAAS,oBACP,QACA,UACA,SACA,WACA,eACA,eAC2D;AAC3D,QAAM,YAA6C,CAAC;AACpD,MAAI,cAAc,eAAe,GAAG;AAClC,UAAM,KAAK,cAAc,eAAe;AACxC,cAAU,gBAAgB,IAAI,GAAG,EAAE,IAAI,cAAc,YAAY;AACjE,QAAI,CAAC,cAAc,cAAc;AAC/B,gBAAU,YAAY,IAAI;AAAA,IAC5B;AAAA,EACF,WAAW,YAAY,GAAG;AACxB,cAAU,yBAAyB,IAAI;AAAA,EACzC;AAEA,QAAM,cAAc,kBAAkB;AAAA,IACpC,OAAO,oBAAoB,OAAO,KAAK,MAAM;AAAA,IAC7C,YAAY,OAAO,KAAK;AAAA,IACxB,YAAY,QAAQ;AAAA,IACpB,QAAQ,QAAQ;AAAA,IAChB,QAAQ;AAAA,MACN,gBAAgB,QAAQ;AAAA,MACxB,GAAG;AAAA,IACL;AAAA,EACF,CAAC;AAED,QAAM,mBAAmB,cAAc;AAAA,IACrC,OAAO;AAAA,IACP,SAAS;AAAA,IACT,MAAM,SAAS,KAAK,aAAa;AAAA,IACjC,UAAU;AAAA,MACR,kBAAkB,eAAe,aAAa;AAAA,IAChD;AAAA,EACF,CAAC;AAED,QAAM,WAA0C;AAAA,IAC9C,aAAa,OAAO,KAAK;AAAA,IACzB,YAAY,QAAQ;AAAA,IACpB,QAAQ,QAAQ;AAAA,IAChB,mBAAmB;AAAA,IACnB,eAAe,QAAQ;AAAA,EACzB;AACA,SAAO,EAAE,SAAS,kBAAkB,mBAAmB,EAAE,SAAS,EAAE;AACtE;AAIA,eAAsB,kBACpB,QACA,WAAyB,eACwB;AACjD,QAAM,YAAY,KAAK,IAAI;AAE3B,MAAI,CAAC,OAAO,QAAQ,OAAO,KAAK,WAAW,GAAG;AAC5C,WAAO,0BAA0B,WAAW,oBAAoB,SAAS;AAAA,EAC3E;AAEA,QAAM,EAAE,WAAW,cAAc,gBAAgB,eAAe,IAAI,cAAc,OAAO,IAAI;AAC7F,QAAM,aAAa,UAAU,SAAS,aAAa,SAAS,eAAe;AAE3E,QAAM,SAAS;AAAA,IACb;AAAA,IACA,eAAe,OAAO,KAAK,MAAM,YAAY,UAAU,MAAM,SAAS,aAAa,MAAM,YAAY,eAAe,MAAM,cAAc,eAAe,MAAM;AAAA,EAC/J;AAEA,MAAI,eAAe,GAAG;AACpB,WAAO;AAAA,MACL;AAAA,MACA,OAAO,OAAO,KAAK,MAAM;AAAA,MACzB;AAAA,MACA;AAAA,MACA;AAAA,QACE;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAEA;AAAA,IACE;AAAA,IACA,oBAAoB,UAAU,MAAM,UAAU,aAAa,MAAM,aAAa,eAAe,MAAM;AAAA,IACnG;AAAA,EACF;AACA,QAAM,SAAS,SAAS,IAAI,KAAK,2BAA2B;AAK5D,MAAI,UAAgC;AACpC,MAAI;AACF,UAAM,aAAa,IAAI,WAAW;AAClC,QAAI,UAAU,SAAS,GAAG;AACxB,gBAAU;AAAA,QACR,QAAQ,IAAI,cAAc;AAAA,QAC1B;AAAA,QACA,cAAc,mBAAmB;AAAA,MACnC;AAAA,IACF,OAAO;AACL,gBAAU;AAAA,QACR,QAAQ;AAAA,QACR;AAAA,QACA,cAAc,mBAAmB;AAAA,MACnC;AAAA,IACF;AAAA,EACF,SAAS,OAAO;AACd,UAAM,MAAM,cAAc,KAAK;AAC/B,WAAO;AAAA,MACL;AAAA,MACA,iCAAiC,IAAI,OAAO;AAAA,MAC5C;AAAA,MACA;AAAA,MACA;AAAA,QACE;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAEA,QAAM,sBAAsB,6BAA6B,OAAO,OAAO;AAEvE,QAAM,SAAS,SAAS,IAAI,KAAK,uBAAuB;AAMxD,QAAM,aAA6B;AAAA,IACjC,cAAc,CAAC;AAAA,IAAG,gBAAgB,CAAC;AAAA,IACnC,SAAS,EAAE,YAAY,GAAG,QAAQ,GAAG,cAAc,EAAE;AAAA,IACrD,eAAe,CAAC;AAAA,EAClB;AACA,QAAM,CAAC,UAAU,aAAa,aAAa,IAAI,MAAM,QAAQ,IAAI;AAAA,IAC/D,UAAU,SAAS,IACf,eAAe,WAAW,QAAQ,MAAM,IACxC,QAAQ,QAAwB,UAAU;AAAA,IAC9C,kBAAkB,YAAY;AAAA,IAC9B,oBAAoB,gBAAgB,QAAQ,UAAU;AAAA,EACxD,CAAC;AAID,MAAI,gBAAmC;AAAA,IACrC,cAAc,CAAC;AAAA,IAAG,gBAAgB,CAAC;AAAA,IACnC,SAAS,EAAE,YAAY,GAAG,QAAQ,GAAG,cAAc,EAAE;AAAA,EACvD;AACA,MAAI,SAAS,cAAc,SAAS,GAAG;AACrC,UAAM,cAAc,SAAS,cAAc,OAAO,CAAC,MAAM,EAAE,WAAW,gBAAgB,EAAE;AACxF,UAAM,cAAc,SAAS,cAAc,SAAS;AACpD,UAAM,SAAS;AAAA,MACb;AAAA,MACA,aAAa,SAAS,cAAc,MAAM,2BAA2B,WAAW,YAAY,WAAW;AAAA,IACzG;AACA,UAAM,iBAAgC,SAAS,cAAc,IAAI,CAAC,OAAO;AAAA,MACvE,KAAK,EAAE;AAAA,MACP,WAAW,EAAE;AAAA,IACf,EAAE;AACF,UAAM,eAAe,IAAI;AAAA,MACvB,SAAS,cACN,OAAO,CAAC,MAAM,EAAE,gBAAgB,MAAS,EACzC,IAAI,CAAC,MAAM,CAAC,EAAE,KAAK,EAAE,WAAqB,CAAC;AAAA,IAChD;AACA,oBAAgB,MAAM,oBAAoB,gBAAgB,QAAQ,YAAY,YAAY;AAAA,EAC5F;AAEA,QAAM,eAAe;AAAA,IACnB,GAAG,SAAS;AAAA,IACZ,GAAG,YAAY;AAAA,IACf,GAAG,cAAc;AAAA,IACjB,GAAG,cAAc;AAAA,EACnB;AACA,QAAM,gBAAgB,eAAe;AAAA,IACnC,CAAC,EAAE,IAAI,MAAM,MAAM,GAAG;AAAA;AAAA;AAAA,EACxB;AACA,QAAM,iBAAiB;AAAA,IACrB,GAAG;AAAA,IACH,GAAG,SAAS;AAAA,IACZ,GAAG,YAAY;AAAA,IACf,GAAG,cAAc;AAAA,IACjB,GAAG,cAAc;AAAA,EACnB;AACA,QAAM,UAAyB;AAAA,IAC7B,YACE,SAAS,QAAQ,aACf,YAAY,QAAQ,aACpB,cAAc,QAAQ,aACtB,cAAc,QAAQ;AAAA,IAC1B,QACE,eAAe,SACb,SAAS,QAAQ,SACjB,YAAY,QAAQ,SACpB,cAAc,QAAQ,SACtB,cAAc,QAAQ;AAAA,IAC1B,cAAc,SAAS,QAAQ;AAAA,EACjC;AAEA,QAAM,SAAS,IAAI,QAAQ,WAAW,QAAQ,UAAU,aAAa,QAAQ,MAAM,SAAS;AAE5F,MAAI,aAAa,SAAS,GAAG;AAC3B,UAAM,SAAS,SAAS,IAAI,KAAK,2CAA2C;AAAA,EAC9E;AAEA,QAAM,EAAE,OAAO,gBAAgB,WAAW,aAAa,IAAI,MAAM;AAAA,IAC/D;AAAA,IACA;AAAA,IACA,QAAQ;AAAA,IACR;AAAA,EACF;AAEA,QAAM,WAAW,uBAAuB,gBAAgB,cAAc;AACtE,QAAM,gBAAgB,KAAK,IAAI,IAAI;AAEnC;AAAA,IACE;AAAA,IACA,cAAc,QAAQ,UAAU,gBAAgB,QAAQ,MAAM,YAAY,QAAQ,YAAY;AAAA,IAC9F;AAAA,EACF;AAEA,QAAM,eAAe,eAAe,KAAK,YAAY;AACrD,QAAM,SAAS;AAAA,IACb;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA,EAAE,cAAc,aAAa;AAAA,EAC/B;AAEA,MAAI,QAAQ,eAAe,KAAK,QAAQ,SAAS,GAAG;AAClD,WAAO,YAAY,OAAO,OAAO;AAAA,EACnC;AAEA,SAAO,YAAY,OAAO,SAAS,OAAO,iBAAiB;AAC7D;AAEO,SAAS,wBAAwB,QAAyB;AAC/D,SAAO;AAAA,IACL;AAAA,MACE,MAAM;AAAA,MACN,OAAO;AAAA,MACP,aACE;AAAA,MACF,QAAQ;AAAA,MACR,cAAc;AAAA,MACd,aAAa;AAAA,QACX,cAAc;AAAA,QACd,gBAAgB;AAAA,QAChB,iBAAiB;AAAA,QACjB,eAAe;AAAA,MACjB;AAAA,IACF;AAAA,IACA,OAAO,MAAM,QAAQ;AACnB,UAAI,CAAC,gBAAgB,EAAE,UAAU;AAC/B,eAAO,eAAe,YAAY,qBAAqB,UAAU,CAAC,CAAC;AAAA,MACrE;AAEA,YAAM,WAAW,mBAAmB,KAAK,cAAc;AACvD,YAAM,SAAS,MAAM,kBAAkB,MAAM,QAAQ;AAErD,YAAM,SAAS,SAAS,KAAK,KAAK,OAAO,UAAU,kBAAkB,iBAAiB;AACtF,aAAO,eAAe,MAAM;AAAA,IAC9B;AAAA,EACF;AACF;",
|
|
4
|
+
"sourcesContent": ["/**\n * Scrape Links Tool Handler\n *\n * Scrapes many URLs in parallel. Reddit permalinks (reddit.com/r/.../comments/...)\n * are auto-detected and routed through the Reddit API; all other URLs go through\n * the scraper. Both branches feed the same per-URL LLM extraction pipeline.\n *\n * NEVER throws \u2014 every error is returned as a tool-level failure response.\n */\n\nimport type { MCPServer } from 'mcp-use/server';\n\nimport {\n SCRAPER,\n CONCURRENCY,\n getCapabilities,\n getMissingEnvMessage,\n parseEnv,\n} from '../config/index.js';\nimport {\n scrapeLinksOutputSchema,\n scrapeLinksParamsSchema,\n type ScrapeLinksParams,\n type ScrapeLinksOutput,\n} from '../schemas/scrape-links.js';\nimport { ScraperClient } from '../clients/scraper.js';\nimport { RedditClient, type PostResult } from '../clients/reddit.js';\nimport { JinaClient } from '../clients/jina.js';\nimport { MarkdownCleaner } from '../services/markdown-cleaner.js';\nimport { createLLMProcessor, processContentWithLLM } from '../services/llm-processor.js';\nimport { removeMetaTags } from '../utils/markdown-formatter.js';\nimport { extractReadableContent } from '../utils/content-extractor.js';\nimport { classifyError, ErrorCode } from '../utils/errors.js';\nimport { isDocumentUrl } from '../utils/source-type.js';\nimport { pMap, pMapSettled } from '../utils/concurrency.js';\nimport {\n mcpLog,\n formatSuccess,\n formatError,\n formatBatchHeader,\n formatDuration,\n} from './utils.js';\nimport {\n createToolReporter,\n NOOP_REPORTER,\n toolFailure,\n toolSuccess,\n toToolResponse,\n type ToolExecutionResult,\n type ToolReporter,\n} from './mcp-helpers.js';\n\nconst markdownCleaner = new MarkdownCleaner();\n\nfunction enhanceExtractionInstruction(instruction: string | undefined): string {\n const base = instruction || 'Extract the main content and key information from this page.';\n return `${SCRAPER.EXTRACTION_PREFIX}\\n\\n${base}\\n\\n${SCRAPER.EXTRACTION_SUFFIX}`;\n}\n\n// --- Types ---\n\ninterface ProcessedResult {\n url: string;\n content: string;\n index: number; // original position in params.urls[]\n /**\n * Cleaned markdown captured before LLM extraction. Preserved so the handler\n * can fall back to it when the LLM emits the terse \"Page did not load: X\"\n * escape line and would otherwise nuke the scraped body.\n */\n rawContent?: string;\n}\n\ninterface ScrapeMetrics {\n successful: number;\n failed: number;\n totalCredits: number;\n}\n\ninterface ScrapePhaseResult {\n successItems: ProcessedResult[];\n failedContents: string[];\n metrics: ScrapeMetrics;\n}\n\ninterface BranchInput {\n url: string;\n origIndex: number;\n}\n\ninterface ScrapeClients {\n client: ScraperClient;\n jinaClient: JinaClient;\n llmProcessor: ReturnType<typeof createLLMProcessor>;\n}\n\n/**\n * Any URL the web branch decides to hand off to Jina Reader \u2014 either because\n * Scrape.do returned a binary content-type, or because Scrape.do failed\n * outright (non-404 error). `scrapeError` is preserved so that, if Jina also\n * fails, the final error message can surface both layers.\n *\n * Genuine 404s are NOT put here \u2014 the URL doesn't exist; Jina won't help.\n */\ninterface JinaFallback {\n url: string;\n origIndex: number;\n reason: 'binary_content' | 'scrape_failed';\n scrapeError?: string;\n}\n\ninterface WebPhaseResult extends ScrapePhaseResult {\n jinaFallbacks: JinaFallback[];\n}\n\n// --- Reddit URL detection ---\n\nconst REDDIT_HOST = /(?:^|\\.)reddit\\.com$/i;\nconst REDDIT_POST_PERMALINK = /\\/r\\/[^/]+\\/comments\\/[a-z0-9]+/i;\n\nfunction isRedditUrl(url: string): boolean {\n try {\n const u = new URL(url);\n return REDDIT_HOST.test(u.hostname);\n } catch {\n return false;\n }\n}\n\nfunction isRedditPostPermalink(url: string): boolean {\n try {\n const u = new URL(url);\n return REDDIT_HOST.test(u.hostname) && REDDIT_POST_PERMALINK.test(u.pathname);\n } catch {\n return false;\n }\n}\n\n// --- Error helper ---\n\nfunction createScrapeErrorResponse(\n code: string,\n message: string,\n startTime: number,\n retryable = false,\n alternatives?: string[],\n): ToolExecutionResult<ScrapeLinksOutput> {\n return toolFailure(\n `${formatError({\n code,\n message,\n retryable,\n toolName: 'scrape-links',\n howToFix: code === 'NO_URLS' ? ['Provide at least one valid URL'] : undefined,\n alternatives,\n })}\\n\\nExecution time: ${formatDuration(Date.now() - startTime)}`,\n );\n}\n\n// --- URL partitioning ---\n\ninterface PartitionedUrls {\n webInputs: BranchInput[];\n redditInputs: BranchInput[];\n documentInputs: BranchInput[];\n invalidEntries: { url: string; origIndex: number }[];\n}\n\nfunction partitionUrls(urls: string[]): PartitionedUrls {\n const webInputs: BranchInput[] = [];\n const redditInputs: BranchInput[] = [];\n const documentInputs: BranchInput[] = [];\n const invalidEntries: { url: string; origIndex: number }[] = [];\n\n for (let i = 0; i < urls.length; i++) {\n const url = urls[i]!;\n try {\n new URL(url);\n } catch {\n invalidEntries.push({ url, origIndex: i });\n continue;\n }\n // Document URLs (.pdf/.docx/.pptx/.xlsx) go straight to Jina Reader \u2014\n // bypassing Scrape.do because it cannot decode binary bodies. Ordered\n // before the Reddit check so a hypothetical PDF on a reddit-adjacent host\n // still takes the document path.\n if (isDocumentUrl(url)) {\n documentInputs.push({ url, origIndex: i });\n } else if (isRedditUrl(url)) {\n redditInputs.push({ url, origIndex: i });\n } else {\n webInputs.push({ url, origIndex: i });\n }\n }\n\n return { webInputs, redditInputs, documentInputs, invalidEntries };\n}\n\n// --- Web branch ---\n\nasync function fetchWebBranch(\n inputs: BranchInput[],\n client: ScraperClient,\n): Promise<WebPhaseResult> {\n if (inputs.length === 0) {\n return {\n successItems: [],\n failedContents: [],\n metrics: { successful: 0, failed: 0, totalCredits: 0 },\n jinaFallbacks: [],\n };\n }\n\n mcpLog('info', `[concurrency] web branch: fanning out ${inputs.length} URL(s) with limit=${CONCURRENCY.SCRAPER}`, 'scrape');\n const urls = inputs.map((i) => i.url);\n const results = await client.scrapeMultiple(urls, { timeout: 60 });\n const urlToIndex = new Map(inputs.map((i) => [i.url, i.origIndex]));\n\n const successItems: ProcessedResult[] = [];\n const failedContents: string[] = [];\n const jinaFallbacks: JinaFallback[] = [];\n let successful = 0;\n let failed = 0;\n let totalCredits = 0;\n\n for (let i = 0; i < results.length; i++) {\n const result = results[i];\n const origIndex = inputs[i]!.origIndex;\n if (!result) {\n failed++;\n failedContents.push(`## ${inputs[i]!.url}\\n\\n\u274C No result returned`);\n continue;\n }\n\n // Binary document detected by content-type \u2014 defer to Jina Reader.\n if (result.error?.code === ErrorCode.UNSUPPORTED_BINARY_CONTENT) {\n jinaFallbacks.push({\n url: result.url,\n origIndex: urlToIndex.get(result.url) ?? origIndex,\n reason: 'binary_content',\n });\n continue;\n }\n\n // Scrape.do failure \u2014 only 404s are treated as hard fails (Jina won't\n // help when the page genuinely doesn't exist). Every other failure mode\n // (302 redirect loops, WAF blocks, timeouts, 5xx, service unavailable)\n // gets a second chance through Jina Reader, which uses different IPs\n // and handles many anti-bot surfaces differently.\n const scrapeFailed = Boolean(result.error) || result.statusCode < 200 || result.statusCode >= 300;\n if (scrapeFailed && result.statusCode !== 404) {\n jinaFallbacks.push({\n url: result.url,\n origIndex: urlToIndex.get(result.url) ?? origIndex,\n reason: 'scrape_failed',\n scrapeError: result.error?.message || result.content || `HTTP ${result.statusCode}`,\n });\n continue;\n }\n if (scrapeFailed) {\n failed++;\n failedContents.push(`## ${result.url}\\n\\n\u274C Failed to scrape: HTTP 404 \u2014 Page not found`);\n continue;\n }\n\n successful++;\n totalCredits += result.credits;\n\n let content: string;\n try {\n const readable = extractReadableContent(result.content, result.url);\n const sourceForCleaner = readable.extracted ? readable.content : result.content;\n content = markdownCleaner.processContent(sourceForCleaner);\n } catch {\n content = result.content;\n }\n\n successItems.push({ url: result.url, content, index: origIndex, rawContent: content });\n }\n\n return {\n successItems,\n failedContents,\n metrics: { successful, failed, totalCredits },\n jinaFallbacks,\n };\n}\n\n// --- Document branch (Jina Reader) ---\n\n/**\n * Format a Jina-failure line. If the URL was deferred here *after* Scrape.do\n * already failed, surface both layers' errors so the caller can see that this\n * isn't just a Jina glitch \u2014 the primary path failed too.\n *\n * Exported for unit testing.\n */\nexport function formatJinaFailure(url: string, jinaError: string, scrapeError?: string): string {\n if (scrapeError) {\n return `## ${url}\\n\\n\u274C Both scrapers failed. Scrape.do: ${scrapeError}. Jina Reader: ${jinaError}.`;\n }\n return `## ${url}\\n\\n\u274C Document conversion failed: ${jinaError}`;\n}\n\nasync function fetchDocumentBranch(\n inputs: BranchInput[],\n jinaClient: JinaClient,\n /** Optional: map url \u2192 original Scrape.do error, for fallback messaging. */\n scrapeErrorContext?: Map<string, string>,\n): Promise<ScrapePhaseResult> {\n if (inputs.length === 0) {\n return { successItems: [], failedContents: [], metrics: { successful: 0, failed: 0, totalCredits: 0 } };\n }\n\n mcpLog(\n 'info',\n `[concurrency] document branch (jina): converting ${inputs.length} URL(s) with limit=${CONCURRENCY.SCRAPER}`,\n 'scrape',\n );\n\n const results = await pMapSettled(\n inputs,\n (input) => jinaClient.convert({ url: input.url }),\n CONCURRENCY.SCRAPER,\n );\n\n const successItems: ProcessedResult[] = [];\n const failedContents: string[] = [];\n let successful = 0;\n let failed = 0;\n\n for (let i = 0; i < results.length; i++) {\n const settled = results[i];\n const input = inputs[i]!;\n const scrapeError = scrapeErrorContext?.get(input.url);\n if (!settled) {\n failed++;\n failedContents.push(formatJinaFailure(input.url, 'No result returned', scrapeError));\n continue;\n }\n if (settled.status === 'rejected') {\n failed++;\n const reason = settled.reason instanceof Error ? settled.reason.message : String(settled.reason);\n failedContents.push(formatJinaFailure(input.url, reason, scrapeError));\n continue;\n }\n\n const result = settled.value;\n if (result.error || result.statusCode < 200 || result.statusCode >= 300) {\n failed++;\n const errorMsg = result.error?.message || `HTTP ${result.statusCode}`;\n failedContents.push(formatJinaFailure(input.url, errorMsg, scrapeError));\n continue;\n }\n\n successful++;\n successItems.push({ url: input.url, content: result.content, index: input.origIndex, rawContent: result.content });\n }\n\n return { successItems, failedContents, metrics: { successful, failed, totalCredits: 0 } };\n}\n\n// --- Reddit branch ---\n\nfunction formatRedditPostAsMarkdown(result: PostResult): string {\n const { post, comments } = result;\n const lines: string[] = [];\n lines.push(`# ${post.title}`);\n lines.push('');\n lines.push(`**r/${post.subreddit}** \u2022 u/${post.author} \u2022 \u2B06\uFE0F ${post.score} \u2022 \uD83D\uDCAC ${post.commentCount} comments`);\n lines.push(`\uD83D\uDD17 ${post.url}`);\n lines.push('');\n if (post.body) {\n lines.push('## Post content');\n lines.push('');\n lines.push(post.body);\n lines.push('');\n }\n if (comments.length > 0) {\n lines.push(`## Top comments (${comments.length} total)`);\n lines.push('');\n for (const c of comments) {\n const indent = ' '.repeat(c.depth);\n const op = c.isOP ? ' **[OP]**' : '';\n const score = c.score >= 0 ? `+${c.score}` : `${c.score}`;\n lines.push(`${indent}- **u/${c.author}**${op} _(${score})_`);\n for (const line of c.body.split('\\n')) {\n lines.push(`${indent} ${line}`);\n }\n lines.push('');\n }\n }\n return lines.join('\\n');\n}\n\nasync function fetchRedditBranch(inputs: BranchInput[]): Promise<ScrapePhaseResult> {\n if (inputs.length === 0) {\n return { successItems: [], failedContents: [], metrics: { successful: 0, failed: 0, totalCredits: 0 } };\n }\n\n const env = parseEnv();\n if (!env.REDDIT_CLIENT_ID || !env.REDDIT_CLIENT_SECRET) {\n const failedContents = inputs.map(\n (i) => `## ${i.url}\\n\\n\u274C Reddit URL detected, but Reddit API is not configured. Set \\`REDDIT_CLIENT_ID\\` and \\`REDDIT_CLIENT_SECRET\\` in the server env to enable threaded Reddit scraping.`,\n );\n return {\n successItems: [],\n failedContents,\n metrics: { successful: 0, failed: inputs.length, totalCredits: 0 },\n };\n }\n\n // Warn for non-permalink Reddit URLs (subreddit homepages, /new, /top, /hot,\n // user profiles). The Reddit API path we call requires /r/.../comments/... \u2014\n // reject upfront so the caller sees a helpful message instead of a 404.\n const [postInputs, nonPermalinks] = inputs.reduce<[BranchInput[], BranchInput[]]>(\n ([posts, rest], input) => {\n if (isRedditPostPermalink(input.url)) posts.push(input);\n else rest.push(input);\n return [posts, rest];\n },\n [[], []],\n );\n\n const nonPermalinkFailed = nonPermalinks.map(\n (i) => `## ${i.url}\\n\\n\u274C Only Reddit post permalinks (/r/<sub>/comments/<id>/...) are supported. Use web-search with scope:\"reddit\" to discover post permalinks first.`,\n );\n\n if (postInputs.length === 0) {\n return {\n successItems: [],\n failedContents: nonPermalinkFailed,\n metrics: { successful: 0, failed: nonPermalinks.length, totalCredits: 0 },\n };\n }\n\n mcpLog('info', `[concurrency] reddit branch: fetching ${postInputs.length} post(s) with limit=${CONCURRENCY.REDDIT}`, 'scrape');\n const client = new RedditClient(env.REDDIT_CLIENT_ID, env.REDDIT_CLIENT_SECRET);\n const urls = postInputs.map((i) => i.url);\n const batchResult = await client.batchGetPosts(urls, true);\n const urlToIndex = new Map(postInputs.map((i) => [i.url, i.origIndex]));\n\n const successItems: ProcessedResult[] = [];\n const failedContents: string[] = [...nonPermalinkFailed];\n let successful = 0;\n let failed = nonPermalinks.length;\n\n for (const [url, result] of batchResult.results) {\n const origIndex = urlToIndex.get(url) ?? -1;\n if (result instanceof Error) {\n failed++;\n failedContents.push(`## ${url}\\n\\n\u274C Reddit fetch failed: ${result.message}`);\n continue;\n }\n successful++;\n const md = formatRedditPostAsMarkdown(result);\n successItems.push({ url, content: md, index: origIndex, rawContent: md });\n }\n\n return { successItems, failedContents, metrics: { successful, failed, totalCredits: 0 } };\n}\n\n// --- Terse-LLM-escape detection + raw fallback merger ---\n\n/**\n * The LLM extraction prompt tells the model to emit a single terse line when\n * a page \"clearly failed to load\" (login walls, JS-render-empty, paywalls,\n * etc.). In practice the LLM over-triggers this on partially-rendered pages,\n * causing scrape-links to return a one-line verdict and discard the cleaned\n * markdown. This detector + merger keep the verdict but re-attach a capped\n * slice of the raw markdown so the caller always has something to work with.\n */\nconst TERSE_LLM_FAILURE_RE =\n /^\\s*##\\s*Matches\\s*\\n+\\s*_Page did not load:\\s*([a-z0-9_-]+)_\\s*\\.?\\s*$/i;\n\n/** Cap on the raw-markdown slice appended under \"## Raw content ...\" */\nexport const RAW_FALLBACK_CHAR_CAP = 4000;\n\n/**\n * If `llmOutput` is exactly the terse \"## Matches\\n_Page did not load: X_\"\n * line, return the reason token (e.g. \"login-wall\"). Otherwise null.\n */\nexport function detectTerseFailure(llmOutput: string): string | null {\n const m = llmOutput.trim().match(TERSE_LLM_FAILURE_RE);\n return m ? m[1]! : null;\n}\n\n/**\n * When the LLM emitted the terse escape line, append a capped slice of the\n * raw cleaned markdown under a `## Raw content (...)` section so the caller\n * still has the actual scraped body to inspect. No-op otherwise.\n */\nexport function mergeLlmWithRawFallback(\n llmOutput: string,\n rawContent: string | undefined,\n): string {\n const reason = detectTerseFailure(llmOutput);\n if (!reason) return llmOutput;\n const trimmed = rawContent?.trim();\n if (!trimmed) return llmOutput;\n const snippet =\n trimmed.length > RAW_FALLBACK_CHAR_CAP\n ? trimmed.slice(0, RAW_FALLBACK_CHAR_CAP) + '\\n\\n\u2026[raw truncated]'\n : trimmed;\n return `${llmOutput.trim()}\\n\\n## Raw content (LLM flagged page as ${reason})\\n\\n${snippet}`;\n}\n\n// --- LLM extraction (shared by both branches) ---\n\nasync function processItemsWithLlm(\n successItems: ProcessedResult[],\n enhancedInstruction: string | undefined,\n llmProcessor: ReturnType<typeof createLLMProcessor>,\n reporter: ToolReporter,\n): Promise<{ items: ProcessedResult[]; llmErrors: number; llmAttempted: number }> {\n let llmErrors = 0;\n\n // Raw-mode bypass: caller omitted `extract` \u2192 return cleaned markdown as-is.\n if (!enhancedInstruction) {\n if (successItems.length > 0) {\n mcpLog('info', 'Raw mode: extract omitted \u2014 returning cleaned scraped content without LLM pass', 'scrape');\n }\n return { items: successItems, llmErrors, llmAttempted: 0 };\n }\n\n if (!llmProcessor || successItems.length === 0) {\n if (!llmProcessor && successItems.length > 0) {\n mcpLog('warning', 'LLM unavailable (LLM_API_KEY not set). Returning raw scraped content.', 'scrape');\n void reporter.log('warning', 'llm_extractor_unreachable: planner not configured; raw scraped content returned');\n }\n return { items: successItems, llmErrors, llmAttempted: 0 };\n }\n\n mcpLog('info', `[concurrency] llm extraction: fanning out ${successItems.length} item(s) with limit=${CONCURRENCY.LLM_EXTRACTION}`, 'scrape');\n\n const llmResults = await pMap(\n successItems,\n async (item) => {\n mcpLog('debug', `LLM extracting ${item.url}...`, 'scrape');\n\n const llmResult = await processContentWithLLM(\n item.content,\n { enabled: true, extract: enhancedInstruction, url: item.url },\n llmProcessor,\n );\n\n if (llmResult.processed) {\n const merged = mergeLlmWithRawFallback(llmResult.content, item.rawContent);\n if (merged !== llmResult.content) {\n mcpLog('warning', `LLM emitted terse escape line for ${item.url} \u2014 preserved raw fallback`, 'scrape');\n void reporter.log('warning', `llm_terse_escape: ${item.url} \u2014 preserving raw fallback`);\n }\n return { ...item, content: merged };\n }\n\n llmErrors++;\n mcpLog('warning', `LLM extraction failed for ${item.url}: ${llmResult.error || 'unknown reason'}`, 'scrape');\n void reporter.log('warning', `llm_extractor_unreachable: ${item.url} \u2014 ${llmResult.error || 'unknown reason'}`);\n return item;\n },\n CONCURRENCY.LLM_EXTRACTION,\n );\n\n return { items: llmResults, llmErrors, llmAttempted: successItems.length };\n}\n\n// --- Output assembly ---\n\nfunction assembleContentEntries(successItems: ProcessedResult[], failedContents: string[]): string[] {\n const sorted = [...successItems].sort((a, b) => a.index - b.index);\n const contents = [...failedContents];\n for (const item of sorted) {\n let content = item.content;\n try {\n content = removeMetaTags(content);\n } catch {\n // Use content as-is\n }\n contents.push(`## ${item.url}\\n\\n${content}`);\n }\n return contents;\n}\n\nfunction buildScrapeResponse(\n params: ScrapeLinksParams,\n contents: string[],\n metrics: ScrapeMetrics,\n llmErrors: number,\n executionTime: number,\n llmAccounting: { llmAttempted: number; llmSucceeded: boolean },\n): { content: string; structuredContent: ScrapeLinksOutput } {\n const llmExtras: Record<string, string | number> = {};\n if (llmAccounting.llmAttempted > 0) {\n const ok = llmAccounting.llmAttempted - llmErrors;\n llmExtras['LLM extraction'] = `${ok}/${llmAccounting.llmAttempted} succeeded`;\n if (!llmAccounting.llmSucceeded) {\n llmExtras['LLM credit'] = '0 charged (no extraction produced)';\n }\n } else if (llmErrors > 0) {\n llmExtras['LLM extraction failures'] = llmErrors;\n }\n\n const batchHeader = formatBatchHeader({\n title: `Scraped Content (${params.urls.length} URLs)`,\n totalItems: params.urls.length,\n successful: metrics.successful,\n failed: metrics.failed,\n extras: {\n 'Credits used': metrics.totalCredits,\n ...llmExtras,\n },\n });\n\n const formattedContent = formatSuccess({\n title: 'Scraping Complete',\n summary: batchHeader,\n data: contents.join('\\n\\n---\\n\\n'),\n metadata: {\n 'Execution time': formatDuration(executionTime),\n },\n });\n\n const metadata: ScrapeLinksOutput['metadata'] = {\n total_items: params.urls.length,\n successful: metrics.successful,\n failed: metrics.failed,\n execution_time_ms: executionTime,\n total_credits: metrics.totalCredits,\n };\n return { content: formattedContent, structuredContent: { metadata } };\n}\n\n// --- Handler ---\n\nexport async function handleScrapeLinks(\n params: ScrapeLinksParams,\n reporter: ToolReporter = NOOP_REPORTER,\n): Promise<ToolExecutionResult<ScrapeLinksOutput>> {\n const startTime = Date.now();\n\n if (!params.urls || params.urls.length === 0) {\n return createScrapeErrorResponse('NO_URLS', 'No URLs provided', startTime);\n }\n\n const { webInputs, redditInputs, documentInputs, invalidEntries } = partitionUrls(params.urls);\n const validCount = webInputs.length + redditInputs.length + documentInputs.length;\n\n await reporter.log(\n 'info',\n `Partitioned ${params.urls.length} URL(s): ${webInputs.length} web, ${redditInputs.length} reddit, ${documentInputs.length} document, ${invalidEntries.length} invalid`,\n );\n\n if (validCount === 0) {\n return createScrapeErrorResponse(\n 'INVALID_URLS',\n `All ${params.urls.length} URLs are invalid`,\n startTime,\n false,\n [\n 'web-search(queries=[...], extract=\"...\") \u2014 search for valid URLs first, then scrape the results',\n ],\n );\n }\n\n mcpLog(\n 'info',\n `Starting scrape: ${webInputs.length} web + ${redditInputs.length} reddit + ${documentInputs.length} document URL(s)`,\n 'scrape',\n );\n await reporter.progress(15, 100, 'Preparing scraper clients');\n\n // Only initialize the Scrape.do client if we actually have HTML/web URLs.\n // The Jina client is cheap (no auth needed) and always constructed so the\n // document branch and the web\u2192Jina fallback path both work uniformly.\n let clients: ScrapeClients | null = null;\n try {\n const jinaClient = new JinaClient();\n if (webInputs.length > 0) {\n clients = {\n client: new ScraperClient(),\n jinaClient,\n llmProcessor: createLLMProcessor(),\n };\n } else {\n clients = {\n client: null as unknown as ScraperClient,\n jinaClient,\n llmProcessor: createLLMProcessor(),\n };\n }\n } catch (error) {\n const err = classifyError(error);\n return createScrapeErrorResponse(\n 'CLIENT_INIT_FAILED',\n `Failed to initialize scraper: ${err.message}`,\n startTime,\n false,\n [\n 'web-search(queries=[\"topic key findings\", \"topic summary\"], extract=\"key findings and summary\") \u2014 search instead of scraping',\n ],\n );\n }\n\n // Only enhance + run LLM when caller supplied an extract instruction.\n // Undefined \u2192 raw mode (cleaned markdown returned without LLM pass).\n const enhancedInstruction = params.extract\n ? enhanceExtractionInstruction(params.extract)\n : undefined;\n\n await reporter.progress(35, 100, 'Fetching page content');\n\n // Phase 1 \u2014 run all three branches in parallel. Failures in one branch do\n // not block the others. The web branch may surface URLs to reroute via\n // `jinaFallbacks` (binary content-type OR non-404 Scrape.do failure),\n // which Phase 2 re-runs through Jina Reader.\n const emptyPhase: WebPhaseResult = {\n successItems: [], failedContents: [],\n metrics: { successful: 0, failed: 0, totalCredits: 0 },\n jinaFallbacks: [],\n };\n const [webPhase, redditPhase, documentPhase] = await Promise.all([\n webInputs.length > 0\n ? fetchWebBranch(webInputs, clients.client)\n : Promise.resolve<WebPhaseResult>(emptyPhase),\n fetchRedditBranch(redditInputs),\n fetchDocumentBranch(documentInputs, clients.jinaClient),\n ]);\n\n // Phase 2 \u2014 Jina Reader as a fallback for web-branch URLs that either\n // returned binary content or failed outright on Scrape.do.\n let deferredPhase: ScrapePhaseResult = {\n successItems: [], failedContents: [],\n metrics: { successful: 0, failed: 0, totalCredits: 0 },\n };\n if (webPhase.jinaFallbacks.length > 0) {\n const binaryCount = webPhase.jinaFallbacks.filter((f) => f.reason === 'binary_content').length;\n const failedCount = webPhase.jinaFallbacks.length - binaryCount;\n await reporter.log(\n 'info',\n `Rerouting ${webPhase.jinaFallbacks.length} URL(s) to Jina Reader: ${binaryCount} binary, ${failedCount} scrape-failed`,\n );\n const fallbackInputs: BranchInput[] = webPhase.jinaFallbacks.map((f) => ({\n url: f.url,\n origIndex: f.origIndex,\n }));\n const errorContext = new Map<string, string>(\n webPhase.jinaFallbacks\n .filter((f) => f.scrapeError !== undefined)\n .map((f) => [f.url, f.scrapeError as string]),\n );\n deferredPhase = await fetchDocumentBranch(fallbackInputs, clients.jinaClient, errorContext);\n }\n\n const successItems = [\n ...webPhase.successItems,\n ...redditPhase.successItems,\n ...documentPhase.successItems,\n ...deferredPhase.successItems,\n ];\n const invalidFailed = invalidEntries.map(\n ({ url }) => `## ${url}\\n\\n\u274C Invalid URL format`,\n );\n const failedContents = [\n ...invalidFailed,\n ...webPhase.failedContents,\n ...redditPhase.failedContents,\n ...documentPhase.failedContents,\n ...deferredPhase.failedContents,\n ];\n const metrics: ScrapeMetrics = {\n successful:\n webPhase.metrics.successful\n + redditPhase.metrics.successful\n + documentPhase.metrics.successful\n + deferredPhase.metrics.successful,\n failed:\n invalidEntries.length\n + webPhase.metrics.failed\n + redditPhase.metrics.failed\n + documentPhase.metrics.failed\n + deferredPhase.metrics.failed,\n totalCredits: webPhase.metrics.totalCredits,\n };\n\n await reporter.log('info', `Fetched ${metrics.successful} page(s), ${metrics.failed} failed`);\n\n if (successItems.length > 0) {\n await reporter.progress(80, 100, 'Running LLM extraction over fetched pages');\n }\n\n const { items: processedItems, llmErrors, llmAttempted } = await processItemsWithLlm(\n successItems,\n enhancedInstruction,\n clients.llmProcessor,\n reporter,\n );\n\n const contents = assembleContentEntries(processedItems, failedContents);\n const executionTime = Date.now() - startTime;\n\n mcpLog(\n 'info',\n `Completed: ${metrics.successful} successful, ${metrics.failed} failed, ${metrics.totalCredits} credits used`,\n 'scrape',\n );\n\n const llmSucceeded = llmAttempted > 0 && llmErrors < llmAttempted;\n const result = buildScrapeResponse(\n params,\n contents,\n metrics,\n llmErrors,\n executionTime,\n { llmAttempted, llmSucceeded },\n );\n\n if (metrics.successful === 0 && metrics.failed > 0) {\n return toolFailure(result.content);\n }\n\n return toolSuccess(result.content, result.structuredContent);\n}\n\nexport function registerScrapeLinksTool(server: MCPServer): void {\n server.tool(\n {\n name: 'scrape-links',\n title: 'Scrape Links',\n description:\n 'Fetch many URLs in parallel. With `extract` set, run per-URL structured LLM extraction (each page returns `## Source`, `## Matches` verbatim facts, `## Not found` gaps, `## Follow-up signals` new terms + referenced URLs); omit `extract` for raw mode (cleaned markdown per URL, no LLM pass). Auto-detects reddit.com post permalinks \u2192 Reddit API (threaded post + comments); PDF/DOCX/PPTX/XLSX \u2192 Jina Reader; everything else \u2192 HTTP scraper. Safe to call in parallel \u2014 group URLs by context rather than jamming unrelated batches together. Describe the SHAPE of what you want in `extract`, facets separated by `|` (e.g. `root cause | affected versions | fix | workarounds | timeline`).',\n schema: scrapeLinksParamsSchema,\n outputSchema: scrapeLinksOutputSchema,\n annotations: {\n readOnlyHint: true,\n idempotentHint: true,\n destructiveHint: false,\n openWorldHint: true,\n },\n },\n async (args, ctx) => {\n if (!getCapabilities().scraping) {\n return toToolResponse(toolFailure(getMissingEnvMessage('scraping')));\n }\n\n const reporter = createToolReporter(ctx, 'scrape-links');\n const result = await handleScrapeLinks(args, reporter);\n\n await reporter.progress(100, 100, result.isError ? 'Scrape failed' : 'Scrape complete');\n return toToolResponse(result);\n },\n );\n}\n"],
|
|
5
|
+
"mappings": "AAYA;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AACP;AAAA,EACE;AAAA,EACA;AAAA,OAGK;AACP,SAAS,qBAAqB;AAC9B,SAAS,oBAAqC;AAC9C,SAAS,kBAAkB;AAC3B,SAAS,uBAAuB;AAChC,SAAS,oBAAoB,6BAA6B;AAC1D,SAAS,sBAAsB;AAC/B,SAAS,8BAA8B;AACvC,SAAS,eAAe,iBAAiB;AACzC,SAAS,qBAAqB;AAC9B,SAAS,MAAM,mBAAmB;AAClC;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AACP;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OAGK;AAEP,MAAM,kBAAkB,IAAI,gBAAgB;AAE5C,SAAS,6BAA6B,aAAyC;AAC7E,QAAM,OAAO,eAAe;AAC5B,SAAO,GAAG,QAAQ,iBAAiB;AAAA;AAAA,EAAO,IAAI;AAAA;AAAA,EAAO,QAAQ,iBAAiB;AAChF;AA4DA,MAAM,cAAc;AACpB,MAAM,wBAAwB;AAE9B,SAAS,YAAY,KAAsB;AACzC,MAAI;AACF,UAAM,IAAI,IAAI,IAAI,GAAG;AACrB,WAAO,YAAY,KAAK,EAAE,QAAQ;AAAA,EACpC,QAAQ;AACN,WAAO;AAAA,EACT;AACF;AAEA,SAAS,sBAAsB,KAAsB;AACnD,MAAI;AACF,UAAM,IAAI,IAAI,IAAI,GAAG;AACrB,WAAO,YAAY,KAAK,EAAE,QAAQ,KAAK,sBAAsB,KAAK,EAAE,QAAQ;AAAA,EAC9E,QAAQ;AACN,WAAO;AAAA,EACT;AACF;AAIA,SAAS,0BACP,MACA,SACA,WACA,YAAY,OACZ,cACwC;AACxC,SAAO;AAAA,IACL,GAAG,YAAY;AAAA,MACb;AAAA,MACA;AAAA,MACA;AAAA,MACA,UAAU;AAAA,MACV,UAAU,SAAS,YAAY,CAAC,gCAAgC,IAAI;AAAA,MACpE;AAAA,IACF,CAAC,CAAC;AAAA;AAAA,kBAAuB,eAAe,KAAK,IAAI,IAAI,SAAS,CAAC;AAAA,EACjE;AACF;AAWA,SAAS,cAAc,MAAiC;AACtD,QAAM,YAA2B,CAAC;AAClC,QAAM,eAA8B,CAAC;AACrC,QAAM,iBAAgC,CAAC;AACvC,QAAM,iBAAuD,CAAC;AAE9D,WAAS,IAAI,GAAG,IAAI,KAAK,QAAQ,KAAK;AACpC,UAAM,MAAM,KAAK,CAAC;AAClB,QAAI;AACF,UAAI,IAAI,GAAG;AAAA,IACb,QAAQ;AACN,qBAAe,KAAK,EAAE,KAAK,WAAW,EAAE,CAAC;AACzC;AAAA,IACF;AAKA,QAAI,cAAc,GAAG,GAAG;AACtB,qBAAe,KAAK,EAAE,KAAK,WAAW,EAAE,CAAC;AAAA,IAC3C,WAAW,YAAY,GAAG,GAAG;AAC3B,mBAAa,KAAK,EAAE,KAAK,WAAW,EAAE,CAAC;AAAA,IACzC,OAAO;AACL,gBAAU,KAAK,EAAE,KAAK,WAAW,EAAE,CAAC;AAAA,IACtC;AAAA,EACF;AAEA,SAAO,EAAE,WAAW,cAAc,gBAAgB,eAAe;AACnE;AAIA,eAAe,eACb,QACA,QACyB;AACzB,MAAI,OAAO,WAAW,GAAG;AACvB,WAAO;AAAA,MACL,cAAc,CAAC;AAAA,MACf,gBAAgB,CAAC;AAAA,MACjB,SAAS,EAAE,YAAY,GAAG,QAAQ,GAAG,cAAc,EAAE;AAAA,MACrD,eAAe,CAAC;AAAA,IAClB;AAAA,EACF;AAEA,SAAO,QAAQ,yCAAyC,OAAO,MAAM,sBAAsB,YAAY,OAAO,IAAI,QAAQ;AAC1H,QAAM,OAAO,OAAO,IAAI,CAAC,MAAM,EAAE,GAAG;AACpC,QAAM,UAAU,MAAM,OAAO,eAAe,MAAM,EAAE,SAAS,GAAG,CAAC;AACjE,QAAM,aAAa,IAAI,IAAI,OAAO,IAAI,CAAC,MAAM,CAAC,EAAE,KAAK,EAAE,SAAS,CAAC,CAAC;AAElE,QAAM,eAAkC,CAAC;AACzC,QAAM,iBAA2B,CAAC;AAClC,QAAM,gBAAgC,CAAC;AACvC,MAAI,aAAa;AACjB,MAAI,SAAS;AACb,MAAI,eAAe;AAEnB,WAAS,IAAI,GAAG,IAAI,QAAQ,QAAQ,KAAK;AACvC,UAAM,SAAS,QAAQ,CAAC;AACxB,UAAM,YAAY,OAAO,CAAC,EAAG;AAC7B,QAAI,CAAC,QAAQ;AACX;AACA,qBAAe,KAAK,MAAM,OAAO,CAAC,EAAG,GAAG;AAAA;AAAA,0BAA0B;AAClE;AAAA,IACF;AAGA,QAAI,OAAO,OAAO,SAAS,UAAU,4BAA4B;AAC/D,oBAAc,KAAK;AAAA,QACjB,KAAK,OAAO;AAAA,QACZ,WAAW,WAAW,IAAI,OAAO,GAAG,KAAK;AAAA,QACzC,QAAQ;AAAA,MACV,CAAC;AACD;AAAA,IACF;AAOA,UAAM,eAAe,QAAQ,OAAO,KAAK,KAAK,OAAO,aAAa,OAAO,OAAO,cAAc;AAC9F,QAAI,gBAAgB,OAAO,eAAe,KAAK;AAC7C,oBAAc,KAAK;AAAA,QACjB,KAAK,OAAO;AAAA,QACZ,WAAW,WAAW,IAAI,OAAO,GAAG,KAAK;AAAA,QACzC,QAAQ;AAAA,QACR,aAAa,OAAO,OAAO,WAAW,OAAO,WAAW,QAAQ,OAAO,UAAU;AAAA,MACnF,CAAC;AACD;AAAA,IACF;AACA,QAAI,cAAc;AAChB;AACA,qBAAe,KAAK,MAAM,OAAO,GAAG;AAAA;AAAA,wDAAmD;AACvF;AAAA,IACF;AAEA;AACA,oBAAgB,OAAO;AAEvB,QAAI;AACJ,QAAI;AACF,YAAM,WAAW,uBAAuB,OAAO,SAAS,OAAO,GAAG;AAClE,YAAM,mBAAmB,SAAS,YAAY,SAAS,UAAU,OAAO;AACxE,gBAAU,gBAAgB,eAAe,gBAAgB;AAAA,IAC3D,QAAQ;AACN,gBAAU,OAAO;AAAA,IACnB;AAEA,iBAAa,KAAK,EAAE,KAAK,OAAO,KAAK,SAAS,OAAO,WAAW,YAAY,QAAQ,CAAC;AAAA,EACvF;AAEA,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA,SAAS,EAAE,YAAY,QAAQ,aAAa;AAAA,IAC5C;AAAA,EACF;AACF;AAWO,SAAS,kBAAkB,KAAa,WAAmB,aAA8B;AAC9F,MAAI,aAAa;AACf,WAAO,MAAM,GAAG;AAAA;AAAA,0CAA0C,WAAW,kBAAkB,SAAS;AAAA,EAClG;AACA,SAAO,MAAM,GAAG;AAAA;AAAA,qCAAqC,SAAS;AAChE;AAEA,eAAe,oBACb,QACA,YAEA,oBAC4B;AAC5B,MAAI,OAAO,WAAW,GAAG;AACvB,WAAO,EAAE,cAAc,CAAC,GAAG,gBAAgB,CAAC,GAAG,SAAS,EAAE,YAAY,GAAG,QAAQ,GAAG,cAAc,EAAE,EAAE;AAAA,EACxG;AAEA;AAAA,IACE;AAAA,IACA,oDAAoD,OAAO,MAAM,sBAAsB,YAAY,OAAO;AAAA,IAC1G;AAAA,EACF;AAEA,QAAM,UAAU,MAAM;AAAA,IACpB;AAAA,IACA,CAAC,UAAU,WAAW,QAAQ,EAAE,KAAK,MAAM,IAAI,CAAC;AAAA,IAChD,YAAY;AAAA,EACd;AAEA,QAAM,eAAkC,CAAC;AACzC,QAAM,iBAA2B,CAAC;AAClC,MAAI,aAAa;AACjB,MAAI,SAAS;AAEb,WAAS,IAAI,GAAG,IAAI,QAAQ,QAAQ,KAAK;AACvC,UAAM,UAAU,QAAQ,CAAC;AACzB,UAAM,QAAQ,OAAO,CAAC;AACtB,UAAM,cAAc,oBAAoB,IAAI,MAAM,GAAG;AACrD,QAAI,CAAC,SAAS;AACZ;AACA,qBAAe,KAAK,kBAAkB,MAAM,KAAK,sBAAsB,WAAW,CAAC;AACnF;AAAA,IACF;AACA,QAAI,QAAQ,WAAW,YAAY;AACjC;AACA,YAAM,SAAS,QAAQ,kBAAkB,QAAQ,QAAQ,OAAO,UAAU,OAAO,QAAQ,MAAM;AAC/F,qBAAe,KAAK,kBAAkB,MAAM,KAAK,QAAQ,WAAW,CAAC;AACrE;AAAA,IACF;AAEA,UAAM,SAAS,QAAQ;AACvB,QAAI,OAAO,SAAS,OAAO,aAAa,OAAO,OAAO,cAAc,KAAK;AACvE;AACA,YAAM,WAAW,OAAO,OAAO,WAAW,QAAQ,OAAO,UAAU;AACnE,qBAAe,KAAK,kBAAkB,MAAM,KAAK,UAAU,WAAW,CAAC;AACvE;AAAA,IACF;AAEA;AACA,iBAAa,KAAK,EAAE,KAAK,MAAM,KAAK,SAAS,OAAO,SAAS,OAAO,MAAM,WAAW,YAAY,OAAO,QAAQ,CAAC;AAAA,EACnH;AAEA,SAAO,EAAE,cAAc,gBAAgB,SAAS,EAAE,YAAY,QAAQ,cAAc,EAAE,EAAE;AAC1F;AAIA,SAAS,2BAA2B,QAA4B;AAC9D,QAAM,EAAE,MAAM,SAAS,IAAI;AAC3B,QAAM,QAAkB,CAAC;AACzB,QAAM,KAAK,KAAK,KAAK,KAAK,EAAE;AAC5B,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,OAAO,KAAK,SAAS,eAAU,KAAK,MAAM,wBAAS,KAAK,KAAK,qBAAS,KAAK,YAAY,WAAW;AAC7G,QAAM,KAAK,aAAM,KAAK,GAAG,EAAE;AAC3B,QAAM,KAAK,EAAE;AACb,MAAI,KAAK,MAAM;AACb,UAAM,KAAK,iBAAiB;AAC5B,UAAM,KAAK,EAAE;AACb,UAAM,KAAK,KAAK,IAAI;AACpB,UAAM,KAAK,EAAE;AAAA,EACf;AACA,MAAI,SAAS,SAAS,GAAG;AACvB,UAAM,KAAK,oBAAoB,SAAS,MAAM,SAAS;AACvD,UAAM,KAAK,EAAE;AACb,eAAW,KAAK,UAAU;AACxB,YAAM,SAAS,KAAK,OAAO,EAAE,KAAK;AAClC,YAAM,KAAK,EAAE,OAAO,cAAc;AAClC,YAAM,QAAQ,EAAE,SAAS,IAAI,IAAI,EAAE,KAAK,KAAK,GAAG,EAAE,KAAK;AACvD,YAAM,KAAK,GAAG,MAAM,SAAS,EAAE,MAAM,KAAK,EAAE,MAAM,KAAK,IAAI;AAC3D,iBAAW,QAAQ,EAAE,KAAK,MAAM,IAAI,GAAG;AACrC,cAAM,KAAK,GAAG,MAAM,KAAK,IAAI,EAAE;AAAA,MACjC;AACA,YAAM,KAAK,EAAE;AAAA,IACf;AAAA,EACF;AACA,SAAO,MAAM,KAAK,IAAI;AACxB;AAEA,eAAe,kBAAkB,QAAmD;AAClF,MAAI,OAAO,WAAW,GAAG;AACvB,WAAO,EAAE,cAAc,CAAC,GAAG,gBAAgB,CAAC,GAAG,SAAS,EAAE,YAAY,GAAG,QAAQ,GAAG,cAAc,EAAE,EAAE;AAAA,EACxG;AAEA,QAAM,MAAM,SAAS;AACrB,MAAI,CAAC,IAAI,oBAAoB,CAAC,IAAI,sBAAsB;AACtD,UAAMA,kBAAiB,OAAO;AAAA,MAC5B,CAAC,MAAM,MAAM,EAAE,GAAG;AAAA;AAAA;AAAA,IACpB;AACA,WAAO;AAAA,MACL,cAAc,CAAC;AAAA,MACf,gBAAAA;AAAA,MACA,SAAS,EAAE,YAAY,GAAG,QAAQ,OAAO,QAAQ,cAAc,EAAE;AAAA,IACnE;AAAA,EACF;AAKA,QAAM,CAAC,YAAY,aAAa,IAAI,OAAO;AAAA,IACzC,CAAC,CAAC,OAAO,IAAI,GAAG,UAAU;AACxB,UAAI,sBAAsB,MAAM,GAAG,EAAG,OAAM,KAAK,KAAK;AAAA,UACjD,MAAK,KAAK,KAAK;AACpB,aAAO,CAAC,OAAO,IAAI;AAAA,IACrB;AAAA,IACA,CAAC,CAAC,GAAG,CAAC,CAAC;AAAA,EACT;AAEA,QAAM,qBAAqB,cAAc;AAAA,IACvC,CAAC,MAAM,MAAM,EAAE,GAAG;AAAA;AAAA;AAAA,EACpB;AAEA,MAAI,WAAW,WAAW,GAAG;AAC3B,WAAO;AAAA,MACL,cAAc,CAAC;AAAA,MACf,gBAAgB;AAAA,MAChB,SAAS,EAAE,YAAY,GAAG,QAAQ,cAAc,QAAQ,cAAc,EAAE;AAAA,IAC1E;AAAA,EACF;AAEA,SAAO,QAAQ,yCAAyC,WAAW,MAAM,uBAAuB,YAAY,MAAM,IAAI,QAAQ;AAC9H,QAAM,SAAS,IAAI,aAAa,IAAI,kBAAkB,IAAI,oBAAoB;AAC9E,QAAM,OAAO,WAAW,IAAI,CAAC,MAAM,EAAE,GAAG;AACxC,QAAM,cAAc,MAAM,OAAO,cAAc,MAAM,IAAI;AACzD,QAAM,aAAa,IAAI,IAAI,WAAW,IAAI,CAAC,MAAM,CAAC,EAAE,KAAK,EAAE,SAAS,CAAC,CAAC;AAEtE,QAAM,eAAkC,CAAC;AACzC,QAAM,iBAA2B,CAAC,GAAG,kBAAkB;AACvD,MAAI,aAAa;AACjB,MAAI,SAAS,cAAc;AAE3B,aAAW,CAAC,KAAK,MAAM,KAAK,YAAY,SAAS;AAC/C,UAAM,YAAY,WAAW,IAAI,GAAG,KAAK;AACzC,QAAI,kBAAkB,OAAO;AAC3B;AACA,qBAAe,KAAK,MAAM,GAAG;AAAA;AAAA,8BAA8B,OAAO,OAAO,EAAE;AAC3E;AAAA,IACF;AACA;AACA,UAAM,KAAK,2BAA2B,MAAM;AAC5C,iBAAa,KAAK,EAAE,KAAK,SAAS,IAAI,OAAO,WAAW,YAAY,GAAG,CAAC;AAAA,EAC1E;AAEA,SAAO,EAAE,cAAc,gBAAgB,SAAS,EAAE,YAAY,QAAQ,cAAc,EAAE,EAAE;AAC1F;AAYA,MAAM,uBACJ;AAGK,MAAM,wBAAwB;AAM9B,SAAS,mBAAmB,WAAkC;AACnE,QAAM,IAAI,UAAU,KAAK,EAAE,MAAM,oBAAoB;AACrD,SAAO,IAAI,EAAE,CAAC,IAAK;AACrB;AAOO,SAAS,wBACd,WACA,YACQ;AACR,QAAM,SAAS,mBAAmB,SAAS;AAC3C,MAAI,CAAC,OAAQ,QAAO;AACpB,QAAM,UAAU,YAAY,KAAK;AACjC,MAAI,CAAC,QAAS,QAAO;AACrB,QAAM,UACJ,QAAQ,SAAS,wBACb,QAAQ,MAAM,GAAG,qBAAqB,IAAI,8BAC1C;AACN,SAAO,GAAG,UAAU,KAAK,CAAC;AAAA;AAAA,sCAA2C,MAAM;AAAA;AAAA,EAAQ,OAAO;AAC5F;AAIA,eAAe,oBACb,cACA,qBACA,cACA,UACgF;AAChF,MAAI,YAAY;AAGhB,MAAI,CAAC,qBAAqB;AACxB,QAAI,aAAa,SAAS,GAAG;AAC3B,aAAO,QAAQ,uFAAkF,QAAQ;AAAA,IAC3G;AACA,WAAO,EAAE,OAAO,cAAc,WAAW,cAAc,EAAE;AAAA,EAC3D;AAEA,MAAI,CAAC,gBAAgB,aAAa,WAAW,GAAG;AAC9C,QAAI,CAAC,gBAAgB,aAAa,SAAS,GAAG;AAC5C,aAAO,WAAW,yEAAyE,QAAQ;AACnG,WAAK,SAAS,IAAI,WAAW,iFAAiF;AAAA,IAChH;AACA,WAAO,EAAE,OAAO,cAAc,WAAW,cAAc,EAAE;AAAA,EAC3D;AAEA,SAAO,QAAQ,6CAA6C,aAAa,MAAM,uBAAuB,YAAY,cAAc,IAAI,QAAQ;AAE5I,QAAM,aAAa,MAAM;AAAA,IACvB;AAAA,IACA,OAAO,SAAS;AACd,aAAO,SAAS,kBAAkB,KAAK,GAAG,OAAO,QAAQ;AAEzD,YAAM,YAAY,MAAM;AAAA,QACtB,KAAK;AAAA,QACL,EAAE,SAAS,MAAM,SAAS,qBAAqB,KAAK,KAAK,IAAI;AAAA,QAC7D;AAAA,MACF;AAEA,UAAI,UAAU,WAAW;AACvB,cAAM,SAAS,wBAAwB,UAAU,SAAS,KAAK,UAAU;AACzE,YAAI,WAAW,UAAU,SAAS;AAChC,iBAAO,WAAW,qCAAqC,KAAK,GAAG,kCAA6B,QAAQ;AACpG,eAAK,SAAS,IAAI,WAAW,qBAAqB,KAAK,GAAG,iCAA4B;AAAA,QACxF;AACA,eAAO,EAAE,GAAG,MAAM,SAAS,OAAO;AAAA,MACpC;AAEA;AACA,aAAO,WAAW,6BAA6B,KAAK,GAAG,KAAK,UAAU,SAAS,gBAAgB,IAAI,QAAQ;AAC3G,WAAK,SAAS,IAAI,WAAW,8BAA8B,KAAK,GAAG,WAAM,UAAU,SAAS,gBAAgB,EAAE;AAC9G,aAAO;AAAA,IACT;AAAA,IACA,YAAY;AAAA,EACd;AAEA,SAAO,EAAE,OAAO,YAAY,WAAW,cAAc,aAAa,OAAO;AAC3E;AAIA,SAAS,uBAAuB,cAAiC,gBAAoC;AACnG,QAAM,SAAS,CAAC,GAAG,YAAY,EAAE,KAAK,CAAC,GAAG,MAAM,EAAE,QAAQ,EAAE,KAAK;AACjE,QAAM,WAAW,CAAC,GAAG,cAAc;AACnC,aAAW,QAAQ,QAAQ;AACzB,QAAI,UAAU,KAAK;AACnB,QAAI;AACF,gBAAU,eAAe,OAAO;AAAA,IAClC,QAAQ;AAAA,IAER;AACA,aAAS,KAAK,MAAM,KAAK,GAAG;AAAA;AAAA,EAAO,OAAO,EAAE;AAAA,EAC9C;AACA,SAAO;AACT;AAEA,SAAS,oBACP,QACA,UACA,SACA,WACA,eACA,eAC2D;AAC3D,QAAM,YAA6C,CAAC;AACpD,MAAI,cAAc,eAAe,GAAG;AAClC,UAAM,KAAK,cAAc,eAAe;AACxC,cAAU,gBAAgB,IAAI,GAAG,EAAE,IAAI,cAAc,YAAY;AACjE,QAAI,CAAC,cAAc,cAAc;AAC/B,gBAAU,YAAY,IAAI;AAAA,IAC5B;AAAA,EACF,WAAW,YAAY,GAAG;AACxB,cAAU,yBAAyB,IAAI;AAAA,EACzC;AAEA,QAAM,cAAc,kBAAkB;AAAA,IACpC,OAAO,oBAAoB,OAAO,KAAK,MAAM;AAAA,IAC7C,YAAY,OAAO,KAAK;AAAA,IACxB,YAAY,QAAQ;AAAA,IACpB,QAAQ,QAAQ;AAAA,IAChB,QAAQ;AAAA,MACN,gBAAgB,QAAQ;AAAA,MACxB,GAAG;AAAA,IACL;AAAA,EACF,CAAC;AAED,QAAM,mBAAmB,cAAc;AAAA,IACrC,OAAO;AAAA,IACP,SAAS;AAAA,IACT,MAAM,SAAS,KAAK,aAAa;AAAA,IACjC,UAAU;AAAA,MACR,kBAAkB,eAAe,aAAa;AAAA,IAChD;AAAA,EACF,CAAC;AAED,QAAM,WAA0C;AAAA,IAC9C,aAAa,OAAO,KAAK;AAAA,IACzB,YAAY,QAAQ;AAAA,IACpB,QAAQ,QAAQ;AAAA,IAChB,mBAAmB;AAAA,IACnB,eAAe,QAAQ;AAAA,EACzB;AACA,SAAO,EAAE,SAAS,kBAAkB,mBAAmB,EAAE,SAAS,EAAE;AACtE;AAIA,eAAsB,kBACpB,QACA,WAAyB,eACwB;AACjD,QAAM,YAAY,KAAK,IAAI;AAE3B,MAAI,CAAC,OAAO,QAAQ,OAAO,KAAK,WAAW,GAAG;AAC5C,WAAO,0BAA0B,WAAW,oBAAoB,SAAS;AAAA,EAC3E;AAEA,QAAM,EAAE,WAAW,cAAc,gBAAgB,eAAe,IAAI,cAAc,OAAO,IAAI;AAC7F,QAAM,aAAa,UAAU,SAAS,aAAa,SAAS,eAAe;AAE3E,QAAM,SAAS;AAAA,IACb;AAAA,IACA,eAAe,OAAO,KAAK,MAAM,YAAY,UAAU,MAAM,SAAS,aAAa,MAAM,YAAY,eAAe,MAAM,cAAc,eAAe,MAAM;AAAA,EAC/J;AAEA,MAAI,eAAe,GAAG;AACpB,WAAO;AAAA,MACL;AAAA,MACA,OAAO,OAAO,KAAK,MAAM;AAAA,MACzB;AAAA,MACA;AAAA,MACA;AAAA,QACE;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAEA;AAAA,IACE;AAAA,IACA,oBAAoB,UAAU,MAAM,UAAU,aAAa,MAAM,aAAa,eAAe,MAAM;AAAA,IACnG;AAAA,EACF;AACA,QAAM,SAAS,SAAS,IAAI,KAAK,2BAA2B;AAK5D,MAAI,UAAgC;AACpC,MAAI;AACF,UAAM,aAAa,IAAI,WAAW;AAClC,QAAI,UAAU,SAAS,GAAG;AACxB,gBAAU;AAAA,QACR,QAAQ,IAAI,cAAc;AAAA,QAC1B;AAAA,QACA,cAAc,mBAAmB;AAAA,MACnC;AAAA,IACF,OAAO;AACL,gBAAU;AAAA,QACR,QAAQ;AAAA,QACR;AAAA,QACA,cAAc,mBAAmB;AAAA,MACnC;AAAA,IACF;AAAA,EACF,SAAS,OAAO;AACd,UAAM,MAAM,cAAc,KAAK;AAC/B,WAAO;AAAA,MACL;AAAA,MACA,iCAAiC,IAAI,OAAO;AAAA,MAC5C;AAAA,MACA;AAAA,MACA;AAAA,QACE;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAIA,QAAM,sBAAsB,OAAO,UAC/B,6BAA6B,OAAO,OAAO,IAC3C;AAEJ,QAAM,SAAS,SAAS,IAAI,KAAK,uBAAuB;AAMxD,QAAM,aAA6B;AAAA,IACjC,cAAc,CAAC;AAAA,IAAG,gBAAgB,CAAC;AAAA,IACnC,SAAS,EAAE,YAAY,GAAG,QAAQ,GAAG,cAAc,EAAE;AAAA,IACrD,eAAe,CAAC;AAAA,EAClB;AACA,QAAM,CAAC,UAAU,aAAa,aAAa,IAAI,MAAM,QAAQ,IAAI;AAAA,IAC/D,UAAU,SAAS,IACf,eAAe,WAAW,QAAQ,MAAM,IACxC,QAAQ,QAAwB,UAAU;AAAA,IAC9C,kBAAkB,YAAY;AAAA,IAC9B,oBAAoB,gBAAgB,QAAQ,UAAU;AAAA,EACxD,CAAC;AAID,MAAI,gBAAmC;AAAA,IACrC,cAAc,CAAC;AAAA,IAAG,gBAAgB,CAAC;AAAA,IACnC,SAAS,EAAE,YAAY,GAAG,QAAQ,GAAG,cAAc,EAAE;AAAA,EACvD;AACA,MAAI,SAAS,cAAc,SAAS,GAAG;AACrC,UAAM,cAAc,SAAS,cAAc,OAAO,CAAC,MAAM,EAAE,WAAW,gBAAgB,EAAE;AACxF,UAAM,cAAc,SAAS,cAAc,SAAS;AACpD,UAAM,SAAS;AAAA,MACb;AAAA,MACA,aAAa,SAAS,cAAc,MAAM,2BAA2B,WAAW,YAAY,WAAW;AAAA,IACzG;AACA,UAAM,iBAAgC,SAAS,cAAc,IAAI,CAAC,OAAO;AAAA,MACvE,KAAK,EAAE;AAAA,MACP,WAAW,EAAE;AAAA,IACf,EAAE;AACF,UAAM,eAAe,IAAI;AAAA,MACvB,SAAS,cACN,OAAO,CAAC,MAAM,EAAE,gBAAgB,MAAS,EACzC,IAAI,CAAC,MAAM,CAAC,EAAE,KAAK,EAAE,WAAqB,CAAC;AAAA,IAChD;AACA,oBAAgB,MAAM,oBAAoB,gBAAgB,QAAQ,YAAY,YAAY;AAAA,EAC5F;AAEA,QAAM,eAAe;AAAA,IACnB,GAAG,SAAS;AAAA,IACZ,GAAG,YAAY;AAAA,IACf,GAAG,cAAc;AAAA,IACjB,GAAG,cAAc;AAAA,EACnB;AACA,QAAM,gBAAgB,eAAe;AAAA,IACnC,CAAC,EAAE,IAAI,MAAM,MAAM,GAAG;AAAA;AAAA;AAAA,EACxB;AACA,QAAM,iBAAiB;AAAA,IACrB,GAAG;AAAA,IACH,GAAG,SAAS;AAAA,IACZ,GAAG,YAAY;AAAA,IACf,GAAG,cAAc;AAAA,IACjB,GAAG,cAAc;AAAA,EACnB;AACA,QAAM,UAAyB;AAAA,IAC7B,YACE,SAAS,QAAQ,aACf,YAAY,QAAQ,aACpB,cAAc,QAAQ,aACtB,cAAc,QAAQ;AAAA,IAC1B,QACE,eAAe,SACb,SAAS,QAAQ,SACjB,YAAY,QAAQ,SACpB,cAAc,QAAQ,SACtB,cAAc,QAAQ;AAAA,IAC1B,cAAc,SAAS,QAAQ;AAAA,EACjC;AAEA,QAAM,SAAS,IAAI,QAAQ,WAAW,QAAQ,UAAU,aAAa,QAAQ,MAAM,SAAS;AAE5F,MAAI,aAAa,SAAS,GAAG;AAC3B,UAAM,SAAS,SAAS,IAAI,KAAK,2CAA2C;AAAA,EAC9E;AAEA,QAAM,EAAE,OAAO,gBAAgB,WAAW,aAAa,IAAI,MAAM;AAAA,IAC/D;AAAA,IACA;AAAA,IACA,QAAQ;AAAA,IACR;AAAA,EACF;AAEA,QAAM,WAAW,uBAAuB,gBAAgB,cAAc;AACtE,QAAM,gBAAgB,KAAK,IAAI,IAAI;AAEnC;AAAA,IACE;AAAA,IACA,cAAc,QAAQ,UAAU,gBAAgB,QAAQ,MAAM,YAAY,QAAQ,YAAY;AAAA,IAC9F;AAAA,EACF;AAEA,QAAM,eAAe,eAAe,KAAK,YAAY;AACrD,QAAM,SAAS;AAAA,IACb;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA,EAAE,cAAc,aAAa;AAAA,EAC/B;AAEA,MAAI,QAAQ,eAAe,KAAK,QAAQ,SAAS,GAAG;AAClD,WAAO,YAAY,OAAO,OAAO;AAAA,EACnC;AAEA,SAAO,YAAY,OAAO,SAAS,OAAO,iBAAiB;AAC7D;AAEO,SAAS,wBAAwB,QAAyB;AAC/D,SAAO;AAAA,IACL;AAAA,MACE,MAAM;AAAA,MACN,OAAO;AAAA,MACP,aACE;AAAA,MACF,QAAQ;AAAA,MACR,cAAc;AAAA,MACd,aAAa;AAAA,QACX,cAAc;AAAA,QACd,gBAAgB;AAAA,QAChB,iBAAiB;AAAA,QACjB,eAAe;AAAA,MACjB;AAAA,IACF;AAAA,IACA,OAAO,MAAM,QAAQ;AACnB,UAAI,CAAC,gBAAgB,EAAE,UAAU;AAC/B,eAAO,eAAe,YAAY,qBAAqB,UAAU,CAAC,CAAC;AAAA,MACrE;AAEA,YAAM,WAAW,mBAAmB,KAAK,cAAc;AACvD,YAAM,SAAS,MAAM,kBAAkB,MAAM,QAAQ;AAErD,YAAM,SAAS,SAAS,KAAK,KAAK,OAAO,UAAU,kBAAkB,iBAAiB;AACtF,aAAO,eAAe,MAAM;AAAA,IAC9B;AAAA,EACF;AACF;",
|
|
6
6
|
"names": ["failedContents"]
|
|
7
7
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "mcp-researchpowerpack",
|
|
3
|
-
"version": "6.0.
|
|
3
|
+
"version": "6.0.9",
|
|
4
4
|
"description": "HTTP-first MCP research server: start-research (goal-tailored brief), web-search (with Reddit scope), scrape-links (auto-detects Reddit URLs) — built on mcp-use.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|