mcp-researchpowerpack 6.0.8 → 6.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/mcp-use.json +2 -2
- package/dist/src/schemas/scrape-links.js +2 -2
- package/dist/src/schemas/scrape-links.js.map +2 -2
- package/dist/src/schemas/web-search.js +12 -1
- package/dist/src/schemas/web-search.js.map +2 -2
- package/dist/src/services/llm-processor.js +3 -7
- package/dist/src/services/llm-processor.js.map +2 -2
- package/dist/src/tools/scrape.js +39 -6
- package/dist/src/tools/scrape.js.map +2 -2
- package/dist/src/tools/search.js +84 -4
- package/dist/src/tools/search.js.map +2 -2
- package/dist/src/utils/query-relax.js +91 -0
- package/dist/src/utils/query-relax.js.map +7 -0
- package/package.json +1 -1
package/dist/mcp-use.json
CHANGED
|
@@ -5,8 +5,8 @@ const urlSchema = z.string().url({ message: "scrape-links: Invalid URL format" }
|
|
|
5
5
|
).describe("A fully-qualified HTTP or HTTPS URL to scrape.");
|
|
6
6
|
const scrapeLinksParamsSchema = z.object({
|
|
7
7
|
urls: z.array(urlSchema).min(1, { message: "scrape-links: At least 1 URL required" }).describe("URLs to fetch and extract in parallel. Reddit post permalinks (`reddit.com/r/<sub>/comments/<id>/...`) are auto-detected and routed through the Reddit API (threaded post + comments); every other URL flows through the HTTP scraper. Mix reddit + non-reddit URLs freely; both branches run concurrently. Prefer contextually grouped batches \u2014 call this tool multiple times in parallel when URL sets are unrelated, instead of one giant mixed batch."),
|
|
8
|
-
extract: z.string().min(1, { message: "scrape-links: extract cannot be empty" }).describe(
|
|
9
|
-
'
|
|
8
|
+
extract: z.string().min(1, { message: "scrape-links: extract cannot be empty" }).optional().describe(
|
|
9
|
+
'OPTIONAL semantic extraction instruction. Describe the SHAPE of what you want, separated by `|`. When provided, the extractor classifies each page (docs / github-thread / reddit / marketing / cve / paper / announcement / qa / blog / changelog / release-notes) and adjusts emphasis per type: preserves numbers/versions/stacktraces verbatim from docs and CVE pages, quotes Reddit/HN with attribution plus sentiment distribution, flags what the page did NOT answer in a "Not found" section, and surfaces referenced-but-unscraped URLs in a "Follow-up signals" bulletin that feeds the next research loop. Good examples: "root cause | affected versions | fix | workarounds | timeline"; "pricing tiers | rate limits | enterprise contact | free-tier quotas"; "maintainer decisions | accepted fix commits | stacktraces | resolved version". Omit this argument to skip LLM extraction entirely and receive cleaned markdown for each URL (raw mode \u2014 cheaper, faster, and useful when you want the whole page rather than a filtered view).'
|
|
10
10
|
)
|
|
11
11
|
}).strict();
|
|
12
12
|
const scrapeLinksOutputSchema = z.object({
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"version": 3,
|
|
3
3
|
"sources": ["../../../src/schemas/scrape-links.ts"],
|
|
4
|
-
"sourcesContent": ["import { z } from 'zod';\n\nconst urlSchema = z\n .string()\n .url({ message: 'scrape-links: Invalid URL format' })\n .refine(\n url => url.startsWith('http://') || url.startsWith('https://'),\n { message: 'scrape-links: URL must use http:// or https://' }\n )\n .describe('A fully-qualified HTTP or HTTPS URL to scrape.');\n\nexport const scrapeLinksParamsSchema = z.object({\n urls: z\n .array(urlSchema)\n .min(1, { message: 'scrape-links: At least 1 URL required' })\n .describe('URLs to fetch and extract in parallel. Reddit post permalinks (`reddit.com/r/<sub>/comments/<id>/...`) are auto-detected and routed through the Reddit API (threaded post + comments); every other URL flows through the HTTP scraper. Mix reddit + non-reddit URLs freely; both branches run concurrently. Prefer contextually grouped batches \u2014 call this tool multiple times in parallel when URL sets are unrelated, instead of one giant mixed batch.'),\n extract: z\n .string()\n .min(1, { message: 'scrape-links: extract cannot be empty' })\n .describe(\n '
|
|
5
|
-
"mappings": "AAAA,SAAS,SAAS;AAElB,MAAM,YAAY,EACf,OAAO,EACP,IAAI,EAAE,SAAS,mCAAmC,CAAC,EACnD;AAAA,EACC,SAAO,IAAI,WAAW,SAAS,KAAK,IAAI,WAAW,UAAU;AAAA,EAC7D,EAAE,SAAS,iDAAiD;AAC9D,EACC,SAAS,gDAAgD;AAErD,MAAM,0BAA0B,EAAE,OAAO;AAAA,EAC9C,MAAM,EACH,MAAM,SAAS,EACf,IAAI,GAAG,EAAE,SAAS,wCAAwC,CAAC,EAC3D,SAAS,icAA4b;AAAA,EACxc,SAAS,EACN,OAAO,EACP,IAAI,GAAG,EAAE,SAAS,wCAAwC,CAAC,EAC3D;AAAA,IACC;AAAA,EACF;AACJ,CAAC,EAAE,OAAO;AAIH,MAAM,0BAA0B,EAAE,OAAO;AAAA;AAAA;AAAA;AAAA,EAI9C,UAAU,EAAE,OAAO;AAAA,IACjB,aAAa,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,2BAA2B;AAAA,IAChF,YAAY,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,4BAA4B;AAAA,IAChF,QAAQ,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,mBAAmB;AAAA,IACnE,mBAAmB,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,kCAAkC;AAAA,IAC7F,eAAe,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,qCAAqC;AAAA,EAC9F,CAAC,EAAE,OAAO;AACZ,CAAC,EAAE,OAAO;",
|
|
4
|
+
"sourcesContent": ["import { z } from 'zod';\n\nconst urlSchema = z\n .string()\n .url({ message: 'scrape-links: Invalid URL format' })\n .refine(\n url => url.startsWith('http://') || url.startsWith('https://'),\n { message: 'scrape-links: URL must use http:// or https://' }\n )\n .describe('A fully-qualified HTTP or HTTPS URL to scrape.');\n\nexport const scrapeLinksParamsSchema = z.object({\n urls: z\n .array(urlSchema)\n .min(1, { message: 'scrape-links: At least 1 URL required' })\n .describe('URLs to fetch and extract in parallel. Reddit post permalinks (`reddit.com/r/<sub>/comments/<id>/...`) are auto-detected and routed through the Reddit API (threaded post + comments); every other URL flows through the HTTP scraper. Mix reddit + non-reddit URLs freely; both branches run concurrently. Prefer contextually grouped batches \u2014 call this tool multiple times in parallel when URL sets are unrelated, instead of one giant mixed batch.'),\n extract: z\n .string()\n .min(1, { message: 'scrape-links: extract cannot be empty' })\n .optional()\n .describe(\n 'OPTIONAL semantic extraction instruction. Describe the SHAPE of what you want, separated by `|`. When provided, the extractor classifies each page (docs / github-thread / reddit / marketing / cve / paper / announcement / qa / blog / changelog / release-notes) and adjusts emphasis per type: preserves numbers/versions/stacktraces verbatim from docs and CVE pages, quotes Reddit/HN with attribution plus sentiment distribution, flags what the page did NOT answer in a \"Not found\" section, and surfaces referenced-but-unscraped URLs in a \"Follow-up signals\" bulletin that feeds the next research loop. Good examples: \"root cause | affected versions | fix | workarounds | timeline\"; \"pricing tiers | rate limits | enterprise contact | free-tier quotas\"; \"maintainer decisions | accepted fix commits | stacktraces | resolved version\". Omit this argument to skip LLM extraction entirely and receive cleaned markdown for each URL (raw mode \u2014 cheaper, faster, and useful when you want the whole page rather than a filtered view).',\n ),\n}).strict();\n\nexport type ScrapeLinksParams = z.infer<typeof scrapeLinksParamsSchema>;\n\nexport const scrapeLinksOutputSchema = z.object({\n // `content` deliberately NOT duplicated here \u2014 the primary markdown lives in\n // the MCP tool result's `content[0].text`. Previously this schema echoed the\n // whole extraction output, doubling token cost for clients that forward both.\n metadata: z.object({\n total_items: z.number().int().nonnegative().describe('Number of URLs processed.'),\n successful: z.number().int().nonnegative().describe('URLs fetched successfully.'),\n failed: z.number().int().nonnegative().describe('URLs that failed.'),\n execution_time_ms: z.number().int().nonnegative().describe('Wall clock time in milliseconds.'),\n total_credits: z.number().int().nonnegative().describe('External scraping credits consumed.'),\n }).strict(),\n}).strict();\n\nexport type ScrapeLinksOutput = z.infer<typeof scrapeLinksOutputSchema>;\n"],
|
|
5
|
+
"mappings": "AAAA,SAAS,SAAS;AAElB,MAAM,YAAY,EACf,OAAO,EACP,IAAI,EAAE,SAAS,mCAAmC,CAAC,EACnD;AAAA,EACC,SAAO,IAAI,WAAW,SAAS,KAAK,IAAI,WAAW,UAAU;AAAA,EAC7D,EAAE,SAAS,iDAAiD;AAC9D,EACC,SAAS,gDAAgD;AAErD,MAAM,0BAA0B,EAAE,OAAO;AAAA,EAC9C,MAAM,EACH,MAAM,SAAS,EACf,IAAI,GAAG,EAAE,SAAS,wCAAwC,CAAC,EAC3D,SAAS,icAA4b;AAAA,EACxc,SAAS,EACN,OAAO,EACP,IAAI,GAAG,EAAE,SAAS,wCAAwC,CAAC,EAC3D,SAAS,EACT;AAAA,IACC;AAAA,EACF;AACJ,CAAC,EAAE,OAAO;AAIH,MAAM,0BAA0B,EAAE,OAAO;AAAA;AAAA;AAAA;AAAA,EAI9C,UAAU,EAAE,OAAO;AAAA,IACjB,aAAa,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,2BAA2B;AAAA,IAChF,YAAY,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,4BAA4B;AAAA,IAChF,QAAQ,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,mBAAmB;AAAA,IACnE,mBAAmB,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,kCAAkC;AAAA,IAC7F,eAAe,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,qCAAqC;AAAA,EAC9F,CAAC,EAAE,OAAO;AACZ,CAAC,EAAE,OAAO;",
|
|
6
6
|
"names": []
|
|
7
7
|
}
|
|
@@ -46,7 +46,18 @@ const webSearchOutputSchema = z.object({
|
|
|
46
46
|
result_count: z.number().int().nonnegative().describe("Results returned for this query."),
|
|
47
47
|
top_url: z.string().optional().describe("Domain of the top result.")
|
|
48
48
|
})).optional().describe("Per-query result counts and top URLs."),
|
|
49
|
-
low_yield_queries: z.array(z.string()).optional().describe("Queries that produced 0-1 results.")
|
|
49
|
+
low_yield_queries: z.array(z.string()).optional().describe("Queries that produced 0-1 results."),
|
|
50
|
+
query_rewrites: z.array(z.object({
|
|
51
|
+
original: z.string().describe("The query as the agent submitted it."),
|
|
52
|
+
rewritten: z.string().describe("The query as dispatched to Google after Phase A normalization."),
|
|
53
|
+
rules: z.array(z.string()).describe("Rule ids applied (A1=operator-char de-quote, A2=path/URL de-quote, A3=phrase-AND collapse).")
|
|
54
|
+
})).optional().describe("Pre-dispatch query rewrites \u2014 Phase A normalizations (operator-char and path/URL de-quote, phrase-AND \u2192 anchor + OR collapse)."),
|
|
55
|
+
retried_queries: z.array(z.object({
|
|
56
|
+
original: z.string().describe("The query as dispatched (post-Phase-A) that returned 0 results."),
|
|
57
|
+
retried_with: z.string().describe("The relaxed form retried after the empty initial response."),
|
|
58
|
+
rules: z.array(z.string()).describe("Rule ids applied (B1=strip all quotes, B2=drop site: filter)."),
|
|
59
|
+
recovered_results: z.number().int().nonnegative().describe("How many hits the retry produced; 0 means the retry also failed.")
|
|
60
|
+
})).optional().describe("On-empty retries \u2014 Phase B relaxations applied after the initial Serper batch returned 0 results for a query.")
|
|
50
61
|
}).strict()
|
|
51
62
|
}).strict();
|
|
52
63
|
export {
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"version": 3,
|
|
3
3
|
"sources": ["../../../src/schemas/web-search.ts"],
|
|
4
|
-
"sourcesContent": ["import { z } from 'zod';\n\nexport const webSearchParamsSchema = z.object({\n queries: z\n .array(\n z.string()\n .min(1, { message: 'web-search: Query cannot be empty' })\n .describe('A single Google search query. Each query runs as a separate parallel search. Use operators (site:, quotes, verbatim version numbers) to sharpen retrieval.'),\n )\n .min(1, { message: 'web-search: At least 1 query required' })\n .describe(\n 'Search queries to run in parallel via Google. Think of these as **concept groups** \u2014 clusters of semantically distinct facets of your research goal, each probing a DIFFERENT angle (official spec, implementation, failures, comparison, sentiment, changelog, CVE, pricing). Fire all groups in ONE call as a flat array. Overlapping queries waste budget; orthogonal facets multiply coverage. A narrow bug needs 10\u201320 queries across 2\u20133 facets; a comparison needs 25\u201335 across 4\u20136 facets; open-ended synthesis needs 40\u201380 across 8+ facets.',\n ),\n extract: z\n .string()\n .min(1, { message: 'web-search: extract cannot be empty' })\n .describe(\n 'Semantic instruction for the relevance classifier \u2014 what \"relevant\" means for THIS goal. Drives tiering (HIGHLY_RELEVANT / MAYBE_RELEVANT / OTHER), synthesis, gap analysis, and refine-query suggestions. Be specific: \"OAuth 2.1 support in TypeScript MCP frameworks \u2014 runnable code, not marketing\", not \"MCP OAuth\". The classifier uses this to choose a source-of-truth rubric (vendor_doc for spec, github for bugs, reddit/blog for migration/sentiment, cve_databases for security).',\n ),\n raw: z\n .boolean()\n .default(false)\n .describe('Skip LLM classification and return the raw ranked URL list. Use when you need unprocessed results.'),\n scope: z\n .enum(['web', 'reddit', 'both'])\n .default('web')\n .describe(\n 'Search scope. \"web\" (default) = open web, no augmentation. \"reddit\" = server appends `site:reddit.com` to every query and filters results to post permalinks (`/r/.+/comments/[a-z0-9]+/`); subreddit homepages are dropped. \"both\" = runs every query twice (open web + reddit-scoped), merges the result set, and tags each row with its source. Use \"reddit\" for sentiment/migration/lived-experience research; use \"both\" when you want one call to cover both branches.',\n ),\n verbose: z\n .boolean()\n .default(false)\n .describe(\n 'Include the per-row scoring/coverage metadata, the trailing Signals block, and the CONSENSUS labels even when they carry little signal (single-query hits, threshold of 1). Default false \u2014 most agents do not need this and it costs ~1.5KB per call on a typical 3-query fan-out.',\n ),\n}).strict();\n\nexport type WebSearchParams = z.infer<typeof webSearchParamsSchema>;\n\nexport const webSearchOutputSchema = z.object({\n // `content` deliberately NOT duplicated here \u2014 the primary markdown lives in\n // the MCP tool result's `content[0].text`. Previously this schema echoed the\n // whole markdown under `structuredContent.content`, doubling token cost for\n // clients that forward both fields to an LLM.\n results: z\n .array(z.object({\n rank: z.number().int().positive().describe('1-based rank in the merged ranking.'),\n url: z.string().describe('Result URL.'),\n title: z.string().describe('Page title from the result.'),\n snippet: z.string().describe('Search snippet from the result.'),\n source_type: z\n .enum(['reddit', 'github', 'docs', 'blog', 'paper', 'qa', 'cve', 'news', 'video', 'web'])\n .describe(\n 'Heuristic source kind from the URL. When the LLM classifier is online its tag overrides this.',\n ),\n score: z.number().describe('Composite CTR-weighted score, normalized to 100.'),\n seen_in: z.number().int().nonnegative().describe('Number of input queries this URL appeared in.'),\n best_position: z.number().int().nonnegative().describe('Best (lowest) SERP position observed.'),\n }))\n .optional()\n .describe('Per-result structured payload \u2014 same data the markdown table renders, machine-readable.'),\n metadata: z.object({\n total_items: z.number().int().nonnegative().describe('Number of queries executed.'),\n successful: z.number().int().nonnegative().describe('Queries that returned results.'),\n failed: z.number().int().nonnegative().describe('Queries that failed.'),\n execution_time_ms: z.number().int().nonnegative().describe('Wall clock time in milliseconds.'),\n llm_classified: z.boolean().describe('Whether LLM classification was applied.'),\n llm_error: z.string().optional().describe('LLM error if classification failed and fell back to raw.'),\n scope: z.enum(['web', 'reddit', 'both']).optional().describe('Search scope used.'),\n coverage_summary: z\n .array(z.object({\n query: z.string().describe('The search query.'),\n result_count: z.number().int().nonnegative().describe('Results returned for this query.'),\n top_url: z.string().optional().describe('Domain of the top result.'),\n }))\n .optional()\n .describe('Per-query result counts and top URLs.'),\n low_yield_queries: z\n .array(z.string())\n .optional()\n .describe('Queries that produced 0-1 results.'),\n }).strict(),\n}).strict();\n\nexport type WebSearchOutput = z.infer<typeof webSearchOutputSchema>;\n"],
|
|
5
|
-
"mappings": "AAAA,SAAS,SAAS;AAEX,MAAM,wBAAwB,EAAE,OAAO;AAAA,EAC5C,SAAS,EACN;AAAA,IACC,EAAE,OAAO,EACN,IAAI,GAAG,EAAE,SAAS,oCAAoC,CAAC,EACvD,SAAS,4JAA4J;AAAA,EAC1K,EACC,IAAI,GAAG,EAAE,SAAS,wCAAwC,CAAC,EAC3D;AAAA,IACC;AAAA,EACF;AAAA,EACF,SAAS,EACN,OAAO,EACP,IAAI,GAAG,EAAE,SAAS,sCAAsC,CAAC,EACzD;AAAA,IACC;AAAA,EACF;AAAA,EACF,KAAK,EACF,QAAQ,EACR,QAAQ,KAAK,EACb,SAAS,oGAAoG;AAAA,EAChH,OAAO,EACJ,KAAK,CAAC,OAAO,UAAU,MAAM,CAAC,EAC9B,QAAQ,KAAK,EACb;AAAA,IACC;AAAA,EACF;AAAA,EACF,SAAS,EACN,QAAQ,EACR,QAAQ,KAAK,EACb;AAAA,IACC;AAAA,EACF;AACJ,CAAC,EAAE,OAAO;AAIH,MAAM,wBAAwB,EAAE,OAAO;AAAA;AAAA;AAAA;AAAA;AAAA,EAK5C,SAAS,EACN,MAAM,EAAE,OAAO;AAAA,IACd,MAAM,EAAE,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,SAAS,qCAAqC;AAAA,IAChF,KAAK,EAAE,OAAO,EAAE,SAAS,aAAa;AAAA,IACtC,OAAO,EAAE,OAAO,EAAE,SAAS,6BAA6B;AAAA,IACxD,SAAS,EAAE,OAAO,EAAE,SAAS,iCAAiC;AAAA,IAC9D,aAAa,EACV,KAAK,CAAC,UAAU,UAAU,QAAQ,QAAQ,SAAS,MAAM,OAAO,QAAQ,SAAS,KAAK,CAAC,EACvF;AAAA,MACC;AAAA,IACF;AAAA,IACF,OAAO,EAAE,OAAO,EAAE,SAAS,kDAAkD;AAAA,IAC7E,SAAS,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,+CAA+C;AAAA,IAChG,eAAe,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,uCAAuC;AAAA,EAChG,CAAC,CAAC,EACD,SAAS,EACT,SAAS,8FAAyF;AAAA,EACrG,UAAU,EAAE,OAAO;AAAA,IACjB,aAAa,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,6BAA6B;AAAA,IAClF,YAAY,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,gCAAgC;AAAA,IACpF,QAAQ,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,sBAAsB;AAAA,IACtE,mBAAmB,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,kCAAkC;AAAA,IAC7F,gBAAgB,EAAE,QAAQ,EAAE,SAAS,yCAAyC;AAAA,IAC9E,WAAW,EAAE,OAAO,EAAE,SAAS,EAAE,SAAS,0DAA0D;AAAA,IACpG,OAAO,EAAE,KAAK,CAAC,OAAO,UAAU,MAAM,CAAC,EAAE,SAAS,EAAE,SAAS,oBAAoB;AAAA,IACjF,kBAAkB,EACf,MAAM,EAAE,OAAO;AAAA,MACd,OAAO,EAAE,OAAO,EAAE,SAAS,mBAAmB;AAAA,MAC9C,cAAc,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,kCAAkC;AAAA,MACxF,SAAS,EAAE,OAAO,EAAE,SAAS,EAAE,SAAS,2BAA2B;AAAA,IACrE,CAAC,CAAC,EACD,SAAS,EACT,SAAS,uCAAuC;AAAA,IACnD,mBAAmB,EAChB,MAAM,EAAE,OAAO,CAAC,EAChB,SAAS,EACT,SAAS,oCAAoC;AAAA,
|
|
4
|
+
"sourcesContent": ["import { z } from 'zod';\n\nexport const webSearchParamsSchema = z.object({\n queries: z\n .array(\n z.string()\n .min(1, { message: 'web-search: Query cannot be empty' })\n .describe('A single Google search query. Each query runs as a separate parallel search. Use operators (site:, quotes, verbatim version numbers) to sharpen retrieval.'),\n )\n .min(1, { message: 'web-search: At least 1 query required' })\n .describe(\n 'Search queries to run in parallel via Google. Think of these as **concept groups** \u2014 clusters of semantically distinct facets of your research goal, each probing a DIFFERENT angle (official spec, implementation, failures, comparison, sentiment, changelog, CVE, pricing). Fire all groups in ONE call as a flat array. Overlapping queries waste budget; orthogonal facets multiply coverage. A narrow bug needs 10\u201320 queries across 2\u20133 facets; a comparison needs 25\u201335 across 4\u20136 facets; open-ended synthesis needs 40\u201380 across 8+ facets.',\n ),\n extract: z\n .string()\n .min(1, { message: 'web-search: extract cannot be empty' })\n .describe(\n 'Semantic instruction for the relevance classifier \u2014 what \"relevant\" means for THIS goal. Drives tiering (HIGHLY_RELEVANT / MAYBE_RELEVANT / OTHER), synthesis, gap analysis, and refine-query suggestions. Be specific: \"OAuth 2.1 support in TypeScript MCP frameworks \u2014 runnable code, not marketing\", not \"MCP OAuth\". The classifier uses this to choose a source-of-truth rubric (vendor_doc for spec, github for bugs, reddit/blog for migration/sentiment, cve_databases for security).',\n ),\n raw: z\n .boolean()\n .default(false)\n .describe('Skip LLM classification and return the raw ranked URL list. Use when you need unprocessed results.'),\n scope: z\n .enum(['web', 'reddit', 'both'])\n .default('web')\n .describe(\n 'Search scope. \"web\" (default) = open web, no augmentation. \"reddit\" = server appends `site:reddit.com` to every query and filters results to post permalinks (`/r/.+/comments/[a-z0-9]+/`); subreddit homepages are dropped. \"both\" = runs every query twice (open web + reddit-scoped), merges the result set, and tags each row with its source. Use \"reddit\" for sentiment/migration/lived-experience research; use \"both\" when you want one call to cover both branches.',\n ),\n verbose: z\n .boolean()\n .default(false)\n .describe(\n 'Include the per-row scoring/coverage metadata, the trailing Signals block, and the CONSENSUS labels even when they carry little signal (single-query hits, threshold of 1). Default false \u2014 most agents do not need this and it costs ~1.5KB per call on a typical 3-query fan-out.',\n ),\n}).strict();\n\nexport type WebSearchParams = z.infer<typeof webSearchParamsSchema>;\n\nexport const webSearchOutputSchema = z.object({\n // `content` deliberately NOT duplicated here \u2014 the primary markdown lives in\n // the MCP tool result's `content[0].text`. Previously this schema echoed the\n // whole markdown under `structuredContent.content`, doubling token cost for\n // clients that forward both fields to an LLM.\n results: z\n .array(z.object({\n rank: z.number().int().positive().describe('1-based rank in the merged ranking.'),\n url: z.string().describe('Result URL.'),\n title: z.string().describe('Page title from the result.'),\n snippet: z.string().describe('Search snippet from the result.'),\n source_type: z\n .enum(['reddit', 'github', 'docs', 'blog', 'paper', 'qa', 'cve', 'news', 'video', 'web'])\n .describe(\n 'Heuristic source kind from the URL. When the LLM classifier is online its tag overrides this.',\n ),\n score: z.number().describe('Composite CTR-weighted score, normalized to 100.'),\n seen_in: z.number().int().nonnegative().describe('Number of input queries this URL appeared in.'),\n best_position: z.number().int().nonnegative().describe('Best (lowest) SERP position observed.'),\n }))\n .optional()\n .describe('Per-result structured payload \u2014 same data the markdown table renders, machine-readable.'),\n metadata: z.object({\n total_items: z.number().int().nonnegative().describe('Number of queries executed.'),\n successful: z.number().int().nonnegative().describe('Queries that returned results.'),\n failed: z.number().int().nonnegative().describe('Queries that failed.'),\n execution_time_ms: z.number().int().nonnegative().describe('Wall clock time in milliseconds.'),\n llm_classified: z.boolean().describe('Whether LLM classification was applied.'),\n llm_error: z.string().optional().describe('LLM error if classification failed and fell back to raw.'),\n scope: z.enum(['web', 'reddit', 'both']).optional().describe('Search scope used.'),\n coverage_summary: z\n .array(z.object({\n query: z.string().describe('The search query.'),\n result_count: z.number().int().nonnegative().describe('Results returned for this query.'),\n top_url: z.string().optional().describe('Domain of the top result.'),\n }))\n .optional()\n .describe('Per-query result counts and top URLs.'),\n low_yield_queries: z\n .array(z.string())\n .optional()\n .describe('Queries that produced 0-1 results.'),\n query_rewrites: z\n .array(z.object({\n original: z.string().describe('The query as the agent submitted it.'),\n rewritten: z.string().describe('The query as dispatched to Google after Phase A normalization.'),\n rules: z.array(z.string()).describe('Rule ids applied (A1=operator-char de-quote, A2=path/URL de-quote, A3=phrase-AND collapse).'),\n }))\n .optional()\n .describe('Pre-dispatch query rewrites \u2014 Phase A normalizations (operator-char and path/URL de-quote, phrase-AND \u2192 anchor + OR collapse).'),\n retried_queries: z\n .array(z.object({\n original: z.string().describe('The query as dispatched (post-Phase-A) that returned 0 results.'),\n retried_with: z.string().describe('The relaxed form retried after the empty initial response.'),\n rules: z.array(z.string()).describe('Rule ids applied (B1=strip all quotes, B2=drop site: filter).'),\n recovered_results: z.number().int().nonnegative().describe('How many hits the retry produced; 0 means the retry also failed.'),\n }))\n .optional()\n .describe('On-empty retries \u2014 Phase B relaxations applied after the initial Serper batch returned 0 results for a query.'),\n }).strict(),\n}).strict();\n\nexport type WebSearchOutput = z.infer<typeof webSearchOutputSchema>;\n"],
|
|
5
|
+
"mappings": "AAAA,SAAS,SAAS;AAEX,MAAM,wBAAwB,EAAE,OAAO;AAAA,EAC5C,SAAS,EACN;AAAA,IACC,EAAE,OAAO,EACN,IAAI,GAAG,EAAE,SAAS,oCAAoC,CAAC,EACvD,SAAS,4JAA4J;AAAA,EAC1K,EACC,IAAI,GAAG,EAAE,SAAS,wCAAwC,CAAC,EAC3D;AAAA,IACC;AAAA,EACF;AAAA,EACF,SAAS,EACN,OAAO,EACP,IAAI,GAAG,EAAE,SAAS,sCAAsC,CAAC,EACzD;AAAA,IACC;AAAA,EACF;AAAA,EACF,KAAK,EACF,QAAQ,EACR,QAAQ,KAAK,EACb,SAAS,oGAAoG;AAAA,EAChH,OAAO,EACJ,KAAK,CAAC,OAAO,UAAU,MAAM,CAAC,EAC9B,QAAQ,KAAK,EACb;AAAA,IACC;AAAA,EACF;AAAA,EACF,SAAS,EACN,QAAQ,EACR,QAAQ,KAAK,EACb;AAAA,IACC;AAAA,EACF;AACJ,CAAC,EAAE,OAAO;AAIH,MAAM,wBAAwB,EAAE,OAAO;AAAA;AAAA;AAAA;AAAA;AAAA,EAK5C,SAAS,EACN,MAAM,EAAE,OAAO;AAAA,IACd,MAAM,EAAE,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,SAAS,qCAAqC;AAAA,IAChF,KAAK,EAAE,OAAO,EAAE,SAAS,aAAa;AAAA,IACtC,OAAO,EAAE,OAAO,EAAE,SAAS,6BAA6B;AAAA,IACxD,SAAS,EAAE,OAAO,EAAE,SAAS,iCAAiC;AAAA,IAC9D,aAAa,EACV,KAAK,CAAC,UAAU,UAAU,QAAQ,QAAQ,SAAS,MAAM,OAAO,QAAQ,SAAS,KAAK,CAAC,EACvF;AAAA,MACC;AAAA,IACF;AAAA,IACF,OAAO,EAAE,OAAO,EAAE,SAAS,kDAAkD;AAAA,IAC7E,SAAS,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,+CAA+C;AAAA,IAChG,eAAe,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,uCAAuC;AAAA,EAChG,CAAC,CAAC,EACD,SAAS,EACT,SAAS,8FAAyF;AAAA,EACrG,UAAU,EAAE,OAAO;AAAA,IACjB,aAAa,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,6BAA6B;AAAA,IAClF,YAAY,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,gCAAgC;AAAA,IACpF,QAAQ,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,sBAAsB;AAAA,IACtE,mBAAmB,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,kCAAkC;AAAA,IAC7F,gBAAgB,EAAE,QAAQ,EAAE,SAAS,yCAAyC;AAAA,IAC9E,WAAW,EAAE,OAAO,EAAE,SAAS,EAAE,SAAS,0DAA0D;AAAA,IACpG,OAAO,EAAE,KAAK,CAAC,OAAO,UAAU,MAAM,CAAC,EAAE,SAAS,EAAE,SAAS,oBAAoB;AAAA,IACjF,kBAAkB,EACf,MAAM,EAAE,OAAO;AAAA,MACd,OAAO,EAAE,OAAO,EAAE,SAAS,mBAAmB;AAAA,MAC9C,cAAc,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,kCAAkC;AAAA,MACxF,SAAS,EAAE,OAAO,EAAE,SAAS,EAAE,SAAS,2BAA2B;AAAA,IACrE,CAAC,CAAC,EACD,SAAS,EACT,SAAS,uCAAuC;AAAA,IACnD,mBAAmB,EAChB,MAAM,EAAE,OAAO,CAAC,EAChB,SAAS,EACT,SAAS,oCAAoC;AAAA,IAChD,gBAAgB,EACb,MAAM,EAAE,OAAO;AAAA,MACd,UAAU,EAAE,OAAO,EAAE,SAAS,sCAAsC;AAAA,MACpE,WAAW,EAAE,OAAO,EAAE,SAAS,gEAAgE;AAAA,MAC/F,OAAO,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,SAAS,6FAA6F;AAAA,IACnI,CAAC,CAAC,EACD,SAAS,EACT,SAAS,0IAAgI;AAAA,IAC5I,iBAAiB,EACd,MAAM,EAAE,OAAO;AAAA,MACd,UAAU,EAAE,OAAO,EAAE,SAAS,iEAAiE;AAAA,MAC/F,cAAc,EAAE,OAAO,EAAE,SAAS,4DAA4D;AAAA,MAC9F,OAAO,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,SAAS,+DAA+D;AAAA,MACnG,mBAAmB,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,kEAAkE;AAAA,IAC/H,CAAC,CAAC,EACD,SAAS,EACT,SAAS,oHAA+G;AAAA,EAC7H,CAAC,EAAE,OAAO;AACZ,CAAC,EAAE,OAAO;",
|
|
6
6
|
"names": []
|
|
7
7
|
}
|
|
@@ -291,13 +291,9 @@ RULES:
|
|
|
291
291
|
- Preserve code blocks, command examples, tables exactly.
|
|
292
292
|
- Do NOT add commentary or recommendations outside "Follow-up signals".
|
|
293
293
|
- Page language \u2260 English: quote verbatim in the original language AND provide a parenthetical gloss in English.
|
|
294
|
-
-
|
|
295
|
-
\`## Matches\\n_Page did not load:
|
|
296
|
-
|
|
297
|
-
\`## Matches\\n_Page did not load: paywall_\`
|
|
298
|
-
\`## Matches\\n_Page did not load: JS-render-empty_\`
|
|
299
|
-
\`## Matches\\n_Page did not load: non-text-asset_\`
|
|
300
|
-
\`## Matches\\n_Page did not load: truncated-before-relevant-section_\`
|
|
294
|
+
- Page appears gated (login wall, paywall, JS-render-empty shell) or near-empty: BEFORE dismissing the page, look for ANY visible text \u2014 og:title, og:description, meta description, headline, author name, nav labels, teaser/preview sentences, visible comment snippets. If ANY such text exists, extract it as usual under \`## Source\` + \`## Matches\`, and list the blocked facets under \`## Not found\`. Prefix the first \`## Matches\` bullet with \`**[partial \u2014 <reason>]**\` so the caller knows the body is gated (reasons: \`login-wall | paywall | JS-render-empty | truncated-before-relevant-section\`). ONLY when there is NO visible extractable text at all (< 50 words AND no og:* AND no headline AND no preview), return exactly one line:
|
|
295
|
+
\`## Matches\\n_Page did not load: <reason>_\`
|
|
296
|
+
Valid reasons: \`404 | login-wall | paywall | JS-render-empty | non-text-asset | truncated-before-relevant-section\`.
|
|
301
297
|
|
|
302
298
|
Content:
|
|
303
299
|
${truncatedContent}` : `Clean the following page content: drop navigation, ads, cookie banners, footers, author bios, related-article lists. Preserve headings, paragraphs, code blocks, tables, and inline links as \`[text](url)\`. Do NOT summarize \u2014 preserve the full body.
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"version": 3,
|
|
3
3
|
"sources": ["../../../src/services/llm-processor.ts"],
|
|
4
|
-
"sourcesContent": ["/**\n * LLM Processor for content extraction\n * Uses any OpenAI-compatible endpoint. Reasoning effort is always 'low'.\n * Primary model exhausts its retries first; fallback model (LLM_FALLBACK_MODEL) then\n * gets up to FALLBACK_RETRY_COUNT additional attempts before the call fails.\n * NEVER throws \u2014 always returns a valid result.\n */\n\nimport OpenAI from 'openai';\nimport { LLM_EXTRACTION, getCapabilities } from '../config/index.js';\nimport {\n classifyError,\n sleep,\n ErrorCode,\n withStallProtection,\n type StructuredError,\n} from '../utils/errors.js';\nimport { mcpLog } from '../utils/logger.js';\n\n/** Maximum input characters for LLM processing (~125k tokens, sized for the larger fallback model) */\nconst MAX_LLM_INPUT_CHARS = 500_000 as const;\n\n/**\n * Maximum input characters for the primary model when it has a smaller context window.\n * Used when an input would exceed the mini model's limits so the call goes straight to fallback\n * instead of burning retries on guaranteed context_length_exceeded errors.\n */\nconst MAX_PRIMARY_MODEL_INPUT_CHARS = 100_000 as const;\n\n/** LLM client timeout in milliseconds */\nconst LLM_CLIENT_TIMEOUT_MS = 600_000 as const;\n\n/** Jitter factor for exponential backoff */\nconst BACKOFF_JITTER_FACTOR = 0.3 as const;\n\n/** Stall detection timeout \u2014 abort if no response in this time */\nconst LLM_STALL_TIMEOUT_MS = 75_000 as const;\n\n/** Hard request deadline for LLM calls */\nconst LLM_REQUEST_DEADLINE_MS = 150_000 as const;\n\n// ============================================================================\n// LLM health tracking \u2014 surfaced via health://status so capability-aware\n// clients can branch on degraded mode without parsing per-call footers.\n// ============================================================================\n\ntype LLMHealthKind = 'planner' | 'extractor';\n\nexport interface LLMHealthSnapshot {\n readonly lastPlannerOk: boolean;\n readonly lastExtractorOk: boolean;\n readonly lastPlannerCheckedAt: string | null;\n readonly lastExtractorCheckedAt: string | null;\n readonly lastPlannerError: string | null;\n readonly lastExtractorError: string | null;\n readonly plannerConfigured: boolean;\n readonly extractorConfigured: boolean;\n /** Failures since the last success. Reset to 0 on `markLLMSuccess`. */\n readonly consecutivePlannerFailures: number;\n readonly consecutiveExtractorFailures: number;\n}\n\nconst llmHealth = {\n lastPlannerOk: false,\n lastExtractorOk: false,\n lastPlannerCheckedAt: null as string | null,\n lastExtractorCheckedAt: null as string | null,\n lastPlannerError: null as string | null,\n lastExtractorError: null as string | null,\n consecutivePlannerFailures: 0,\n consecutiveExtractorFailures: 0,\n};\n\nexport function markLLMSuccess(kind: LLMHealthKind): void {\n const ts = new Date().toISOString();\n if (kind === 'planner') {\n llmHealth.lastPlannerOk = true;\n llmHealth.lastPlannerCheckedAt = ts;\n llmHealth.lastPlannerError = null;\n llmHealth.consecutivePlannerFailures = 0;\n } else {\n llmHealth.lastExtractorOk = true;\n llmHealth.lastExtractorCheckedAt = ts;\n llmHealth.lastExtractorError = null;\n llmHealth.consecutiveExtractorFailures = 0;\n }\n}\n\nexport function markLLMFailure(kind: LLMHealthKind, err: unknown): void {\n const ts = new Date().toISOString();\n const message = err instanceof Error ? err.message : String(err ?? 'unknown error');\n if (kind === 'planner') {\n llmHealth.lastPlannerOk = false;\n llmHealth.lastPlannerCheckedAt = ts;\n llmHealth.lastPlannerError = message;\n llmHealth.consecutivePlannerFailures += 1;\n } else {\n llmHealth.lastExtractorOk = false;\n llmHealth.lastExtractorCheckedAt = ts;\n llmHealth.lastExtractorError = message;\n llmHealth.consecutiveExtractorFailures += 1;\n }\n}\n\nexport function getLLMHealth(): LLMHealthSnapshot {\n const cap = getCapabilities();\n return {\n lastPlannerOk: llmHealth.lastPlannerOk,\n lastExtractorOk: llmHealth.lastExtractorOk,\n lastPlannerCheckedAt: llmHealth.lastPlannerCheckedAt,\n lastExtractorCheckedAt: llmHealth.lastExtractorCheckedAt,\n lastPlannerError: llmHealth.lastPlannerError,\n lastExtractorError: llmHealth.lastExtractorError,\n // Static capability \u2014 based on env presence at boot. Runtime health (above)\n // tells whether the last attempt actually succeeded.\n plannerConfigured: cap.llmExtraction,\n extractorConfigured: cap.llmExtraction,\n consecutivePlannerFailures: llmHealth.consecutivePlannerFailures,\n consecutiveExtractorFailures: llmHealth.consecutiveExtractorFailures,\n };\n}\n\n/** Test-only \u2014 reset state between tests. Not exported from index. */\nexport function _resetLLMHealthForTests(): void {\n llmHealth.lastPlannerOk = false;\n llmHealth.lastExtractorOk = false;\n llmHealth.lastPlannerCheckedAt = null;\n llmHealth.lastExtractorCheckedAt = null;\n llmHealth.lastPlannerError = null;\n llmHealth.lastExtractorError = null;\n llmHealth.consecutivePlannerFailures = 0;\n llmHealth.consecutiveExtractorFailures = 0;\n}\n\ninterface ProcessingConfig {\n readonly enabled: boolean;\n readonly extract: string | undefined;\n readonly url?: string;\n}\n\ninterface LLMResult {\n readonly content: string;\n readonly processed: boolean;\n readonly error?: string;\n readonly errorDetails?: StructuredError;\n}\n\n// LLM-specific retry configuration\nconst LLM_RETRY_CONFIG = {\n maxRetries: 2,\n baseDelayMs: 1000,\n maxDelayMs: 5000,\n} as const;\n\n/** Number of additional attempts using the fallback model after primary exhausts. */\nconst FALLBACK_RETRY_COUNT = 3 as const;\n\n// OpenAI-compatible retryable error codes (using Set for type-safe lookup)\nconst RETRYABLE_LLM_ERROR_CODES = new Set([\n 'rate_limit_exceeded',\n 'server_error',\n 'timeout',\n 'service_unavailable',\n]);\n\n/** Type guard for errors with an HTTP status code */\nfunction hasStatus(error: unknown): error is { status: number } {\n return (\n typeof error === 'object' &&\n error !== null &&\n 'status' in error &&\n typeof (error as Record<string, unknown>).status === 'number'\n );\n}\n\nlet llmClient: OpenAI | null = null;\n\ntype OpenAITextGenerator = Pick<OpenAI, 'chat'>;\n\nexport function createLLMProcessor(): OpenAI | null {\n if (!getCapabilities().llmExtraction) return null;\n\n if (!llmClient) {\n llmClient = new OpenAI({\n baseURL: LLM_EXTRACTION.BASE_URL,\n apiKey: LLM_EXTRACTION.API_KEY,\n timeout: LLM_CLIENT_TIMEOUT_MS,\n maxRetries: 0,\n defaultHeaders: { 'X-Title': 'mcp-research-powerpack' },\n });\n mcpLog('info', `LLM extraction configured (model: ${LLM_EXTRACTION.MODEL}, baseURL: ${LLM_EXTRACTION.BASE_URL})`, 'llm');\n }\n return llmClient;\n}\n\nfunction buildChatRequestBody(model: string, prompt: string): Record<string, unknown> {\n return {\n model,\n messages: [{ role: 'user', content: prompt }],\n reasoning_effort: 'low',\n };\n}\n\nexport async function requestText(\n processor: OpenAITextGenerator,\n prompt: string,\n operationLabel: string,\n signal?: AbortSignal,\n modelOverride?: string,\n): Promise<{ content: string | null; model: string; error?: string }> {\n const model = modelOverride || LLM_EXTRACTION.MODEL;\n\n try {\n const response = await withStallProtection(\n (stallSignal) => processor.chat.completions.create(\n buildChatRequestBody(model, prompt) as unknown as OpenAI.ChatCompletionCreateParamsNonStreaming,\n {\n signal: signal ? AbortSignal.any([stallSignal, signal]) : stallSignal,\n timeout: LLM_REQUEST_DEADLINE_MS,\n },\n ),\n LLM_STALL_TIMEOUT_MS,\n 3,\n `${operationLabel} (${model})`,\n );\n\n const content = response.choices?.[0]?.message?.content?.trim();\n if (content) {\n return { content, model };\n }\n\n const err = `Empty response from model ${model}`;\n mcpLog('warning', `${operationLabel} returned empty content for model ${model}`, 'llm');\n return { content: null, model, error: err };\n } catch (err: unknown) {\n const message = err instanceof Error ? err.message : String(err);\n mcpLog('warning', `${operationLabel} failed for model ${model}: ${message}`, 'llm');\n return { content: null, model, error: message };\n }\n}\n\n/**\n * Single LLM call with automatic fallback model.\n * Tries the primary model once; if it fails and LLM_FALLBACK_MODEL is set,\n * retries up to FALLBACK_RETRY_COUNT times on the fallback model.\n * Used for single-shot calls (classify, brief, refine queries).\n */\nexport async function requestTextWithFallback(\n processor: OpenAITextGenerator,\n prompt: string,\n operationLabel: string,\n signal?: AbortSignal,\n): Promise<{ content: string | null; model: string; error?: string }> {\n const primary = await requestText(processor, prompt, operationLabel, signal);\n if (primary.content) return primary;\n\n const fallbackModel = LLM_EXTRACTION.FALLBACK_MODEL;\n if (!fallbackModel) return primary;\n\n mcpLog('warning', `Primary model failed, switching to fallback ${fallbackModel}`, 'llm');\n\n let lastError = primary.error;\n for (let attempt = 0; attempt < FALLBACK_RETRY_COUNT; attempt++) {\n if (attempt > 0) {\n const delayMs = calculateLLMBackoff(attempt - 1);\n mcpLog('warning', `Fallback retry ${attempt}/${FALLBACK_RETRY_COUNT - 1} in ${delayMs}ms`, 'llm');\n try { await sleep(delayMs, signal); } catch { break; }\n }\n const result = await requestText(processor, prompt, `${operationLabel} [fallback]`, signal, fallbackModel);\n if (result.content) return result;\n lastError = result.error;\n }\n\n return { content: null, model: fallbackModel, error: lastError };\n}\n\n/**\n * Check if an LLM error is retryable\n */\nfunction isRetryableLLMError(error: unknown): boolean {\n if (!error || typeof error !== 'object') return false;\n\n // Stall/timeout protection errors - always retry these\n const stallCode = (error as { code?: string })?.code;\n if (stallCode === 'ESTALLED' || stallCode === 'ETIMEDOUT') {\n return true;\n }\n\n // Check HTTP status codes\n if (hasStatus(error)) {\n if (error.status === 429 || error.status === 500 || error.status === 502 || error.status === 503 || error.status === 504) {\n return true;\n }\n }\n\n // Check error codes from the OpenAI-compatible endpoint\n const record = error as Record<string, unknown>;\n const code = typeof record.code === 'string' ? record.code : undefined;\n const nested =\n typeof record.error === 'object' && record.error !== null\n ? (record.error as Record<string, unknown>)\n : null;\n const errorCode =\n code ??\n (nested && typeof nested.code === 'string' ? nested.code : undefined) ??\n (nested && typeof nested.type === 'string' ? nested.type : undefined);\n if (errorCode && RETRYABLE_LLM_ERROR_CODES.has(errorCode)) {\n return true;\n }\n\n // Check message for common patterns\n const message = typeof record.message === 'string' ? record.message.toLowerCase() : '';\n if (\n message.includes('rate limit') ||\n message.includes('timeout') ||\n message.includes('timed out') ||\n message.includes('service unavailable') ||\n message.includes('server error') ||\n message.includes('connection') ||\n message.includes('econnreset')\n ) {\n return true;\n }\n\n return false;\n}\n\n/**\n * Detect \"the prompt is too long for this model\" errors.\n * These are NOT retryable on the same model \u2014 we should skip remaining primary retries\n * and go straight to the fallback model (which has a larger context window).\n */\nfunction isContextWindowError(error: unknown): boolean {\n if (!error || typeof error !== 'object') return false;\n\n const record = error as Record<string, unknown>;\n const nested =\n typeof record.error === 'object' && record.error !== null\n ? (record.error as Record<string, unknown>)\n : null;\n\n const code = typeof record.code === 'string' ? record.code : undefined;\n const nestedCode = nested && typeof nested.code === 'string' ? nested.code : undefined;\n if (code === 'context_length_exceeded' || nestedCode === 'context_length_exceeded') {\n return true;\n }\n\n const messages: string[] = [];\n if (typeof record.message === 'string') messages.push(record.message);\n if (nested && typeof nested.message === 'string') messages.push(nested.message);\n const combined = messages.join(' ').toLowerCase();\n return (\n combined.includes('context length') ||\n combined.includes('context window') ||\n combined.includes('maximum context') ||\n combined.includes('maximum tokens') ||\n combined.includes('token limit') ||\n combined.includes('too many tokens') ||\n combined.includes('prompt is too long') ||\n combined.includes('reduce the length')\n );\n}\n\n/**\n * Calculate backoff delay with jitter for LLM retries\n */\nfunction calculateLLMBackoff(attempt: number): number {\n const exponentialDelay = LLM_RETRY_CONFIG.baseDelayMs * Math.pow(2, attempt);\n const jitter = Math.random() * BACKOFF_JITTER_FACTOR * exponentialDelay;\n return Math.min(exponentialDelay + jitter, LLM_RETRY_CONFIG.maxDelayMs);\n}\n\n/**\n * Process content with LLM extraction\n * NEVER throws - always returns a valid LLMResult\n * Implements retry logic with exponential backoff for transient failures\n */\nexport async function processContentWithLLM(\n content: string,\n config: ProcessingConfig,\n processor?: OpenAI | null,\n signal?: AbortSignal\n): Promise<LLMResult> {\n // Early returns for invalid/skip conditions\n if (!config.enabled) {\n return { content, processed: false };\n }\n\n if (!processor) {\n return {\n content,\n processed: false,\n error: 'LLM processor not available (LLM_API_KEY, LLM_BASE_URL, and LLM_MODEL must all be set)',\n errorDetails: {\n code: ErrorCode.AUTH_ERROR,\n message: 'LLM processor not available',\n retryable: false,\n },\n };\n }\n\n if (!content?.trim()) {\n return { content: content || '', processed: false, error: 'Empty content provided' };\n }\n\n // Truncate extremely long content to avoid blowing past even the fallback model's context.\n const truncatedContent = content.length > MAX_LLM_INPUT_CHARS\n ? content.substring(0, MAX_LLM_INPUT_CHARS) + '\\n\\n[Content truncated due to length]'\n : content;\n\n // If the prompt would exceed the primary (mini) model's smaller context window,\n // skip it entirely and go straight to the fallback model. Saves burning retries\n // on guaranteed context_length_exceeded errors.\n const skipPrimaryForSize =\n truncatedContent.length > MAX_PRIMARY_MODEL_INPUT_CHARS && !!LLM_EXTRACTION.FALLBACK_MODEL;\n\n // Sanitize URL before sending to LLM: drop query string and fragment\n // so signed URLs, session tokens, auth params, or tracking hashes never\n // land in a third-party LLM prompt. Keep origin + path for page-type classification.\n const safeUrl = (() => {\n if (!config.url) return undefined;\n try {\n const u = new URL(config.url);\n return `${u.origin}${u.pathname}`;\n } catch {\n return undefined;\n }\n })();\n const urlLine = safeUrl ? `PAGE URL: ${safeUrl}\\n\\n` : '';\n\n const prompt = config.extract\n ? `You are a factual extractor for a research agent. Extract ONLY the information that matches the instruction below. Do not summarize, interpret, or editorialize.\n\n${urlLine}EXTRACTION INSTRUCTION: ${config.extract}\n\nSTEP 1 \u2014 Classify this page. Look at the URL if present, plus structural cues (code blocks, table patterns, comment threads, marketing copy). Pick ONE:\n\\`docs | changelog | github-readme | github-thread | reddit | hackernews | forum | blog | marketing | announcement | qa | cve | paper | release-notes | other\\`\n\nSTEP 2 \u2014 Adjust emphasis by page type:\n- docs / changelog / github-readme / release-notes \u2192 API signatures, version numbers, flags, exact config keys, code blocks. Copy verbatim. Preserve tables as tables.\n- github-thread \u2192 weight MAINTAINER comments (label \"[maintainer]\") over drive-by commenters. Preserve stacktraces verbatim. Capture chronological resolution \u2014 what was decided and when. Link the accepted-fix commit/PR if referenced.\n- reddit / hackernews / forum \u2192 lived experience. Quote verbatim with attribution (\"u/foo wrote: \u2026\" or \"user <name>\"). Prioritize replies with stack details, specific failure stories, or replies that contradict the OP. Record overall sentiment distribution as one bullet if clear skew (\"~70% agree / ~20% dissent / rest off-topic\"). Drop context-free opinions (\"this sucks\") from Matches.\n- blog \u2192 prioritize concrete reproductions, code, measurements. If the author makes a claim without evidence, mark \"[unsourced claim]\".\n- marketing / announcement \u2192 pricing tiers, feature matrices verbatim, free-tier quotas, enterprise contact. Preserve tables as tables. Treat roadmap/future-tense claims skeptically \u2014 note them as \"[announced, not shipped]\" when framing is future-tense.\n- qa (stackoverflow) \u2192 accepted answer's code + high-voted disagreements. Always note the answer date \u2014 SO rots.\n- cve \u2192 CVSS vector verbatim, CWE, CPE ranges, affected versions, fix version, references. Each with its label.\n- paper \u2192 claim, method, dataset, benchmark numbers, comparison baseline. Preserve numeric deltas verbatim.\n\nSTEP 3 \u2014 Emit markdown with these sections, in order:\n\n## Source\n- URL: <verbatim if visible, else \"unknown\">\n- Page type: <the type you picked>\n- Page date: <verbatim if visible, else \"not visible\">\n- Author / maintainer (if identifiable): <verbatim>\n\n## Matches\nOne bullet per distinct piece of matching info:\n- **<short label>** \u2014 the information. Quote VERBATIM for: numbers, versions, dates, API names, prices, error messages, stacktraces, CVSS vectors, benchmark scores, command flags, proper nouns, and people's words. Backticks for code/identifiers. Preserve tables.\n\n## Not found\nEvery part of the extraction instruction this page did NOT answer. Be explicit. Example: \"Enterprise pricing contact \u2014 not present on this page.\"\n\n## Follow-up signals\nShort bullets \u2014 NEW angles this page surfaced that the agent should investigate. Include: new terms, unexpected vendor names, contradicting claims, referenced-but-unscraped URLs. Copy URLs VERBATIM from the source; if only anchor text is visible, write \"anchor: <text> (URL not in scraped content)\". Skip this section if nothing new surfaced. Do NOT invent.\n\n## Contradictions\n(Include this section only if the page contains internally contradictory claims.) Bullet each contradiction with both sides quoted verbatim.\n\n## Truncation\n(Include only if content appears cut mid-element.) \"Content cut mid-<table row / code block / comment / paragraph>; extraction may be incomplete for <section>.\"\n\nRULES:\n- Never paraphrase numbers, versions, code, or quoted text.\n- If an instruction item is not answered, it goes in \"Not found\" \u2014 do NOT invent an answer to please the caller.\n- Preserve code blocks, command examples, tables exactly.\n- Do NOT add commentary or recommendations outside \"Follow-up signals\".\n- Page language \u2260 English: quote verbatim in the original language AND provide a parenthetical gloss in English.\n- Content clearly failed to load: return ONLY a single line, choosing from:\n \\`## Matches\\\\n_Page did not load: 404_\\`\n \\`## Matches\\\\n_Page did not load: login-wall_\\`\n \\`## Matches\\\\n_Page did not load: paywall_\\`\n \\`## Matches\\\\n_Page did not load: JS-render-empty_\\`\n \\`## Matches\\\\n_Page did not load: non-text-asset_\\`\n \\`## Matches\\\\n_Page did not load: truncated-before-relevant-section_\\`\n\nContent:\n${truncatedContent}`\n : `Clean the following page content: drop navigation, ads, cookie banners, footers, author bios, related-article lists. Preserve headings, paragraphs, code blocks, tables, and inline links as \\`[text](url)\\`. Do NOT summarize \u2014 preserve the full body.\n\n${urlLine}Content:\n${truncatedContent}`;\n\n let lastError: StructuredError | undefined;\n\n // Phase 1: primary model with up to LLM_RETRY_CONFIG.maxRetries retries.\n // Skip entirely when the input is too big for the primary's context window.\n if (skipPrimaryForSize) {\n mcpLog(\n 'info',\n `Input ${truncatedContent.length} chars exceeds primary model cap (${MAX_PRIMARY_MODEL_INPUT_CHARS}); routing directly to fallback`,\n 'llm',\n );\n } else {\n for (let attempt = 0; attempt <= LLM_RETRY_CONFIG.maxRetries; attempt++) {\n try {\n if (attempt === 0) {\n mcpLog('info', `Starting extraction with ${LLM_EXTRACTION.MODEL}`, 'llm');\n } else {\n mcpLog('warning', `Retry attempt ${attempt}/${LLM_RETRY_CONFIG.maxRetries}`, 'llm');\n }\n\n const response = await requestText(processor, prompt, 'LLM extraction', signal);\n\n if (response.content) {\n mcpLog('info', `Successfully extracted ${response.content.length} characters`, 'llm');\n markLLMSuccess('extractor');\n return { content: response.content, processed: true };\n }\n\n // Empty response \u2014 not retryable\n mcpLog('warning', 'Received empty response from LLM', 'llm');\n markLLMFailure('extractor', 'LLM returned empty response');\n return {\n content,\n processed: false,\n error: 'LLM returned empty response',\n errorDetails: {\n code: ErrorCode.INTERNAL_ERROR,\n message: 'LLM returned empty response',\n retryable: false,\n },\n };\n\n } catch (err: unknown) {\n lastError = classifyError(err);\n const status = hasStatus(err) ? err.status : undefined;\n const code = typeof err === 'object' && err !== null && 'code' in err\n ? String((err as Record<string, unknown>).code)\n : undefined;\n const ctxErr = isContextWindowError(err);\n mcpLog('error', `Error (attempt ${attempt + 1}): ${lastError.message} [status=${status}, code=${code}, retryable=${isRetryableLLMError(err)}, context_window=${ctxErr}]`, 'llm');\n\n // Context window errors are not retryable on the same model \u2014 jump to fallback.\n if (ctxErr) {\n mcpLog('warning', 'Context window exceeded on primary \u2014 skipping remaining retries, routing to fallback', 'llm');\n break;\n }\n\n if (isRetryableLLMError(err) && attempt < LLM_RETRY_CONFIG.maxRetries) {\n const delayMs = calculateLLMBackoff(attempt);\n mcpLog('warning', `Retrying in ${delayMs}ms...`, 'llm');\n try { await sleep(delayMs, signal); } catch { break; }\n continue;\n }\n break;\n }\n }\n }\n\n // Phase 2: fallback model \u2014 FALLBACK_RETRY_COUNT attempts before giving up\n const fallbackModel = LLM_EXTRACTION.FALLBACK_MODEL;\n if (fallbackModel) {\n mcpLog('warning', `Primary exhausted, switching to fallback ${fallbackModel}`, 'llm');\n for (let attempt = 0; attempt < FALLBACK_RETRY_COUNT; attempt++) {\n if (attempt > 0) {\n const delayMs = calculateLLMBackoff(attempt - 1);\n mcpLog('warning', `Fallback retry ${attempt}/${FALLBACK_RETRY_COUNT - 1} in ${delayMs}ms`, 'llm');\n try { await sleep(delayMs, signal); } catch { break; }\n }\n try {\n const response = await requestText(processor, prompt, 'LLM extraction [fallback]', signal, fallbackModel);\n if (response.content) {\n mcpLog('info', `Fallback extracted ${response.content.length} characters`, 'llm');\n markLLMSuccess('extractor');\n return { content: response.content, processed: true };\n }\n mcpLog('warning', 'Fallback returned empty response', 'llm');\n break;\n } catch (err: unknown) {\n lastError = classifyError(err);\n mcpLog('error', `Fallback error (attempt ${attempt + 1}): ${lastError.message}`, 'llm');\n }\n }\n }\n\n const errorMessage = lastError?.message || 'Unknown LLM error';\n mcpLog('error', `All attempts failed: ${errorMessage}. Returning original content.`, 'llm');\n markLLMFailure('extractor', errorMessage);\n\n return {\n content,\n processed: false,\n error: `LLM extraction failed: ${errorMessage}`,\n errorDetails: lastError || {\n code: ErrorCode.UNKNOWN_ERROR,\n message: errorMessage,\n retryable: false,\n },\n };\n}\n\n// ============================================================================\n// Web-Search Result Classification\n// ============================================================================\n\n/** Maximum URLs to send to the LLM for classification */\nconst MAX_CLASSIFICATION_URLS = 50 as const;\n\n/** Classification tiers */\ntype ClassificationTier = 'HIGHLY_RELEVANT' | 'MAYBE_RELEVANT' | 'OTHER';\n\nexport interface ClassificationEntry {\n readonly rank: number;\n readonly tier: ClassificationTier;\n readonly source_type?: string;\n readonly reason?: string;\n}\n\nexport interface ClassificationGap {\n readonly id: number;\n readonly description: string;\n}\n\nexport interface ClassificationResult {\n readonly title: string;\n readonly synthesis: string;\n readonly results: ClassificationEntry[];\n readonly refine_queries?: Array<{\n readonly query: string;\n readonly rationale: string;\n readonly gap_id?: number;\n }>;\n readonly confidence?: 'high' | 'medium' | 'low';\n readonly confidence_reason?: string;\n readonly gaps?: ClassificationGap[];\n}\n\nexport interface RefineQuerySuggestion {\n readonly query: string;\n readonly rationale: string;\n readonly gap_id?: number;\n readonly gap_description?: string;\n}\n\n/**\n * Classify web-search results by relevance to an objective using the LLM.\n * Sends only titles, snippets, and domain names \u2014 does NOT fetch URLs.\n * Returns null on failure (caller should fall back to raw output).\n */\nexport async function classifySearchResults(\n rankedUrls: ReadonlyArray<{\n readonly rank: number;\n readonly url: string;\n readonly title: string;\n readonly snippet: string;\n readonly frequency: number;\n readonly queries: string[];\n }>,\n objective: string,\n totalQueries: number,\n processor: OpenAI,\n previousQueries: readonly string[] = [],\n): Promise<{ result: ClassificationResult | null; error?: string }> {\n const urlsToClassify = rankedUrls.slice(0, MAX_CLASSIFICATION_URLS);\n\n // Descending static weights fed to the LLM. Higher-ranked URLs get a bigger\n // weight so the classifier biases HIGHLY_RELEVANT toward them. The weights\n // here are a shown-to-LLM summary, not the internal CTR ranking (which\n // still runs in url-aggregator.ts). Rank 11+ all bucket to w=1.\n const STATIC_WEIGHTS = [30, 20, 15, 10, 8, 6, 5, 4, 3, 2] as const;\n const weightForRank = (rank: number): number => STATIC_WEIGHTS[rank - 1] ?? 1;\n\n // Build compressed result list \u2014 weight + title + domain + snippet (truncated)\n const lines: string[] = [];\n for (const url of urlsToClassify) {\n let domain: string;\n try {\n domain = new URL(url.url).hostname.replace(/^www\\./, '');\n } catch {\n domain = url.url;\n }\n const snippet = url.snippet.length > 120\n ? url.snippet.slice(0, 117) + '...'\n : url.snippet;\n lines.push(`[${url.rank}] w=${weightForRank(url.rank)} ${url.title} \u2014 ${domain} \u2014 ${snippet}`);\n }\n\n const prevQueriesBlock = previousQueries.length > 0\n ? previousQueries.map((q) => `- ${q}`).join('\\n')\n : '- (none provided)';\n const today = new Date().toISOString().slice(0, 10);\n\n const prompt = `You are the relevance filter for a research agent. Classify each search result below against the objective and produce a structured analysis.\n\nOBJECTIVE: ${objective}\nTODAY: ${today}\n\nPREVIOUS QUERIES (already run \u2014 do NOT paraphrase in refine_queries):\n${prevQueriesBlock}\n\nReturn ONLY a JSON object (no markdown, no code fences):\n\n{\n \"title\": \"2\u20138 word label for this RESULT CLUSTER (not the objective)\",\n \"synthesis\": \"3\u20135 sentences grounded in the results. Every non-trivial claim cites a rank in [brackets], e.g. '[3] documents the flag; [7][12] report it is broken on macOS.' A synthesis with zero citations is invalid.\",\n \"confidence\": \"high | medium | low\",\n \"confidence_reason\": \"one sentence \u2014 why\",\n \"gaps\": [\n { \"id\": 0, \"description\": \"specific, actionable thing the current results do NOT answer \u2014 not 'more info needed'\" }\n ],\n \"refine_queries\": [\n { \"query\": \"concrete next search\", \"gap_id\": 0, \"rationale\": \"\u226412 words\" }\n ],\n \"results\": [\n {\n \"rank\": 1,\n \"tier\": \"HIGHLY_RELEVANT | MAYBE_RELEVANT | OTHER\",\n \"source_type\": \"vendor_doc | github | reddit | hackernews | blog | news | marketing | stackoverflow | cve | paper | release_notes | aggregator | other\",\n \"reason\": \"\u226412 words citing the snippet cue that drove the tier\"\n }\n ]\n}\n\nWEIGHT SCHEME: each row is prefixed with a weight (w=N). Higher weight means the URL ranked better across input queries \u2014 prefer HIGHLY_RELEVANT for high-weight rows when content matches the objective. Weight alone never justifies HIGHLY_RELEVANT; snippet cues still drive the decision.\n\nSOURCE-OF-TRUTH RUBRIC (the \"primary source\" is goal-dependent \u2014 infer goal type from the objective):\n- spec / API / config questions \u2192 vendor_doc, github (README, RFC), release_notes are primary\n- bug / failure-mode questions \u2192 github (issue/PR), stackoverflow are primary\n- migration / sentiment / lived-experience \u2192 reddit, hackernews, blog are primary; docs are secondary\n- pricing / commercial \u2192 marketing (the vendor's own pricing page IS the primary source, but treat feature lists skeptically)\n- security / CVE \u2192 cve databases, distro security trackers (nvd.nist.gov, security-tracker.debian.org, ubuntu.com/security) are primary\n- synthesis / open-ended \u2192 blend; no single type is primary\n- product launch \u2192 vendor_doc + news + marketing for the launch itself; blogs + reddit for independent verification\n\nFRESHNESS: proportional to topic velocity. For a week-old release, demote anything older than 30 days. For general tech questions, demote older than 18 months. For stable protocols (HTTP, TCP, POSIX), don't demote by age.\n\nCONFIDENCE:\n- high = \u22653 HIGHLY_RELEVANT results from INDEPENDENT domains agree on the core answer\n- medium = \u22652 HIGHLY_RELEVANT exist but disagree or share a domain; OR a single authoritative primary source answers it\n- low = otherwise; snippet-only judgments cap at medium\n\nREFINE QUERIES \u2014 each MUST differ from every previousQuery by:\n- a new operator (site:, quotes, verbatim version number), OR\n- a domain-specific noun ABSENT from every prior query\nAdding a year alone does NOT count as differentiation.\nEach refine_query MUST reference a specific gap_id from the gaps array above.\nProduce 4\u20138 refine_queries total. Cover: (a) a primary-source probe, (b) a temporal sharpener, (c) a failure-mode or comparison probe, (d) at least one new-term probe seeded by a specific result's snippet.\n\nRULES:\n- Classify ALL ${urlsToClassify.length} results. Do not skip or collapse any.\n- Use only the three tier values.\n- Judge from title + domain + snippet only. Do NOT invent facts not present in the snippet.\n- If ALL results are OTHER: synthesis = \"\", confidence = \"low\", and \\`gaps\\` must explicitly state why the current queries missed the target.\n- Casing: tier = UPPERCASE_WITH_UNDERSCORES, confidence = lowercase.\n\nSEARCH RESULTS (${urlsToClassify.length} URLs from ${totalQueries} queries):\n${lines.join('\\n')}`;\n\n try {\n mcpLog('info', `Classifying ${urlsToClassify.length} URLs against objective`, 'llm');\n\n const response = await requestTextWithFallback(\n processor,\n prompt,\n 'Search classification',\n );\n\n if (!response.content) {\n const errMsg = response.error ?? 'LLM returned empty classification response';\n markLLMFailure('planner', errMsg);\n return { result: null, error: errMsg };\n }\n\n // Strip markdown code fences if present\n const cleaned = response.content.replace(/^```(?:json)?\\s*\\n?/m, '').replace(/\\n?```\\s*$/m, '').trim();\n const parsed = JSON.parse(cleaned) as ClassificationResult;\n\n // Validate the response shape.\n // Note: synthesis is typed not truthy \u2014 the prompt explicitly instructs an empty string\n // for the all-OTHER case, and we must not reject that.\n if (!parsed.title || typeof parsed.synthesis !== 'string' || !Array.isArray(parsed.results)) {\n const errMsg = 'LLM response missing required fields (title, synthesis, results)';\n markLLMFailure('planner', errMsg);\n return { result: null, error: errMsg };\n }\n\n mcpLog('info', `Classification complete: ${parsed.results.filter(r => r.tier === 'HIGHLY_RELEVANT').length} highly relevant`, 'llm');\n markLLMSuccess('planner');\n return { result: parsed };\n } catch (err: unknown) {\n const message = err instanceof Error ? err.message : String(err);\n mcpLog('error', `Classification failed: ${message}`, 'llm');\n markLLMFailure('planner', message);\n return { result: null, error: `Classification failed: ${message}` };\n }\n}\n\nexport async function suggestRefineQueriesForRawMode(\n rankedUrls: ReadonlyArray<{\n readonly rank: number;\n readonly url: string;\n readonly title: string;\n }>,\n objective: string,\n originalQueries: readonly string[],\n processor: OpenAI,\n): Promise<{ result: RefineQuerySuggestion[]; error?: string }> {\n const urlsToSummarize = rankedUrls.slice(0, 12);\n const lines = urlsToSummarize.map((url) => {\n let domain: string;\n try {\n domain = new URL(url.url).hostname.replace(/^www\\./, '');\n } catch {\n domain = url.url;\n }\n return `[${url.rank}] ${url.title} \u2014 ${domain}`;\n });\n\n const prompt = `You are generating follow-up search queries for an agent using raw web-search results.\n\nReturn ONLY a JSON object (no markdown, no code fences):\n{\n \"refine_queries\": [\n { \"query\": \"next search query\", \"gap_description\": \"what gap this closes\", \"rationale\": \"\u226412 words on why\" }\n ]\n}\n\nOBJECTIVE: ${objective}\n\nPREVIOUS QUERIES (already run \u2014 do NOT paraphrase):\n${originalQueries.map((query) => `- ${query}`).join('\\n')}\n\nTOP RESULT TITLES (to seed new-term probes):\n${lines.join('\\n')}\n\nRULES:\n- Produce 4\u20136 diverse follow-ups. Cover: (a) a primary-source probe (site:, RFC, vendor docs); (b) a temporal sharpener (changelog, version number); (c) a failure-mode or comparison probe; (d) at least one new-term probe seeded by a specific result title.\n- Each query MUST differ from every previousQuery by either a new operator (site:, quotes, a verbatim version number) OR a domain-specific noun absent from every prior query. Adding a year alone does NOT count.\n- Each refine_query MUST include a \\`gap_description\\` naming what the current results don't answer.\n- Do not include URLs.\n- Keep rationales \u226412 words.`;\n\n try {\n const response = await requestTextWithFallback(\n processor,\n prompt,\n 'Raw-mode refine query generation',\n );\n\n if (!response.content) {\n const errMsg = response.error ?? 'LLM returned empty raw-mode refine query response';\n markLLMFailure('planner', errMsg);\n return { result: [], error: errMsg };\n }\n\n const cleaned = response.content.replace(/^```(?:json)?\\s*\\n?/m, '').replace(/\\n?```\\s*$/m, '').trim();\n const parsed = JSON.parse(cleaned) as { refine_queries?: RefineQuerySuggestion[] };\n\n markLLMSuccess('planner');\n return { result: Array.isArray(parsed.refine_queries) ? parsed.refine_queries : [] };\n } catch (err: unknown) {\n const message = err instanceof Error ? err.message : String(err);\n mcpLog('error', `Raw-mode refine query generation failed: ${message}`, 'llm');\n markLLMFailure('planner', message);\n return { result: [], error: message };\n }\n}\n\n// ============================================================================\n// Research Brief \u2014 goal-aware orientation (called by start-research)\n// ============================================================================\n\nexport type PrimaryBranch = 'reddit' | 'web' | 'both';\n\nexport interface ResearchBriefStep {\n readonly tool: 'web-search' | 'scrape-links';\n readonly reason: string;\n}\n\nexport interface ResearchBrief {\n readonly goal_class: string;\n readonly goal_class_reason: string;\n readonly primary_branch: PrimaryBranch;\n readonly primary_branch_reason: string;\n readonly freshness_window: string;\n readonly first_call_sequence: readonly ResearchBriefStep[];\n readonly keyword_seeds: readonly string[];\n readonly iteration_hints: readonly string[];\n readonly gaps_to_watch: readonly string[];\n readonly stop_criteria: readonly string[];\n}\n\nconst VALID_GOAL_CLASSES = new Set([\n 'spec', 'bug', 'migration', 'sentiment', 'pricing', 'security',\n 'synthesis', 'product_launch', 'other',\n]);\n\nconst VALID_FRESHNESS = new Set(['days', 'weeks', 'months', 'years']);\nconst VALID_BRANCHES = new Set<PrimaryBranch>(['reddit', 'web', 'both']);\nconst VALID_STEP_TOOLS = new Set(['web-search', 'scrape-links']);\n\nfunction isStringArray(value: unknown): value is string[] {\n return Array.isArray(value) && value.every((v) => typeof v === 'string');\n}\n\nfunction isStepArray(value: unknown): value is ResearchBriefStep[] {\n return Array.isArray(value) && value.every((s) => {\n if (typeof s !== 'object' || s === null) return false;\n const tool = (s as Record<string, unknown>).tool;\n const reason = (s as Record<string, unknown>).reason;\n return typeof tool === 'string'\n && VALID_STEP_TOOLS.has(tool)\n && typeof reason === 'string'\n && reason.trim().length > 0;\n });\n}\n\nexport function parseResearchBrief(raw: string): ResearchBrief | null {\n try {\n const cleaned = raw.replace(/^```(?:json)?\\s*\\n?/m, '').replace(/\\n?```\\s*$/m, '').trim();\n const parsed = JSON.parse(cleaned) as Record<string, unknown>;\n\n const goal_class = typeof parsed.goal_class === 'string' ? parsed.goal_class : null;\n if (!goal_class || !VALID_GOAL_CLASSES.has(goal_class)) return null;\n\n const freshness_window = typeof parsed.freshness_window === 'string' ? parsed.freshness_window : null;\n if (!freshness_window || !VALID_FRESHNESS.has(freshness_window)) return null;\n\n const primary_branch = parsed.primary_branch;\n if (typeof primary_branch !== 'string' || !VALID_BRANCHES.has(primary_branch as PrimaryBranch)) return null;\n\n if (!isStepArray(parsed.first_call_sequence) || parsed.first_call_sequence.length === 0) return null;\n if (!isStringArray(parsed.keyword_seeds) || parsed.keyword_seeds.length === 0) return null;\n\n return {\n goal_class,\n goal_class_reason: typeof parsed.goal_class_reason === 'string' ? parsed.goal_class_reason : '',\n primary_branch: primary_branch as PrimaryBranch,\n primary_branch_reason: typeof parsed.primary_branch_reason === 'string' ? parsed.primary_branch_reason : '',\n freshness_window,\n first_call_sequence: parsed.first_call_sequence,\n keyword_seeds: parsed.keyword_seeds.filter((s) => s.trim().length > 0),\n iteration_hints: isStringArray(parsed.iteration_hints) ? parsed.iteration_hints : [],\n gaps_to_watch: isStringArray(parsed.gaps_to_watch) ? parsed.gaps_to_watch : [],\n stop_criteria: isStringArray(parsed.stop_criteria) ? parsed.stop_criteria : [],\n };\n } catch {\n return null;\n }\n}\n\nexport async function generateResearchBrief(\n goal: string,\n processor: OpenAI,\n signal?: AbortSignal,\n): Promise<ResearchBrief | null> {\n const today = new Date().toISOString().slice(0, 10);\n\n const prompt = `You are a research planner. An agent is about to run a multi-pass research loop on the goal below using 3 tools:\n\n - web-search: fan-out Google, scope: web|reddit|both, up to 50 queries per call, parallel-callable (multiple calls per turn)\n - scrape-links: fetch URLs in parallel, auto-detects reddit.com post permalinks \u2192 Reddit API (threaded post+comments); all other URLs \u2192 HTTP scraper; parallel-callable\n\nProduce a tailored JSON brief.\n\nGOAL: ${goal}\nTODAY: ${today}\n\nReturn ONLY a JSON object (no markdown, no code fences):\n\n{\n \"goal_class\": \"spec | bug | migration | sentiment | pricing | security | synthesis | product_launch | other\",\n \"goal_class_reason\": \"one sentence \u2014 why this class\",\n \"primary_branch\": \"reddit | web | both\",\n \"primary_branch_reason\": \"one sentence \u2014 why this branch leads\",\n \"freshness_window\": \"days | weeks | months | years\",\n \"first_call_sequence\": [\n { \"tool\": \"web-search | scrape-links\", \"reason\": \"what this call establishes for the agent\" }\n ],\n \"keyword_seeds\": [\"25\u201350 concrete Google queries \u2014 flat list, to be fired in the first web-search call\"],\n \"iteration_hints\": [\"2\u20135 pointers on which harvested terms / follow-up signals to watch for after pass 1\"],\n \"gaps_to_watch\": [\"2\u20135 concrete questions the agent MUST verify or the answer is incomplete\"],\n \"stop_criteria\": [\"2\u20134 checkable conditions \u2014 all must hold before the agent declares done\"]\n}\n\nRULES:\n\nprimary_branch:\n- \"reddit\" \u2192 sentiment / migration / lived-experience / community-consensus goals. Leads with scope:\"reddit\" web-search.\n- \"web\" \u2192 spec / bug / pricing / CVE / API / primary-source goals. Leads with scope:\"web\" web-search.\n- \"both\" \u2192 opinion-heavy AND needs official sources (e.g. product launch + practitioner reception).\n\nfirst_call_sequence:\n- 1\u20133 steps.\n- reddit-first: step 1 = web-search (caller sets scope:\"reddit\"), step 2 = scrape-links on best post permalinks.\n- web-first: step 1 = web-search (scope:\"web\"), step 2 = scrape-links on HIGHLY_RELEVANT URLs.\n- both: step 1 = two parallel web-search calls (one scope:\"reddit\", one scope:\"web\"), step 2 = merged scrape-links.\n\nkeyword_seeds:\n- 25\u201350 total. Narrow bug \u2192 fewer. Open synthesis \u2192 more.\n- Use operators where helpful (site:, quotes, verbatim version numbers).\n- DIVERSE facets \u2014 same noun-phrase cannot repeat across seeds with adjectives-only variation.\n- Do NOT invent vendor names you are uncertain exist.\n- For \\`site:<domain>\\` filters, ONLY use domains you are highly confident are real. Safe choices: \\`github.com\\`, \\`stackoverflow.com\\`, \\`reddit.com\\`, \\`news.ycombinator.com\\`, \\`arxiv.org\\`, \\`nvd.nist.gov\\`, \\`pypi.org\\`, \\`npmjs.com\\`, plus any canonical homepage/docs domain explicitly spelled out in the goal itself (e.g. goal names \"Cursor\" \u2192 \\`cursor.com\\`/\\`docs.cursor.com\\` is acceptable). If you don't know the product's real docs domain, leave the query open (no \\`site:\\`) instead of guessing.\n\nfreshness_window:\n- If the goal mentions a recent release / date / version, use \"days\" or \"weeks\".\n- Stable protocols / APIs \u2192 \"months\" or \"years\".`;\n\n try {\n const response = await requestTextWithFallback(\n processor,\n prompt,\n 'Research brief generation',\n signal,\n );\n\n if (!response.content) {\n mcpLog('warning', `Research brief generation returned no content: ${response.error ?? 'unknown'}`, 'llm');\n markLLMFailure('planner', response.error ?? 'empty response');\n return null;\n }\n\n const brief = parseResearchBrief(response.content);\n if (!brief) {\n mcpLog('warning', 'Research brief JSON parse or shape validation failed', 'llm');\n markLLMFailure('planner', 'brief parse/validation failed');\n return null;\n }\n\n markLLMSuccess('planner');\n return brief;\n } catch (err: unknown) {\n const message = err instanceof Error ? err.message : String(err);\n mcpLog('warning', `Research brief generation failed: ${message}`, 'llm');\n markLLMFailure('planner', message);\n return null;\n }\n}\n\nexport function renderResearchBrief(brief: ResearchBrief): string {\n const lines: string[] = [];\n\n lines.push('## Your research brief (goal-tailored)');\n lines.push('');\n lines.push(`**Goal class**: \\`${brief.goal_class}\\` \u2014 ${brief.goal_class_reason}`);\n lines.push(`**Primary branch**: \\`${brief.primary_branch}\\` \u2014 ${brief.primary_branch_reason}`);\n lines.push(`**Freshness**: \\`${brief.freshness_window}\\``);\n lines.push('');\n\n if (brief.first_call_sequence.length > 0) {\n lines.push('### First-call sequence');\n brief.first_call_sequence.forEach((step, i) => {\n lines.push(`${i + 1}. \\`${step.tool}\\` \u2014 ${step.reason}`);\n });\n lines.push('');\n }\n\n if (brief.keyword_seeds.length > 0) {\n lines.push(`### Keyword seeds (${brief.keyword_seeds.length}) \u2014 fire these in your first \\`web-search\\` call as a flat \\`queries\\` array`);\n for (const seed of brief.keyword_seeds) {\n lines.push(`- ${seed}`);\n }\n lines.push('');\n }\n\n if (brief.iteration_hints.length > 0) {\n lines.push('### Iteration hints (harvest new terms from scrape extracts\\' `## Follow-up signals`)');\n for (const hint of brief.iteration_hints) lines.push(`- ${hint}`);\n lines.push('');\n }\n\n if (brief.gaps_to_watch.length > 0) {\n lines.push('### Gaps to watch');\n for (const gap of brief.gaps_to_watch) lines.push(`- ${gap}`);\n lines.push('');\n }\n\n if (brief.stop_criteria.length > 0) {\n lines.push('### Stop criteria');\n for (const c of brief.stop_criteria) lines.push(`- ${c}`);\n lines.push('');\n }\n\n lines.push('---');\n lines.push('');\n lines.push('Fire `first_call_sequence` now. After each `scrape-links`, harvest new terms from `## Follow-up signals` and build your next `web-search` round. Stop when every gap is closed.');\n\n return lines.join('\\n');\n}\n"],
|
|
5
|
-
"mappings": "AAQA,OAAO,YAAY;AACnB,SAAS,gBAAgB,uBAAuB;AAChD;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OAEK;AACP,SAAS,cAAc;AAGvB,MAAM,sBAAsB;AAO5B,MAAM,gCAAgC;AAGtC,MAAM,wBAAwB;AAG9B,MAAM,wBAAwB;AAG9B,MAAM,uBAAuB;AAG7B,MAAM,0BAA0B;AAuBhC,MAAM,YAAY;AAAA,EAChB,eAAe;AAAA,EACf,iBAAiB;AAAA,EACjB,sBAAsB;AAAA,EACtB,wBAAwB;AAAA,EACxB,kBAAkB;AAAA,EAClB,oBAAoB;AAAA,EACpB,4BAA4B;AAAA,EAC5B,8BAA8B;AAChC;AAEO,SAAS,eAAe,MAA2B;AACxD,QAAM,MAAK,oBAAI,KAAK,GAAE,YAAY;AAClC,MAAI,SAAS,WAAW;AACtB,cAAU,gBAAgB;AAC1B,cAAU,uBAAuB;AACjC,cAAU,mBAAmB;AAC7B,cAAU,6BAA6B;AAAA,EACzC,OAAO;AACL,cAAU,kBAAkB;AAC5B,cAAU,yBAAyB;AACnC,cAAU,qBAAqB;AAC/B,cAAU,+BAA+B;AAAA,EAC3C;AACF;AAEO,SAAS,eAAe,MAAqB,KAAoB;AACtE,QAAM,MAAK,oBAAI,KAAK,GAAE,YAAY;AAClC,QAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,OAAO,eAAe;AAClF,MAAI,SAAS,WAAW;AACtB,cAAU,gBAAgB;AAC1B,cAAU,uBAAuB;AACjC,cAAU,mBAAmB;AAC7B,cAAU,8BAA8B;AAAA,EAC1C,OAAO;AACL,cAAU,kBAAkB;AAC5B,cAAU,yBAAyB;AACnC,cAAU,qBAAqB;AAC/B,cAAU,gCAAgC;AAAA,EAC5C;AACF;AAEO,SAAS,eAAkC;AAChD,QAAM,MAAM,gBAAgB;AAC5B,SAAO;AAAA,IACL,eAAe,UAAU;AAAA,IACzB,iBAAiB,UAAU;AAAA,IAC3B,sBAAsB,UAAU;AAAA,IAChC,wBAAwB,UAAU;AAAA,IAClC,kBAAkB,UAAU;AAAA,IAC5B,oBAAoB,UAAU;AAAA;AAAA;AAAA,IAG9B,mBAAmB,IAAI;AAAA,IACvB,qBAAqB,IAAI;AAAA,IACzB,4BAA4B,UAAU;AAAA,IACtC,8BAA8B,UAAU;AAAA,EAC1C;AACF;AAGO,SAAS,0BAAgC;AAC9C,YAAU,gBAAgB;AAC1B,YAAU,kBAAkB;AAC5B,YAAU,uBAAuB;AACjC,YAAU,yBAAyB;AACnC,YAAU,mBAAmB;AAC7B,YAAU,qBAAqB;AAC/B,YAAU,6BAA6B;AACvC,YAAU,+BAA+B;AAC3C;AAgBA,MAAM,mBAAmB;AAAA,EACvB,YAAY;AAAA,EACZ,aAAa;AAAA,EACb,YAAY;AACd;AAGA,MAAM,uBAAuB;AAG7B,MAAM,4BAA4B,oBAAI,IAAI;AAAA,EACxC;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF,CAAC;AAGD,SAAS,UAAU,OAA6C;AAC9D,SACE,OAAO,UAAU,YACjB,UAAU,QACV,YAAY,SACZ,OAAQ,MAAkC,WAAW;AAEzD;AAEA,IAAI,YAA2B;AAIxB,SAAS,qBAAoC;AAClD,MAAI,CAAC,gBAAgB,EAAE,cAAe,QAAO;AAE7C,MAAI,CAAC,WAAW;AACd,gBAAY,IAAI,OAAO;AAAA,MACrB,SAAS,eAAe;AAAA,MACxB,QAAQ,eAAe;AAAA,MACvB,SAAS;AAAA,MACT,YAAY;AAAA,MACZ,gBAAgB,EAAE,WAAW,yBAAyB;AAAA,IACxD,CAAC;AACD,WAAO,QAAQ,qCAAqC,eAAe,KAAK,cAAc,eAAe,QAAQ,KAAK,KAAK;AAAA,EACzH;AACA,SAAO;AACT;AAEA,SAAS,qBAAqB,OAAe,QAAyC;AACpF,SAAO;AAAA,IACL;AAAA,IACA,UAAU,CAAC,EAAE,MAAM,QAAQ,SAAS,OAAO,CAAC;AAAA,IAC5C,kBAAkB;AAAA,EACpB;AACF;AAEA,eAAsB,YACpB,WACA,QACA,gBACA,QACA,eACoE;AACpE,QAAM,QAAQ,iBAAiB,eAAe;AAE9C,MAAI;AACF,UAAM,WAAW,MAAM;AAAA,MACrB,CAAC,gBAAgB,UAAU,KAAK,YAAY;AAAA,QAC1C,qBAAqB,OAAO,MAAM;AAAA,QAClC;AAAA,UACE,QAAQ,SAAS,YAAY,IAAI,CAAC,aAAa,MAAM,CAAC,IAAI;AAAA,UAC1D,SAAS;AAAA,QACX;AAAA,MACF;AAAA,MACA;AAAA,MACA;AAAA,MACA,GAAG,cAAc,KAAK,KAAK;AAAA,IAC7B;AAEA,UAAM,UAAU,SAAS,UAAU,CAAC,GAAG,SAAS,SAAS,KAAK;AAC9D,QAAI,SAAS;AACX,aAAO,EAAE,SAAS,MAAM;AAAA,IAC1B;AAEA,UAAM,MAAM,6BAA6B,KAAK;AAC9C,WAAO,WAAW,GAAG,cAAc,qCAAqC,KAAK,IAAI,KAAK;AACtF,WAAO,EAAE,SAAS,MAAM,OAAO,OAAO,IAAI;AAAA,EAC5C,SAAS,KAAc;AACrB,UAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC/D,WAAO,WAAW,GAAG,cAAc,qBAAqB,KAAK,KAAK,OAAO,IAAI,KAAK;AAClF,WAAO,EAAE,SAAS,MAAM,OAAO,OAAO,QAAQ;AAAA,EAChD;AACF;AAQA,eAAsB,wBACpB,WACA,QACA,gBACA,QACoE;AACpE,QAAM,UAAU,MAAM,YAAY,WAAW,QAAQ,gBAAgB,MAAM;AAC3E,MAAI,QAAQ,QAAS,QAAO;AAE5B,QAAM,gBAAgB,eAAe;AACrC,MAAI,CAAC,cAAe,QAAO;AAE3B,SAAO,WAAW,+CAA+C,aAAa,IAAI,KAAK;AAEvF,MAAI,YAAY,QAAQ;AACxB,WAAS,UAAU,GAAG,UAAU,sBAAsB,WAAW;AAC/D,QAAI,UAAU,GAAG;AACf,YAAM,UAAU,oBAAoB,UAAU,CAAC;AAC/C,aAAO,WAAW,kBAAkB,OAAO,IAAI,uBAAuB,CAAC,OAAO,OAAO,MAAM,KAAK;AAChG,UAAI;AAAE,cAAM,MAAM,SAAS,MAAM;AAAA,MAAG,QAAQ;AAAE;AAAA,MAAO;AAAA,IACvD;AACA,UAAM,SAAS,MAAM,YAAY,WAAW,QAAQ,GAAG,cAAc,eAAe,QAAQ,aAAa;AACzG,QAAI,OAAO,QAAS,QAAO;AAC3B,gBAAY,OAAO;AAAA,EACrB;AAEA,SAAO,EAAE,SAAS,MAAM,OAAO,eAAe,OAAO,UAAU;AACjE;AAKA,SAAS,oBAAoB,OAAyB;AACpD,MAAI,CAAC,SAAS,OAAO,UAAU,SAAU,QAAO;AAGhD,QAAM,YAAa,OAA6B;AAChD,MAAI,cAAc,cAAc,cAAc,aAAa;AACzD,WAAO;AAAA,EACT;AAGA,MAAI,UAAU,KAAK,GAAG;AACpB,QAAI,MAAM,WAAW,OAAO,MAAM,WAAW,OAAO,MAAM,WAAW,OAAO,MAAM,WAAW,OAAO,MAAM,WAAW,KAAK;AACxH,aAAO;AAAA,IACT;AAAA,EACF;AAGA,QAAM,SAAS;AACf,QAAM,OAAO,OAAO,OAAO,SAAS,WAAW,OAAO,OAAO;AAC7D,QAAM,SACJ,OAAO,OAAO,UAAU,YAAY,OAAO,UAAU,OAChD,OAAO,QACR;AACN,QAAM,YACJ,SACC,UAAU,OAAO,OAAO,SAAS,WAAW,OAAO,OAAO,YAC1D,UAAU,OAAO,OAAO,SAAS,WAAW,OAAO,OAAO;AAC7D,MAAI,aAAa,0BAA0B,IAAI,SAAS,GAAG;AACzD,WAAO;AAAA,EACT;AAGA,QAAM,UAAU,OAAO,OAAO,YAAY,WAAW,OAAO,QAAQ,YAAY,IAAI;AACpF,MACE,QAAQ,SAAS,YAAY,KAC7B,QAAQ,SAAS,SAAS,KAC1B,QAAQ,SAAS,WAAW,KAC5B,QAAQ,SAAS,qBAAqB,KACtC,QAAQ,SAAS,cAAc,KAC/B,QAAQ,SAAS,YAAY,KAC7B,QAAQ,SAAS,YAAY,GAC7B;AACA,WAAO;AAAA,EACT;AAEA,SAAO;AACT;AAOA,SAAS,qBAAqB,OAAyB;AACrD,MAAI,CAAC,SAAS,OAAO,UAAU,SAAU,QAAO;AAEhD,QAAM,SAAS;AACf,QAAM,SACJ,OAAO,OAAO,UAAU,YAAY,OAAO,UAAU,OAChD,OAAO,QACR;AAEN,QAAM,OAAO,OAAO,OAAO,SAAS,WAAW,OAAO,OAAO;AAC7D,QAAM,aAAa,UAAU,OAAO,OAAO,SAAS,WAAW,OAAO,OAAO;AAC7E,MAAI,SAAS,6BAA6B,eAAe,2BAA2B;AAClF,WAAO;AAAA,EACT;AAEA,QAAM,WAAqB,CAAC;AAC5B,MAAI,OAAO,OAAO,YAAY,SAAU,UAAS,KAAK,OAAO,OAAO;AACpE,MAAI,UAAU,OAAO,OAAO,YAAY,SAAU,UAAS,KAAK,OAAO,OAAO;AAC9E,QAAM,WAAW,SAAS,KAAK,GAAG,EAAE,YAAY;AAChD,SACE,SAAS,SAAS,gBAAgB,KAClC,SAAS,SAAS,gBAAgB,KAClC,SAAS,SAAS,iBAAiB,KACnC,SAAS,SAAS,gBAAgB,KAClC,SAAS,SAAS,aAAa,KAC/B,SAAS,SAAS,iBAAiB,KACnC,SAAS,SAAS,oBAAoB,KACtC,SAAS,SAAS,mBAAmB;AAEzC;AAKA,SAAS,oBAAoB,SAAyB;AACpD,QAAM,mBAAmB,iBAAiB,cAAc,KAAK,IAAI,GAAG,OAAO;AAC3E,QAAM,SAAS,KAAK,OAAO,IAAI,wBAAwB;AACvD,SAAO,KAAK,IAAI,mBAAmB,QAAQ,iBAAiB,UAAU;AACxE;AAOA,eAAsB,sBACpB,SACA,QACA,WACA,QACoB;AAEpB,MAAI,CAAC,OAAO,SAAS;AACnB,WAAO,EAAE,SAAS,WAAW,MAAM;AAAA,EACrC;AAEA,MAAI,CAAC,WAAW;AACd,WAAO;AAAA,MACL;AAAA,MACA,WAAW;AAAA,MACX,OAAO;AAAA,MACP,cAAc;AAAA,QACZ,MAAM,UAAU;AAAA,QAChB,SAAS;AAAA,QACT,WAAW;AAAA,MACb;AAAA,IACF;AAAA,EACF;AAEA,MAAI,CAAC,SAAS,KAAK,GAAG;AACpB,WAAO,EAAE,SAAS,WAAW,IAAI,WAAW,OAAO,OAAO,yBAAyB;AAAA,EACrF;AAGA,QAAM,mBAAmB,QAAQ,SAAS,sBACtC,QAAQ,UAAU,GAAG,mBAAmB,IAAI,0CAC5C;AAKJ,QAAM,qBACJ,iBAAiB,SAAS,iCAAiC,CAAC,CAAC,eAAe;AAK9E,QAAM,WAAW,MAAM;AACrB,QAAI,CAAC,OAAO,IAAK,QAAO;AACxB,QAAI;AACF,YAAM,IAAI,IAAI,IAAI,OAAO,GAAG;AAC5B,aAAO,GAAG,EAAE,MAAM,GAAG,EAAE,QAAQ;AAAA,IACjC,QAAQ;AACN,aAAO;AAAA,IACT;AAAA,EACF,GAAG;AACH,QAAM,UAAU,UAAU,aAAa,OAAO;AAAA;AAAA,IAAS;AAEvD,QAAM,SAAS,OAAO,UAClB;AAAA;AAAA,EAEJ,OAAO,2BAA2B,OAAO,OAAO;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAsDhD,gBAAgB,KACZ;AAAA;AAAA,EAEJ,OAAO;AAAA,EACP,gBAAgB;AAEhB,MAAI;AAIJ,MAAI,oBAAoB;AACtB;AAAA,MACE;AAAA,MACA,SAAS,iBAAiB,MAAM,qCAAqC,6BAA6B;AAAA,MAClG;AAAA,IACF;AAAA,EACF,OAAO;AACL,aAAS,UAAU,GAAG,WAAW,iBAAiB,YAAY,WAAW;AACvE,UAAI;AACF,YAAI,YAAY,GAAG;AACjB,iBAAO,QAAQ,4BAA4B,eAAe,KAAK,IAAI,KAAK;AAAA,QAC1E,OAAO;AACL,iBAAO,WAAW,iBAAiB,OAAO,IAAI,iBAAiB,UAAU,IAAI,KAAK;AAAA,QACpF;AAEA,cAAM,WAAW,MAAM,YAAY,WAAW,QAAQ,kBAAkB,MAAM;AAE9E,YAAI,SAAS,SAAS;AACpB,iBAAO,QAAQ,0BAA0B,SAAS,QAAQ,MAAM,eAAe,KAAK;AACpF,yBAAe,WAAW;AAC1B,iBAAO,EAAE,SAAS,SAAS,SAAS,WAAW,KAAK;AAAA,QACtD;AAGA,eAAO,WAAW,oCAAoC,KAAK;AAC3D,uBAAe,aAAa,6BAA6B;AACzD,eAAO;AAAA,UACL;AAAA,UACA,WAAW;AAAA,UACX,OAAO;AAAA,UACP,cAAc;AAAA,YACZ,MAAM,UAAU;AAAA,YAChB,SAAS;AAAA,YACT,WAAW;AAAA,UACb;AAAA,QACF;AAAA,MAEF,SAAS,KAAc;AACrB,oBAAY,cAAc,GAAG;AAC7B,cAAM,SAAS,UAAU,GAAG,IAAI,IAAI,SAAS;AAC7C,cAAM,OAAO,OAAO,QAAQ,YAAY,QAAQ,QAAQ,UAAU,MAC9D,OAAQ,IAAgC,IAAI,IAC5C;AACJ,cAAM,SAAS,qBAAqB,GAAG;AACvC,eAAO,SAAS,kBAAkB,UAAU,CAAC,MAAM,UAAU,OAAO,YAAY,MAAM,UAAU,IAAI,eAAe,oBAAoB,GAAG,CAAC,oBAAoB,MAAM,KAAK,KAAK;AAG/K,YAAI,QAAQ;AACV,iBAAO,WAAW,6FAAwF,KAAK;AAC/G;AAAA,QACF;AAEA,YAAI,oBAAoB,GAAG,KAAK,UAAU,iBAAiB,YAAY;AACrE,gBAAM,UAAU,oBAAoB,OAAO;AAC3C,iBAAO,WAAW,eAAe,OAAO,SAAS,KAAK;AACtD,cAAI;AAAE,kBAAM,MAAM,SAAS,MAAM;AAAA,UAAG,QAAQ;AAAE;AAAA,UAAO;AACrD;AAAA,QACF;AACA;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAGA,QAAM,gBAAgB,eAAe;AACrC,MAAI,eAAe;AACjB,WAAO,WAAW,4CAA4C,aAAa,IAAI,KAAK;AACpF,aAAS,UAAU,GAAG,UAAU,sBAAsB,WAAW;AAC/D,UAAI,UAAU,GAAG;AACf,cAAM,UAAU,oBAAoB,UAAU,CAAC;AAC/C,eAAO,WAAW,kBAAkB,OAAO,IAAI,uBAAuB,CAAC,OAAO,OAAO,MAAM,KAAK;AAChG,YAAI;AAAE,gBAAM,MAAM,SAAS,MAAM;AAAA,QAAG,QAAQ;AAAE;AAAA,QAAO;AAAA,MACvD;AACA,UAAI;AACF,cAAM,WAAW,MAAM,YAAY,WAAW,QAAQ,6BAA6B,QAAQ,aAAa;AACxG,YAAI,SAAS,SAAS;AACpB,iBAAO,QAAQ,sBAAsB,SAAS,QAAQ,MAAM,eAAe,KAAK;AAChF,yBAAe,WAAW;AAC1B,iBAAO,EAAE,SAAS,SAAS,SAAS,WAAW,KAAK;AAAA,QACtD;AACA,eAAO,WAAW,oCAAoC,KAAK;AAC3D;AAAA,MACF,SAAS,KAAc;AACrB,oBAAY,cAAc,GAAG;AAC7B,eAAO,SAAS,2BAA2B,UAAU,CAAC,MAAM,UAAU,OAAO,IAAI,KAAK;AAAA,MACxF;AAAA,IACF;AAAA,EACF;AAEA,QAAM,eAAe,WAAW,WAAW;AAC3C,SAAO,SAAS,wBAAwB,YAAY,iCAAiC,KAAK;AAC1F,iBAAe,aAAa,YAAY;AAExC,SAAO;AAAA,IACL;AAAA,IACA,WAAW;AAAA,IACX,OAAO,0BAA0B,YAAY;AAAA,IAC7C,cAAc,aAAa;AAAA,MACzB,MAAM,UAAU;AAAA,MAChB,SAAS;AAAA,MACT,WAAW;AAAA,IACb;AAAA,EACF;AACF;AAOA,MAAM,0BAA0B;AA2ChC,eAAsB,sBACpB,YAQA,WACA,cACA,WACA,kBAAqC,CAAC,GAC4B;AAClE,QAAM,iBAAiB,WAAW,MAAM,GAAG,uBAAuB;AAMlE,QAAM,iBAAiB,CAAC,IAAI,IAAI,IAAI,IAAI,GAAG,GAAG,GAAG,GAAG,GAAG,CAAC;AACxD,QAAM,gBAAgB,CAAC,SAAyB,eAAe,OAAO,CAAC,KAAK;AAG5E,QAAM,QAAkB,CAAC;AACzB,aAAW,OAAO,gBAAgB;AAChC,QAAI;AACJ,QAAI;AACF,eAAS,IAAI,IAAI,IAAI,GAAG,EAAE,SAAS,QAAQ,UAAU,EAAE;AAAA,IACzD,QAAQ;AACN,eAAS,IAAI;AAAA,IACf;AACA,UAAM,UAAU,IAAI,QAAQ,SAAS,MACjC,IAAI,QAAQ,MAAM,GAAG,GAAG,IAAI,QAC5B,IAAI;AACR,UAAM,KAAK,IAAI,IAAI,IAAI,OAAO,cAAc,IAAI,IAAI,CAAC,IAAI,IAAI,KAAK,WAAM,MAAM,WAAM,OAAO,EAAE;AAAA,EAC/F;AAEA,QAAM,mBAAmB,gBAAgB,SAAS,IAC9C,gBAAgB,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,EAAE,KAAK,IAAI,IAC9C;AACJ,QAAM,SAAQ,oBAAI,KAAK,GAAE,YAAY,EAAE,MAAM,GAAG,EAAE;AAElD,QAAM,SAAS;AAAA;AAAA,aAEJ,SAAS;AAAA,SACb,KAAK;AAAA;AAAA;AAAA,EAGZ,gBAAgB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,iBAmDD,eAAe,MAAM;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,kBAMpB,eAAe,MAAM,cAAc,YAAY;AAAA,EAC/D,MAAM,KAAK,IAAI,CAAC;AAEhB,MAAI;AACF,WAAO,QAAQ,eAAe,eAAe,MAAM,2BAA2B,KAAK;AAEnF,UAAM,WAAW,MAAM;AAAA,MACrB;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAEA,QAAI,CAAC,SAAS,SAAS;AACrB,YAAM,SAAS,SAAS,SAAS;AACjC,qBAAe,WAAW,MAAM;AAChC,aAAO,EAAE,QAAQ,MAAM,OAAO,OAAO;AAAA,IACvC;AAGA,UAAM,UAAU,SAAS,QAAQ,QAAQ,wBAAwB,EAAE,EAAE,QAAQ,eAAe,EAAE,EAAE,KAAK;AACrG,UAAM,SAAS,KAAK,MAAM,OAAO;AAKjC,QAAI,CAAC,OAAO,SAAS,OAAO,OAAO,cAAc,YAAY,CAAC,MAAM,QAAQ,OAAO,OAAO,GAAG;AAC3F,YAAM,SAAS;AACf,qBAAe,WAAW,MAAM;AAChC,aAAO,EAAE,QAAQ,MAAM,OAAO,OAAO;AAAA,IACvC;AAEA,WAAO,QAAQ,4BAA4B,OAAO,QAAQ,OAAO,OAAK,EAAE,SAAS,iBAAiB,EAAE,MAAM,oBAAoB,KAAK;AACnI,mBAAe,SAAS;AACxB,WAAO,EAAE,QAAQ,OAAO;AAAA,EAC1B,SAAS,KAAc;AACrB,UAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC/D,WAAO,SAAS,0BAA0B,OAAO,IAAI,KAAK;AAC1D,mBAAe,WAAW,OAAO;AACjC,WAAO,EAAE,QAAQ,MAAM,OAAO,0BAA0B,OAAO,GAAG;AAAA,EACpE;AACF;AAEA,eAAsB,+BACpB,YAKA,WACA,iBACA,WAC8D;AAC9D,QAAM,kBAAkB,WAAW,MAAM,GAAG,EAAE;AAC9C,QAAM,QAAQ,gBAAgB,IAAI,CAAC,QAAQ;AACzC,QAAI;AACJ,QAAI;AACF,eAAS,IAAI,IAAI,IAAI,GAAG,EAAE,SAAS,QAAQ,UAAU,EAAE;AAAA,IACzD,QAAQ;AACN,eAAS,IAAI;AAAA,IACf;AACA,WAAO,IAAI,IAAI,IAAI,KAAK,IAAI,KAAK,WAAM,MAAM;AAAA,EAC/C,CAAC;AAED,QAAM,SAAS;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,aASJ,SAAS;AAAA;AAAA;AAAA,EAGpB,gBAAgB,IAAI,CAAC,UAAU,KAAK,KAAK,EAAE,EAAE,KAAK,IAAI,CAAC;AAAA;AAAA;AAAA,EAGvD,MAAM,KAAK,IAAI,CAAC;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAShB,MAAI;AACF,UAAM,WAAW,MAAM;AAAA,MACrB;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAEA,QAAI,CAAC,SAAS,SAAS;AACrB,YAAM,SAAS,SAAS,SAAS;AACjC,qBAAe,WAAW,MAAM;AAChC,aAAO,EAAE,QAAQ,CAAC,GAAG,OAAO,OAAO;AAAA,IACrC;AAEA,UAAM,UAAU,SAAS,QAAQ,QAAQ,wBAAwB,EAAE,EAAE,QAAQ,eAAe,EAAE,EAAE,KAAK;AACrG,UAAM,SAAS,KAAK,MAAM,OAAO;AAEjC,mBAAe,SAAS;AACxB,WAAO,EAAE,QAAQ,MAAM,QAAQ,OAAO,cAAc,IAAI,OAAO,iBAAiB,CAAC,EAAE;AAAA,EACrF,SAAS,KAAc;AACrB,UAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC/D,WAAO,SAAS,4CAA4C,OAAO,IAAI,KAAK;AAC5E,mBAAe,WAAW,OAAO;AACjC,WAAO,EAAE,QAAQ,CAAC,GAAG,OAAO,QAAQ;AAAA,EACtC;AACF;AA0BA,MAAM,qBAAqB,oBAAI,IAAI;AAAA,EACjC;AAAA,EAAQ;AAAA,EAAO;AAAA,EAAa;AAAA,EAAa;AAAA,EAAW;AAAA,EACpD;AAAA,EAAa;AAAA,EAAkB;AACjC,CAAC;AAED,MAAM,kBAAkB,oBAAI,IAAI,CAAC,QAAQ,SAAS,UAAU,OAAO,CAAC;AACpE,MAAM,iBAAiB,oBAAI,IAAmB,CAAC,UAAU,OAAO,MAAM,CAAC;AACvE,MAAM,mBAAmB,oBAAI,IAAI,CAAC,cAAc,cAAc,CAAC;AAE/D,SAAS,cAAc,OAAmC;AACxD,SAAO,MAAM,QAAQ,KAAK,KAAK,MAAM,MAAM,CAAC,MAAM,OAAO,MAAM,QAAQ;AACzE;AAEA,SAAS,YAAY,OAA8C;AACjE,SAAO,MAAM,QAAQ,KAAK,KAAK,MAAM,MAAM,CAAC,MAAM;AAChD,QAAI,OAAO,MAAM,YAAY,MAAM,KAAM,QAAO;AAChD,UAAM,OAAQ,EAA8B;AAC5C,UAAM,SAAU,EAA8B;AAC9C,WAAO,OAAO,SAAS,YAClB,iBAAiB,IAAI,IAAI,KACzB,OAAO,WAAW,YAClB,OAAO,KAAK,EAAE,SAAS;AAAA,EAC9B,CAAC;AACH;AAEO,SAAS,mBAAmB,KAAmC;AACpE,MAAI;AACF,UAAM,UAAU,IAAI,QAAQ,wBAAwB,EAAE,EAAE,QAAQ,eAAe,EAAE,EAAE,KAAK;AACxF,UAAM,SAAS,KAAK,MAAM,OAAO;AAEjC,UAAM,aAAa,OAAO,OAAO,eAAe,WAAW,OAAO,aAAa;AAC/E,QAAI,CAAC,cAAc,CAAC,mBAAmB,IAAI,UAAU,EAAG,QAAO;AAE/D,UAAM,mBAAmB,OAAO,OAAO,qBAAqB,WAAW,OAAO,mBAAmB;AACjG,QAAI,CAAC,oBAAoB,CAAC,gBAAgB,IAAI,gBAAgB,EAAG,QAAO;AAExE,UAAM,iBAAiB,OAAO;AAC9B,QAAI,OAAO,mBAAmB,YAAY,CAAC,eAAe,IAAI,cAA+B,EAAG,QAAO;AAEvG,QAAI,CAAC,YAAY,OAAO,mBAAmB,KAAK,OAAO,oBAAoB,WAAW,EAAG,QAAO;AAChG,QAAI,CAAC,cAAc,OAAO,aAAa,KAAK,OAAO,cAAc,WAAW,EAAG,QAAO;AAEtF,WAAO;AAAA,MACL;AAAA,MACA,mBAAmB,OAAO,OAAO,sBAAsB,WAAW,OAAO,oBAAoB;AAAA,MAC7F;AAAA,MACA,uBAAuB,OAAO,OAAO,0BAA0B,WAAW,OAAO,wBAAwB;AAAA,MACzG;AAAA,MACA,qBAAqB,OAAO;AAAA,MAC5B,eAAe,OAAO,cAAc,OAAO,CAAC,MAAM,EAAE,KAAK,EAAE,SAAS,CAAC;AAAA,MACrE,iBAAiB,cAAc,OAAO,eAAe,IAAI,OAAO,kBAAkB,CAAC;AAAA,MACnF,eAAe,cAAc,OAAO,aAAa,IAAI,OAAO,gBAAgB,CAAC;AAAA,MAC7E,eAAe,cAAc,OAAO,aAAa,IAAI,OAAO,gBAAgB,CAAC;AAAA,IAC/E;AAAA,EACF,QAAQ;AACN,WAAO;AAAA,EACT;AACF;AAEA,eAAsB,sBACpB,MACA,WACA,QAC+B;AAC/B,QAAM,SAAQ,oBAAI,KAAK,GAAE,YAAY,EAAE,MAAM,GAAG,EAAE;AAElD,QAAM,SAAS;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,QAOT,IAAI;AAAA,SACH,KAAK;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AA2CZ,MAAI;AACF,UAAM,WAAW,MAAM;AAAA,MACrB;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAEA,QAAI,CAAC,SAAS,SAAS;AACrB,aAAO,WAAW,kDAAkD,SAAS,SAAS,SAAS,IAAI,KAAK;AACxG,qBAAe,WAAW,SAAS,SAAS,gBAAgB;AAC5D,aAAO;AAAA,IACT;AAEA,UAAM,QAAQ,mBAAmB,SAAS,OAAO;AACjD,QAAI,CAAC,OAAO;AACV,aAAO,WAAW,wDAAwD,KAAK;AAC/E,qBAAe,WAAW,+BAA+B;AACzD,aAAO;AAAA,IACT;AAEA,mBAAe,SAAS;AACxB,WAAO;AAAA,EACT,SAAS,KAAc;AACrB,UAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC/D,WAAO,WAAW,qCAAqC,OAAO,IAAI,KAAK;AACvE,mBAAe,WAAW,OAAO;AACjC,WAAO;AAAA,EACT;AACF;AAEO,SAAS,oBAAoB,OAA8B;AAChE,QAAM,QAAkB,CAAC;AAEzB,QAAM,KAAK,wCAAwC;AACnD,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,qBAAqB,MAAM,UAAU,aAAQ,MAAM,iBAAiB,EAAE;AACjF,QAAM,KAAK,yBAAyB,MAAM,cAAc,aAAQ,MAAM,qBAAqB,EAAE;AAC7F,QAAM,KAAK,oBAAoB,MAAM,gBAAgB,IAAI;AACzD,QAAM,KAAK,EAAE;AAEb,MAAI,MAAM,oBAAoB,SAAS,GAAG;AACxC,UAAM,KAAK,yBAAyB;AACpC,UAAM,oBAAoB,QAAQ,CAAC,MAAM,MAAM;AAC7C,YAAM,KAAK,GAAG,IAAI,CAAC,OAAO,KAAK,IAAI,aAAQ,KAAK,MAAM,EAAE;AAAA,IAC1D,CAAC;AACD,UAAM,KAAK,EAAE;AAAA,EACf;AAEA,MAAI,MAAM,cAAc,SAAS,GAAG;AAClC,UAAM,KAAK,sBAAsB,MAAM,cAAc,MAAM,mFAA8E;AACzI,eAAW,QAAQ,MAAM,eAAe;AACtC,YAAM,KAAK,KAAK,IAAI,EAAE;AAAA,IACxB;AACA,UAAM,KAAK,EAAE;AAAA,EACf;AAEA,MAAI,MAAM,gBAAgB,SAAS,GAAG;AACpC,UAAM,KAAK,sFAAuF;AAClG,eAAW,QAAQ,MAAM,gBAAiB,OAAM,KAAK,KAAK,IAAI,EAAE;AAChE,UAAM,KAAK,EAAE;AAAA,EACf;AAEA,MAAI,MAAM,cAAc,SAAS,GAAG;AAClC,UAAM,KAAK,mBAAmB;AAC9B,eAAW,OAAO,MAAM,cAAe,OAAM,KAAK,KAAK,GAAG,EAAE;AAC5D,UAAM,KAAK,EAAE;AAAA,EACf;AAEA,MAAI,MAAM,cAAc,SAAS,GAAG;AAClC,UAAM,KAAK,mBAAmB;AAC9B,eAAW,KAAK,MAAM,cAAe,OAAM,KAAK,KAAK,CAAC,EAAE;AACxD,UAAM,KAAK,EAAE;AAAA,EACf;AAEA,QAAM,KAAK,KAAK;AAChB,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,iLAAiL;AAE5L,SAAO,MAAM,KAAK,IAAI;AACxB;",
|
|
4
|
+
"sourcesContent": ["/**\n * LLM Processor for content extraction\n * Uses any OpenAI-compatible endpoint. Reasoning effort is always 'low'.\n * Primary model exhausts its retries first; fallback model (LLM_FALLBACK_MODEL) then\n * gets up to FALLBACK_RETRY_COUNT additional attempts before the call fails.\n * NEVER throws \u2014 always returns a valid result.\n */\n\nimport OpenAI from 'openai';\nimport { LLM_EXTRACTION, getCapabilities } from '../config/index.js';\nimport {\n classifyError,\n sleep,\n ErrorCode,\n withStallProtection,\n type StructuredError,\n} from '../utils/errors.js';\nimport { mcpLog } from '../utils/logger.js';\n\n/** Maximum input characters for LLM processing (~125k tokens, sized for the larger fallback model) */\nconst MAX_LLM_INPUT_CHARS = 500_000 as const;\n\n/**\n * Maximum input characters for the primary model when it has a smaller context window.\n * Used when an input would exceed the mini model's limits so the call goes straight to fallback\n * instead of burning retries on guaranteed context_length_exceeded errors.\n */\nconst MAX_PRIMARY_MODEL_INPUT_CHARS = 100_000 as const;\n\n/** LLM client timeout in milliseconds */\nconst LLM_CLIENT_TIMEOUT_MS = 600_000 as const;\n\n/** Jitter factor for exponential backoff */\nconst BACKOFF_JITTER_FACTOR = 0.3 as const;\n\n/** Stall detection timeout \u2014 abort if no response in this time */\nconst LLM_STALL_TIMEOUT_MS = 75_000 as const;\n\n/** Hard request deadline for LLM calls */\nconst LLM_REQUEST_DEADLINE_MS = 150_000 as const;\n\n// ============================================================================\n// LLM health tracking \u2014 surfaced via health://status so capability-aware\n// clients can branch on degraded mode without parsing per-call footers.\n// ============================================================================\n\ntype LLMHealthKind = 'planner' | 'extractor';\n\nexport interface LLMHealthSnapshot {\n readonly lastPlannerOk: boolean;\n readonly lastExtractorOk: boolean;\n readonly lastPlannerCheckedAt: string | null;\n readonly lastExtractorCheckedAt: string | null;\n readonly lastPlannerError: string | null;\n readonly lastExtractorError: string | null;\n readonly plannerConfigured: boolean;\n readonly extractorConfigured: boolean;\n /** Failures since the last success. Reset to 0 on `markLLMSuccess`. */\n readonly consecutivePlannerFailures: number;\n readonly consecutiveExtractorFailures: number;\n}\n\nconst llmHealth = {\n lastPlannerOk: false,\n lastExtractorOk: false,\n lastPlannerCheckedAt: null as string | null,\n lastExtractorCheckedAt: null as string | null,\n lastPlannerError: null as string | null,\n lastExtractorError: null as string | null,\n consecutivePlannerFailures: 0,\n consecutiveExtractorFailures: 0,\n};\n\nexport function markLLMSuccess(kind: LLMHealthKind): void {\n const ts = new Date().toISOString();\n if (kind === 'planner') {\n llmHealth.lastPlannerOk = true;\n llmHealth.lastPlannerCheckedAt = ts;\n llmHealth.lastPlannerError = null;\n llmHealth.consecutivePlannerFailures = 0;\n } else {\n llmHealth.lastExtractorOk = true;\n llmHealth.lastExtractorCheckedAt = ts;\n llmHealth.lastExtractorError = null;\n llmHealth.consecutiveExtractorFailures = 0;\n }\n}\n\nexport function markLLMFailure(kind: LLMHealthKind, err: unknown): void {\n const ts = new Date().toISOString();\n const message = err instanceof Error ? err.message : String(err ?? 'unknown error');\n if (kind === 'planner') {\n llmHealth.lastPlannerOk = false;\n llmHealth.lastPlannerCheckedAt = ts;\n llmHealth.lastPlannerError = message;\n llmHealth.consecutivePlannerFailures += 1;\n } else {\n llmHealth.lastExtractorOk = false;\n llmHealth.lastExtractorCheckedAt = ts;\n llmHealth.lastExtractorError = message;\n llmHealth.consecutiveExtractorFailures += 1;\n }\n}\n\nexport function getLLMHealth(): LLMHealthSnapshot {\n const cap = getCapabilities();\n return {\n lastPlannerOk: llmHealth.lastPlannerOk,\n lastExtractorOk: llmHealth.lastExtractorOk,\n lastPlannerCheckedAt: llmHealth.lastPlannerCheckedAt,\n lastExtractorCheckedAt: llmHealth.lastExtractorCheckedAt,\n lastPlannerError: llmHealth.lastPlannerError,\n lastExtractorError: llmHealth.lastExtractorError,\n // Static capability \u2014 based on env presence at boot. Runtime health (above)\n // tells whether the last attempt actually succeeded.\n plannerConfigured: cap.llmExtraction,\n extractorConfigured: cap.llmExtraction,\n consecutivePlannerFailures: llmHealth.consecutivePlannerFailures,\n consecutiveExtractorFailures: llmHealth.consecutiveExtractorFailures,\n };\n}\n\n/** Test-only \u2014 reset state between tests. Not exported from index. */\nexport function _resetLLMHealthForTests(): void {\n llmHealth.lastPlannerOk = false;\n llmHealth.lastExtractorOk = false;\n llmHealth.lastPlannerCheckedAt = null;\n llmHealth.lastExtractorCheckedAt = null;\n llmHealth.lastPlannerError = null;\n llmHealth.lastExtractorError = null;\n llmHealth.consecutivePlannerFailures = 0;\n llmHealth.consecutiveExtractorFailures = 0;\n}\n\ninterface ProcessingConfig {\n readonly enabled: boolean;\n readonly extract: string | undefined;\n readonly url?: string;\n}\n\ninterface LLMResult {\n readonly content: string;\n readonly processed: boolean;\n readonly error?: string;\n readonly errorDetails?: StructuredError;\n}\n\n// LLM-specific retry configuration\nconst LLM_RETRY_CONFIG = {\n maxRetries: 2,\n baseDelayMs: 1000,\n maxDelayMs: 5000,\n} as const;\n\n/** Number of additional attempts using the fallback model after primary exhausts. */\nconst FALLBACK_RETRY_COUNT = 3 as const;\n\n// OpenAI-compatible retryable error codes (using Set for type-safe lookup)\nconst RETRYABLE_LLM_ERROR_CODES = new Set([\n 'rate_limit_exceeded',\n 'server_error',\n 'timeout',\n 'service_unavailable',\n]);\n\n/** Type guard for errors with an HTTP status code */\nfunction hasStatus(error: unknown): error is { status: number } {\n return (\n typeof error === 'object' &&\n error !== null &&\n 'status' in error &&\n typeof (error as Record<string, unknown>).status === 'number'\n );\n}\n\nlet llmClient: OpenAI | null = null;\n\ntype OpenAITextGenerator = Pick<OpenAI, 'chat'>;\n\nexport function createLLMProcessor(): OpenAI | null {\n if (!getCapabilities().llmExtraction) return null;\n\n if (!llmClient) {\n llmClient = new OpenAI({\n baseURL: LLM_EXTRACTION.BASE_URL,\n apiKey: LLM_EXTRACTION.API_KEY,\n timeout: LLM_CLIENT_TIMEOUT_MS,\n maxRetries: 0,\n defaultHeaders: { 'X-Title': 'mcp-research-powerpack' },\n });\n mcpLog('info', `LLM extraction configured (model: ${LLM_EXTRACTION.MODEL}, baseURL: ${LLM_EXTRACTION.BASE_URL})`, 'llm');\n }\n return llmClient;\n}\n\nfunction buildChatRequestBody(model: string, prompt: string): Record<string, unknown> {\n return {\n model,\n messages: [{ role: 'user', content: prompt }],\n reasoning_effort: 'low',\n };\n}\n\nexport async function requestText(\n processor: OpenAITextGenerator,\n prompt: string,\n operationLabel: string,\n signal?: AbortSignal,\n modelOverride?: string,\n): Promise<{ content: string | null; model: string; error?: string }> {\n const model = modelOverride || LLM_EXTRACTION.MODEL;\n\n try {\n const response = await withStallProtection(\n (stallSignal) => processor.chat.completions.create(\n buildChatRequestBody(model, prompt) as unknown as OpenAI.ChatCompletionCreateParamsNonStreaming,\n {\n signal: signal ? AbortSignal.any([stallSignal, signal]) : stallSignal,\n timeout: LLM_REQUEST_DEADLINE_MS,\n },\n ),\n LLM_STALL_TIMEOUT_MS,\n 3,\n `${operationLabel} (${model})`,\n );\n\n const content = response.choices?.[0]?.message?.content?.trim();\n if (content) {\n return { content, model };\n }\n\n const err = `Empty response from model ${model}`;\n mcpLog('warning', `${operationLabel} returned empty content for model ${model}`, 'llm');\n return { content: null, model, error: err };\n } catch (err: unknown) {\n const message = err instanceof Error ? err.message : String(err);\n mcpLog('warning', `${operationLabel} failed for model ${model}: ${message}`, 'llm');\n return { content: null, model, error: message };\n }\n}\n\n/**\n * Single LLM call with automatic fallback model.\n * Tries the primary model once; if it fails and LLM_FALLBACK_MODEL is set,\n * retries up to FALLBACK_RETRY_COUNT times on the fallback model.\n * Used for single-shot calls (classify, brief, refine queries).\n */\nexport async function requestTextWithFallback(\n processor: OpenAITextGenerator,\n prompt: string,\n operationLabel: string,\n signal?: AbortSignal,\n): Promise<{ content: string | null; model: string; error?: string }> {\n const primary = await requestText(processor, prompt, operationLabel, signal);\n if (primary.content) return primary;\n\n const fallbackModel = LLM_EXTRACTION.FALLBACK_MODEL;\n if (!fallbackModel) return primary;\n\n mcpLog('warning', `Primary model failed, switching to fallback ${fallbackModel}`, 'llm');\n\n let lastError = primary.error;\n for (let attempt = 0; attempt < FALLBACK_RETRY_COUNT; attempt++) {\n if (attempt > 0) {\n const delayMs = calculateLLMBackoff(attempt - 1);\n mcpLog('warning', `Fallback retry ${attempt}/${FALLBACK_RETRY_COUNT - 1} in ${delayMs}ms`, 'llm');\n try { await sleep(delayMs, signal); } catch { break; }\n }\n const result = await requestText(processor, prompt, `${operationLabel} [fallback]`, signal, fallbackModel);\n if (result.content) return result;\n lastError = result.error;\n }\n\n return { content: null, model: fallbackModel, error: lastError };\n}\n\n/**\n * Check if an LLM error is retryable\n */\nfunction isRetryableLLMError(error: unknown): boolean {\n if (!error || typeof error !== 'object') return false;\n\n // Stall/timeout protection errors - always retry these\n const stallCode = (error as { code?: string })?.code;\n if (stallCode === 'ESTALLED' || stallCode === 'ETIMEDOUT') {\n return true;\n }\n\n // Check HTTP status codes\n if (hasStatus(error)) {\n if (error.status === 429 || error.status === 500 || error.status === 502 || error.status === 503 || error.status === 504) {\n return true;\n }\n }\n\n // Check error codes from the OpenAI-compatible endpoint\n const record = error as Record<string, unknown>;\n const code = typeof record.code === 'string' ? record.code : undefined;\n const nested =\n typeof record.error === 'object' && record.error !== null\n ? (record.error as Record<string, unknown>)\n : null;\n const errorCode =\n code ??\n (nested && typeof nested.code === 'string' ? nested.code : undefined) ??\n (nested && typeof nested.type === 'string' ? nested.type : undefined);\n if (errorCode && RETRYABLE_LLM_ERROR_CODES.has(errorCode)) {\n return true;\n }\n\n // Check message for common patterns\n const message = typeof record.message === 'string' ? record.message.toLowerCase() : '';\n if (\n message.includes('rate limit') ||\n message.includes('timeout') ||\n message.includes('timed out') ||\n message.includes('service unavailable') ||\n message.includes('server error') ||\n message.includes('connection') ||\n message.includes('econnreset')\n ) {\n return true;\n }\n\n return false;\n}\n\n/**\n * Detect \"the prompt is too long for this model\" errors.\n * These are NOT retryable on the same model \u2014 we should skip remaining primary retries\n * and go straight to the fallback model (which has a larger context window).\n */\nfunction isContextWindowError(error: unknown): boolean {\n if (!error || typeof error !== 'object') return false;\n\n const record = error as Record<string, unknown>;\n const nested =\n typeof record.error === 'object' && record.error !== null\n ? (record.error as Record<string, unknown>)\n : null;\n\n const code = typeof record.code === 'string' ? record.code : undefined;\n const nestedCode = nested && typeof nested.code === 'string' ? nested.code : undefined;\n if (code === 'context_length_exceeded' || nestedCode === 'context_length_exceeded') {\n return true;\n }\n\n const messages: string[] = [];\n if (typeof record.message === 'string') messages.push(record.message);\n if (nested && typeof nested.message === 'string') messages.push(nested.message);\n const combined = messages.join(' ').toLowerCase();\n return (\n combined.includes('context length') ||\n combined.includes('context window') ||\n combined.includes('maximum context') ||\n combined.includes('maximum tokens') ||\n combined.includes('token limit') ||\n combined.includes('too many tokens') ||\n combined.includes('prompt is too long') ||\n combined.includes('reduce the length')\n );\n}\n\n/**\n * Calculate backoff delay with jitter for LLM retries\n */\nfunction calculateLLMBackoff(attempt: number): number {\n const exponentialDelay = LLM_RETRY_CONFIG.baseDelayMs * Math.pow(2, attempt);\n const jitter = Math.random() * BACKOFF_JITTER_FACTOR * exponentialDelay;\n return Math.min(exponentialDelay + jitter, LLM_RETRY_CONFIG.maxDelayMs);\n}\n\n/**\n * Process content with LLM extraction\n * NEVER throws - always returns a valid LLMResult\n * Implements retry logic with exponential backoff for transient failures\n */\nexport async function processContentWithLLM(\n content: string,\n config: ProcessingConfig,\n processor?: OpenAI | null,\n signal?: AbortSignal\n): Promise<LLMResult> {\n // Early returns for invalid/skip conditions\n if (!config.enabled) {\n return { content, processed: false };\n }\n\n if (!processor) {\n return {\n content,\n processed: false,\n error: 'LLM processor not available (LLM_API_KEY, LLM_BASE_URL, and LLM_MODEL must all be set)',\n errorDetails: {\n code: ErrorCode.AUTH_ERROR,\n message: 'LLM processor not available',\n retryable: false,\n },\n };\n }\n\n if (!content?.trim()) {\n return { content: content || '', processed: false, error: 'Empty content provided' };\n }\n\n // Truncate extremely long content to avoid blowing past even the fallback model's context.\n const truncatedContent = content.length > MAX_LLM_INPUT_CHARS\n ? content.substring(0, MAX_LLM_INPUT_CHARS) + '\\n\\n[Content truncated due to length]'\n : content;\n\n // If the prompt would exceed the primary (mini) model's smaller context window,\n // skip it entirely and go straight to the fallback model. Saves burning retries\n // on guaranteed context_length_exceeded errors.\n const skipPrimaryForSize =\n truncatedContent.length > MAX_PRIMARY_MODEL_INPUT_CHARS && !!LLM_EXTRACTION.FALLBACK_MODEL;\n\n // Sanitize URL before sending to LLM: drop query string and fragment\n // so signed URLs, session tokens, auth params, or tracking hashes never\n // land in a third-party LLM prompt. Keep origin + path for page-type classification.\n const safeUrl = (() => {\n if (!config.url) return undefined;\n try {\n const u = new URL(config.url);\n return `${u.origin}${u.pathname}`;\n } catch {\n return undefined;\n }\n })();\n const urlLine = safeUrl ? `PAGE URL: ${safeUrl}\\n\\n` : '';\n\n const prompt = config.extract\n ? `You are a factual extractor for a research agent. Extract ONLY the information that matches the instruction below. Do not summarize, interpret, or editorialize.\n\n${urlLine}EXTRACTION INSTRUCTION: ${config.extract}\n\nSTEP 1 \u2014 Classify this page. Look at the URL if present, plus structural cues (code blocks, table patterns, comment threads, marketing copy). Pick ONE:\n\\`docs | changelog | github-readme | github-thread | reddit | hackernews | forum | blog | marketing | announcement | qa | cve | paper | release-notes | other\\`\n\nSTEP 2 \u2014 Adjust emphasis by page type:\n- docs / changelog / github-readme / release-notes \u2192 API signatures, version numbers, flags, exact config keys, code blocks. Copy verbatim. Preserve tables as tables.\n- github-thread \u2192 weight MAINTAINER comments (label \"[maintainer]\") over drive-by commenters. Preserve stacktraces verbatim. Capture chronological resolution \u2014 what was decided and when. Link the accepted-fix commit/PR if referenced.\n- reddit / hackernews / forum \u2192 lived experience. Quote verbatim with attribution (\"u/foo wrote: \u2026\" or \"user <name>\"). Prioritize replies with stack details, specific failure stories, or replies that contradict the OP. Record overall sentiment distribution as one bullet if clear skew (\"~70% agree / ~20% dissent / rest off-topic\"). Drop context-free opinions (\"this sucks\") from Matches.\n- blog \u2192 prioritize concrete reproductions, code, measurements. If the author makes a claim without evidence, mark \"[unsourced claim]\".\n- marketing / announcement \u2192 pricing tiers, feature matrices verbatim, free-tier quotas, enterprise contact. Preserve tables as tables. Treat roadmap/future-tense claims skeptically \u2014 note them as \"[announced, not shipped]\" when framing is future-tense.\n- qa (stackoverflow) \u2192 accepted answer's code + high-voted disagreements. Always note the answer date \u2014 SO rots.\n- cve \u2192 CVSS vector verbatim, CWE, CPE ranges, affected versions, fix version, references. Each with its label.\n- paper \u2192 claim, method, dataset, benchmark numbers, comparison baseline. Preserve numeric deltas verbatim.\n\nSTEP 3 \u2014 Emit markdown with these sections, in order:\n\n## Source\n- URL: <verbatim if visible, else \"unknown\">\n- Page type: <the type you picked>\n- Page date: <verbatim if visible, else \"not visible\">\n- Author / maintainer (if identifiable): <verbatim>\n\n## Matches\nOne bullet per distinct piece of matching info:\n- **<short label>** \u2014 the information. Quote VERBATIM for: numbers, versions, dates, API names, prices, error messages, stacktraces, CVSS vectors, benchmark scores, command flags, proper nouns, and people's words. Backticks for code/identifiers. Preserve tables.\n\n## Not found\nEvery part of the extraction instruction this page did NOT answer. Be explicit. Example: \"Enterprise pricing contact \u2014 not present on this page.\"\n\n## Follow-up signals\nShort bullets \u2014 NEW angles this page surfaced that the agent should investigate. Include: new terms, unexpected vendor names, contradicting claims, referenced-but-unscraped URLs. Copy URLs VERBATIM from the source; if only anchor text is visible, write \"anchor: <text> (URL not in scraped content)\". Skip this section if nothing new surfaced. Do NOT invent.\n\n## Contradictions\n(Include this section only if the page contains internally contradictory claims.) Bullet each contradiction with both sides quoted verbatim.\n\n## Truncation\n(Include only if content appears cut mid-element.) \"Content cut mid-<table row / code block / comment / paragraph>; extraction may be incomplete for <section>.\"\n\nRULES:\n- Never paraphrase numbers, versions, code, or quoted text.\n- If an instruction item is not answered, it goes in \"Not found\" \u2014 do NOT invent an answer to please the caller.\n- Preserve code blocks, command examples, tables exactly.\n- Do NOT add commentary or recommendations outside \"Follow-up signals\".\n- Page language \u2260 English: quote verbatim in the original language AND provide a parenthetical gloss in English.\n- Page appears gated (login wall, paywall, JS-render-empty shell) or near-empty: BEFORE dismissing the page, look for ANY visible text \u2014 og:title, og:description, meta description, headline, author name, nav labels, teaser/preview sentences, visible comment snippets. If ANY such text exists, extract it as usual under \\`## Source\\` + \\`## Matches\\`, and list the blocked facets under \\`## Not found\\`. Prefix the first \\`## Matches\\` bullet with \\`**[partial \u2014 <reason>]**\\` so the caller knows the body is gated (reasons: \\`login-wall | paywall | JS-render-empty | truncated-before-relevant-section\\`). ONLY when there is NO visible extractable text at all (< 50 words AND no og:* AND no headline AND no preview), return exactly one line:\n \\`## Matches\\\\n_Page did not load: <reason>_\\`\n Valid reasons: \\`404 | login-wall | paywall | JS-render-empty | non-text-asset | truncated-before-relevant-section\\`.\n\nContent:\n${truncatedContent}`\n : `Clean the following page content: drop navigation, ads, cookie banners, footers, author bios, related-article lists. Preserve headings, paragraphs, code blocks, tables, and inline links as \\`[text](url)\\`. Do NOT summarize \u2014 preserve the full body.\n\n${urlLine}Content:\n${truncatedContent}`;\n\n let lastError: StructuredError | undefined;\n\n // Phase 1: primary model with up to LLM_RETRY_CONFIG.maxRetries retries.\n // Skip entirely when the input is too big for the primary's context window.\n if (skipPrimaryForSize) {\n mcpLog(\n 'info',\n `Input ${truncatedContent.length} chars exceeds primary model cap (${MAX_PRIMARY_MODEL_INPUT_CHARS}); routing directly to fallback`,\n 'llm',\n );\n } else {\n for (let attempt = 0; attempt <= LLM_RETRY_CONFIG.maxRetries; attempt++) {\n try {\n if (attempt === 0) {\n mcpLog('info', `Starting extraction with ${LLM_EXTRACTION.MODEL}`, 'llm');\n } else {\n mcpLog('warning', `Retry attempt ${attempt}/${LLM_RETRY_CONFIG.maxRetries}`, 'llm');\n }\n\n const response = await requestText(processor, prompt, 'LLM extraction', signal);\n\n if (response.content) {\n mcpLog('info', `Successfully extracted ${response.content.length} characters`, 'llm');\n markLLMSuccess('extractor');\n return { content: response.content, processed: true };\n }\n\n // Empty response \u2014 not retryable\n mcpLog('warning', 'Received empty response from LLM', 'llm');\n markLLMFailure('extractor', 'LLM returned empty response');\n return {\n content,\n processed: false,\n error: 'LLM returned empty response',\n errorDetails: {\n code: ErrorCode.INTERNAL_ERROR,\n message: 'LLM returned empty response',\n retryable: false,\n },\n };\n\n } catch (err: unknown) {\n lastError = classifyError(err);\n const status = hasStatus(err) ? err.status : undefined;\n const code = typeof err === 'object' && err !== null && 'code' in err\n ? String((err as Record<string, unknown>).code)\n : undefined;\n const ctxErr = isContextWindowError(err);\n mcpLog('error', `Error (attempt ${attempt + 1}): ${lastError.message} [status=${status}, code=${code}, retryable=${isRetryableLLMError(err)}, context_window=${ctxErr}]`, 'llm');\n\n // Context window errors are not retryable on the same model \u2014 jump to fallback.\n if (ctxErr) {\n mcpLog('warning', 'Context window exceeded on primary \u2014 skipping remaining retries, routing to fallback', 'llm');\n break;\n }\n\n if (isRetryableLLMError(err) && attempt < LLM_RETRY_CONFIG.maxRetries) {\n const delayMs = calculateLLMBackoff(attempt);\n mcpLog('warning', `Retrying in ${delayMs}ms...`, 'llm');\n try { await sleep(delayMs, signal); } catch { break; }\n continue;\n }\n break;\n }\n }\n }\n\n // Phase 2: fallback model \u2014 FALLBACK_RETRY_COUNT attempts before giving up\n const fallbackModel = LLM_EXTRACTION.FALLBACK_MODEL;\n if (fallbackModel) {\n mcpLog('warning', `Primary exhausted, switching to fallback ${fallbackModel}`, 'llm');\n for (let attempt = 0; attempt < FALLBACK_RETRY_COUNT; attempt++) {\n if (attempt > 0) {\n const delayMs = calculateLLMBackoff(attempt - 1);\n mcpLog('warning', `Fallback retry ${attempt}/${FALLBACK_RETRY_COUNT - 1} in ${delayMs}ms`, 'llm');\n try { await sleep(delayMs, signal); } catch { break; }\n }\n try {\n const response = await requestText(processor, prompt, 'LLM extraction [fallback]', signal, fallbackModel);\n if (response.content) {\n mcpLog('info', `Fallback extracted ${response.content.length} characters`, 'llm');\n markLLMSuccess('extractor');\n return { content: response.content, processed: true };\n }\n mcpLog('warning', 'Fallback returned empty response', 'llm');\n break;\n } catch (err: unknown) {\n lastError = classifyError(err);\n mcpLog('error', `Fallback error (attempt ${attempt + 1}): ${lastError.message}`, 'llm');\n }\n }\n }\n\n const errorMessage = lastError?.message || 'Unknown LLM error';\n mcpLog('error', `All attempts failed: ${errorMessage}. Returning original content.`, 'llm');\n markLLMFailure('extractor', errorMessage);\n\n return {\n content,\n processed: false,\n error: `LLM extraction failed: ${errorMessage}`,\n errorDetails: lastError || {\n code: ErrorCode.UNKNOWN_ERROR,\n message: errorMessage,\n retryable: false,\n },\n };\n}\n\n// ============================================================================\n// Web-Search Result Classification\n// ============================================================================\n\n/** Maximum URLs to send to the LLM for classification */\nconst MAX_CLASSIFICATION_URLS = 50 as const;\n\n/** Classification tiers */\ntype ClassificationTier = 'HIGHLY_RELEVANT' | 'MAYBE_RELEVANT' | 'OTHER';\n\nexport interface ClassificationEntry {\n readonly rank: number;\n readonly tier: ClassificationTier;\n readonly source_type?: string;\n readonly reason?: string;\n}\n\nexport interface ClassificationGap {\n readonly id: number;\n readonly description: string;\n}\n\nexport interface ClassificationResult {\n readonly title: string;\n readonly synthesis: string;\n readonly results: ClassificationEntry[];\n readonly refine_queries?: Array<{\n readonly query: string;\n readonly rationale: string;\n readonly gap_id?: number;\n }>;\n readonly confidence?: 'high' | 'medium' | 'low';\n readonly confidence_reason?: string;\n readonly gaps?: ClassificationGap[];\n}\n\nexport interface RefineQuerySuggestion {\n readonly query: string;\n readonly rationale: string;\n readonly gap_id?: number;\n readonly gap_description?: string;\n}\n\n/**\n * Classify web-search results by relevance to an objective using the LLM.\n * Sends only titles, snippets, and domain names \u2014 does NOT fetch URLs.\n * Returns null on failure (caller should fall back to raw output).\n */\nexport async function classifySearchResults(\n rankedUrls: ReadonlyArray<{\n readonly rank: number;\n readonly url: string;\n readonly title: string;\n readonly snippet: string;\n readonly frequency: number;\n readonly queries: string[];\n }>,\n objective: string,\n totalQueries: number,\n processor: OpenAI,\n previousQueries: readonly string[] = [],\n): Promise<{ result: ClassificationResult | null; error?: string }> {\n const urlsToClassify = rankedUrls.slice(0, MAX_CLASSIFICATION_URLS);\n\n // Descending static weights fed to the LLM. Higher-ranked URLs get a bigger\n // weight so the classifier biases HIGHLY_RELEVANT toward them. The weights\n // here are a shown-to-LLM summary, not the internal CTR ranking (which\n // still runs in url-aggregator.ts). Rank 11+ all bucket to w=1.\n const STATIC_WEIGHTS = [30, 20, 15, 10, 8, 6, 5, 4, 3, 2] as const;\n const weightForRank = (rank: number): number => STATIC_WEIGHTS[rank - 1] ?? 1;\n\n // Build compressed result list \u2014 weight + title + domain + snippet (truncated)\n const lines: string[] = [];\n for (const url of urlsToClassify) {\n let domain: string;\n try {\n domain = new URL(url.url).hostname.replace(/^www\\./, '');\n } catch {\n domain = url.url;\n }\n const snippet = url.snippet.length > 120\n ? url.snippet.slice(0, 117) + '...'\n : url.snippet;\n lines.push(`[${url.rank}] w=${weightForRank(url.rank)} ${url.title} \u2014 ${domain} \u2014 ${snippet}`);\n }\n\n const prevQueriesBlock = previousQueries.length > 0\n ? previousQueries.map((q) => `- ${q}`).join('\\n')\n : '- (none provided)';\n const today = new Date().toISOString().slice(0, 10);\n\n const prompt = `You are the relevance filter for a research agent. Classify each search result below against the objective and produce a structured analysis.\n\nOBJECTIVE: ${objective}\nTODAY: ${today}\n\nPREVIOUS QUERIES (already run \u2014 do NOT paraphrase in refine_queries):\n${prevQueriesBlock}\n\nReturn ONLY a JSON object (no markdown, no code fences):\n\n{\n \"title\": \"2\u20138 word label for this RESULT CLUSTER (not the objective)\",\n \"synthesis\": \"3\u20135 sentences grounded in the results. Every non-trivial claim cites a rank in [brackets], e.g. '[3] documents the flag; [7][12] report it is broken on macOS.' A synthesis with zero citations is invalid.\",\n \"confidence\": \"high | medium | low\",\n \"confidence_reason\": \"one sentence \u2014 why\",\n \"gaps\": [\n { \"id\": 0, \"description\": \"specific, actionable thing the current results do NOT answer \u2014 not 'more info needed'\" }\n ],\n \"refine_queries\": [\n { \"query\": \"concrete next search\", \"gap_id\": 0, \"rationale\": \"\u226412 words\" }\n ],\n \"results\": [\n {\n \"rank\": 1,\n \"tier\": \"HIGHLY_RELEVANT | MAYBE_RELEVANT | OTHER\",\n \"source_type\": \"vendor_doc | github | reddit | hackernews | blog | news | marketing | stackoverflow | cve | paper | release_notes | aggregator | other\",\n \"reason\": \"\u226412 words citing the snippet cue that drove the tier\"\n }\n ]\n}\n\nWEIGHT SCHEME: each row is prefixed with a weight (w=N). Higher weight means the URL ranked better across input queries \u2014 prefer HIGHLY_RELEVANT for high-weight rows when content matches the objective. Weight alone never justifies HIGHLY_RELEVANT; snippet cues still drive the decision.\n\nSOURCE-OF-TRUTH RUBRIC (the \"primary source\" is goal-dependent \u2014 infer goal type from the objective):\n- spec / API / config questions \u2192 vendor_doc, github (README, RFC), release_notes are primary\n- bug / failure-mode questions \u2192 github (issue/PR), stackoverflow are primary\n- migration / sentiment / lived-experience \u2192 reddit, hackernews, blog are primary; docs are secondary\n- pricing / commercial \u2192 marketing (the vendor's own pricing page IS the primary source, but treat feature lists skeptically)\n- security / CVE \u2192 cve databases, distro security trackers (nvd.nist.gov, security-tracker.debian.org, ubuntu.com/security) are primary\n- synthesis / open-ended \u2192 blend; no single type is primary\n- product launch \u2192 vendor_doc + news + marketing for the launch itself; blogs + reddit for independent verification\n\nFRESHNESS: proportional to topic velocity. For a week-old release, demote anything older than 30 days. For general tech questions, demote older than 18 months. For stable protocols (HTTP, TCP, POSIX), don't demote by age.\n\nCONFIDENCE:\n- high = \u22653 HIGHLY_RELEVANT results from INDEPENDENT domains agree on the core answer\n- medium = \u22652 HIGHLY_RELEVANT exist but disagree or share a domain; OR a single authoritative primary source answers it\n- low = otherwise; snippet-only judgments cap at medium\n\nREFINE QUERIES \u2014 each MUST differ from every previousQuery by:\n- a new operator (site:, quotes, verbatim version number), OR\n- a domain-specific noun ABSENT from every prior query\nAdding a year alone does NOT count as differentiation.\nEach refine_query MUST reference a specific gap_id from the gaps array above.\nProduce 4\u20138 refine_queries total. Cover: (a) a primary-source probe, (b) a temporal sharpener, (c) a failure-mode or comparison probe, (d) at least one new-term probe seeded by a specific result's snippet.\n\nRULES:\n- Classify ALL ${urlsToClassify.length} results. Do not skip or collapse any.\n- Use only the three tier values.\n- Judge from title + domain + snippet only. Do NOT invent facts not present in the snippet.\n- If ALL results are OTHER: synthesis = \"\", confidence = \"low\", and \\`gaps\\` must explicitly state why the current queries missed the target.\n- Casing: tier = UPPERCASE_WITH_UNDERSCORES, confidence = lowercase.\n\nSEARCH RESULTS (${urlsToClassify.length} URLs from ${totalQueries} queries):\n${lines.join('\\n')}`;\n\n try {\n mcpLog('info', `Classifying ${urlsToClassify.length} URLs against objective`, 'llm');\n\n const response = await requestTextWithFallback(\n processor,\n prompt,\n 'Search classification',\n );\n\n if (!response.content) {\n const errMsg = response.error ?? 'LLM returned empty classification response';\n markLLMFailure('planner', errMsg);\n return { result: null, error: errMsg };\n }\n\n // Strip markdown code fences if present\n const cleaned = response.content.replace(/^```(?:json)?\\s*\\n?/m, '').replace(/\\n?```\\s*$/m, '').trim();\n const parsed = JSON.parse(cleaned) as ClassificationResult;\n\n // Validate the response shape.\n // Note: synthesis is typed not truthy \u2014 the prompt explicitly instructs an empty string\n // for the all-OTHER case, and we must not reject that.\n if (!parsed.title || typeof parsed.synthesis !== 'string' || !Array.isArray(parsed.results)) {\n const errMsg = 'LLM response missing required fields (title, synthesis, results)';\n markLLMFailure('planner', errMsg);\n return { result: null, error: errMsg };\n }\n\n mcpLog('info', `Classification complete: ${parsed.results.filter(r => r.tier === 'HIGHLY_RELEVANT').length} highly relevant`, 'llm');\n markLLMSuccess('planner');\n return { result: parsed };\n } catch (err: unknown) {\n const message = err instanceof Error ? err.message : String(err);\n mcpLog('error', `Classification failed: ${message}`, 'llm');\n markLLMFailure('planner', message);\n return { result: null, error: `Classification failed: ${message}` };\n }\n}\n\nexport async function suggestRefineQueriesForRawMode(\n rankedUrls: ReadonlyArray<{\n readonly rank: number;\n readonly url: string;\n readonly title: string;\n }>,\n objective: string,\n originalQueries: readonly string[],\n processor: OpenAI,\n): Promise<{ result: RefineQuerySuggestion[]; error?: string }> {\n const urlsToSummarize = rankedUrls.slice(0, 12);\n const lines = urlsToSummarize.map((url) => {\n let domain: string;\n try {\n domain = new URL(url.url).hostname.replace(/^www\\./, '');\n } catch {\n domain = url.url;\n }\n return `[${url.rank}] ${url.title} \u2014 ${domain}`;\n });\n\n const prompt = `You are generating follow-up search queries for an agent using raw web-search results.\n\nReturn ONLY a JSON object (no markdown, no code fences):\n{\n \"refine_queries\": [\n { \"query\": \"next search query\", \"gap_description\": \"what gap this closes\", \"rationale\": \"\u226412 words on why\" }\n ]\n}\n\nOBJECTIVE: ${objective}\n\nPREVIOUS QUERIES (already run \u2014 do NOT paraphrase):\n${originalQueries.map((query) => `- ${query}`).join('\\n')}\n\nTOP RESULT TITLES (to seed new-term probes):\n${lines.join('\\n')}\n\nRULES:\n- Produce 4\u20136 diverse follow-ups. Cover: (a) a primary-source probe (site:, RFC, vendor docs); (b) a temporal sharpener (changelog, version number); (c) a failure-mode or comparison probe; (d) at least one new-term probe seeded by a specific result title.\n- Each query MUST differ from every previousQuery by either a new operator (site:, quotes, a verbatim version number) OR a domain-specific noun absent from every prior query. Adding a year alone does NOT count.\n- Each refine_query MUST include a \\`gap_description\\` naming what the current results don't answer.\n- Do not include URLs.\n- Keep rationales \u226412 words.`;\n\n try {\n const response = await requestTextWithFallback(\n processor,\n prompt,\n 'Raw-mode refine query generation',\n );\n\n if (!response.content) {\n const errMsg = response.error ?? 'LLM returned empty raw-mode refine query response';\n markLLMFailure('planner', errMsg);\n return { result: [], error: errMsg };\n }\n\n const cleaned = response.content.replace(/^```(?:json)?\\s*\\n?/m, '').replace(/\\n?```\\s*$/m, '').trim();\n const parsed = JSON.parse(cleaned) as { refine_queries?: RefineQuerySuggestion[] };\n\n markLLMSuccess('planner');\n return { result: Array.isArray(parsed.refine_queries) ? parsed.refine_queries : [] };\n } catch (err: unknown) {\n const message = err instanceof Error ? err.message : String(err);\n mcpLog('error', `Raw-mode refine query generation failed: ${message}`, 'llm');\n markLLMFailure('planner', message);\n return { result: [], error: message };\n }\n}\n\n// ============================================================================\n// Research Brief \u2014 goal-aware orientation (called by start-research)\n// ============================================================================\n\nexport type PrimaryBranch = 'reddit' | 'web' | 'both';\n\nexport interface ResearchBriefStep {\n readonly tool: 'web-search' | 'scrape-links';\n readonly reason: string;\n}\n\nexport interface ResearchBrief {\n readonly goal_class: string;\n readonly goal_class_reason: string;\n readonly primary_branch: PrimaryBranch;\n readonly primary_branch_reason: string;\n readonly freshness_window: string;\n readonly first_call_sequence: readonly ResearchBriefStep[];\n readonly keyword_seeds: readonly string[];\n readonly iteration_hints: readonly string[];\n readonly gaps_to_watch: readonly string[];\n readonly stop_criteria: readonly string[];\n}\n\nconst VALID_GOAL_CLASSES = new Set([\n 'spec', 'bug', 'migration', 'sentiment', 'pricing', 'security',\n 'synthesis', 'product_launch', 'other',\n]);\n\nconst VALID_FRESHNESS = new Set(['days', 'weeks', 'months', 'years']);\nconst VALID_BRANCHES = new Set<PrimaryBranch>(['reddit', 'web', 'both']);\nconst VALID_STEP_TOOLS = new Set(['web-search', 'scrape-links']);\n\nfunction isStringArray(value: unknown): value is string[] {\n return Array.isArray(value) && value.every((v) => typeof v === 'string');\n}\n\nfunction isStepArray(value: unknown): value is ResearchBriefStep[] {\n return Array.isArray(value) && value.every((s) => {\n if (typeof s !== 'object' || s === null) return false;\n const tool = (s as Record<string, unknown>).tool;\n const reason = (s as Record<string, unknown>).reason;\n return typeof tool === 'string'\n && VALID_STEP_TOOLS.has(tool)\n && typeof reason === 'string'\n && reason.trim().length > 0;\n });\n}\n\nexport function parseResearchBrief(raw: string): ResearchBrief | null {\n try {\n const cleaned = raw.replace(/^```(?:json)?\\s*\\n?/m, '').replace(/\\n?```\\s*$/m, '').trim();\n const parsed = JSON.parse(cleaned) as Record<string, unknown>;\n\n const goal_class = typeof parsed.goal_class === 'string' ? parsed.goal_class : null;\n if (!goal_class || !VALID_GOAL_CLASSES.has(goal_class)) return null;\n\n const freshness_window = typeof parsed.freshness_window === 'string' ? parsed.freshness_window : null;\n if (!freshness_window || !VALID_FRESHNESS.has(freshness_window)) return null;\n\n const primary_branch = parsed.primary_branch;\n if (typeof primary_branch !== 'string' || !VALID_BRANCHES.has(primary_branch as PrimaryBranch)) return null;\n\n if (!isStepArray(parsed.first_call_sequence) || parsed.first_call_sequence.length === 0) return null;\n if (!isStringArray(parsed.keyword_seeds) || parsed.keyword_seeds.length === 0) return null;\n\n return {\n goal_class,\n goal_class_reason: typeof parsed.goal_class_reason === 'string' ? parsed.goal_class_reason : '',\n primary_branch: primary_branch as PrimaryBranch,\n primary_branch_reason: typeof parsed.primary_branch_reason === 'string' ? parsed.primary_branch_reason : '',\n freshness_window,\n first_call_sequence: parsed.first_call_sequence,\n keyword_seeds: parsed.keyword_seeds.filter((s) => s.trim().length > 0),\n iteration_hints: isStringArray(parsed.iteration_hints) ? parsed.iteration_hints : [],\n gaps_to_watch: isStringArray(parsed.gaps_to_watch) ? parsed.gaps_to_watch : [],\n stop_criteria: isStringArray(parsed.stop_criteria) ? parsed.stop_criteria : [],\n };\n } catch {\n return null;\n }\n}\n\nexport async function generateResearchBrief(\n goal: string,\n processor: OpenAI,\n signal?: AbortSignal,\n): Promise<ResearchBrief | null> {\n const today = new Date().toISOString().slice(0, 10);\n\n const prompt = `You are a research planner. An agent is about to run a multi-pass research loop on the goal below using 3 tools:\n\n - web-search: fan-out Google, scope: web|reddit|both, up to 50 queries per call, parallel-callable (multiple calls per turn)\n - scrape-links: fetch URLs in parallel, auto-detects reddit.com post permalinks \u2192 Reddit API (threaded post+comments); all other URLs \u2192 HTTP scraper; parallel-callable\n\nProduce a tailored JSON brief.\n\nGOAL: ${goal}\nTODAY: ${today}\n\nReturn ONLY a JSON object (no markdown, no code fences):\n\n{\n \"goal_class\": \"spec | bug | migration | sentiment | pricing | security | synthesis | product_launch | other\",\n \"goal_class_reason\": \"one sentence \u2014 why this class\",\n \"primary_branch\": \"reddit | web | both\",\n \"primary_branch_reason\": \"one sentence \u2014 why this branch leads\",\n \"freshness_window\": \"days | weeks | months | years\",\n \"first_call_sequence\": [\n { \"tool\": \"web-search | scrape-links\", \"reason\": \"what this call establishes for the agent\" }\n ],\n \"keyword_seeds\": [\"25\u201350 concrete Google queries \u2014 flat list, to be fired in the first web-search call\"],\n \"iteration_hints\": [\"2\u20135 pointers on which harvested terms / follow-up signals to watch for after pass 1\"],\n \"gaps_to_watch\": [\"2\u20135 concrete questions the agent MUST verify or the answer is incomplete\"],\n \"stop_criteria\": [\"2\u20134 checkable conditions \u2014 all must hold before the agent declares done\"]\n}\n\nRULES:\n\nprimary_branch:\n- \"reddit\" \u2192 sentiment / migration / lived-experience / community-consensus goals. Leads with scope:\"reddit\" web-search.\n- \"web\" \u2192 spec / bug / pricing / CVE / API / primary-source goals. Leads with scope:\"web\" web-search.\n- \"both\" \u2192 opinion-heavy AND needs official sources (e.g. product launch + practitioner reception).\n\nfirst_call_sequence:\n- 1\u20133 steps.\n- reddit-first: step 1 = web-search (caller sets scope:\"reddit\"), step 2 = scrape-links on best post permalinks.\n- web-first: step 1 = web-search (scope:\"web\"), step 2 = scrape-links on HIGHLY_RELEVANT URLs.\n- both: step 1 = two parallel web-search calls (one scope:\"reddit\", one scope:\"web\"), step 2 = merged scrape-links.\n\nkeyword_seeds:\n- 25\u201350 total. Narrow bug \u2192 fewer. Open synthesis \u2192 more.\n- Use operators where helpful (site:, quotes, verbatim version numbers).\n- DIVERSE facets \u2014 same noun-phrase cannot repeat across seeds with adjectives-only variation.\n- Do NOT invent vendor names you are uncertain exist.\n- For \\`site:<domain>\\` filters, ONLY use domains you are highly confident are real. Safe choices: \\`github.com\\`, \\`stackoverflow.com\\`, \\`reddit.com\\`, \\`news.ycombinator.com\\`, \\`arxiv.org\\`, \\`nvd.nist.gov\\`, \\`pypi.org\\`, \\`npmjs.com\\`, plus any canonical homepage/docs domain explicitly spelled out in the goal itself (e.g. goal names \"Cursor\" \u2192 \\`cursor.com\\`/\\`docs.cursor.com\\` is acceptable). If you don't know the product's real docs domain, leave the query open (no \\`site:\\`) instead of guessing.\n\nfreshness_window:\n- If the goal mentions a recent release / date / version, use \"days\" or \"weeks\".\n- Stable protocols / APIs \u2192 \"months\" or \"years\".`;\n\n try {\n const response = await requestTextWithFallback(\n processor,\n prompt,\n 'Research brief generation',\n signal,\n );\n\n if (!response.content) {\n mcpLog('warning', `Research brief generation returned no content: ${response.error ?? 'unknown'}`, 'llm');\n markLLMFailure('planner', response.error ?? 'empty response');\n return null;\n }\n\n const brief = parseResearchBrief(response.content);\n if (!brief) {\n mcpLog('warning', 'Research brief JSON parse or shape validation failed', 'llm');\n markLLMFailure('planner', 'brief parse/validation failed');\n return null;\n }\n\n markLLMSuccess('planner');\n return brief;\n } catch (err: unknown) {\n const message = err instanceof Error ? err.message : String(err);\n mcpLog('warning', `Research brief generation failed: ${message}`, 'llm');\n markLLMFailure('planner', message);\n return null;\n }\n}\n\nexport function renderResearchBrief(brief: ResearchBrief): string {\n const lines: string[] = [];\n\n lines.push('## Your research brief (goal-tailored)');\n lines.push('');\n lines.push(`**Goal class**: \\`${brief.goal_class}\\` \u2014 ${brief.goal_class_reason}`);\n lines.push(`**Primary branch**: \\`${brief.primary_branch}\\` \u2014 ${brief.primary_branch_reason}`);\n lines.push(`**Freshness**: \\`${brief.freshness_window}\\``);\n lines.push('');\n\n if (brief.first_call_sequence.length > 0) {\n lines.push('### First-call sequence');\n brief.first_call_sequence.forEach((step, i) => {\n lines.push(`${i + 1}. \\`${step.tool}\\` \u2014 ${step.reason}`);\n });\n lines.push('');\n }\n\n if (brief.keyword_seeds.length > 0) {\n lines.push(`### Keyword seeds (${brief.keyword_seeds.length}) \u2014 fire these in your first \\`web-search\\` call as a flat \\`queries\\` array`);\n for (const seed of brief.keyword_seeds) {\n lines.push(`- ${seed}`);\n }\n lines.push('');\n }\n\n if (brief.iteration_hints.length > 0) {\n lines.push('### Iteration hints (harvest new terms from scrape extracts\\' `## Follow-up signals`)');\n for (const hint of brief.iteration_hints) lines.push(`- ${hint}`);\n lines.push('');\n }\n\n if (brief.gaps_to_watch.length > 0) {\n lines.push('### Gaps to watch');\n for (const gap of brief.gaps_to_watch) lines.push(`- ${gap}`);\n lines.push('');\n }\n\n if (brief.stop_criteria.length > 0) {\n lines.push('### Stop criteria');\n for (const c of brief.stop_criteria) lines.push(`- ${c}`);\n lines.push('');\n }\n\n lines.push('---');\n lines.push('');\n lines.push('Fire `first_call_sequence` now. After each `scrape-links`, harvest new terms from `## Follow-up signals` and build your next `web-search` round. Stop when every gap is closed.');\n\n return lines.join('\\n');\n}\n"],
|
|
5
|
+
"mappings": "AAQA,OAAO,YAAY;AACnB,SAAS,gBAAgB,uBAAuB;AAChD;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OAEK;AACP,SAAS,cAAc;AAGvB,MAAM,sBAAsB;AAO5B,MAAM,gCAAgC;AAGtC,MAAM,wBAAwB;AAG9B,MAAM,wBAAwB;AAG9B,MAAM,uBAAuB;AAG7B,MAAM,0BAA0B;AAuBhC,MAAM,YAAY;AAAA,EAChB,eAAe;AAAA,EACf,iBAAiB;AAAA,EACjB,sBAAsB;AAAA,EACtB,wBAAwB;AAAA,EACxB,kBAAkB;AAAA,EAClB,oBAAoB;AAAA,EACpB,4BAA4B;AAAA,EAC5B,8BAA8B;AAChC;AAEO,SAAS,eAAe,MAA2B;AACxD,QAAM,MAAK,oBAAI,KAAK,GAAE,YAAY;AAClC,MAAI,SAAS,WAAW;AACtB,cAAU,gBAAgB;AAC1B,cAAU,uBAAuB;AACjC,cAAU,mBAAmB;AAC7B,cAAU,6BAA6B;AAAA,EACzC,OAAO;AACL,cAAU,kBAAkB;AAC5B,cAAU,yBAAyB;AACnC,cAAU,qBAAqB;AAC/B,cAAU,+BAA+B;AAAA,EAC3C;AACF;AAEO,SAAS,eAAe,MAAqB,KAAoB;AACtE,QAAM,MAAK,oBAAI,KAAK,GAAE,YAAY;AAClC,QAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,OAAO,eAAe;AAClF,MAAI,SAAS,WAAW;AACtB,cAAU,gBAAgB;AAC1B,cAAU,uBAAuB;AACjC,cAAU,mBAAmB;AAC7B,cAAU,8BAA8B;AAAA,EAC1C,OAAO;AACL,cAAU,kBAAkB;AAC5B,cAAU,yBAAyB;AACnC,cAAU,qBAAqB;AAC/B,cAAU,gCAAgC;AAAA,EAC5C;AACF;AAEO,SAAS,eAAkC;AAChD,QAAM,MAAM,gBAAgB;AAC5B,SAAO;AAAA,IACL,eAAe,UAAU;AAAA,IACzB,iBAAiB,UAAU;AAAA,IAC3B,sBAAsB,UAAU;AAAA,IAChC,wBAAwB,UAAU;AAAA,IAClC,kBAAkB,UAAU;AAAA,IAC5B,oBAAoB,UAAU;AAAA;AAAA;AAAA,IAG9B,mBAAmB,IAAI;AAAA,IACvB,qBAAqB,IAAI;AAAA,IACzB,4BAA4B,UAAU;AAAA,IACtC,8BAA8B,UAAU;AAAA,EAC1C;AACF;AAGO,SAAS,0BAAgC;AAC9C,YAAU,gBAAgB;AAC1B,YAAU,kBAAkB;AAC5B,YAAU,uBAAuB;AACjC,YAAU,yBAAyB;AACnC,YAAU,mBAAmB;AAC7B,YAAU,qBAAqB;AAC/B,YAAU,6BAA6B;AACvC,YAAU,+BAA+B;AAC3C;AAgBA,MAAM,mBAAmB;AAAA,EACvB,YAAY;AAAA,EACZ,aAAa;AAAA,EACb,YAAY;AACd;AAGA,MAAM,uBAAuB;AAG7B,MAAM,4BAA4B,oBAAI,IAAI;AAAA,EACxC;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF,CAAC;AAGD,SAAS,UAAU,OAA6C;AAC9D,SACE,OAAO,UAAU,YACjB,UAAU,QACV,YAAY,SACZ,OAAQ,MAAkC,WAAW;AAEzD;AAEA,IAAI,YAA2B;AAIxB,SAAS,qBAAoC;AAClD,MAAI,CAAC,gBAAgB,EAAE,cAAe,QAAO;AAE7C,MAAI,CAAC,WAAW;AACd,gBAAY,IAAI,OAAO;AAAA,MACrB,SAAS,eAAe;AAAA,MACxB,QAAQ,eAAe;AAAA,MACvB,SAAS;AAAA,MACT,YAAY;AAAA,MACZ,gBAAgB,EAAE,WAAW,yBAAyB;AAAA,IACxD,CAAC;AACD,WAAO,QAAQ,qCAAqC,eAAe,KAAK,cAAc,eAAe,QAAQ,KAAK,KAAK;AAAA,EACzH;AACA,SAAO;AACT;AAEA,SAAS,qBAAqB,OAAe,QAAyC;AACpF,SAAO;AAAA,IACL;AAAA,IACA,UAAU,CAAC,EAAE,MAAM,QAAQ,SAAS,OAAO,CAAC;AAAA,IAC5C,kBAAkB;AAAA,EACpB;AACF;AAEA,eAAsB,YACpB,WACA,QACA,gBACA,QACA,eACoE;AACpE,QAAM,QAAQ,iBAAiB,eAAe;AAE9C,MAAI;AACF,UAAM,WAAW,MAAM;AAAA,MACrB,CAAC,gBAAgB,UAAU,KAAK,YAAY;AAAA,QAC1C,qBAAqB,OAAO,MAAM;AAAA,QAClC;AAAA,UACE,QAAQ,SAAS,YAAY,IAAI,CAAC,aAAa,MAAM,CAAC,IAAI;AAAA,UAC1D,SAAS;AAAA,QACX;AAAA,MACF;AAAA,MACA;AAAA,MACA;AAAA,MACA,GAAG,cAAc,KAAK,KAAK;AAAA,IAC7B;AAEA,UAAM,UAAU,SAAS,UAAU,CAAC,GAAG,SAAS,SAAS,KAAK;AAC9D,QAAI,SAAS;AACX,aAAO,EAAE,SAAS,MAAM;AAAA,IAC1B;AAEA,UAAM,MAAM,6BAA6B,KAAK;AAC9C,WAAO,WAAW,GAAG,cAAc,qCAAqC,KAAK,IAAI,KAAK;AACtF,WAAO,EAAE,SAAS,MAAM,OAAO,OAAO,IAAI;AAAA,EAC5C,SAAS,KAAc;AACrB,UAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC/D,WAAO,WAAW,GAAG,cAAc,qBAAqB,KAAK,KAAK,OAAO,IAAI,KAAK;AAClF,WAAO,EAAE,SAAS,MAAM,OAAO,OAAO,QAAQ;AAAA,EAChD;AACF;AAQA,eAAsB,wBACpB,WACA,QACA,gBACA,QACoE;AACpE,QAAM,UAAU,MAAM,YAAY,WAAW,QAAQ,gBAAgB,MAAM;AAC3E,MAAI,QAAQ,QAAS,QAAO;AAE5B,QAAM,gBAAgB,eAAe;AACrC,MAAI,CAAC,cAAe,QAAO;AAE3B,SAAO,WAAW,+CAA+C,aAAa,IAAI,KAAK;AAEvF,MAAI,YAAY,QAAQ;AACxB,WAAS,UAAU,GAAG,UAAU,sBAAsB,WAAW;AAC/D,QAAI,UAAU,GAAG;AACf,YAAM,UAAU,oBAAoB,UAAU,CAAC;AAC/C,aAAO,WAAW,kBAAkB,OAAO,IAAI,uBAAuB,CAAC,OAAO,OAAO,MAAM,KAAK;AAChG,UAAI;AAAE,cAAM,MAAM,SAAS,MAAM;AAAA,MAAG,QAAQ;AAAE;AAAA,MAAO;AAAA,IACvD;AACA,UAAM,SAAS,MAAM,YAAY,WAAW,QAAQ,GAAG,cAAc,eAAe,QAAQ,aAAa;AACzG,QAAI,OAAO,QAAS,QAAO;AAC3B,gBAAY,OAAO;AAAA,EACrB;AAEA,SAAO,EAAE,SAAS,MAAM,OAAO,eAAe,OAAO,UAAU;AACjE;AAKA,SAAS,oBAAoB,OAAyB;AACpD,MAAI,CAAC,SAAS,OAAO,UAAU,SAAU,QAAO;AAGhD,QAAM,YAAa,OAA6B;AAChD,MAAI,cAAc,cAAc,cAAc,aAAa;AACzD,WAAO;AAAA,EACT;AAGA,MAAI,UAAU,KAAK,GAAG;AACpB,QAAI,MAAM,WAAW,OAAO,MAAM,WAAW,OAAO,MAAM,WAAW,OAAO,MAAM,WAAW,OAAO,MAAM,WAAW,KAAK;AACxH,aAAO;AAAA,IACT;AAAA,EACF;AAGA,QAAM,SAAS;AACf,QAAM,OAAO,OAAO,OAAO,SAAS,WAAW,OAAO,OAAO;AAC7D,QAAM,SACJ,OAAO,OAAO,UAAU,YAAY,OAAO,UAAU,OAChD,OAAO,QACR;AACN,QAAM,YACJ,SACC,UAAU,OAAO,OAAO,SAAS,WAAW,OAAO,OAAO,YAC1D,UAAU,OAAO,OAAO,SAAS,WAAW,OAAO,OAAO;AAC7D,MAAI,aAAa,0BAA0B,IAAI,SAAS,GAAG;AACzD,WAAO;AAAA,EACT;AAGA,QAAM,UAAU,OAAO,OAAO,YAAY,WAAW,OAAO,QAAQ,YAAY,IAAI;AACpF,MACE,QAAQ,SAAS,YAAY,KAC7B,QAAQ,SAAS,SAAS,KAC1B,QAAQ,SAAS,WAAW,KAC5B,QAAQ,SAAS,qBAAqB,KACtC,QAAQ,SAAS,cAAc,KAC/B,QAAQ,SAAS,YAAY,KAC7B,QAAQ,SAAS,YAAY,GAC7B;AACA,WAAO;AAAA,EACT;AAEA,SAAO;AACT;AAOA,SAAS,qBAAqB,OAAyB;AACrD,MAAI,CAAC,SAAS,OAAO,UAAU,SAAU,QAAO;AAEhD,QAAM,SAAS;AACf,QAAM,SACJ,OAAO,OAAO,UAAU,YAAY,OAAO,UAAU,OAChD,OAAO,QACR;AAEN,QAAM,OAAO,OAAO,OAAO,SAAS,WAAW,OAAO,OAAO;AAC7D,QAAM,aAAa,UAAU,OAAO,OAAO,SAAS,WAAW,OAAO,OAAO;AAC7E,MAAI,SAAS,6BAA6B,eAAe,2BAA2B;AAClF,WAAO;AAAA,EACT;AAEA,QAAM,WAAqB,CAAC;AAC5B,MAAI,OAAO,OAAO,YAAY,SAAU,UAAS,KAAK,OAAO,OAAO;AACpE,MAAI,UAAU,OAAO,OAAO,YAAY,SAAU,UAAS,KAAK,OAAO,OAAO;AAC9E,QAAM,WAAW,SAAS,KAAK,GAAG,EAAE,YAAY;AAChD,SACE,SAAS,SAAS,gBAAgB,KAClC,SAAS,SAAS,gBAAgB,KAClC,SAAS,SAAS,iBAAiB,KACnC,SAAS,SAAS,gBAAgB,KAClC,SAAS,SAAS,aAAa,KAC/B,SAAS,SAAS,iBAAiB,KACnC,SAAS,SAAS,oBAAoB,KACtC,SAAS,SAAS,mBAAmB;AAEzC;AAKA,SAAS,oBAAoB,SAAyB;AACpD,QAAM,mBAAmB,iBAAiB,cAAc,KAAK,IAAI,GAAG,OAAO;AAC3E,QAAM,SAAS,KAAK,OAAO,IAAI,wBAAwB;AACvD,SAAO,KAAK,IAAI,mBAAmB,QAAQ,iBAAiB,UAAU;AACxE;AAOA,eAAsB,sBACpB,SACA,QACA,WACA,QACoB;AAEpB,MAAI,CAAC,OAAO,SAAS;AACnB,WAAO,EAAE,SAAS,WAAW,MAAM;AAAA,EACrC;AAEA,MAAI,CAAC,WAAW;AACd,WAAO;AAAA,MACL;AAAA,MACA,WAAW;AAAA,MACX,OAAO;AAAA,MACP,cAAc;AAAA,QACZ,MAAM,UAAU;AAAA,QAChB,SAAS;AAAA,QACT,WAAW;AAAA,MACb;AAAA,IACF;AAAA,EACF;AAEA,MAAI,CAAC,SAAS,KAAK,GAAG;AACpB,WAAO,EAAE,SAAS,WAAW,IAAI,WAAW,OAAO,OAAO,yBAAyB;AAAA,EACrF;AAGA,QAAM,mBAAmB,QAAQ,SAAS,sBACtC,QAAQ,UAAU,GAAG,mBAAmB,IAAI,0CAC5C;AAKJ,QAAM,qBACJ,iBAAiB,SAAS,iCAAiC,CAAC,CAAC,eAAe;AAK9E,QAAM,WAAW,MAAM;AACrB,QAAI,CAAC,OAAO,IAAK,QAAO;AACxB,QAAI;AACF,YAAM,IAAI,IAAI,IAAI,OAAO,GAAG;AAC5B,aAAO,GAAG,EAAE,MAAM,GAAG,EAAE,QAAQ;AAAA,IACjC,QAAQ;AACN,aAAO;AAAA,IACT;AAAA,EACF,GAAG;AACH,QAAM,UAAU,UAAU,aAAa,OAAO;AAAA;AAAA,IAAS;AAEvD,QAAM,SAAS,OAAO,UAClB;AAAA;AAAA,EAEJ,OAAO,2BAA2B,OAAO,OAAO;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAkDhD,gBAAgB,KACZ;AAAA;AAAA,EAEJ,OAAO;AAAA,EACP,gBAAgB;AAEhB,MAAI;AAIJ,MAAI,oBAAoB;AACtB;AAAA,MACE;AAAA,MACA,SAAS,iBAAiB,MAAM,qCAAqC,6BAA6B;AAAA,MAClG;AAAA,IACF;AAAA,EACF,OAAO;AACL,aAAS,UAAU,GAAG,WAAW,iBAAiB,YAAY,WAAW;AACvE,UAAI;AACF,YAAI,YAAY,GAAG;AACjB,iBAAO,QAAQ,4BAA4B,eAAe,KAAK,IAAI,KAAK;AAAA,QAC1E,OAAO;AACL,iBAAO,WAAW,iBAAiB,OAAO,IAAI,iBAAiB,UAAU,IAAI,KAAK;AAAA,QACpF;AAEA,cAAM,WAAW,MAAM,YAAY,WAAW,QAAQ,kBAAkB,MAAM;AAE9E,YAAI,SAAS,SAAS;AACpB,iBAAO,QAAQ,0BAA0B,SAAS,QAAQ,MAAM,eAAe,KAAK;AACpF,yBAAe,WAAW;AAC1B,iBAAO,EAAE,SAAS,SAAS,SAAS,WAAW,KAAK;AAAA,QACtD;AAGA,eAAO,WAAW,oCAAoC,KAAK;AAC3D,uBAAe,aAAa,6BAA6B;AACzD,eAAO;AAAA,UACL;AAAA,UACA,WAAW;AAAA,UACX,OAAO;AAAA,UACP,cAAc;AAAA,YACZ,MAAM,UAAU;AAAA,YAChB,SAAS;AAAA,YACT,WAAW;AAAA,UACb;AAAA,QACF;AAAA,MAEF,SAAS,KAAc;AACrB,oBAAY,cAAc,GAAG;AAC7B,cAAM,SAAS,UAAU,GAAG,IAAI,IAAI,SAAS;AAC7C,cAAM,OAAO,OAAO,QAAQ,YAAY,QAAQ,QAAQ,UAAU,MAC9D,OAAQ,IAAgC,IAAI,IAC5C;AACJ,cAAM,SAAS,qBAAqB,GAAG;AACvC,eAAO,SAAS,kBAAkB,UAAU,CAAC,MAAM,UAAU,OAAO,YAAY,MAAM,UAAU,IAAI,eAAe,oBAAoB,GAAG,CAAC,oBAAoB,MAAM,KAAK,KAAK;AAG/K,YAAI,QAAQ;AACV,iBAAO,WAAW,6FAAwF,KAAK;AAC/G;AAAA,QACF;AAEA,YAAI,oBAAoB,GAAG,KAAK,UAAU,iBAAiB,YAAY;AACrE,gBAAM,UAAU,oBAAoB,OAAO;AAC3C,iBAAO,WAAW,eAAe,OAAO,SAAS,KAAK;AACtD,cAAI;AAAE,kBAAM,MAAM,SAAS,MAAM;AAAA,UAAG,QAAQ;AAAE;AAAA,UAAO;AACrD;AAAA,QACF;AACA;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAGA,QAAM,gBAAgB,eAAe;AACrC,MAAI,eAAe;AACjB,WAAO,WAAW,4CAA4C,aAAa,IAAI,KAAK;AACpF,aAAS,UAAU,GAAG,UAAU,sBAAsB,WAAW;AAC/D,UAAI,UAAU,GAAG;AACf,cAAM,UAAU,oBAAoB,UAAU,CAAC;AAC/C,eAAO,WAAW,kBAAkB,OAAO,IAAI,uBAAuB,CAAC,OAAO,OAAO,MAAM,KAAK;AAChG,YAAI;AAAE,gBAAM,MAAM,SAAS,MAAM;AAAA,QAAG,QAAQ;AAAE;AAAA,QAAO;AAAA,MACvD;AACA,UAAI;AACF,cAAM,WAAW,MAAM,YAAY,WAAW,QAAQ,6BAA6B,QAAQ,aAAa;AACxG,YAAI,SAAS,SAAS;AACpB,iBAAO,QAAQ,sBAAsB,SAAS,QAAQ,MAAM,eAAe,KAAK;AAChF,yBAAe,WAAW;AAC1B,iBAAO,EAAE,SAAS,SAAS,SAAS,WAAW,KAAK;AAAA,QACtD;AACA,eAAO,WAAW,oCAAoC,KAAK;AAC3D;AAAA,MACF,SAAS,KAAc;AACrB,oBAAY,cAAc,GAAG;AAC7B,eAAO,SAAS,2BAA2B,UAAU,CAAC,MAAM,UAAU,OAAO,IAAI,KAAK;AAAA,MACxF;AAAA,IACF;AAAA,EACF;AAEA,QAAM,eAAe,WAAW,WAAW;AAC3C,SAAO,SAAS,wBAAwB,YAAY,iCAAiC,KAAK;AAC1F,iBAAe,aAAa,YAAY;AAExC,SAAO;AAAA,IACL;AAAA,IACA,WAAW;AAAA,IACX,OAAO,0BAA0B,YAAY;AAAA,IAC7C,cAAc,aAAa;AAAA,MACzB,MAAM,UAAU;AAAA,MAChB,SAAS;AAAA,MACT,WAAW;AAAA,IACb;AAAA,EACF;AACF;AAOA,MAAM,0BAA0B;AA2ChC,eAAsB,sBACpB,YAQA,WACA,cACA,WACA,kBAAqC,CAAC,GAC4B;AAClE,QAAM,iBAAiB,WAAW,MAAM,GAAG,uBAAuB;AAMlE,QAAM,iBAAiB,CAAC,IAAI,IAAI,IAAI,IAAI,GAAG,GAAG,GAAG,GAAG,GAAG,CAAC;AACxD,QAAM,gBAAgB,CAAC,SAAyB,eAAe,OAAO,CAAC,KAAK;AAG5E,QAAM,QAAkB,CAAC;AACzB,aAAW,OAAO,gBAAgB;AAChC,QAAI;AACJ,QAAI;AACF,eAAS,IAAI,IAAI,IAAI,GAAG,EAAE,SAAS,QAAQ,UAAU,EAAE;AAAA,IACzD,QAAQ;AACN,eAAS,IAAI;AAAA,IACf;AACA,UAAM,UAAU,IAAI,QAAQ,SAAS,MACjC,IAAI,QAAQ,MAAM,GAAG,GAAG,IAAI,QAC5B,IAAI;AACR,UAAM,KAAK,IAAI,IAAI,IAAI,OAAO,cAAc,IAAI,IAAI,CAAC,IAAI,IAAI,KAAK,WAAM,MAAM,WAAM,OAAO,EAAE;AAAA,EAC/F;AAEA,QAAM,mBAAmB,gBAAgB,SAAS,IAC9C,gBAAgB,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,EAAE,KAAK,IAAI,IAC9C;AACJ,QAAM,SAAQ,oBAAI,KAAK,GAAE,YAAY,EAAE,MAAM,GAAG,EAAE;AAElD,QAAM,SAAS;AAAA;AAAA,aAEJ,SAAS;AAAA,SACb,KAAK;AAAA;AAAA;AAAA,EAGZ,gBAAgB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,iBAmDD,eAAe,MAAM;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,kBAMpB,eAAe,MAAM,cAAc,YAAY;AAAA,EAC/D,MAAM,KAAK,IAAI,CAAC;AAEhB,MAAI;AACF,WAAO,QAAQ,eAAe,eAAe,MAAM,2BAA2B,KAAK;AAEnF,UAAM,WAAW,MAAM;AAAA,MACrB;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAEA,QAAI,CAAC,SAAS,SAAS;AACrB,YAAM,SAAS,SAAS,SAAS;AACjC,qBAAe,WAAW,MAAM;AAChC,aAAO,EAAE,QAAQ,MAAM,OAAO,OAAO;AAAA,IACvC;AAGA,UAAM,UAAU,SAAS,QAAQ,QAAQ,wBAAwB,EAAE,EAAE,QAAQ,eAAe,EAAE,EAAE,KAAK;AACrG,UAAM,SAAS,KAAK,MAAM,OAAO;AAKjC,QAAI,CAAC,OAAO,SAAS,OAAO,OAAO,cAAc,YAAY,CAAC,MAAM,QAAQ,OAAO,OAAO,GAAG;AAC3F,YAAM,SAAS;AACf,qBAAe,WAAW,MAAM;AAChC,aAAO,EAAE,QAAQ,MAAM,OAAO,OAAO;AAAA,IACvC;AAEA,WAAO,QAAQ,4BAA4B,OAAO,QAAQ,OAAO,OAAK,EAAE,SAAS,iBAAiB,EAAE,MAAM,oBAAoB,KAAK;AACnI,mBAAe,SAAS;AACxB,WAAO,EAAE,QAAQ,OAAO;AAAA,EAC1B,SAAS,KAAc;AACrB,UAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC/D,WAAO,SAAS,0BAA0B,OAAO,IAAI,KAAK;AAC1D,mBAAe,WAAW,OAAO;AACjC,WAAO,EAAE,QAAQ,MAAM,OAAO,0BAA0B,OAAO,GAAG;AAAA,EACpE;AACF;AAEA,eAAsB,+BACpB,YAKA,WACA,iBACA,WAC8D;AAC9D,QAAM,kBAAkB,WAAW,MAAM,GAAG,EAAE;AAC9C,QAAM,QAAQ,gBAAgB,IAAI,CAAC,QAAQ;AACzC,QAAI;AACJ,QAAI;AACF,eAAS,IAAI,IAAI,IAAI,GAAG,EAAE,SAAS,QAAQ,UAAU,EAAE;AAAA,IACzD,QAAQ;AACN,eAAS,IAAI;AAAA,IACf;AACA,WAAO,IAAI,IAAI,IAAI,KAAK,IAAI,KAAK,WAAM,MAAM;AAAA,EAC/C,CAAC;AAED,QAAM,SAAS;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,aASJ,SAAS;AAAA;AAAA;AAAA,EAGpB,gBAAgB,IAAI,CAAC,UAAU,KAAK,KAAK,EAAE,EAAE,KAAK,IAAI,CAAC;AAAA;AAAA;AAAA,EAGvD,MAAM,KAAK,IAAI,CAAC;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAShB,MAAI;AACF,UAAM,WAAW,MAAM;AAAA,MACrB;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAEA,QAAI,CAAC,SAAS,SAAS;AACrB,YAAM,SAAS,SAAS,SAAS;AACjC,qBAAe,WAAW,MAAM;AAChC,aAAO,EAAE,QAAQ,CAAC,GAAG,OAAO,OAAO;AAAA,IACrC;AAEA,UAAM,UAAU,SAAS,QAAQ,QAAQ,wBAAwB,EAAE,EAAE,QAAQ,eAAe,EAAE,EAAE,KAAK;AACrG,UAAM,SAAS,KAAK,MAAM,OAAO;AAEjC,mBAAe,SAAS;AACxB,WAAO,EAAE,QAAQ,MAAM,QAAQ,OAAO,cAAc,IAAI,OAAO,iBAAiB,CAAC,EAAE;AAAA,EACrF,SAAS,KAAc;AACrB,UAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC/D,WAAO,SAAS,4CAA4C,OAAO,IAAI,KAAK;AAC5E,mBAAe,WAAW,OAAO;AACjC,WAAO,EAAE,QAAQ,CAAC,GAAG,OAAO,QAAQ;AAAA,EACtC;AACF;AA0BA,MAAM,qBAAqB,oBAAI,IAAI;AAAA,EACjC;AAAA,EAAQ;AAAA,EAAO;AAAA,EAAa;AAAA,EAAa;AAAA,EAAW;AAAA,EACpD;AAAA,EAAa;AAAA,EAAkB;AACjC,CAAC;AAED,MAAM,kBAAkB,oBAAI,IAAI,CAAC,QAAQ,SAAS,UAAU,OAAO,CAAC;AACpE,MAAM,iBAAiB,oBAAI,IAAmB,CAAC,UAAU,OAAO,MAAM,CAAC;AACvE,MAAM,mBAAmB,oBAAI,IAAI,CAAC,cAAc,cAAc,CAAC;AAE/D,SAAS,cAAc,OAAmC;AACxD,SAAO,MAAM,QAAQ,KAAK,KAAK,MAAM,MAAM,CAAC,MAAM,OAAO,MAAM,QAAQ;AACzE;AAEA,SAAS,YAAY,OAA8C;AACjE,SAAO,MAAM,QAAQ,KAAK,KAAK,MAAM,MAAM,CAAC,MAAM;AAChD,QAAI,OAAO,MAAM,YAAY,MAAM,KAAM,QAAO;AAChD,UAAM,OAAQ,EAA8B;AAC5C,UAAM,SAAU,EAA8B;AAC9C,WAAO,OAAO,SAAS,YAClB,iBAAiB,IAAI,IAAI,KACzB,OAAO,WAAW,YAClB,OAAO,KAAK,EAAE,SAAS;AAAA,EAC9B,CAAC;AACH;AAEO,SAAS,mBAAmB,KAAmC;AACpE,MAAI;AACF,UAAM,UAAU,IAAI,QAAQ,wBAAwB,EAAE,EAAE,QAAQ,eAAe,EAAE,EAAE,KAAK;AACxF,UAAM,SAAS,KAAK,MAAM,OAAO;AAEjC,UAAM,aAAa,OAAO,OAAO,eAAe,WAAW,OAAO,aAAa;AAC/E,QAAI,CAAC,cAAc,CAAC,mBAAmB,IAAI,UAAU,EAAG,QAAO;AAE/D,UAAM,mBAAmB,OAAO,OAAO,qBAAqB,WAAW,OAAO,mBAAmB;AACjG,QAAI,CAAC,oBAAoB,CAAC,gBAAgB,IAAI,gBAAgB,EAAG,QAAO;AAExE,UAAM,iBAAiB,OAAO;AAC9B,QAAI,OAAO,mBAAmB,YAAY,CAAC,eAAe,IAAI,cAA+B,EAAG,QAAO;AAEvG,QAAI,CAAC,YAAY,OAAO,mBAAmB,KAAK,OAAO,oBAAoB,WAAW,EAAG,QAAO;AAChG,QAAI,CAAC,cAAc,OAAO,aAAa,KAAK,OAAO,cAAc,WAAW,EAAG,QAAO;AAEtF,WAAO;AAAA,MACL;AAAA,MACA,mBAAmB,OAAO,OAAO,sBAAsB,WAAW,OAAO,oBAAoB;AAAA,MAC7F;AAAA,MACA,uBAAuB,OAAO,OAAO,0BAA0B,WAAW,OAAO,wBAAwB;AAAA,MACzG;AAAA,MACA,qBAAqB,OAAO;AAAA,MAC5B,eAAe,OAAO,cAAc,OAAO,CAAC,MAAM,EAAE,KAAK,EAAE,SAAS,CAAC;AAAA,MACrE,iBAAiB,cAAc,OAAO,eAAe,IAAI,OAAO,kBAAkB,CAAC;AAAA,MACnF,eAAe,cAAc,OAAO,aAAa,IAAI,OAAO,gBAAgB,CAAC;AAAA,MAC7E,eAAe,cAAc,OAAO,aAAa,IAAI,OAAO,gBAAgB,CAAC;AAAA,IAC/E;AAAA,EACF,QAAQ;AACN,WAAO;AAAA,EACT;AACF;AAEA,eAAsB,sBACpB,MACA,WACA,QAC+B;AAC/B,QAAM,SAAQ,oBAAI,KAAK,GAAE,YAAY,EAAE,MAAM,GAAG,EAAE;AAElD,QAAM,SAAS;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,QAOT,IAAI;AAAA,SACH,KAAK;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AA2CZ,MAAI;AACF,UAAM,WAAW,MAAM;AAAA,MACrB;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAEA,QAAI,CAAC,SAAS,SAAS;AACrB,aAAO,WAAW,kDAAkD,SAAS,SAAS,SAAS,IAAI,KAAK;AACxG,qBAAe,WAAW,SAAS,SAAS,gBAAgB;AAC5D,aAAO;AAAA,IACT;AAEA,UAAM,QAAQ,mBAAmB,SAAS,OAAO;AACjD,QAAI,CAAC,OAAO;AACV,aAAO,WAAW,wDAAwD,KAAK;AAC/E,qBAAe,WAAW,+BAA+B;AACzD,aAAO;AAAA,IACT;AAEA,mBAAe,SAAS;AACxB,WAAO;AAAA,EACT,SAAS,KAAc;AACrB,UAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC/D,WAAO,WAAW,qCAAqC,OAAO,IAAI,KAAK;AACvE,mBAAe,WAAW,OAAO;AACjC,WAAO;AAAA,EACT;AACF;AAEO,SAAS,oBAAoB,OAA8B;AAChE,QAAM,QAAkB,CAAC;AAEzB,QAAM,KAAK,wCAAwC;AACnD,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,qBAAqB,MAAM,UAAU,aAAQ,MAAM,iBAAiB,EAAE;AACjF,QAAM,KAAK,yBAAyB,MAAM,cAAc,aAAQ,MAAM,qBAAqB,EAAE;AAC7F,QAAM,KAAK,oBAAoB,MAAM,gBAAgB,IAAI;AACzD,QAAM,KAAK,EAAE;AAEb,MAAI,MAAM,oBAAoB,SAAS,GAAG;AACxC,UAAM,KAAK,yBAAyB;AACpC,UAAM,oBAAoB,QAAQ,CAAC,MAAM,MAAM;AAC7C,YAAM,KAAK,GAAG,IAAI,CAAC,OAAO,KAAK,IAAI,aAAQ,KAAK,MAAM,EAAE;AAAA,IAC1D,CAAC;AACD,UAAM,KAAK,EAAE;AAAA,EACf;AAEA,MAAI,MAAM,cAAc,SAAS,GAAG;AAClC,UAAM,KAAK,sBAAsB,MAAM,cAAc,MAAM,mFAA8E;AACzI,eAAW,QAAQ,MAAM,eAAe;AACtC,YAAM,KAAK,KAAK,IAAI,EAAE;AAAA,IACxB;AACA,UAAM,KAAK,EAAE;AAAA,EACf;AAEA,MAAI,MAAM,gBAAgB,SAAS,GAAG;AACpC,UAAM,KAAK,sFAAuF;AAClG,eAAW,QAAQ,MAAM,gBAAiB,OAAM,KAAK,KAAK,IAAI,EAAE;AAChE,UAAM,KAAK,EAAE;AAAA,EACf;AAEA,MAAI,MAAM,cAAc,SAAS,GAAG;AAClC,UAAM,KAAK,mBAAmB;AAC9B,eAAW,OAAO,MAAM,cAAe,OAAM,KAAK,KAAK,GAAG,EAAE;AAC5D,UAAM,KAAK,EAAE;AAAA,EACf;AAEA,MAAI,MAAM,cAAc,SAAS,GAAG;AAClC,UAAM,KAAK,mBAAmB;AAC9B,eAAW,KAAK,MAAM,cAAe,OAAM,KAAK,KAAK,CAAC,EAAE;AACxD,UAAM,KAAK,EAAE;AAAA,EACf;AAEA,QAAM,KAAK,KAAK;AAChB,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,iLAAiL;AAE5L,SAAO,MAAM,KAAK,IAAI;AACxB;",
|
|
6
6
|
"names": []
|
|
7
7
|
}
|
package/dist/src/tools/scrape.js
CHANGED
|
@@ -161,7 +161,7 @@ async function fetchWebBranch(inputs, client) {
|
|
|
161
161
|
} catch {
|
|
162
162
|
content = result.content;
|
|
163
163
|
}
|
|
164
|
-
successItems.push({ url: result.url, content, index: origIndex });
|
|
164
|
+
successItems.push({ url: result.url, content, index: origIndex, rawContent: content });
|
|
165
165
|
}
|
|
166
166
|
return {
|
|
167
167
|
successItems,
|
|
@@ -221,7 +221,7 @@ async function fetchDocumentBranch(inputs, jinaClient, scrapeErrorContext) {
|
|
|
221
221
|
continue;
|
|
222
222
|
}
|
|
223
223
|
successful++;
|
|
224
|
-
successItems.push({ url: input.url, content: result.content, index: input.origIndex });
|
|
224
|
+
successItems.push({ url: input.url, content: result.content, index: input.origIndex, rawContent: result.content });
|
|
225
225
|
}
|
|
226
226
|
return { successItems, failedContents, metrics: { successful, failed, totalCredits: 0 } };
|
|
227
227
|
}
|
|
@@ -311,12 +311,37 @@ async function fetchRedditBranch(inputs) {
|
|
|
311
311
|
continue;
|
|
312
312
|
}
|
|
313
313
|
successful++;
|
|
314
|
-
|
|
314
|
+
const md = formatRedditPostAsMarkdown(result);
|
|
315
|
+
successItems.push({ url, content: md, index: origIndex, rawContent: md });
|
|
315
316
|
}
|
|
316
317
|
return { successItems, failedContents, metrics: { successful, failed, totalCredits: 0 } };
|
|
317
318
|
}
|
|
319
|
+
const TERSE_LLM_FAILURE_RE = /^\s*##\s*Matches\s*\n+\s*_Page did not load:\s*([a-z0-9_-]+)_\s*\.?\s*$/i;
|
|
320
|
+
const RAW_FALLBACK_CHAR_CAP = 4e3;
|
|
321
|
+
function detectTerseFailure(llmOutput) {
|
|
322
|
+
const m = llmOutput.trim().match(TERSE_LLM_FAILURE_RE);
|
|
323
|
+
return m ? m[1] : null;
|
|
324
|
+
}
|
|
325
|
+
function mergeLlmWithRawFallback(llmOutput, rawContent) {
|
|
326
|
+
const reason = detectTerseFailure(llmOutput);
|
|
327
|
+
if (!reason) return llmOutput;
|
|
328
|
+
const trimmed = rawContent?.trim();
|
|
329
|
+
if (!trimmed) return llmOutput;
|
|
330
|
+
const snippet = trimmed.length > RAW_FALLBACK_CHAR_CAP ? trimmed.slice(0, RAW_FALLBACK_CHAR_CAP) + "\n\n\u2026[raw truncated]" : trimmed;
|
|
331
|
+
return `${llmOutput.trim()}
|
|
332
|
+
|
|
333
|
+
## Raw content (LLM flagged page as ${reason})
|
|
334
|
+
|
|
335
|
+
${snippet}`;
|
|
336
|
+
}
|
|
318
337
|
async function processItemsWithLlm(successItems, enhancedInstruction, llmProcessor, reporter) {
|
|
319
338
|
let llmErrors = 0;
|
|
339
|
+
if (!enhancedInstruction) {
|
|
340
|
+
if (successItems.length > 0) {
|
|
341
|
+
mcpLog("info", "Raw mode: extract omitted \u2014 returning cleaned scraped content without LLM pass", "scrape");
|
|
342
|
+
}
|
|
343
|
+
return { items: successItems, llmErrors, llmAttempted: 0 };
|
|
344
|
+
}
|
|
320
345
|
if (!llmProcessor || successItems.length === 0) {
|
|
321
346
|
if (!llmProcessor && successItems.length > 0) {
|
|
322
347
|
mcpLog("warning", "LLM unavailable (LLM_API_KEY not set). Returning raw scraped content.", "scrape");
|
|
@@ -335,7 +360,12 @@ async function processItemsWithLlm(successItems, enhancedInstruction, llmProcess
|
|
|
335
360
|
llmProcessor
|
|
336
361
|
);
|
|
337
362
|
if (llmResult.processed) {
|
|
338
|
-
|
|
363
|
+
const merged = mergeLlmWithRawFallback(llmResult.content, item.rawContent);
|
|
364
|
+
if (merged !== llmResult.content) {
|
|
365
|
+
mcpLog("warning", `LLM emitted terse escape line for ${item.url} \u2014 preserved raw fallback`, "scrape");
|
|
366
|
+
void reporter.log("warning", `llm_terse_escape: ${item.url} \u2014 preserving raw fallback`);
|
|
367
|
+
}
|
|
368
|
+
return { ...item, content: merged };
|
|
339
369
|
}
|
|
340
370
|
llmErrors++;
|
|
341
371
|
mcpLog("warning", `LLM extraction failed for ${item.url}: ${llmResult.error || "unknown reason"}`, "scrape");
|
|
@@ -455,7 +485,7 @@ async function handleScrapeLinks(params, reporter = NOOP_REPORTER) {
|
|
|
455
485
|
]
|
|
456
486
|
);
|
|
457
487
|
}
|
|
458
|
-
const enhancedInstruction = enhanceExtractionInstruction(params.extract);
|
|
488
|
+
const enhancedInstruction = params.extract ? enhanceExtractionInstruction(params.extract) : void 0;
|
|
459
489
|
await reporter.progress(35, 100, "Fetching page content");
|
|
460
490
|
const emptyPhase = {
|
|
461
491
|
successItems: [],
|
|
@@ -548,7 +578,7 @@ function registerScrapeLinksTool(server) {
|
|
|
548
578
|
{
|
|
549
579
|
name: "scrape-links",
|
|
550
580
|
title: "Scrape Links",
|
|
551
|
-
description: "Fetch many URLs in parallel
|
|
581
|
+
description: "Fetch many URLs in parallel. With `extract` set, run per-URL structured LLM extraction (each page returns `## Source`, `## Matches` verbatim facts, `## Not found` gaps, `## Follow-up signals` new terms + referenced URLs); omit `extract` for raw mode (cleaned markdown per URL, no LLM pass). Auto-detects reddit.com post permalinks \u2192 Reddit API (threaded post + comments); PDF/DOCX/PPTX/XLSX \u2192 Jina Reader; everything else \u2192 HTTP scraper. Safe to call in parallel \u2014 group URLs by context rather than jamming unrelated batches together. Describe the SHAPE of what you want in `extract`, facets separated by `|` (e.g. `root cause | affected versions | fix | workarounds | timeline`).",
|
|
552
582
|
schema: scrapeLinksParamsSchema,
|
|
553
583
|
outputSchema: scrapeLinksOutputSchema,
|
|
554
584
|
annotations: {
|
|
@@ -570,8 +600,11 @@ function registerScrapeLinksTool(server) {
|
|
|
570
600
|
);
|
|
571
601
|
}
|
|
572
602
|
export {
|
|
603
|
+
RAW_FALLBACK_CHAR_CAP,
|
|
604
|
+
detectTerseFailure,
|
|
573
605
|
formatJinaFailure,
|
|
574
606
|
handleScrapeLinks,
|
|
607
|
+
mergeLlmWithRawFallback,
|
|
575
608
|
registerScrapeLinksTool
|
|
576
609
|
};
|
|
577
610
|
//# sourceMappingURL=scrape.js.map
|