@staticn0va/wigolo 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. package/README.md +142 -345
  2. package/dist/agent/pipeline.d.ts.map +1 -1
  3. package/dist/agent/pipeline.js +35 -5
  4. package/dist/agent/pipeline.js.map +1 -1
  5. package/dist/cache/store.d.ts +1 -0
  6. package/dist/cache/store.d.ts.map +1 -1
  7. package/dist/cache/store.js +4 -2
  8. package/dist/cache/store.js.map +1 -1
  9. package/dist/cli/doctor.d.ts.map +1 -1
  10. package/dist/cli/doctor.js +43 -17
  11. package/dist/cli/doctor.js.map +1 -1
  12. package/dist/cli/shutdown.d.ts +2 -0
  13. package/dist/cli/shutdown.d.ts.map +1 -0
  14. package/dist/cli/shutdown.js +26 -0
  15. package/dist/cli/shutdown.js.map +1 -0
  16. package/dist/extraction/v1/local-llm.d.ts.map +1 -1
  17. package/dist/extraction/v1/local-llm.js +13 -37
  18. package/dist/extraction/v1/local-llm.js.map +1 -1
  19. package/dist/fetch/error-describe.d.ts +7 -0
  20. package/dist/fetch/error-describe.d.ts.map +1 -0
  21. package/dist/fetch/error-describe.js +37 -0
  22. package/dist/fetch/error-describe.js.map +1 -0
  23. package/dist/fetch/router.d.ts.map +1 -1
  24. package/dist/fetch/router.js +4 -2
  25. package/dist/fetch/router.js.map +1 -1
  26. package/dist/index.js +17 -12
  27. package/dist/index.js.map +1 -1
  28. package/dist/integrations/cloud/llm/model-select.d.ts +5 -0
  29. package/dist/integrations/cloud/llm/model-select.d.ts.map +1 -0
  30. package/dist/integrations/cloud/llm/model-select.js +32 -0
  31. package/dist/integrations/cloud/llm/model-select.js.map +1 -0
  32. package/dist/integrations/cloud/llm/run.d.ts +27 -0
  33. package/dist/integrations/cloud/llm/run.d.ts.map +1 -0
  34. package/dist/integrations/cloud/llm/run.js +99 -0
  35. package/dist/integrations/cloud/llm/run.js.map +1 -0
  36. package/dist/integrations/cloud/llm/text-adapters.d.ts +19 -0
  37. package/dist/integrations/cloud/llm/text-adapters.d.ts.map +1 -0
  38. package/dist/integrations/cloud/llm/text-adapters.js +103 -0
  39. package/dist/integrations/cloud/llm/text-adapters.js.map +1 -0
  40. package/dist/providers/rerank-provider.d.ts +1 -0
  41. package/dist/providers/rerank-provider.d.ts.map +1 -1
  42. package/dist/providers/rerank-provider.js +13 -0
  43. package/dist/providers/rerank-provider.js.map +1 -1
  44. package/dist/research/brief.d.ts +1 -0
  45. package/dist/research/brief.d.ts.map +1 -1
  46. package/dist/research/brief.js +8 -4
  47. package/dist/research/brief.js.map +1 -1
  48. package/dist/research/pipeline.js +1 -1
  49. package/dist/research/pipeline.js.map +1 -1
  50. package/dist/research/synthesis-local.d.ts +3 -0
  51. package/dist/research/synthesis-local.d.ts.map +1 -1
  52. package/dist/research/synthesis-local.js +18 -29
  53. package/dist/research/synthesis-local.js.map +1 -1
  54. package/dist/search/filters.d.ts.map +1 -1
  55. package/dist/search/filters.js +11 -1
  56. package/dist/search/filters.js.map +1 -1
  57. package/dist/search/reranker/transformers-rerank-provider.d.ts +1 -0
  58. package/dist/search/reranker/transformers-rerank-provider.d.ts.map +1 -1
  59. package/dist/search/reranker/transformers-rerank-provider.js +16 -0
  60. package/dist/search/reranker/transformers-rerank-provider.js.map +1 -1
  61. package/dist/tools/cache.d.ts.map +1 -1
  62. package/dist/tools/cache.js +4 -2
  63. package/dist/tools/cache.js.map +1 -1
  64. package/dist/tools/fetch.d.ts.map +1 -1
  65. package/dist/tools/fetch.js +17 -4
  66. package/dist/tools/fetch.js.map +1 -1
  67. package/package.json +1 -1
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../../../../src/integrations/cloud/llm/text-adapters.ts"],"sourcesContent":["// Free-form text completion adapters per provider. Returns plain text\n// (markdown / prose) without a JSON schema constraint — used by research +\n// agent synthesis. The JSON-schema adapters in anthropic.ts/openai.ts/etc.\n// stay for extract's structured path.\n//\n// SDKs are imported lazily inside each adapter so module-load is cheap;\n// otherwise pulling in all four cloud SDKs at boot adds hundreds of ms to\n// MCP server startup (caught by the cold-start e2e timing test).\n\nimport type { LLMProvider } from './types.js';\n\nexport interface TextCallOpts {\n prompt: string;\n model: string;\n maxTokens?: number;\n signal?: AbortSignal;\n}\n\nexport interface TextCallResult {\n text: string;\n provider: LLMProvider;\n model: string;\n latencyMs: number;\n}\n\nconst DEFAULT_MAX_TOKENS = 2000;\n\nexport async function callAnthropicText(opts: TextCallOpts, apiKey: string): Promise<TextCallResult> {\n const { default: Anthropic } = await import('@anthropic-ai/sdk');\n const client = new Anthropic({ apiKey });\n const start = Date.now();\n const response = await client.messages.create(\n {\n model: opts.model,\n max_tokens: opts.maxTokens ?? DEFAULT_MAX_TOKENS,\n messages: [{ role: 'user', content: opts.prompt }],\n },\n { signal: opts.signal },\n );\n const block = (response.content ?? []).find((b: { type: string }) => b.type === 'text') as\n | { type: 'text'; text: string }\n | undefined;\n if (!block) throw new Error('anthropic: no text block in response');\n return {\n text: block.text,\n provider: 'anthropic',\n model: response.model ?? opts.model,\n latencyMs: Date.now() - start,\n };\n}\n\nexport async function callOpenAIText(opts: TextCallOpts, apiKey: string): Promise<TextCallResult> {\n const { default: OpenAI } = await import('openai');\n const client = new OpenAI({ apiKey });\n const start = Date.now();\n const response = await client.chat.completions.create(\n {\n model: opts.model,\n max_completion_tokens: opts.maxTokens ?? DEFAULT_MAX_TOKENS,\n messages: [{ role: 'user', content: opts.prompt }],\n },\n { signal: opts.signal },\n );\n const text = response.choices?.[0]?.message?.content;\n if (typeof text !== 'string' || text.trim().length === 0) {\n throw new Error('openai: empty content in response');\n }\n return {\n text,\n provider: 'openai',\n model: response.model ?? opts.model,\n latencyMs: Date.now() - start,\n };\n}\n\nexport async function callGeminiText(opts: TextCallOpts, apiKey: string): Promise<TextCallResult> {\n const { GoogleGenAI } = await import('@google/genai');\n const client = new GoogleGenAI({ apiKey });\n const start = Date.now();\n const response = await client.models.generateContent({\n model: opts.model,\n contents: opts.prompt,\n config: {\n maxOutputTokens: opts.maxTokens ?? DEFAULT_MAX_TOKENS,\n abortSignal: opts.signal,\n },\n });\n const text = response.text;\n if (!text || text.trim().length === 0) throw new Error('gemini: empty text in response');\n return {\n text,\n provider: 'gemini',\n model: opts.model,\n latencyMs: Date.now() - start,\n };\n}\n\nexport async function callGroqText(opts: TextCallOpts, apiKey: string): Promise<TextCallResult> {\n const { default: Groq } = await import('groq-sdk');\n const client = new Groq({ apiKey });\n const start = Date.now();\n const response = await client.chat.completions.create(\n {\n model: opts.model,\n max_completion_tokens: opts.maxTokens ?? DEFAULT_MAX_TOKENS,\n messages: [{ role: 'user', content: opts.prompt }],\n },\n { signal: opts.signal },\n );\n const text = response.choices?.[0]?.message?.content;\n if (typeof text !== 'string' || text.trim().length === 0) {\n throw new Error('groq: empty content in response');\n }\n return {\n text,\n provider: 'groq',\n model: response.model ?? opts.model,\n latencyMs: Date.now() - start,\n };\n}\n\nexport const TEXT_ADAPTERS: Record<\n LLMProvider,\n (opts: TextCallOpts, apiKey: string) => Promise<TextCallResult>\n> = {\n anthropic: callAnthropicText,\n openai: callOpenAIText,\n gemini: callGeminiText,\n groq: callGroqText,\n};\n"],"mappings":"AAyBA,MAAM,qBAAqB;AAE3B,eAAsB,kBAAkB,MAAoB,QAAyC;AACnG,QAAM,EAAE,SAAS,UAAU,IAAI,MAAM,OAAO,mBAAmB;AAC/D,QAAM,SAAS,IAAI,UAAU,EAAE,OAAO,CAAC;AACvC,QAAM,QAAQ,KAAK,IAAI;AACvB,QAAM,WAAW,MAAM,OAAO,SAAS;AAAA,IACrC;AAAA,MACE,OAAO,KAAK;AAAA,MACZ,YAAY,KAAK,aAAa;AAAA,MAC9B,UAAU,CAAC,EAAE,MAAM,QAAQ,SAAS,KAAK,OAAO,CAAC;AAAA,IACnD;AAAA,IACA,EAAE,QAAQ,KAAK,OAAO;AAAA,EACxB;AACA,QAAM,SAAS,SAAS,WAAW,CAAC,GAAG,KAAK,CAAC,MAAwB,EAAE,SAAS,MAAM;AAGtF,MAAI,CAAC,MAAO,OAAM,IAAI,MAAM,sCAAsC;AAClE,SAAO;AAAA,IACL,MAAM,MAAM;AAAA,IACZ,UAAU;AAAA,IACV,OAAO,SAAS,SAAS,KAAK;AAAA,IAC9B,WAAW,KAAK,IAAI,IAAI;AAAA,EAC1B;AACF;AAEA,eAAsB,eAAe,MAAoB,QAAyC;AAChG,QAAM,EAAE,SAAS,OAAO,IAAI,MAAM,OAAO,QAAQ;AACjD,QAAM,SAAS,IAAI,OAAO,EAAE,OAAO,CAAC;AACpC,QAAM,QAAQ,KAAK,IAAI;AACvB,QAAM,WAAW,MAAM,OAAO,KAAK,YAAY;AAAA,IAC7C;AAAA,MACE,OAAO,KAAK;AAAA,MACZ,uBAAuB,KAAK,aAAa;AAAA,MACzC,UAAU,CAAC,EAAE,MAAM,QAAQ,SAAS,KAAK,OAAO,CAAC;AAAA,IACnD;AAAA,IACA,EAAE,QAAQ,KAAK,OAAO;AAAA,EACxB;AACA,QAAM,OAAO,SAAS,UAAU,CAAC,GAAG,SAAS;AAC7C,MAAI,OAAO,SAAS,YAAY,KAAK,KAAK,EAAE,WAAW,GAAG;AACxD,UAAM,IAAI,MAAM,mCAAmC;AAAA,EACrD;AACA,SAAO;AAAA,IACL;AAAA,IACA,UAAU;AAAA,IACV,OAAO,SAAS,SAAS,KAAK;AAAA,IAC9B,WAAW,KAAK,IAAI,IAAI;AAAA,EAC1B;AACF;AAEA,eAAsB,eAAe,MAAoB,QAAyC;AAChG,QAAM,EAAE,YAAY,IAAI,MAAM,OAAO,eAAe;AACpD,QAAM,SAAS,IAAI,YAAY,EAAE,OAAO,CAAC;AACzC,QAAM,QAAQ,KAAK,IAAI;AACvB,QAAM,WAAW,MAAM,OAAO,OAAO,gBAAgB;AAAA,IACnD,OAAO,KAAK;AAAA,IACZ,UAAU,KAAK;AAAA,IACf,QAAQ;AAAA,MACN,iBAAiB,KAAK,aAAa;AAAA,MACnC,aAAa,KAAK;AAAA,IACpB;AAAA,EACF,CAAC;AACD,QAAM,OAAO,SAAS;AACtB,MAAI,CAAC,QAAQ,KAAK,KAAK,EAAE,WAAW,EAAG,OAAM,IAAI,MAAM,gCAAgC;AACvF,SAAO;AAAA,IACL;AAAA,IACA,UAAU;AAAA,IACV,OAAO,KAAK;AAAA,IACZ,WAAW,KAAK,IAAI,IAAI;AAAA,EAC1B;AACF;AAEA,eAAsB,aAAa,MAAoB,QAAyC;AAC9F,QAAM,EAAE,SAAS,KAAK,IAAI,MAAM,OAAO,UAAU;AACjD,QAAM,SAAS,IAAI,KAAK,EAAE,OAAO,CAAC;AAClC,QAAM,QAAQ,KAAK,IAAI;AACvB,QAAM,WAAW,MAAM,OAAO,KAAK,YAAY;AAAA,IAC7C;AAAA,MACE,OAAO,KAAK;AAAA,MACZ,uBAAuB,KAAK,aAAa;AAAA,MACzC,UAAU,CAAC,EAAE,MAAM,QAAQ,SAAS,KAAK,OAAO,CAAC;AAAA,IACnD;AAAA,IACA,EAAE,QAAQ,KAAK,OAAO;AAAA,EACxB;AACA,QAAM,OAAO,SAAS,UAAU,CAAC,GAAG,SAAS;AAC7C,MAAI,OAAO,SAAS,YAAY,KAAK,KAAK,EAAE,WAAW,GAAG;AACxD,UAAM,IAAI,MAAM,iCAAiC;AAAA,EACnD;AACA,SAAO;AAAA,IACL;AAAA,IACA,UAAU;AAAA,IACV,OAAO,SAAS,SAAS,KAAK;AAAA,IAC9B,WAAW,KAAK,IAAI,IAAI;AAAA,EAC1B;AACF;AAEO,MAAM,gBAGT;AAAA,EACF,WAAW;AAAA,EACX,QAAQ;AAAA,EACR,QAAQ;AAAA,EACR,MAAM;AACR;","names":[]}
@@ -13,4 +13,5 @@ export interface RerankProvider {
13
13
  }
14
14
  export declare function getRerankProvider(): Promise<RerankProvider>;
15
15
  export declare function _resetRerankProviderForTest(): void;
16
+ export declare function disposeRerankProvider(): Promise<void>;
16
17
  //# sourceMappingURL=rerank-provider.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"rerank-provider.d.ts","sourceRoot":"","sources":["../../src/providers/rerank-provider.ts"],"names":[],"mappings":"AAWA,MAAM,WAAW,eAAe;IAC9B,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;CACd;AAED,MAAM,WAAW,YAAY;IAC3B,EAAE,EAAE,MAAM,CAAC;IACX,KAAK,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,cAAc;IAC7B,MAAM,CACJ,KAAK,EAAE,MAAM,EACb,UAAU,EAAE,eAAe,EAAE,EAC7B,IAAI,CAAC,EAAE,MAAM,GACZ,OAAO,CAAC,YAAY,EAAE,CAAC,CAAC;IAC3B,8DAA8D;IAC9D,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;CAC1B;AAID,wBAAgB,iBAAiB,IAAI,OAAO,CAAC,cAAc,CAAC,CAkB3D;AAED,wBAAgB,2BAA2B,IAAI,IAAI,CAElD"}
1
+ {"version":3,"file":"rerank-provider.d.ts","sourceRoot":"","sources":["../../src/providers/rerank-provider.ts"],"names":[],"mappings":"AAWA,MAAM,WAAW,eAAe;IAC9B,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;CACd;AAED,MAAM,WAAW,YAAY;IAC3B,EAAE,EAAE,MAAM,CAAC;IACX,KAAK,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,cAAc;IAC7B,MAAM,CACJ,KAAK,EAAE,MAAM,EACb,UAAU,EAAE,eAAe,EAAE,EAC7B,IAAI,CAAC,EAAE,MAAM,GACZ,OAAO,CAAC,YAAY,EAAE,CAAC,CAAC;IAC3B,8DAA8D;IAC9D,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;CAC1B;AAID,wBAAgB,iBAAiB,IAAI,OAAO,CAAC,cAAc,CAAC,CAkB3D;AAED,wBAAgB,2BAA2B,IAAI,IAAI,CAElD;AAID,wBAAsB,qBAAqB,IAAI,OAAO,CAAC,IAAI,CAAC,CAW3D"}
@@ -21,8 +21,21 @@ function getRerankProvider() {
21
21
  function _resetRerankProviderForTest() {
22
22
  cached = null;
23
23
  }
24
+ async function disposeRerankProvider() {
25
+ if (!cached) return;
26
+ try {
27
+ const provider = await cached;
28
+ const disposable = provider;
29
+ if (typeof disposable.dispose === "function") await disposable.dispose();
30
+ } catch (err) {
31
+ log.debug("rerank dispose failed", { error: err instanceof Error ? err.message : String(err) });
32
+ } finally {
33
+ cached = null;
34
+ }
35
+ }
24
36
  export {
25
37
  _resetRerankProviderForTest,
38
+ disposeRerankProvider,
26
39
  getRerankProvider
27
40
  };
28
41
  //# sourceMappingURL=rerank-provider.js.map
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/providers/rerank-provider.ts"],"sourcesContent":["/**\n * Rerank provider interface — Phase 1 Task 1.3 of v1 engine overhaul.\n *\n * Phase 4 Part A switches the default factory to TransformersRerankProvider\n * (Transformers.js cross-encoder, in-process ONNX runtime). The legacy\n * Python FlashRank adapter still exists in `search/reranker/legacy-provider.ts`\n * pending Phase 4 Part B deletions, but it is no longer wired in.\n */\nimport { createLogger } from '../logger.js';\n\nconst log = createLogger('providers');\nexport interface RerankCandidate {\n id: string;\n text: string;\n}\n\nexport interface RerankResult {\n id: string;\n score: number;\n}\n\nexport interface RerankProvider {\n rerank(\n query: string,\n candidates: RerankCandidate[],\n topK?: number,\n ): Promise<RerankResult[]>;\n /** Model identifier (for cache invalidation / provenance). */\n readonly modelId: string;\n}\n\nlet cached: Promise<RerankProvider> | null = null;\n\nexport function getRerankProvider(): Promise<RerankProvider> {\n if (cached) return cached;\n cached = import('../search/reranker/transformers-rerank-provider.js')\n .then(async (m) => {\n const p = new m.TransformersRerankProvider();\n await p.warmup();\n log.info('rerank provider ready', {\n provider: 'rerank',\n impl: 'transformers',\n modelId: p.modelId,\n });\n return p;\n })\n .catch((err) => {\n cached = null;\n throw err;\n });\n return cached;\n}\n\nexport function _resetRerankProviderForTest(): void {\n cached = null;\n}\n"],"mappings":"AAQA,SAAS,oBAAoB;AAE7B,MAAM,MAAM,aAAa,WAAW;AAqBpC,IAAI,SAAyC;AAEtC,SAAS,oBAA6C;AAC3D,MAAI,OAAQ,QAAO;AACnB,WAAS,OAAO,oDAAoD,EACjE,KAAK,OAAO,MAAM;AACjB,UAAM,IAAI,IAAI,EAAE,2BAA2B;AAC3C,UAAM,EAAE,OAAO;AACf,QAAI,KAAK,yBAAyB;AAAA,MAChC,UAAU;AAAA,MACV,MAAM;AAAA,MACN,SAAS,EAAE;AAAA,IACb,CAAC;AACD,WAAO;AAAA,EACT,CAAC,EACA,MAAM,CAAC,QAAQ;AACd,aAAS;AACT,UAAM;AAAA,EACR,CAAC;AACH,SAAO;AACT;AAEO,SAAS,8BAAoC;AAClD,WAAS;AACX;","names":[]}
1
+ {"version":3,"sources":["../../src/providers/rerank-provider.ts"],"sourcesContent":["/**\n * Rerank provider interface — Phase 1 Task 1.3 of v1 engine overhaul.\n *\n * Phase 4 Part A switches the default factory to TransformersRerankProvider\n * (Transformers.js cross-encoder, in-process ONNX runtime). The legacy\n * Python FlashRank adapter still exists in `search/reranker/legacy-provider.ts`\n * pending Phase 4 Part B deletions, but it is no longer wired in.\n */\nimport { createLogger } from '../logger.js';\n\nconst log = createLogger('providers');\nexport interface RerankCandidate {\n id: string;\n text: string;\n}\n\nexport interface RerankResult {\n id: string;\n score: number;\n}\n\nexport interface RerankProvider {\n rerank(\n query: string,\n candidates: RerankCandidate[],\n topK?: number,\n ): Promise<RerankResult[]>;\n /** Model identifier (for cache invalidation / provenance). */\n readonly modelId: string;\n}\n\nlet cached: Promise<RerankProvider> | null = null;\n\nexport function getRerankProvider(): Promise<RerankProvider> {\n if (cached) return cached;\n cached = import('../search/reranker/transformers-rerank-provider.js')\n .then(async (m) => {\n const p = new m.TransformersRerankProvider();\n await p.warmup();\n log.info('rerank provider ready', {\n provider: 'rerank',\n impl: 'transformers',\n modelId: p.modelId,\n });\n return p;\n })\n .catch((err) => {\n cached = null;\n throw err;\n });\n return cached;\n}\n\nexport function _resetRerankProviderForTest(): void {\n cached = null;\n}\n\n// Best-effort disposal of the cached rerank provider's native resources.\n// Called from CLI shutdown to release the ONNX session before process exit.\nexport async function disposeRerankProvider(): Promise<void> {\n if (!cached) return;\n try {\n const provider = await cached;\n const disposable = provider as unknown as { dispose?: () => Promise<void> };\n if (typeof disposable.dispose === 'function') await disposable.dispose();\n } catch (err) {\n log.debug('rerank dispose failed', { error: err instanceof Error ? err.message : String(err) });\n } finally {\n cached = null;\n }\n}\n"],"mappings":"AAQA,SAAS,oBAAoB;AAE7B,MAAM,MAAM,aAAa,WAAW;AAqBpC,IAAI,SAAyC;AAEtC,SAAS,oBAA6C;AAC3D,MAAI,OAAQ,QAAO;AACnB,WAAS,OAAO,oDAAoD,EACjE,KAAK,OAAO,MAAM;AACjB,UAAM,IAAI,IAAI,EAAE,2BAA2B;AAC3C,UAAM,EAAE,OAAO;AACf,QAAI,KAAK,yBAAyB;AAAA,MAChC,UAAU;AAAA,MACV,MAAM;AAAA,MACN,SAAS,EAAE;AAAA,IACb,CAAC;AACD,WAAO;AAAA,EACT,CAAC,EACA,MAAM,CAAC,QAAQ;AACd,aAAS;AACT,UAAM;AAAA,EACR,CAAC;AACH,SAAO;AACT;AAEO,SAAS,8BAAoC;AAClD,WAAS;AACX;AAIA,eAAsB,wBAAuC;AAC3D,MAAI,CAAC,OAAQ;AACb,MAAI;AACF,UAAM,WAAW,MAAM;AACvB,UAAM,aAAa;AACnB,QAAI,OAAO,WAAW,YAAY,WAAY,OAAM,WAAW,QAAQ;AAAA,EACzE,SAAS,KAAK;AACZ,QAAI,MAAM,yBAAyB,EAAE,OAAO,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG,EAAE,CAAC;AAAA,EAChG,UAAE;AACA,aAAS;AAAA,EACX;AACF;","names":[]}
@@ -2,4 +2,5 @@ import type { ResearchBrief, ResearchSource, CrossReference } from '../types.js'
2
2
  import type { QueryType } from './decompose.js';
3
3
  export declare function buildResearchBrief(question: string, sources: ResearchSource[], subQueries: string[], perSourceCharCap: number, totalSourcesCharCap: number, queryType?: QueryType, comparisonEntities?: string[], synthesisText?: string): Promise<ResearchBrief>;
4
4
  export declare function detectCrossReferences(sources: ResearchSource[]): CrossReference[];
5
+ export declare function stripMarkdownLinks(text: string): string;
5
6
  //# sourceMappingURL=brief.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"brief.d.ts","sourceRoot":"","sources":["../../src/research/brief.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,cAAc,EAAoB,cAAc,EAAE,MAAM,aAAa,CAAC;AACnG,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAahD,wBAAsB,kBAAkB,CACtC,QAAQ,EAAE,MAAM,EAChB,OAAO,EAAE,cAAc,EAAE,EACzB,UAAU,EAAE,MAAM,EAAE,EACpB,gBAAgB,EAAE,MAAM,EACxB,mBAAmB,EAAE,MAAM,EAC3B,SAAS,GAAE,SAAqB,EAChC,kBAAkB,GAAE,MAAM,EAAO,EACjC,aAAa,CAAC,EAAE,MAAM,GACrB,OAAO,CAAC,aAAa,CAAC,CAgDxB;AA6BD,wBAAgB,qBAAqB,CAAC,OAAO,EAAE,cAAc,EAAE,GAAG,cAAc,EAAE,CAuCjF"}
1
+ {"version":3,"file":"brief.d.ts","sourceRoot":"","sources":["../../src/research/brief.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,cAAc,EAAoB,cAAc,EAAE,MAAM,aAAa,CAAC;AACnG,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAahD,wBAAsB,kBAAkB,CACtC,QAAQ,EAAE,MAAM,EAChB,OAAO,EAAE,cAAc,EAAE,EACzB,UAAU,EAAE,MAAM,EAAE,EACpB,gBAAgB,EAAE,MAAM,EACxB,mBAAmB,EAAE,MAAM,EAC3B,SAAS,GAAE,SAAqB,EAChC,kBAAkB,GAAE,MAAM,EAAO,EACjC,aAAa,CAAC,EAAE,MAAM,GACrB,OAAO,CAAC,aAAa,CAAC,CAgDxB;AA6BD,wBAAgB,qBAAqB,CAAC,OAAO,EAAE,cAAc,EAAE,GAAG,cAAc,EAAE,CAuCjF;AA6FD,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAOvD"}
@@ -156,12 +156,15 @@ function firstSubstantiveParagraph(markdown) {
156
156
  for (const p of paragraphs) {
157
157
  if (p.length < 80) continue;
158
158
  if (p.startsWith("#") || p.startsWith("|") || p.startsWith("```")) continue;
159
- const stripped = p.replace(/^!\[[^\]]*\]\([^)]*\)\s*/g, "").replace(/^\[!\[[^\]]*\]\([^)]*\)\]\([^)]*\)\s*/g, "").trim();
160
- if (stripped.length < 80) continue;
161
- return stripped.replace(/\s+/g, " ");
159
+ const cleaned = stripMarkdownLinks(p);
160
+ if (cleaned.length < 80) continue;
161
+ return cleaned.replace(/\s+/g, " ");
162
162
  }
163
163
  return null;
164
164
  }
165
+ function stripMarkdownLinks(text) {
166
+ return text.replace(/!\[[^\]]*\]\([^)]*\)/g, "").replace(/\[!\[[^\]]*\]\([^)]*\)\]\([^)]*\)/g, "").replace(/\[([^\]]+)\]\([^)]*\)/g, "$1").replace(/<https?:\/\/[^>]+>/g, "").trim();
167
+ }
165
168
  function dedupe(list) {
166
169
  const seen = /* @__PURE__ */ new Set();
167
170
  const out = [];
@@ -237,6 +240,7 @@ const STOP_WORDS = /* @__PURE__ */ new Set([
237
240
  ]);
238
241
  export {
239
242
  buildResearchBrief,
240
- detectCrossReferences
243
+ detectCrossReferences,
244
+ stripMarkdownLinks
241
245
  };
242
246
  //# sourceMappingURL=brief.js.map
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/research/brief.ts"],"sourcesContent":["import type { ResearchBrief, ResearchSource, SearchResultItem, CrossReference } from '../types.js';\nimport type { QueryType } from './decompose.js';\nimport { extractHighlights } from '../search/highlights.js';\nimport { buildCitationGraph } from './citation-graph.js';\n\nconst MAX_HIGHLIGHTS = 12;\nconst MAX_KEY_FINDING_LEN = 280;\nconst MAX_TOPICS = 8;\nconst MAX_CROSS_REFS = 10;\nconst MIN_PHRASE_LEN = 4;\n\n// Build a host-LLM-friendly structured brief when internal sampling is\n// unavailable. The host model (Claude Code / Cursor / etc.) consumes this\n// shape to produce the final report without needing to re-read raw sources.\nexport async function buildResearchBrief(\n question: string,\n sources: ResearchSource[],\n subQueries: string[],\n perSourceCharCap: number,\n totalSourcesCharCap: number,\n queryType: QueryType = 'general',\n comparisonEntities: string[] = [],\n synthesisText?: string,\n): Promise<ResearchBrief> {\n const fetched = sources.filter((s) => s.fetched && s.markdown_content.length > 0);\n\n // Highlights reuse the ONNX-reranker-or-paragraph scorer so briefs align with\n // whatever format='highlights' produces for single-query searches.\n const searchItems: SearchResultItem[] = fetched.map((s) => ({\n title: s.title,\n url: s.url,\n snippet: s.markdown_content.slice(0, 200),\n markdown_content: s.markdown_content,\n relevance_score: s.relevance_score,\n }));\n\n const { highlights } = await extractHighlights(question, searchItems, MAX_HIGHLIGHTS);\n\n const topics = buildTopics(subQueries, fetched);\n const keyFindings = buildKeyFindings(fetched);\n const crossReferences = detectCrossReferences(fetched);\n const gaps = detectGaps(subQueries, fetched);\n\n const comparison = queryType === 'comparison' && comparisonEntities.length >= 2\n ? buildComparisonSection(comparisonEntities, fetched)\n : undefined;\n\n const citationGraph = synthesisText && synthesisText.trim().length > 0 && fetched.length > 0\n ? buildCitationGraph(\n synthesisText,\n fetched.map((s) => ({ url: s.url, title: s.title, markdown: s.markdown_content })),\n )\n : undefined;\n\n return {\n topics,\n highlights,\n key_findings: keyFindings,\n per_source_char_cap: perSourceCharCap,\n total_sources_char_cap: totalSourcesCharCap,\n sections: {\n overview: {\n key_findings: keyFindings.slice(0, 5),\n cross_references: crossReferences,\n },\n ...(comparison ? { comparison } : {}),\n gaps,\n },\n query_type: queryType,\n ...(citationGraph && citationGraph.length > 0 ? { citation_graph: citationGraph } : {}),\n };\n}\n\n// Prefer sub-queries (planner's view of the topic space) when available;\n// otherwise derive compact topic labels from source titles.\nfunction buildTopics(subQueries: string[], sources: ResearchSource[]): string[] {\n if (subQueries.length > 0) {\n return dedupe(subQueries).slice(0, MAX_TOPICS);\n }\n const labels = sources\n .map((s) => s.title.split(/[–|:·-]/)[0].trim())\n .filter((t) => t.length >= 5 && t.length <= 100);\n return dedupe(labels).slice(0, MAX_TOPICS);\n}\n\n// First substantive paragraph per source, trimmed to a finding-sized blurb.\n// Ordered by source relevance so the most-weighted finding is first.\nfunction buildKeyFindings(sources: ResearchSource[]): string[] {\n const out: string[] = [];\n for (const s of [...sources].sort((a, b) => b.relevance_score - a.relevance_score)) {\n const first = firstSubstantiveParagraph(s.markdown_content);\n if (!first) continue;\n const trimmed = first.length > MAX_KEY_FINDING_LEN\n ? first.slice(0, MAX_KEY_FINDING_LEN - 1).trimEnd() + '…'\n : first;\n out.push(trimmed);\n }\n return dedupe(out);\n}\n\nexport function detectCrossReferences(sources: ResearchSource[]): CrossReference[] {\n if (sources.length < 2) return [];\n\n // Extract significant phrases from each source's content\n const phraseMap = new Map<string, Set<number>>();\n\n for (let idx = 0; idx < sources.length; idx++) {\n const content = sources[idx].markdown_content.toLowerCase();\n const words = content\n .replace(/[^a-z0-9\\s]/g, ' ')\n .split(/\\s+/)\n .filter((w) => w.length >= MIN_PHRASE_LEN && !STOP_WORDS.has(w));\n\n const seenForSource = new Set<string>();\n for (let i = 0; i < words.length - 2; i++) {\n const phrase = words.slice(i, i + 3).join(' ');\n if (seenForSource.has(phrase)) continue;\n seenForSource.add(phrase);\n\n if (!phraseMap.has(phrase)) phraseMap.set(phrase, new Set());\n phraseMap.get(phrase)!.add(idx);\n }\n }\n\n // Phrases found in 2+ sources are cross-references\n const candidates: CrossReference[] = [];\n for (const [phrase, sourceIndices] of phraseMap) {\n if (sourceIndices.size >= 2) {\n candidates.push({\n finding: phrase,\n source_indices: [...sourceIndices].sort(),\n confidence: sourceIndices.size >= 3 ? 'high' : 'medium',\n });\n }\n }\n\n // Sort by number of sources (desc), then deduplicate overlapping phrases\n candidates.sort((a, b) => b.source_indices.length - a.source_indices.length);\n return deduplicateOverlapping(candidates).slice(0, MAX_CROSS_REFS);\n}\n\nfunction deduplicateOverlapping(refs: CrossReference[]): CrossReference[] {\n const kept: CrossReference[] = [];\n const usedWords = new Set<string>();\n\n for (const ref of refs) {\n const words = ref.finding.split(' ');\n // Skip if most words already covered by a higher-ranked cross-reference\n const overlapCount = words.filter((w) => usedWords.has(w)).length;\n if (overlapCount >= words.length - 1 && kept.length > 0) continue;\n\n kept.push(ref);\n for (const w of words) usedWords.add(w);\n }\n\n return kept;\n}\n\nfunction detectGaps(subQueries: string[], sources: ResearchSource[]): string[] {\n if (subQueries.length === 0) return [];\n\n const gaps: string[] = [];\n const contentLower = sources.map((s) => s.markdown_content.toLowerCase()).join(' ');\n\n for (const query of subQueries) {\n // Extract significant words from sub-query\n const words = query.toLowerCase()\n .replace(/[^a-z0-9\\s]/g, ' ')\n .split(/\\s+/)\n .filter((w) => w.length >= MIN_PHRASE_LEN && !STOP_WORDS.has(w));\n\n if (words.length === 0) continue;\n\n // Count how many significant words appear in any source\n const found = words.filter((w) => contentLower.includes(w)).length;\n const coverage = found / words.length;\n\n if (coverage < 0.5) {\n gaps.push(`Limited coverage for: \"${query}\"`);\n }\n }\n\n return gaps;\n}\n\nfunction buildComparisonSection(\n entities: string[],\n sources: ResearchSource[],\n): { entities: string[]; comparison_points: string[] } {\n const comparisonPoints: string[] = [];\n const contentLower = sources.map((s) => s.markdown_content.toLowerCase()).join('\\n');\n\n // Look for comparison keywords near entity mentions\n const comparisonTerms = ['faster', 'slower', 'better', 'worse', 'more', 'less',\n 'easier', 'harder', 'simpler', 'complex', 'lightweight', 'heavy',\n 'performance', 'scalability', 'ecosystem', 'community', 'support'];\n\n for (const term of comparisonTerms) {\n if (!contentLower.includes(term)) continue;\n\n // Check if term appears near any entity\n const nearEntity = entities.some((e) => {\n const entityLower = e.toLowerCase();\n const idx = contentLower.indexOf(entityLower);\n if (idx === -1) return false;\n // Check within 200 chars of entity mention\n const neighborhood = contentLower.slice(Math.max(0, idx - 200), idx + e.length + 200);\n return neighborhood.includes(term);\n });\n\n if (nearEntity) {\n comparisonPoints.push(term);\n }\n }\n\n return { entities, comparison_points: [...new Set(comparisonPoints)] };\n}\n\nfunction firstSubstantiveParagraph(markdown: string): string | null {\n const paragraphs = markdown.split(/\\n\\n+/).map((p) => p.trim());\n for (const p of paragraphs) {\n if (p.length < 80) continue;\n if (p.startsWith('#') || p.startsWith('|') || p.startsWith('```')) continue;\n // Strip leading images / links-around-images that pad alt text into the\n // paragraph; if nothing of substance remains, skip.\n const stripped = p\n .replace(/^!\\[[^\\]]*\\]\\([^)]*\\)\\s*/g, '')\n .replace(/^\\[!\\[[^\\]]*\\]\\([^)]*\\)\\]\\([^)]*\\)\\s*/g, '')\n .trim();\n if (stripped.length < 80) continue;\n return stripped.replace(/\\s+/g, ' ');\n }\n return null;\n}\n\nfunction dedupe(list: string[]): string[] {\n const seen = new Set<string>();\n const out: string[] = [];\n for (const item of list) {\n const key = item.toLowerCase();\n if (seen.has(key)) continue;\n seen.add(key);\n out.push(item);\n }\n return out;\n}\n\nconst STOP_WORDS = new Set([\n 'about', 'after', 'also', 'been', 'before', 'being', 'between',\n 'both', 'could', 'does', 'doing', 'done', 'each', 'even', 'every',\n 'from', 'have', 'here', 'into', 'just', 'like', 'made', 'make',\n 'many', 'more', 'most', 'much', 'must', 'need', 'only', 'other',\n 'over', 'same', 'should', 'some', 'such', 'than', 'that', 'their',\n 'them', 'then', 'there', 'these', 'they', 'this', 'those', 'through',\n 'very', 'want', 'well', 'were', 'what', 'when', 'where', 'which',\n 'while', 'will', 'with', 'would', 'your',\n]);\n"],"mappings":"AAEA,SAAS,yBAAyB;AAClC,SAAS,0BAA0B;AAEnC,MAAM,iBAAiB;AACvB,MAAM,sBAAsB;AAC5B,MAAM,aAAa;AACnB,MAAM,iBAAiB;AACvB,MAAM,iBAAiB;AAKvB,eAAsB,mBACpB,UACA,SACA,YACA,kBACA,qBACA,YAAuB,WACvB,qBAA+B,CAAC,GAChC,eACwB;AACxB,QAAM,UAAU,QAAQ,OAAO,CAAC,MAAM,EAAE,WAAW,EAAE,iBAAiB,SAAS,CAAC;AAIhF,QAAM,cAAkC,QAAQ,IAAI,CAAC,OAAO;AAAA,IAC1D,OAAO,EAAE;AAAA,IACT,KAAK,EAAE;AAAA,IACP,SAAS,EAAE,iBAAiB,MAAM,GAAG,GAAG;AAAA,IACxC,kBAAkB,EAAE;AAAA,IACpB,iBAAiB,EAAE;AAAA,EACrB,EAAE;AAEF,QAAM,EAAE,WAAW,IAAI,MAAM,kBAAkB,UAAU,aAAa,cAAc;AAEpF,QAAM,SAAS,YAAY,YAAY,OAAO;AAC9C,QAAM,cAAc,iBAAiB,OAAO;AAC5C,QAAM,kBAAkB,sBAAsB,OAAO;AACrD,QAAM,OAAO,WAAW,YAAY,OAAO;AAE3C,QAAM,aAAa,cAAc,gBAAgB,mBAAmB,UAAU,IAC1E,uBAAuB,oBAAoB,OAAO,IAClD;AAEJ,QAAM,gBAAgB,iBAAiB,cAAc,KAAK,EAAE,SAAS,KAAK,QAAQ,SAAS,IACvF;AAAA,IACE;AAAA,IACA,QAAQ,IAAI,CAAC,OAAO,EAAE,KAAK,EAAE,KAAK,OAAO,EAAE,OAAO,UAAU,EAAE,iBAAiB,EAAE;AAAA,EACnF,IACA;AAEJ,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA,cAAc;AAAA,IACd,qBAAqB;AAAA,IACrB,wBAAwB;AAAA,IACxB,UAAU;AAAA,MACR,UAAU;AAAA,QACR,cAAc,YAAY,MAAM,GAAG,CAAC;AAAA,QACpC,kBAAkB;AAAA,MACpB;AAAA,MACA,GAAI,aAAa,EAAE,WAAW,IAAI,CAAC;AAAA,MACnC;AAAA,IACF;AAAA,IACA,YAAY;AAAA,IACZ,GAAI,iBAAiB,cAAc,SAAS,IAAI,EAAE,gBAAgB,cAAc,IAAI,CAAC;AAAA,EACvF;AACF;AAIA,SAAS,YAAY,YAAsB,SAAqC;AAC9E,MAAI,WAAW,SAAS,GAAG;AACzB,WAAO,OAAO,UAAU,EAAE,MAAM,GAAG,UAAU;AAAA,EAC/C;AACA,QAAM,SAAS,QACZ,IAAI,CAAC,MAAM,EAAE,MAAM,MAAM,SAAS,EAAE,CAAC,EAAE,KAAK,CAAC,EAC7C,OAAO,CAAC,MAAM,EAAE,UAAU,KAAK,EAAE,UAAU,GAAG;AACjD,SAAO,OAAO,MAAM,EAAE,MAAM,GAAG,UAAU;AAC3C;AAIA,SAAS,iBAAiB,SAAqC;AAC7D,QAAM,MAAgB,CAAC;AACvB,aAAW,KAAK,CAAC,GAAG,OAAO,EAAE,KAAK,CAAC,GAAG,MAAM,EAAE,kBAAkB,EAAE,eAAe,GAAG;AAClF,UAAM,QAAQ,0BAA0B,EAAE,gBAAgB;AAC1D,QAAI,CAAC,MAAO;AACZ,UAAM,UAAU,MAAM,SAAS,sBAC3B,MAAM,MAAM,GAAG,sBAAsB,CAAC,EAAE,QAAQ,IAAI,WACpD;AACJ,QAAI,KAAK,OAAO;AAAA,EAClB;AACA,SAAO,OAAO,GAAG;AACnB;AAEO,SAAS,sBAAsB,SAA6C;AACjF,MAAI,QAAQ,SAAS,EAAG,QAAO,CAAC;AAGhC,QAAM,YAAY,oBAAI,IAAyB;AAE/C,WAAS,MAAM,GAAG,MAAM,QAAQ,QAAQ,OAAO;AAC7C,UAAM,UAAU,QAAQ,GAAG,EAAE,iBAAiB,YAAY;AAC1D,UAAM,QAAQ,QACX,QAAQ,gBAAgB,GAAG,EAC3B,MAAM,KAAK,EACX,OAAO,CAAC,MAAM,EAAE,UAAU,kBAAkB,CAAC,WAAW,IAAI,CAAC,CAAC;AAEjE,UAAM,gBAAgB,oBAAI,IAAY;AACtC,aAAS,IAAI,GAAG,IAAI,MAAM,SAAS,GAAG,KAAK;AACzC,YAAM,SAAS,MAAM,MAAM,GAAG,IAAI,CAAC,EAAE,KAAK,GAAG;AAC7C,UAAI,cAAc,IAAI,MAAM,EAAG;AAC/B,oBAAc,IAAI,MAAM;AAExB,UAAI,CAAC,UAAU,IAAI,MAAM,EAAG,WAAU,IAAI,QAAQ,oBAAI,IAAI,CAAC;AAC3D,gBAAU,IAAI,MAAM,EAAG,IAAI,GAAG;AAAA,IAChC;AAAA,EACF;AAGA,QAAM,aAA+B,CAAC;AACtC,aAAW,CAAC,QAAQ,aAAa,KAAK,WAAW;AAC/C,QAAI,cAAc,QAAQ,GAAG;AAC3B,iBAAW,KAAK;AAAA,QACd,SAAS;AAAA,QACT,gBAAgB,CAAC,GAAG,aAAa,EAAE,KAAK;AAAA,QACxC,YAAY,cAAc,QAAQ,IAAI,SAAS;AAAA,MACjD,CAAC;AAAA,IACH;AAAA,EACF;AAGA,aAAW,KAAK,CAAC,GAAG,MAAM,EAAE,eAAe,SAAS,EAAE,eAAe,MAAM;AAC3E,SAAO,uBAAuB,UAAU,EAAE,MAAM,GAAG,cAAc;AACnE;AAEA,SAAS,uBAAuB,MAA0C;AACxE,QAAM,OAAyB,CAAC;AAChC,QAAM,YAAY,oBAAI,IAAY;AAElC,aAAW,OAAO,MAAM;AACtB,UAAM,QAAQ,IAAI,QAAQ,MAAM,GAAG;AAEnC,UAAM,eAAe,MAAM,OAAO,CAAC,MAAM,UAAU,IAAI,CAAC,CAAC,EAAE;AAC3D,QAAI,gBAAgB,MAAM,SAAS,KAAK,KAAK,SAAS,EAAG;AAEzD,SAAK,KAAK,GAAG;AACb,eAAW,KAAK,MAAO,WAAU,IAAI,CAAC;AAAA,EACxC;AAEA,SAAO;AACT;AAEA,SAAS,WAAW,YAAsB,SAAqC;AAC7E,MAAI,WAAW,WAAW,EAAG,QAAO,CAAC;AAErC,QAAM,OAAiB,CAAC;AACxB,QAAM,eAAe,QAAQ,IAAI,CAAC,MAAM,EAAE,iBAAiB,YAAY,CAAC,EAAE,KAAK,GAAG;AAElF,aAAW,SAAS,YAAY;AAE9B,UAAM,QAAQ,MAAM,YAAY,EAC7B,QAAQ,gBAAgB,GAAG,EAC3B,MAAM,KAAK,EACX,OAAO,CAAC,MAAM,EAAE,UAAU,kBAAkB,CAAC,WAAW,IAAI,CAAC,CAAC;AAEjE,QAAI,MAAM,WAAW,EAAG;AAGxB,UAAM,QAAQ,MAAM,OAAO,CAAC,MAAM,aAAa,SAAS,CAAC,CAAC,EAAE;AAC5D,UAAM,WAAW,QAAQ,MAAM;AAE/B,QAAI,WAAW,KAAK;AAClB,WAAK,KAAK,0BAA0B,KAAK,GAAG;AAAA,IAC9C;AAAA,EACF;AAEA,SAAO;AACT;AAEA,SAAS,uBACP,UACA,SACqD;AACrD,QAAM,mBAA6B,CAAC;AACpC,QAAM,eAAe,QAAQ,IAAI,CAAC,MAAM,EAAE,iBAAiB,YAAY,CAAC,EAAE,KAAK,IAAI;AAGnF,QAAM,kBAAkB;AAAA,IAAC;AAAA,IAAU;AAAA,IAAU;AAAA,IAAU;AAAA,IAAS;AAAA,IAAQ;AAAA,IACtE;AAAA,IAAU;AAAA,IAAU;AAAA,IAAW;AAAA,IAAW;AAAA,IAAe;AAAA,IACzD;AAAA,IAAe;AAAA,IAAe;AAAA,IAAa;AAAA,IAAa;AAAA,EAAS;AAEnE,aAAW,QAAQ,iBAAiB;AAClC,QAAI,CAAC,aAAa,SAAS,IAAI,EAAG;AAGlC,UAAM,aAAa,SAAS,KAAK,CAAC,MAAM;AACtC,YAAM,cAAc,EAAE,YAAY;AAClC,YAAM,MAAM,aAAa,QAAQ,WAAW;AAC5C,UAAI,QAAQ,GAAI,QAAO;AAEvB,YAAM,eAAe,aAAa,MAAM,KAAK,IAAI,GAAG,MAAM,GAAG,GAAG,MAAM,EAAE,SAAS,GAAG;AACpF,aAAO,aAAa,SAAS,IAAI;AAAA,IACnC,CAAC;AAED,QAAI,YAAY;AACd,uBAAiB,KAAK,IAAI;AAAA,IAC5B;AAAA,EACF;AAEA,SAAO,EAAE,UAAU,mBAAmB,CAAC,GAAG,IAAI,IAAI,gBAAgB,CAAC,EAAE;AACvE;AAEA,SAAS,0BAA0B,UAAiC;AAClE,QAAM,aAAa,SAAS,MAAM,OAAO,EAAE,IAAI,CAAC,MAAM,EAAE,KAAK,CAAC;AAC9D,aAAW,KAAK,YAAY;AAC1B,QAAI,EAAE,SAAS,GAAI;AACnB,QAAI,EAAE,WAAW,GAAG,KAAK,EAAE,WAAW,GAAG,KAAK,EAAE,WAAW,KAAK,EAAG;AAGnE,UAAM,WAAW,EACd,QAAQ,6BAA6B,EAAE,EACvC,QAAQ,0CAA0C,EAAE,EACpD,KAAK;AACR,QAAI,SAAS,SAAS,GAAI;AAC1B,WAAO,SAAS,QAAQ,QAAQ,GAAG;AAAA,EACrC;AACA,SAAO;AACT;AAEA,SAAS,OAAO,MAA0B;AACxC,QAAM,OAAO,oBAAI,IAAY;AAC7B,QAAM,MAAgB,CAAC;AACvB,aAAW,QAAQ,MAAM;AACvB,UAAM,MAAM,KAAK,YAAY;AAC7B,QAAI,KAAK,IAAI,GAAG,EAAG;AACnB,SAAK,IAAI,GAAG;AACZ,QAAI,KAAK,IAAI;AAAA,EACf;AACA,SAAO;AACT;AAEA,MAAM,aAAa,oBAAI,IAAI;AAAA,EACzB;AAAA,EAAS;AAAA,EAAS;AAAA,EAAQ;AAAA,EAAQ;AAAA,EAAU;AAAA,EAAS;AAAA,EACrD;AAAA,EAAQ;AAAA,EAAS;AAAA,EAAQ;AAAA,EAAS;AAAA,EAAQ;AAAA,EAAQ;AAAA,EAAQ;AAAA,EAC1D;AAAA,EAAQ;AAAA,EAAQ;AAAA,EAAQ;AAAA,EAAQ;AAAA,EAAQ;AAAA,EAAQ;AAAA,EAAQ;AAAA,EACxD;AAAA,EAAQ;AAAA,EAAQ;AAAA,EAAQ;AAAA,EAAQ;AAAA,EAAQ;AAAA,EAAQ;AAAA,EAAQ;AAAA,EACxD;AAAA,EAAQ;AAAA,EAAQ;AAAA,EAAU;AAAA,EAAQ;AAAA,EAAQ;AAAA,EAAQ;AAAA,EAAQ;AAAA,EAC1D;AAAA,EAAQ;AAAA,EAAQ;AAAA,EAAS;AAAA,EAAS;AAAA,EAAQ;AAAA,EAAQ;AAAA,EAAS;AAAA,EAC3D;AAAA,EAAQ;AAAA,EAAQ;AAAA,EAAQ;AAAA,EAAQ;AAAA,EAAQ;AAAA,EAAQ;AAAA,EAAS;AAAA,EACzD;AAAA,EAAS;AAAA,EAAQ;AAAA,EAAQ;AAAA,EAAS;AACpC,CAAC;","names":[]}
1
+ {"version":3,"sources":["../../src/research/brief.ts"],"sourcesContent":["import type { ResearchBrief, ResearchSource, SearchResultItem, CrossReference } from '../types.js';\nimport type { QueryType } from './decompose.js';\nimport { extractHighlights } from '../search/highlights.js';\nimport { buildCitationGraph } from './citation-graph.js';\n\nconst MAX_HIGHLIGHTS = 12;\nconst MAX_KEY_FINDING_LEN = 280;\nconst MAX_TOPICS = 8;\nconst MAX_CROSS_REFS = 10;\nconst MIN_PHRASE_LEN = 4;\n\n// Build a host-LLM-friendly structured brief when internal sampling is\n// unavailable. The host model (Claude Code / Cursor / etc.) consumes this\n// shape to produce the final report without needing to re-read raw sources.\nexport async function buildResearchBrief(\n question: string,\n sources: ResearchSource[],\n subQueries: string[],\n perSourceCharCap: number,\n totalSourcesCharCap: number,\n queryType: QueryType = 'general',\n comparisonEntities: string[] = [],\n synthesisText?: string,\n): Promise<ResearchBrief> {\n const fetched = sources.filter((s) => s.fetched && s.markdown_content.length > 0);\n\n // Highlights reuse the ONNX-reranker-or-paragraph scorer so briefs align with\n // whatever format='highlights' produces for single-query searches.\n const searchItems: SearchResultItem[] = fetched.map((s) => ({\n title: s.title,\n url: s.url,\n snippet: s.markdown_content.slice(0, 200),\n markdown_content: s.markdown_content,\n relevance_score: s.relevance_score,\n }));\n\n const { highlights } = await extractHighlights(question, searchItems, MAX_HIGHLIGHTS);\n\n const topics = buildTopics(subQueries, fetched);\n const keyFindings = buildKeyFindings(fetched);\n const crossReferences = detectCrossReferences(fetched);\n const gaps = detectGaps(subQueries, fetched);\n\n const comparison = queryType === 'comparison' && comparisonEntities.length >= 2\n ? buildComparisonSection(comparisonEntities, fetched)\n : undefined;\n\n const citationGraph = synthesisText && synthesisText.trim().length > 0 && fetched.length > 0\n ? buildCitationGraph(\n synthesisText,\n fetched.map((s) => ({ url: s.url, title: s.title, markdown: s.markdown_content })),\n )\n : undefined;\n\n return {\n topics,\n highlights,\n key_findings: keyFindings,\n per_source_char_cap: perSourceCharCap,\n total_sources_char_cap: totalSourcesCharCap,\n sections: {\n overview: {\n key_findings: keyFindings.slice(0, 5),\n cross_references: crossReferences,\n },\n ...(comparison ? { comparison } : {}),\n gaps,\n },\n query_type: queryType,\n ...(citationGraph && citationGraph.length > 0 ? { citation_graph: citationGraph } : {}),\n };\n}\n\n// Prefer sub-queries (planner's view of the topic space) when available;\n// otherwise derive compact topic labels from source titles.\nfunction buildTopics(subQueries: string[], sources: ResearchSource[]): string[] {\n if (subQueries.length > 0) {\n return dedupe(subQueries).slice(0, MAX_TOPICS);\n }\n const labels = sources\n .map((s) => s.title.split(/[–|:·-]/)[0].trim())\n .filter((t) => t.length >= 5 && t.length <= 100);\n return dedupe(labels).slice(0, MAX_TOPICS);\n}\n\n// First substantive paragraph per source, trimmed to a finding-sized blurb.\n// Ordered by source relevance so the most-weighted finding is first.\nfunction buildKeyFindings(sources: ResearchSource[]): string[] {\n const out: string[] = [];\n for (const s of [...sources].sort((a, b) => b.relevance_score - a.relevance_score)) {\n const first = firstSubstantiveParagraph(s.markdown_content);\n if (!first) continue;\n const trimmed = first.length > MAX_KEY_FINDING_LEN\n ? first.slice(0, MAX_KEY_FINDING_LEN - 1).trimEnd() + '…'\n : first;\n out.push(trimmed);\n }\n return dedupe(out);\n}\n\nexport function detectCrossReferences(sources: ResearchSource[]): CrossReference[] {\n if (sources.length < 2) return [];\n\n // Extract significant phrases from each source's content\n const phraseMap = new Map<string, Set<number>>();\n\n for (let idx = 0; idx < sources.length; idx++) {\n const content = sources[idx].markdown_content.toLowerCase();\n const words = content\n .replace(/[^a-z0-9\\s]/g, ' ')\n .split(/\\s+/)\n .filter((w) => w.length >= MIN_PHRASE_LEN && !STOP_WORDS.has(w));\n\n const seenForSource = new Set<string>();\n for (let i = 0; i < words.length - 2; i++) {\n const phrase = words.slice(i, i + 3).join(' ');\n if (seenForSource.has(phrase)) continue;\n seenForSource.add(phrase);\n\n if (!phraseMap.has(phrase)) phraseMap.set(phrase, new Set());\n phraseMap.get(phrase)!.add(idx);\n }\n }\n\n // Phrases found in 2+ sources are cross-references\n const candidates: CrossReference[] = [];\n for (const [phrase, sourceIndices] of phraseMap) {\n if (sourceIndices.size >= 2) {\n candidates.push({\n finding: phrase,\n source_indices: [...sourceIndices].sort(),\n confidence: sourceIndices.size >= 3 ? 'high' : 'medium',\n });\n }\n }\n\n // Sort by number of sources (desc), then deduplicate overlapping phrases\n candidates.sort((a, b) => b.source_indices.length - a.source_indices.length);\n return deduplicateOverlapping(candidates).slice(0, MAX_CROSS_REFS);\n}\n\nfunction deduplicateOverlapping(refs: CrossReference[]): CrossReference[] {\n const kept: CrossReference[] = [];\n const usedWords = new Set<string>();\n\n for (const ref of refs) {\n const words = ref.finding.split(' ');\n // Skip if most words already covered by a higher-ranked cross-reference\n const overlapCount = words.filter((w) => usedWords.has(w)).length;\n if (overlapCount >= words.length - 1 && kept.length > 0) continue;\n\n kept.push(ref);\n for (const w of words) usedWords.add(w);\n }\n\n return kept;\n}\n\nfunction detectGaps(subQueries: string[], sources: ResearchSource[]): string[] {\n if (subQueries.length === 0) return [];\n\n const gaps: string[] = [];\n const contentLower = sources.map((s) => s.markdown_content.toLowerCase()).join(' ');\n\n for (const query of subQueries) {\n // Extract significant words from sub-query\n const words = query.toLowerCase()\n .replace(/[^a-z0-9\\s]/g, ' ')\n .split(/\\s+/)\n .filter((w) => w.length >= MIN_PHRASE_LEN && !STOP_WORDS.has(w));\n\n if (words.length === 0) continue;\n\n // Count how many significant words appear in any source\n const found = words.filter((w) => contentLower.includes(w)).length;\n const coverage = found / words.length;\n\n if (coverage < 0.5) {\n gaps.push(`Limited coverage for: \"${query}\"`);\n }\n }\n\n return gaps;\n}\n\nfunction buildComparisonSection(\n entities: string[],\n sources: ResearchSource[],\n): { entities: string[]; comparison_points: string[] } {\n const comparisonPoints: string[] = [];\n const contentLower = sources.map((s) => s.markdown_content.toLowerCase()).join('\\n');\n\n // Look for comparison keywords near entity mentions\n const comparisonTerms = ['faster', 'slower', 'better', 'worse', 'more', 'less',\n 'easier', 'harder', 'simpler', 'complex', 'lightweight', 'heavy',\n 'performance', 'scalability', 'ecosystem', 'community', 'support'];\n\n for (const term of comparisonTerms) {\n if (!contentLower.includes(term)) continue;\n\n // Check if term appears near any entity\n const nearEntity = entities.some((e) => {\n const entityLower = e.toLowerCase();\n const idx = contentLower.indexOf(entityLower);\n if (idx === -1) return false;\n // Check within 200 chars of entity mention\n const neighborhood = contentLower.slice(Math.max(0, idx - 200), idx + e.length + 200);\n return neighborhood.includes(term);\n });\n\n if (nearEntity) {\n comparisonPoints.push(term);\n }\n }\n\n return { entities, comparison_points: [...new Set(comparisonPoints)] };\n}\n\nfunction firstSubstantiveParagraph(markdown: string): string | null {\n const paragraphs = markdown.split(/\\n\\n+/).map((p) => p.trim());\n for (const p of paragraphs) {\n if (p.length < 80) continue;\n if (p.startsWith('#') || p.startsWith('|') || p.startsWith('```')) continue;\n const cleaned = stripMarkdownLinks(p);\n if (cleaned.length < 80) continue;\n return cleaned.replace(/\\s+/g, ' ');\n }\n return null;\n}\n\n// Flatten markdown link/image syntax to plain text so a downstream char-slice\n// can't chop mid-link and leak `](/?source=post_page...` into key_findings.\nexport function stripMarkdownLinks(text: string): string {\n return text\n .replace(/!\\[[^\\]]*\\]\\([^)]*\\)/g, '')\n .replace(/\\[!\\[[^\\]]*\\]\\([^)]*\\)\\]\\([^)]*\\)/g, '')\n .replace(/\\[([^\\]]+)\\]\\([^)]*\\)/g, '$1')\n .replace(/<https?:\\/\\/[^>]+>/g, '')\n .trim();\n}\n\nfunction dedupe(list: string[]): string[] {\n const seen = new Set<string>();\n const out: string[] = [];\n for (const item of list) {\n const key = item.toLowerCase();\n if (seen.has(key)) continue;\n seen.add(key);\n out.push(item);\n }\n return out;\n}\n\nconst STOP_WORDS = new Set([\n 'about', 'after', 'also', 'been', 'before', 'being', 'between',\n 'both', 'could', 'does', 'doing', 'done', 'each', 'even', 'every',\n 'from', 'have', 'here', 'into', 'just', 'like', 'made', 'make',\n 'many', 'more', 'most', 'much', 'must', 'need', 'only', 'other',\n 'over', 'same', 'should', 'some', 'such', 'than', 'that', 'their',\n 'them', 'then', 'there', 'these', 'they', 'this', 'those', 'through',\n 'very', 'want', 'well', 'were', 'what', 'when', 'where', 'which',\n 'while', 'will', 'with', 'would', 'your',\n]);\n"],"mappings":"AAEA,SAAS,yBAAyB;AAClC,SAAS,0BAA0B;AAEnC,MAAM,iBAAiB;AACvB,MAAM,sBAAsB;AAC5B,MAAM,aAAa;AACnB,MAAM,iBAAiB;AACvB,MAAM,iBAAiB;AAKvB,eAAsB,mBACpB,UACA,SACA,YACA,kBACA,qBACA,YAAuB,WACvB,qBAA+B,CAAC,GAChC,eACwB;AACxB,QAAM,UAAU,QAAQ,OAAO,CAAC,MAAM,EAAE,WAAW,EAAE,iBAAiB,SAAS,CAAC;AAIhF,QAAM,cAAkC,QAAQ,IAAI,CAAC,OAAO;AAAA,IAC1D,OAAO,EAAE;AAAA,IACT,KAAK,EAAE;AAAA,IACP,SAAS,EAAE,iBAAiB,MAAM,GAAG,GAAG;AAAA,IACxC,kBAAkB,EAAE;AAAA,IACpB,iBAAiB,EAAE;AAAA,EACrB,EAAE;AAEF,QAAM,EAAE,WAAW,IAAI,MAAM,kBAAkB,UAAU,aAAa,cAAc;AAEpF,QAAM,SAAS,YAAY,YAAY,OAAO;AAC9C,QAAM,cAAc,iBAAiB,OAAO;AAC5C,QAAM,kBAAkB,sBAAsB,OAAO;AACrD,QAAM,OAAO,WAAW,YAAY,OAAO;AAE3C,QAAM,aAAa,cAAc,gBAAgB,mBAAmB,UAAU,IAC1E,uBAAuB,oBAAoB,OAAO,IAClD;AAEJ,QAAM,gBAAgB,iBAAiB,cAAc,KAAK,EAAE,SAAS,KAAK,QAAQ,SAAS,IACvF;AAAA,IACE;AAAA,IACA,QAAQ,IAAI,CAAC,OAAO,EAAE,KAAK,EAAE,KAAK,OAAO,EAAE,OAAO,UAAU,EAAE,iBAAiB,EAAE;AAAA,EACnF,IACA;AAEJ,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA,cAAc;AAAA,IACd,qBAAqB;AAAA,IACrB,wBAAwB;AAAA,IACxB,UAAU;AAAA,MACR,UAAU;AAAA,QACR,cAAc,YAAY,MAAM,GAAG,CAAC;AAAA,QACpC,kBAAkB;AAAA,MACpB;AAAA,MACA,GAAI,aAAa,EAAE,WAAW,IAAI,CAAC;AAAA,MACnC;AAAA,IACF;AAAA,IACA,YAAY;AAAA,IACZ,GAAI,iBAAiB,cAAc,SAAS,IAAI,EAAE,gBAAgB,cAAc,IAAI,CAAC;AAAA,EACvF;AACF;AAIA,SAAS,YAAY,YAAsB,SAAqC;AAC9E,MAAI,WAAW,SAAS,GAAG;AACzB,WAAO,OAAO,UAAU,EAAE,MAAM,GAAG,UAAU;AAAA,EAC/C;AACA,QAAM,SAAS,QACZ,IAAI,CAAC,MAAM,EAAE,MAAM,MAAM,SAAS,EAAE,CAAC,EAAE,KAAK,CAAC,EAC7C,OAAO,CAAC,MAAM,EAAE,UAAU,KAAK,EAAE,UAAU,GAAG;AACjD,SAAO,OAAO,MAAM,EAAE,MAAM,GAAG,UAAU;AAC3C;AAIA,SAAS,iBAAiB,SAAqC;AAC7D,QAAM,MAAgB,CAAC;AACvB,aAAW,KAAK,CAAC,GAAG,OAAO,EAAE,KAAK,CAAC,GAAG,MAAM,EAAE,kBAAkB,EAAE,eAAe,GAAG;AAClF,UAAM,QAAQ,0BAA0B,EAAE,gBAAgB;AAC1D,QAAI,CAAC,MAAO;AACZ,UAAM,UAAU,MAAM,SAAS,sBAC3B,MAAM,MAAM,GAAG,sBAAsB,CAAC,EAAE,QAAQ,IAAI,WACpD;AACJ,QAAI,KAAK,OAAO;AAAA,EAClB;AACA,SAAO,OAAO,GAAG;AACnB;AAEO,SAAS,sBAAsB,SAA6C;AACjF,MAAI,QAAQ,SAAS,EAAG,QAAO,CAAC;AAGhC,QAAM,YAAY,oBAAI,IAAyB;AAE/C,WAAS,MAAM,GAAG,MAAM,QAAQ,QAAQ,OAAO;AAC7C,UAAM,UAAU,QAAQ,GAAG,EAAE,iBAAiB,YAAY;AAC1D,UAAM,QAAQ,QACX,QAAQ,gBAAgB,GAAG,EAC3B,MAAM,KAAK,EACX,OAAO,CAAC,MAAM,EAAE,UAAU,kBAAkB,CAAC,WAAW,IAAI,CAAC,CAAC;AAEjE,UAAM,gBAAgB,oBAAI,IAAY;AACtC,aAAS,IAAI,GAAG,IAAI,MAAM,SAAS,GAAG,KAAK;AACzC,YAAM,SAAS,MAAM,MAAM,GAAG,IAAI,CAAC,EAAE,KAAK,GAAG;AAC7C,UAAI,cAAc,IAAI,MAAM,EAAG;AAC/B,oBAAc,IAAI,MAAM;AAExB,UAAI,CAAC,UAAU,IAAI,MAAM,EAAG,WAAU,IAAI,QAAQ,oBAAI,IAAI,CAAC;AAC3D,gBAAU,IAAI,MAAM,EAAG,IAAI,GAAG;AAAA,IAChC;AAAA,EACF;AAGA,QAAM,aAA+B,CAAC;AACtC,aAAW,CAAC,QAAQ,aAAa,KAAK,WAAW;AAC/C,QAAI,cAAc,QAAQ,GAAG;AAC3B,iBAAW,KAAK;AAAA,QACd,SAAS;AAAA,QACT,gBAAgB,CAAC,GAAG,aAAa,EAAE,KAAK;AAAA,QACxC,YAAY,cAAc,QAAQ,IAAI,SAAS;AAAA,MACjD,CAAC;AAAA,IACH;AAAA,EACF;AAGA,aAAW,KAAK,CAAC,GAAG,MAAM,EAAE,eAAe,SAAS,EAAE,eAAe,MAAM;AAC3E,SAAO,uBAAuB,UAAU,EAAE,MAAM,GAAG,cAAc;AACnE;AAEA,SAAS,uBAAuB,MAA0C;AACxE,QAAM,OAAyB,CAAC;AAChC,QAAM,YAAY,oBAAI,IAAY;AAElC,aAAW,OAAO,MAAM;AACtB,UAAM,QAAQ,IAAI,QAAQ,MAAM,GAAG;AAEnC,UAAM,eAAe,MAAM,OAAO,CAAC,MAAM,UAAU,IAAI,CAAC,CAAC,EAAE;AAC3D,QAAI,gBAAgB,MAAM,SAAS,KAAK,KAAK,SAAS,EAAG;AAEzD,SAAK,KAAK,GAAG;AACb,eAAW,KAAK,MAAO,WAAU,IAAI,CAAC;AAAA,EACxC;AAEA,SAAO;AACT;AAEA,SAAS,WAAW,YAAsB,SAAqC;AAC7E,MAAI,WAAW,WAAW,EAAG,QAAO,CAAC;AAErC,QAAM,OAAiB,CAAC;AACxB,QAAM,eAAe,QAAQ,IAAI,CAAC,MAAM,EAAE,iBAAiB,YAAY,CAAC,EAAE,KAAK,GAAG;AAElF,aAAW,SAAS,YAAY;AAE9B,UAAM,QAAQ,MAAM,YAAY,EAC7B,QAAQ,gBAAgB,GAAG,EAC3B,MAAM,KAAK,EACX,OAAO,CAAC,MAAM,EAAE,UAAU,kBAAkB,CAAC,WAAW,IAAI,CAAC,CAAC;AAEjE,QAAI,MAAM,WAAW,EAAG;AAGxB,UAAM,QAAQ,MAAM,OAAO,CAAC,MAAM,aAAa,SAAS,CAAC,CAAC,EAAE;AAC5D,UAAM,WAAW,QAAQ,MAAM;AAE/B,QAAI,WAAW,KAAK;AAClB,WAAK,KAAK,0BAA0B,KAAK,GAAG;AAAA,IAC9C;AAAA,EACF;AAEA,SAAO;AACT;AAEA,SAAS,uBACP,UACA,SACqD;AACrD,QAAM,mBAA6B,CAAC;AACpC,QAAM,eAAe,QAAQ,IAAI,CAAC,MAAM,EAAE,iBAAiB,YAAY,CAAC,EAAE,KAAK,IAAI;AAGnF,QAAM,kBAAkB;AAAA,IAAC;AAAA,IAAU;AAAA,IAAU;AAAA,IAAU;AAAA,IAAS;AAAA,IAAQ;AAAA,IACtE;AAAA,IAAU;AAAA,IAAU;AAAA,IAAW;AAAA,IAAW;AAAA,IAAe;AAAA,IACzD;AAAA,IAAe;AAAA,IAAe;AAAA,IAAa;AAAA,IAAa;AAAA,EAAS;AAEnE,aAAW,QAAQ,iBAAiB;AAClC,QAAI,CAAC,aAAa,SAAS,IAAI,EAAG;AAGlC,UAAM,aAAa,SAAS,KAAK,CAAC,MAAM;AACtC,YAAM,cAAc,EAAE,YAAY;AAClC,YAAM,MAAM,aAAa,QAAQ,WAAW;AAC5C,UAAI,QAAQ,GAAI,QAAO;AAEvB,YAAM,eAAe,aAAa,MAAM,KAAK,IAAI,GAAG,MAAM,GAAG,GAAG,MAAM,EAAE,SAAS,GAAG;AACpF,aAAO,aAAa,SAAS,IAAI;AAAA,IACnC,CAAC;AAED,QAAI,YAAY;AACd,uBAAiB,KAAK,IAAI;AAAA,IAC5B;AAAA,EACF;AAEA,SAAO,EAAE,UAAU,mBAAmB,CAAC,GAAG,IAAI,IAAI,gBAAgB,CAAC,EAAE;AACvE;AAEA,SAAS,0BAA0B,UAAiC;AAClE,QAAM,aAAa,SAAS,MAAM,OAAO,EAAE,IAAI,CAAC,MAAM,EAAE,KAAK,CAAC;AAC9D,aAAW,KAAK,YAAY;AAC1B,QAAI,EAAE,SAAS,GAAI;AACnB,QAAI,EAAE,WAAW,GAAG,KAAK,EAAE,WAAW,GAAG,KAAK,EAAE,WAAW,KAAK,EAAG;AACnE,UAAM,UAAU,mBAAmB,CAAC;AACpC,QAAI,QAAQ,SAAS,GAAI;AACzB,WAAO,QAAQ,QAAQ,QAAQ,GAAG;AAAA,EACpC;AACA,SAAO;AACT;AAIO,SAAS,mBAAmB,MAAsB;AACvD,SAAO,KACJ,QAAQ,yBAAyB,EAAE,EACnC,QAAQ,sCAAsC,EAAE,EAChD,QAAQ,0BAA0B,IAAI,EACtC,QAAQ,uBAAuB,EAAE,EACjC,KAAK;AACV;AAEA,SAAS,OAAO,MAA0B;AACxC,QAAM,OAAO,oBAAI,IAAY;AAC7B,QAAM,MAAgB,CAAC;AACvB,aAAW,QAAQ,MAAM;AACvB,UAAM,MAAM,KAAK,YAAY;AAC7B,QAAI,KAAK,IAAI,GAAG,EAAG;AACnB,SAAK,IAAI,GAAG;AACZ,QAAI,KAAK,IAAI;AAAA,EACf;AACA,SAAO;AACT;AAEA,MAAM,aAAa,oBAAI,IAAI;AAAA,EACzB;AAAA,EAAS;AAAA,EAAS;AAAA,EAAQ;AAAA,EAAQ;AAAA,EAAU;AAAA,EAAS;AAAA,EACrD;AAAA,EAAQ;AAAA,EAAS;AAAA,EAAQ;AAAA,EAAS;AAAA,EAAQ;AAAA,EAAQ;AAAA,EAAQ;AAAA,EAC1D;AAAA,EAAQ;AAAA,EAAQ;AAAA,EAAQ;AAAA,EAAQ;AAAA,EAAQ;AAAA,EAAQ;AAAA,EAAQ;AAAA,EACxD;AAAA,EAAQ;AAAA,EAAQ;AAAA,EAAQ;AAAA,EAAQ;AAAA,EAAQ;AAAA,EAAQ;AAAA,EAAQ;AAAA,EACxD;AAAA,EAAQ;AAAA,EAAQ;AAAA,EAAU;AAAA,EAAQ;AAAA,EAAQ;AAAA,EAAQ;AAAA,EAAQ;AAAA,EAC1D;AAAA,EAAQ;AAAA,EAAQ;AAAA,EAAS;AAAA,EAAS;AAAA,EAAQ;AAAA,EAAQ;AAAA,EAAS;AAAA,EAC3D;AAAA,EAAQ;AAAA,EAAQ;AAAA,EAAQ;AAAA,EAAQ;AAAA,EAAQ;AAAA,EAAQ;AAAA,EAAS;AAAA,EACzD;AAAA,EAAS;AAAA,EAAQ;AAAA,EAAQ;AAAA,EAAS;AACpC,CAAC;","names":[]}
@@ -12,7 +12,7 @@ import { truncateSmartly } from "../search/truncate.js";
12
12
  import { cacheContent } from "../cache/store.js";
13
13
  import { getEmbeddingService } from "../embedding/embed.js";
14
14
  import { checkSamplingSupport } from "../search/sampling.js";
15
- import { isLocalLlmEnabled } from "../extraction/v1/local-llm.js";
15
+ import { isLlmConfigured as isLocalLlmEnabled } from "../integrations/cloud/llm/run.js";
16
16
  const log = createLogger("research");
17
17
  const DEPTH_CONFIG = {
18
18
  quick: { subQueries: 2, minSources: 5, maxSources: 8 },
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/research/pipeline.ts"],"sourcesContent":["import { createLogger } from '../logger.js';\nimport { decomposeQuestion, detectQueryType, extractComparisonEntities, type QueryType } from './decompose.js';\nimport { synthesizeReport } from './synthesize.js';\nimport { synthesizeLocal } from './synthesis-local.js';\nimport { buildResearchBrief } from './brief.js';\nimport { deduplicateResults } from '../search/dedup.js';\nimport { rerankResults } from '../search/rerank.js';\nimport { applyAllFilters } from '../search/filters.js';\nimport { exploreInParallel } from './branch-exploration.js';\nimport type { RawSearchResult, SearchEngineOptions } from '../types.js';\nimport { getExtractProvider } from '../providers/extract-provider.js';\nimport { truncateSmartly } from '../search/truncate.js';\nimport { cacheContent } from '../cache/store.js';\nimport { getEmbeddingService } from '../embedding/embed.js';\nimport { checkSamplingSupport, type SamplingCapableServer } from '../search/sampling.js';\nimport { isLocalLlmEnabled } from '../extraction/v1/local-llm.js';\nimport type {\n ResearchInput,\n ResearchOutput,\n ResearchSource,\n SearchEngine,\n Citation,\n} from '../types.js';\nimport type { SmartRouter } from '../fetch/router.js';\n\nconst log = createLogger('research');\n\nconst DEPTH_CONFIG: Record<string, { subQueries: number; minSources: number; maxSources: number }> = {\n quick: { subQueries: 2, minSources: 5, maxSources: 8 },\n standard: { subQueries: 4, minSources: 10, maxSources: 15 },\n comprehensive: { subQueries: 7, minSources: 20, maxSources: 25 },\n};\n\n// Per-depth budgets for the sub-query fan-out. exploreInParallel guarantees\n// a single slow sub-query can't burn the whole research budget — comprehensive\n// runs cap at ~60s total and 15s per sub-query.\nconst SEARCH_TOTAL_BUDGET_MS: Record<string, number> = {\n quick: 15_000,\n standard: 30_000,\n comprehensive: 60_000,\n};\nconst SEARCH_PER_QUERY_BUDGET_MS: Record<string, number> = {\n quick: 8_000,\n standard: 10_000,\n comprehensive: 15_000,\n};\n\nconst PER_SOURCE_CHAR_CAP = 3000;\nconst TOTAL_SOURCES_CHAR_CAP = 40000;\n\nexport async function runResearchPipeline(\n input: ResearchInput,\n engines: SearchEngine[],\n router: SmartRouter,\n server?: SamplingCapableServer,\n): Promise<ResearchOutput> {\n const start = Date.now();\n const depth = input.depth ?? 'standard';\n const config = DEPTH_CONFIG[depth] ?? DEPTH_CONFIG.standard;\n const maxSources = input.max_sources ?? config.maxSources;\n\n try {\n // Phase 1: Decompose question into sub-queries\n log.info('research pipeline started', { question: input.question, depth });\n const decomposeResult = await decomposeQuestion(\n input.question,\n depth as 'quick' | 'standard' | 'comprehensive',\n server,\n );\n const subQueries = decomposeResult.subQueries;\n const queryType = decomposeResult.queryType;\n log.info('decomposition complete', { subQueryCount: subQueries.length, samplingUsed: decomposeResult.samplingUsed, queryType });\n\n // Phase 2: Parallel search across sub-queries with per-query + total\n // budget enforcement via exploreInParallel. A single hung engine no\n // longer wedges the whole research call — the per-query timer aborts\n // it and the rest of the fan-out keeps going. Engine cap when\n // sub-queries are many preserves the multi-query.ts invariant.\n const effEngines = subQueries.length >= 3 && engines.length > 2 ? engines.slice(0, 2) : engines;\n const perEngineMaxResults = Math.ceil(maxSources / subQueries.length) * 2;\n\n const branchResults = await exploreInParallel(\n subQueries,\n async (subQuery, signal) => {\n const results: RawSearchResult[] = [];\n const usedHere = new Set<string>();\n const engineOpts: SearchEngineOptions = {\n maxResults: perEngineMaxResults,\n includeDomains: input.include_domains,\n excludeDomains: input.exclude_domains,\n };\n\n await Promise.allSettled(\n effEngines.map(async (engine) => {\n if (signal.aborted) return;\n try {\n const rs = await engine.search(subQuery, engineOpts);\n for (const r of rs) results.push(r);\n usedHere.add(engine.name);\n } catch (err) {\n log.warn('research engine search failed', {\n engine: engine.name,\n query: subQuery,\n error: err instanceof Error ? err.message : String(err),\n });\n }\n }),\n );\n\n return { results, enginesUsed: [...usedHere] };\n },\n {\n maxConcurrent: 3,\n totalBudgetMs: SEARCH_TOTAL_BUDGET_MS[depth] ?? SEARCH_TOTAL_BUDGET_MS.standard,\n perQueryBudgetMs: SEARCH_PER_QUERY_BUDGET_MS[depth] ?? SEARCH_PER_QUERY_BUDGET_MS.standard,\n },\n );\n\n const allRaw: RawSearchResult[] = [];\n const enginesUsed = new Set<string>();\n const searchErrors: string[] = [];\n for (const br of branchResults) {\n if (br.ok && br.result) {\n allRaw.push(...br.result.results);\n for (const e of br.result.enginesUsed) enginesUsed.add(e);\n } else if (br.error) {\n searchErrors.push(`${br.query}: ${br.error}`);\n }\n }\n if (searchErrors.length > 0) {\n log.warn('some search sub-queries failed', { errors: searchErrors });\n }\n\n log.info('search phase complete', { totalRaw: allRaw.length, engines: [...enginesUsed] });\n\n // Phase 3: Deduplicate, filter, rerank\n let merged = deduplicateResults(allRaw);\n\n merged = applyAllFilters(merged, {\n includeDomains: input.include_domains,\n excludeDomains: input.exclude_domains,\n });\n\n merged = await rerankResults(input.question, merged);\n merged = merged.slice(0, maxSources);\n\n if (merged.length === 0) {\n return {\n report: `## Research: ${input.question}\\n\\nNo sources could be found for this query.`,\n citations: [],\n sources: [],\n sub_queries: subQueries,\n depth,\n total_time_ms: Date.now() - start,\n sampling_supported: !!server && checkSamplingSupport(server),\n };\n }\n\n // Phase 4: Fetch top sources in parallel\n const sources: ResearchSource[] = await fetchSources(merged, router, maxSources);\n applySourceBudget(sources, PER_SOURCE_CHAR_CAP, TOTAL_SOURCES_CHAR_CAP);\n log.info('fetch phase complete', {\n fetched: sources.filter((s) => s.fetched).length,\n failed: sources.filter((s) => !s.fetched).length,\n });\n\n // Phase 5: Synthesize report\n const synthesisResult = await synthesizeReport(\n input.question,\n sources,\n depth as 'quick' | 'standard' | 'comprehensive',\n server,\n );\n log.info('synthesis complete', { samplingUsed: synthesisResult.samplingUsed, reportLength: synthesisResult.report.length });\n\n // Phase 5b: Local-LLM synthesis fallback — only when host LLM did not\n // produce output AND a local provider is configured. Failures fall through\n // to the existing heuristic report in synthesisResult.\n let finalReport = synthesisResult.report;\n let finalCitations: Citation[] = synthesisResult.citations;\n let localSynthesisText: string | undefined;\n if (!synthesisResult.samplingUsed && isLocalLlmEnabled()) {\n try {\n const localSources = sources\n .filter((s) => s.fetched && s.markdown_content.length > 0)\n .map((s) => ({ url: s.url, title: s.title, markdown: s.markdown_content }));\n if (localSources.length > 0) {\n const local = await synthesizeLocal(input.question, localSources);\n finalReport = local.text;\n localSynthesisText = local.text;\n finalCitations = local.citations\n .filter((idx) => idx >= 0 && idx < localSources.length)\n .map((idx) => {\n const s = localSources[idx];\n return {\n index: idx + 1,\n url: s.url,\n title: s.title,\n snippet: s.markdown.slice(0, 200),\n };\n });\n log.info('local synthesis succeeded', { reportLength: finalReport.length });\n }\n } catch (err) {\n log.warn('local LLM synthesis failed; using heuristic fallback', {\n error: err instanceof Error ? err.message : String(err),\n });\n }\n }\n\n // Phase 6: Structured brief — populated when internal sampling is\n // unavailable so the host LLM has well-shaped data to write the report\n // from without re-reading raw markdown.\n const comparisonEntities = queryType === 'comparison'\n ? extractComparisonEntities(input.question).entities\n : [];\n const brief = !synthesisResult.samplingUsed\n ? await buildResearchBrief(\n input.question,\n sources,\n subQueries,\n PER_SOURCE_CHAR_CAP,\n TOTAL_SOURCES_CHAR_CAP,\n queryType,\n comparisonEntities,\n localSynthesisText,\n )\n : undefined;\n\n return {\n report: finalReport,\n citations: finalCitations,\n sources,\n sub_queries: subQueries,\n depth,\n total_time_ms: Date.now() - start,\n sampling_supported: !!server && checkSamplingSupport(server),\n ...(brief ? { brief } : {}),\n };\n } catch (err) {\n log.error('research pipeline failed', {\n question: input.question,\n error: err instanceof Error ? err.message : String(err),\n });\n return {\n report: '',\n citations: [],\n sources: [],\n sub_queries: [],\n depth,\n total_time_ms: Date.now() - start,\n sampling_supported: !!server && checkSamplingSupport(server),\n error: err instanceof Error ? err.message : String(err),\n };\n }\n}\n\ninterface MergedResult {\n title: string;\n url: string;\n snippet: string;\n relevance_score: number;\n engines: string[];\n}\n\nasync function fetchSources(\n merged: MergedResult[],\n router: SmartRouter,\n maxSources: number,\n): Promise<ResearchSource[]> {\n const fetchPromises = merged.slice(0, maxSources).map(async (result): Promise<ResearchSource> => {\n try {\n const raw = await Promise.race([\n router.fetch(result.url, { renderJs: 'auto' }),\n new Promise<never>((_, reject) =>\n setTimeout(() => reject(new Error('fetch timeout')), 15000),\n ),\n ]);\n\n const extractor = await getExtractProvider();\n const extraction = await extractor.extract(raw.html, raw.finalUrl, {\n maxChars: 30000,\n contentType: raw.contentType,\n });\n const truncated = truncateSmartly(extraction.markdown, PER_SOURCE_CHAR_CAP);\n\n try {\n cacheContent(raw, extraction);\n } catch (err) {\n log.warn('failed to cache research source', { url: result.url, error: String(err) });\n }\n\n try {\n const embeddingService = getEmbeddingService();\n if (embeddingService.isAvailable()) {\n embeddingService.embedAsync(raw.finalUrl, extraction.markdown);\n }\n } catch (err) {\n log.debug('embedding hook skipped for research source', { error: String(err) });\n }\n\n return {\n url: result.url,\n title: extraction.title || result.title,\n markdown_content: truncated,\n relevance_score: result.relevance_score,\n fetched: true,\n };\n } catch (err) {\n log.debug('failed to fetch research source', {\n url: result.url,\n error: err instanceof Error ? err.message : String(err),\n });\n return {\n url: result.url,\n title: result.title,\n markdown_content: result.snippet,\n relevance_score: result.relevance_score,\n fetched: false,\n fetch_error: err instanceof Error ? err.message : String(err),\n };\n }\n });\n\n return Promise.all(fetchPromises);\n}\n\n// Cap total returned markdown_content across sources in relevance order.\n// Later (lower-relevance) sources get trimmed further when budget runs low;\n// any source past the cap is set to empty content (caller still sees url/title).\nfunction applySourceBudget(\n sources: ResearchSource[],\n perSourceCap: number,\n totalCap: number,\n): void {\n let used = 0;\n for (const s of sources) {\n if (!s.markdown_content) continue;\n if (used >= totalCap) {\n s.markdown_content = '';\n continue;\n }\n const remaining = totalCap - used;\n const cap = Math.min(perSourceCap, remaining);\n if (s.markdown_content.length > cap) {\n s.markdown_content = truncateSmartly(s.markdown_content, cap);\n }\n used += s.markdown_content.length;\n }\n}\n"],"mappings":"AAAA,SAAS,oBAAoB;AAC7B,SAAS,mBAAoC,iCAAiD;AAC9F,SAAS,wBAAwB;AACjC,SAAS,uBAAuB;AAChC,SAAS,0BAA0B;AACnC,SAAS,0BAA0B;AACnC,SAAS,qBAAqB;AAC9B,SAAS,uBAAuB;AAChC,SAAS,yBAAyB;AAElC,SAAS,0BAA0B;AACnC,SAAS,uBAAuB;AAChC,SAAS,oBAAoB;AAC7B,SAAS,2BAA2B;AACpC,SAAS,4BAAwD;AACjE,SAAS,yBAAyB;AAUlC,MAAM,MAAM,aAAa,UAAU;AAEnC,MAAM,eAA+F;AAAA,EACnG,OAAO,EAAE,YAAY,GAAG,YAAY,GAAG,YAAY,EAAE;AAAA,EACrD,UAAU,EAAE,YAAY,GAAG,YAAY,IAAI,YAAY,GAAG;AAAA,EAC1D,eAAe,EAAE,YAAY,GAAG,YAAY,IAAI,YAAY,GAAG;AACjE;AAKA,MAAM,yBAAiD;AAAA,EACrD,OAAO;AAAA,EACP,UAAU;AAAA,EACV,eAAe;AACjB;AACA,MAAM,6BAAqD;AAAA,EACzD,OAAO;AAAA,EACP,UAAU;AAAA,EACV,eAAe;AACjB;AAEA,MAAM,sBAAsB;AAC5B,MAAM,yBAAyB;AAE/B,eAAsB,oBACpB,OACA,SACA,QACA,QACyB;AACzB,QAAM,QAAQ,KAAK,IAAI;AACvB,QAAM,QAAQ,MAAM,SAAS;AAC7B,QAAM,SAAS,aAAa,KAAK,KAAK,aAAa;AACnD,QAAM,aAAa,MAAM,eAAe,OAAO;AAE/C,MAAI;AAEF,QAAI,KAAK,6BAA6B,EAAE,UAAU,MAAM,UAAU,MAAM,CAAC;AACzE,UAAM,kBAAkB,MAAM;AAAA,MAC5B,MAAM;AAAA,MACN;AAAA,MACA;AAAA,IACF;AACA,UAAM,aAAa,gBAAgB;AACnC,UAAM,YAAY,gBAAgB;AAClC,QAAI,KAAK,0BAA0B,EAAE,eAAe,WAAW,QAAQ,cAAc,gBAAgB,cAAc,UAAU,CAAC;AAO9H,UAAM,aAAa,WAAW,UAAU,KAAK,QAAQ,SAAS,IAAI,QAAQ,MAAM,GAAG,CAAC,IAAI;AACxF,UAAM,sBAAsB,KAAK,KAAK,aAAa,WAAW,MAAM,IAAI;AAExE,UAAM,gBAAgB,MAAM;AAAA,MAC1B;AAAA,MACA,OAAO,UAAU,WAAW;AAC1B,cAAM,UAA6B,CAAC;AACpC,cAAM,WAAW,oBAAI,IAAY;AACjC,cAAM,aAAkC;AAAA,UACtC,YAAY;AAAA,UACZ,gBAAgB,MAAM;AAAA,UACtB,gBAAgB,MAAM;AAAA,QACxB;AAEA,cAAM,QAAQ;AAAA,UACZ,WAAW,IAAI,OAAO,WAAW;AAC/B,gBAAI,OAAO,QAAS;AACpB,gBAAI;AACF,oBAAM,KAAK,MAAM,OAAO,OAAO,UAAU,UAAU;AACnD,yBAAW,KAAK,GAAI,SAAQ,KAAK,CAAC;AAClC,uBAAS,IAAI,OAAO,IAAI;AAAA,YAC1B,SAAS,KAAK;AACZ,kBAAI,KAAK,iCAAiC;AAAA,gBACxC,QAAQ,OAAO;AAAA,gBACf,OAAO;AAAA,gBACP,OAAO,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAAA,cACxD,CAAC;AAAA,YACH;AAAA,UACF,CAAC;AAAA,QACH;AAEA,eAAO,EAAE,SAAS,aAAa,CAAC,GAAG,QAAQ,EAAE;AAAA,MAC/C;AAAA,MACA;AAAA,QACE,eAAe;AAAA,QACf,eAAe,uBAAuB,KAAK,KAAK,uBAAuB;AAAA,QACvE,kBAAkB,2BAA2B,KAAK,KAAK,2BAA2B;AAAA,MACpF;AAAA,IACF;AAEA,UAAM,SAA4B,CAAC;AACnC,UAAM,cAAc,oBAAI,IAAY;AACpC,UAAM,eAAyB,CAAC;AAChC,eAAW,MAAM,eAAe;AAC9B,UAAI,GAAG,MAAM,GAAG,QAAQ;AACtB,eAAO,KAAK,GAAG,GAAG,OAAO,OAAO;AAChC,mBAAW,KAAK,GAAG,OAAO,YAAa,aAAY,IAAI,CAAC;AAAA,MAC1D,WAAW,GAAG,OAAO;AACnB,qBAAa,KAAK,GAAG,GAAG,KAAK,KAAK,GAAG,KAAK,EAAE;AAAA,MAC9C;AAAA,IACF;AACA,QAAI,aAAa,SAAS,GAAG;AAC3B,UAAI,KAAK,kCAAkC,EAAE,QAAQ,aAAa,CAAC;AAAA,IACrE;AAEA,QAAI,KAAK,yBAAyB,EAAE,UAAU,OAAO,QAAQ,SAAS,CAAC,GAAG,WAAW,EAAE,CAAC;AAGxF,QAAI,SAAS,mBAAmB,MAAM;AAEtC,aAAS,gBAAgB,QAAQ;AAAA,MAC/B,gBAAgB,MAAM;AAAA,MACtB,gBAAgB,MAAM;AAAA,IACxB,CAAC;AAED,aAAS,MAAM,cAAc,MAAM,UAAU,MAAM;AACnD,aAAS,OAAO,MAAM,GAAG,UAAU;AAEnC,QAAI,OAAO,WAAW,GAAG;AACvB,aAAO;AAAA,QACL,QAAQ,gBAAgB,MAAM,QAAQ;AAAA;AAAA;AAAA,QACtC,WAAW,CAAC;AAAA,QACZ,SAAS,CAAC;AAAA,QACV,aAAa;AAAA,QACb;AAAA,QACA,eAAe,KAAK,IAAI,IAAI;AAAA,QAC5B,oBAAoB,CAAC,CAAC,UAAU,qBAAqB,MAAM;AAAA,MAC7D;AAAA,IACF;AAGA,UAAM,UAA4B,MAAM,aAAa,QAAQ,QAAQ,UAAU;AAC/E,sBAAkB,SAAS,qBAAqB,sBAAsB;AACtE,QAAI,KAAK,wBAAwB;AAAA,MAC/B,SAAS,QAAQ,OAAO,CAAC,MAAM,EAAE,OAAO,EAAE;AAAA,MAC1C,QAAQ,QAAQ,OAAO,CAAC,MAAM,CAAC,EAAE,OAAO,EAAE;AAAA,IAC5C,CAAC;AAGD,UAAM,kBAAkB,MAAM;AAAA,MAC5B,MAAM;AAAA,MACN;AAAA,MACA;AAAA,MACA;AAAA,IACF;AACA,QAAI,KAAK,sBAAsB,EAAE,cAAc,gBAAgB,cAAc,cAAc,gBAAgB,OAAO,OAAO,CAAC;AAK1H,QAAI,cAAc,gBAAgB;AAClC,QAAI,iBAA6B,gBAAgB;AACjD,QAAI;AACJ,QAAI,CAAC,gBAAgB,gBAAgB,kBAAkB,GAAG;AACxD,UAAI;AACF,cAAM,eAAe,QAClB,OAAO,CAAC,MAAM,EAAE,WAAW,EAAE,iBAAiB,SAAS,CAAC,EACxD,IAAI,CAAC,OAAO,EAAE,KAAK,EAAE,KAAK,OAAO,EAAE,OAAO,UAAU,EAAE,iBAAiB,EAAE;AAC5E,YAAI,aAAa,SAAS,GAAG;AAC3B,gBAAM,QAAQ,MAAM,gBAAgB,MAAM,UAAU,YAAY;AAChE,wBAAc,MAAM;AACpB,+BAAqB,MAAM;AAC3B,2BAAiB,MAAM,UACpB,OAAO,CAAC,QAAQ,OAAO,KAAK,MAAM,aAAa,MAAM,EACrD,IAAI,CAAC,QAAQ;AACZ,kBAAM,IAAI,aAAa,GAAG;AAC1B,mBAAO;AAAA,cACL,OAAO,MAAM;AAAA,cACb,KAAK,EAAE;AAAA,cACP,OAAO,EAAE;AAAA,cACT,SAAS,EAAE,SAAS,MAAM,GAAG,GAAG;AAAA,YAClC;AAAA,UACF,CAAC;AACH,cAAI,KAAK,6BAA6B,EAAE,cAAc,YAAY,OAAO,CAAC;AAAA,QAC5E;AAAA,MACF,SAAS,KAAK;AACZ,YAAI,KAAK,wDAAwD;AAAA,UAC/D,OAAO,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAAA,QACxD,CAAC;AAAA,MACH;AAAA,IACF;AAKA,UAAM,qBAAqB,cAAc,eACrC,0BAA0B,MAAM,QAAQ,EAAE,WAC1C,CAAC;AACL,UAAM,QAAQ,CAAC,gBAAgB,eAC3B,MAAM;AAAA,MACJ,MAAM;AAAA,MACN;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF,IACA;AAEJ,WAAO;AAAA,MACL,QAAQ;AAAA,MACR,WAAW;AAAA,MACX;AAAA,MACA,aAAa;AAAA,MACb;AAAA,MACA,eAAe,KAAK,IAAI,IAAI;AAAA,MAC5B,oBAAoB,CAAC,CAAC,UAAU,qBAAqB,MAAM;AAAA,MAC3D,GAAI,QAAQ,EAAE,MAAM,IAAI,CAAC;AAAA,IAC3B;AAAA,EACF,SAAS,KAAK;AACZ,QAAI,MAAM,4BAA4B;AAAA,MACpC,UAAU,MAAM;AAAA,MAChB,OAAO,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAAA,IACxD,CAAC;AACD,WAAO;AAAA,MACL,QAAQ;AAAA,MACR,WAAW,CAAC;AAAA,MACZ,SAAS,CAAC;AAAA,MACV,aAAa,CAAC;AAAA,MACd;AAAA,MACA,eAAe,KAAK,IAAI,IAAI;AAAA,MAC5B,oBAAoB,CAAC,CAAC,UAAU,qBAAqB,MAAM;AAAA,MAC3D,OAAO,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAAA,IACxD;AAAA,EACF;AACF;AAUA,eAAe,aACb,QACA,QACA,YAC2B;AAC3B,QAAM,gBAAgB,OAAO,MAAM,GAAG,UAAU,EAAE,IAAI,OAAO,WAAoC;AAC/F,QAAI;AACF,YAAM,MAAM,MAAM,QAAQ,KAAK;AAAA,QAC7B,OAAO,MAAM,OAAO,KAAK,EAAE,UAAU,OAAO,CAAC;AAAA,QAC7C,IAAI;AAAA,UAAe,CAAC,GAAG,WACrB,WAAW,MAAM,OAAO,IAAI,MAAM,eAAe,CAAC,GAAG,IAAK;AAAA,QAC5D;AAAA,MACF,CAAC;AAED,YAAM,YAAY,MAAM,mBAAmB;AAC3C,YAAM,aAAa,MAAM,UAAU,QAAQ,IAAI,MAAM,IAAI,UAAU;AAAA,QACjE,UAAU;AAAA,QACV,aAAa,IAAI;AAAA,MACnB,CAAC;AACD,YAAM,YAAY,gBAAgB,WAAW,UAAU,mBAAmB;AAE1E,UAAI;AACF,qBAAa,KAAK,UAAU;AAAA,MAC9B,SAAS,KAAK;AACZ,YAAI,KAAK,mCAAmC,EAAE,KAAK,OAAO,KAAK,OAAO,OAAO,GAAG,EAAE,CAAC;AAAA,MACrF;AAEA,UAAI;AACF,cAAM,mBAAmB,oBAAoB;AAC7C,YAAI,iBAAiB,YAAY,GAAG;AAClC,2BAAiB,WAAW,IAAI,UAAU,WAAW,QAAQ;AAAA,QAC/D;AAAA,MACF,SAAS,KAAK;AACZ,YAAI,MAAM,8CAA8C,EAAE,OAAO,OAAO,GAAG,EAAE,CAAC;AAAA,MAChF;AAEA,aAAO;AAAA,QACL,KAAK,OAAO;AAAA,QACZ,OAAO,WAAW,SAAS,OAAO;AAAA,QAClC,kBAAkB;AAAA,QAClB,iBAAiB,OAAO;AAAA,QACxB,SAAS;AAAA,MACX;AAAA,IACF,SAAS,KAAK;AACZ,UAAI,MAAM,mCAAmC;AAAA,QAC3C,KAAK,OAAO;AAAA,QACZ,OAAO,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAAA,MACxD,CAAC;AACD,aAAO;AAAA,QACL,KAAK,OAAO;AAAA,QACZ,OAAO,OAAO;AAAA,QACd,kBAAkB,OAAO;AAAA,QACzB,iBAAiB,OAAO;AAAA,QACxB,SAAS;AAAA,QACT,aAAa,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAAA,MAC9D;AAAA,IACF;AAAA,EACF,CAAC;AAED,SAAO,QAAQ,IAAI,aAAa;AAClC;AAKA,SAAS,kBACP,SACA,cACA,UACM;AACN,MAAI,OAAO;AACX,aAAW,KAAK,SAAS;AACvB,QAAI,CAAC,EAAE,iBAAkB;AACzB,QAAI,QAAQ,UAAU;AACpB,QAAE,mBAAmB;AACrB;AAAA,IACF;AACA,UAAM,YAAY,WAAW;AAC7B,UAAM,MAAM,KAAK,IAAI,cAAc,SAAS;AAC5C,QAAI,EAAE,iBAAiB,SAAS,KAAK;AACnC,QAAE,mBAAmB,gBAAgB,EAAE,kBAAkB,GAAG;AAAA,IAC9D;AACA,YAAQ,EAAE,iBAAiB;AAAA,EAC7B;AACF;","names":[]}
1
+ {"version":3,"sources":["../../src/research/pipeline.ts"],"sourcesContent":["import { createLogger } from '../logger.js';\nimport { decomposeQuestion, detectQueryType, extractComparisonEntities, type QueryType } from './decompose.js';\nimport { synthesizeReport } from './synthesize.js';\nimport { synthesizeLocal } from './synthesis-local.js';\nimport { buildResearchBrief } from './brief.js';\nimport { deduplicateResults } from '../search/dedup.js';\nimport { rerankResults } from '../search/rerank.js';\nimport { applyAllFilters } from '../search/filters.js';\nimport { exploreInParallel } from './branch-exploration.js';\nimport type { RawSearchResult, SearchEngineOptions } from '../types.js';\nimport { getExtractProvider } from '../providers/extract-provider.js';\nimport { truncateSmartly } from '../search/truncate.js';\nimport { cacheContent } from '../cache/store.js';\nimport { getEmbeddingService } from '../embedding/embed.js';\nimport { checkSamplingSupport, type SamplingCapableServer } from '../search/sampling.js';\nimport { isLlmConfigured as isLocalLlmEnabled } from '../integrations/cloud/llm/run.js';\nimport type {\n ResearchInput,\n ResearchOutput,\n ResearchSource,\n SearchEngine,\n Citation,\n} from '../types.js';\nimport type { SmartRouter } from '../fetch/router.js';\n\nconst log = createLogger('research');\n\nconst DEPTH_CONFIG: Record<string, { subQueries: number; minSources: number; maxSources: number }> = {\n quick: { subQueries: 2, minSources: 5, maxSources: 8 },\n standard: { subQueries: 4, minSources: 10, maxSources: 15 },\n comprehensive: { subQueries: 7, minSources: 20, maxSources: 25 },\n};\n\n// Per-depth budgets for the sub-query fan-out. exploreInParallel guarantees\n// a single slow sub-query can't burn the whole research budget — comprehensive\n// runs cap at ~60s total and 15s per sub-query.\nconst SEARCH_TOTAL_BUDGET_MS: Record<string, number> = {\n quick: 15_000,\n standard: 30_000,\n comprehensive: 60_000,\n};\nconst SEARCH_PER_QUERY_BUDGET_MS: Record<string, number> = {\n quick: 8_000,\n standard: 10_000,\n comprehensive: 15_000,\n};\n\nconst PER_SOURCE_CHAR_CAP = 3000;\nconst TOTAL_SOURCES_CHAR_CAP = 40000;\n\nexport async function runResearchPipeline(\n input: ResearchInput,\n engines: SearchEngine[],\n router: SmartRouter,\n server?: SamplingCapableServer,\n): Promise<ResearchOutput> {\n const start = Date.now();\n const depth = input.depth ?? 'standard';\n const config = DEPTH_CONFIG[depth] ?? DEPTH_CONFIG.standard;\n const maxSources = input.max_sources ?? config.maxSources;\n\n try {\n // Phase 1: Decompose question into sub-queries\n log.info('research pipeline started', { question: input.question, depth });\n const decomposeResult = await decomposeQuestion(\n input.question,\n depth as 'quick' | 'standard' | 'comprehensive',\n server,\n );\n const subQueries = decomposeResult.subQueries;\n const queryType = decomposeResult.queryType;\n log.info('decomposition complete', { subQueryCount: subQueries.length, samplingUsed: decomposeResult.samplingUsed, queryType });\n\n // Phase 2: Parallel search across sub-queries with per-query + total\n // budget enforcement via exploreInParallel. A single hung engine no\n // longer wedges the whole research call — the per-query timer aborts\n // it and the rest of the fan-out keeps going. Engine cap when\n // sub-queries are many preserves the multi-query.ts invariant.\n const effEngines = subQueries.length >= 3 && engines.length > 2 ? engines.slice(0, 2) : engines;\n const perEngineMaxResults = Math.ceil(maxSources / subQueries.length) * 2;\n\n const branchResults = await exploreInParallel(\n subQueries,\n async (subQuery, signal) => {\n const results: RawSearchResult[] = [];\n const usedHere = new Set<string>();\n const engineOpts: SearchEngineOptions = {\n maxResults: perEngineMaxResults,\n includeDomains: input.include_domains,\n excludeDomains: input.exclude_domains,\n };\n\n await Promise.allSettled(\n effEngines.map(async (engine) => {\n if (signal.aborted) return;\n try {\n const rs = await engine.search(subQuery, engineOpts);\n for (const r of rs) results.push(r);\n usedHere.add(engine.name);\n } catch (err) {\n log.warn('research engine search failed', {\n engine: engine.name,\n query: subQuery,\n error: err instanceof Error ? err.message : String(err),\n });\n }\n }),\n );\n\n return { results, enginesUsed: [...usedHere] };\n },\n {\n maxConcurrent: 3,\n totalBudgetMs: SEARCH_TOTAL_BUDGET_MS[depth] ?? SEARCH_TOTAL_BUDGET_MS.standard,\n perQueryBudgetMs: SEARCH_PER_QUERY_BUDGET_MS[depth] ?? SEARCH_PER_QUERY_BUDGET_MS.standard,\n },\n );\n\n const allRaw: RawSearchResult[] = [];\n const enginesUsed = new Set<string>();\n const searchErrors: string[] = [];\n for (const br of branchResults) {\n if (br.ok && br.result) {\n allRaw.push(...br.result.results);\n for (const e of br.result.enginesUsed) enginesUsed.add(e);\n } else if (br.error) {\n searchErrors.push(`${br.query}: ${br.error}`);\n }\n }\n if (searchErrors.length > 0) {\n log.warn('some search sub-queries failed', { errors: searchErrors });\n }\n\n log.info('search phase complete', { totalRaw: allRaw.length, engines: [...enginesUsed] });\n\n // Phase 3: Deduplicate, filter, rerank\n let merged = deduplicateResults(allRaw);\n\n merged = applyAllFilters(merged, {\n includeDomains: input.include_domains,\n excludeDomains: input.exclude_domains,\n });\n\n merged = await rerankResults(input.question, merged);\n merged = merged.slice(0, maxSources);\n\n if (merged.length === 0) {\n return {\n report: `## Research: ${input.question}\\n\\nNo sources could be found for this query.`,\n citations: [],\n sources: [],\n sub_queries: subQueries,\n depth,\n total_time_ms: Date.now() - start,\n sampling_supported: !!server && checkSamplingSupport(server),\n };\n }\n\n // Phase 4: Fetch top sources in parallel\n const sources: ResearchSource[] = await fetchSources(merged, router, maxSources);\n applySourceBudget(sources, PER_SOURCE_CHAR_CAP, TOTAL_SOURCES_CHAR_CAP);\n log.info('fetch phase complete', {\n fetched: sources.filter((s) => s.fetched).length,\n failed: sources.filter((s) => !s.fetched).length,\n });\n\n // Phase 5: Synthesize report\n const synthesisResult = await synthesizeReport(\n input.question,\n sources,\n depth as 'quick' | 'standard' | 'comprehensive',\n server,\n );\n log.info('synthesis complete', { samplingUsed: synthesisResult.samplingUsed, reportLength: synthesisResult.report.length });\n\n // Phase 5b: Local-LLM synthesis fallback — only when host LLM did not\n // produce output AND a local provider is configured. Failures fall through\n // to the existing heuristic report in synthesisResult.\n let finalReport = synthesisResult.report;\n let finalCitations: Citation[] = synthesisResult.citations;\n let localSynthesisText: string | undefined;\n if (!synthesisResult.samplingUsed && isLocalLlmEnabled()) {\n try {\n const localSources = sources\n .filter((s) => s.fetched && s.markdown_content.length > 0)\n .map((s) => ({ url: s.url, title: s.title, markdown: s.markdown_content }));\n if (localSources.length > 0) {\n const local = await synthesizeLocal(input.question, localSources);\n finalReport = local.text;\n localSynthesisText = local.text;\n finalCitations = local.citations\n .filter((idx) => idx >= 0 && idx < localSources.length)\n .map((idx) => {\n const s = localSources[idx];\n return {\n index: idx + 1,\n url: s.url,\n title: s.title,\n snippet: s.markdown.slice(0, 200),\n };\n });\n log.info('local synthesis succeeded', { reportLength: finalReport.length });\n }\n } catch (err) {\n log.warn('local LLM synthesis failed; using heuristic fallback', {\n error: err instanceof Error ? err.message : String(err),\n });\n }\n }\n\n // Phase 6: Structured brief — populated when internal sampling is\n // unavailable so the host LLM has well-shaped data to write the report\n // from without re-reading raw markdown.\n const comparisonEntities = queryType === 'comparison'\n ? extractComparisonEntities(input.question).entities\n : [];\n const brief = !synthesisResult.samplingUsed\n ? await buildResearchBrief(\n input.question,\n sources,\n subQueries,\n PER_SOURCE_CHAR_CAP,\n TOTAL_SOURCES_CHAR_CAP,\n queryType,\n comparisonEntities,\n localSynthesisText,\n )\n : undefined;\n\n return {\n report: finalReport,\n citations: finalCitations,\n sources,\n sub_queries: subQueries,\n depth,\n total_time_ms: Date.now() - start,\n sampling_supported: !!server && checkSamplingSupport(server),\n ...(brief ? { brief } : {}),\n };\n } catch (err) {\n log.error('research pipeline failed', {\n question: input.question,\n error: err instanceof Error ? err.message : String(err),\n });\n return {\n report: '',\n citations: [],\n sources: [],\n sub_queries: [],\n depth,\n total_time_ms: Date.now() - start,\n sampling_supported: !!server && checkSamplingSupport(server),\n error: err instanceof Error ? err.message : String(err),\n };\n }\n}\n\ninterface MergedResult {\n title: string;\n url: string;\n snippet: string;\n relevance_score: number;\n engines: string[];\n}\n\nasync function fetchSources(\n merged: MergedResult[],\n router: SmartRouter,\n maxSources: number,\n): Promise<ResearchSource[]> {\n const fetchPromises = merged.slice(0, maxSources).map(async (result): Promise<ResearchSource> => {\n try {\n const raw = await Promise.race([\n router.fetch(result.url, { renderJs: 'auto' }),\n new Promise<never>((_, reject) =>\n setTimeout(() => reject(new Error('fetch timeout')), 15000),\n ),\n ]);\n\n const extractor = await getExtractProvider();\n const extraction = await extractor.extract(raw.html, raw.finalUrl, {\n maxChars: 30000,\n contentType: raw.contentType,\n });\n const truncated = truncateSmartly(extraction.markdown, PER_SOURCE_CHAR_CAP);\n\n try {\n cacheContent(raw, extraction);\n } catch (err) {\n log.warn('failed to cache research source', { url: result.url, error: String(err) });\n }\n\n try {\n const embeddingService = getEmbeddingService();\n if (embeddingService.isAvailable()) {\n embeddingService.embedAsync(raw.finalUrl, extraction.markdown);\n }\n } catch (err) {\n log.debug('embedding hook skipped for research source', { error: String(err) });\n }\n\n return {\n url: result.url,\n title: extraction.title || result.title,\n markdown_content: truncated,\n relevance_score: result.relevance_score,\n fetched: true,\n };\n } catch (err) {\n log.debug('failed to fetch research source', {\n url: result.url,\n error: err instanceof Error ? err.message : String(err),\n });\n return {\n url: result.url,\n title: result.title,\n markdown_content: result.snippet,\n relevance_score: result.relevance_score,\n fetched: false,\n fetch_error: err instanceof Error ? err.message : String(err),\n };\n }\n });\n\n return Promise.all(fetchPromises);\n}\n\n// Cap total returned markdown_content across sources in relevance order.\n// Later (lower-relevance) sources get trimmed further when budget runs low;\n// any source past the cap is set to empty content (caller still sees url/title).\nfunction applySourceBudget(\n sources: ResearchSource[],\n perSourceCap: number,\n totalCap: number,\n): void {\n let used = 0;\n for (const s of sources) {\n if (!s.markdown_content) continue;\n if (used >= totalCap) {\n s.markdown_content = '';\n continue;\n }\n const remaining = totalCap - used;\n const cap = Math.min(perSourceCap, remaining);\n if (s.markdown_content.length > cap) {\n s.markdown_content = truncateSmartly(s.markdown_content, cap);\n }\n used += s.markdown_content.length;\n }\n}\n"],"mappings":"AAAA,SAAS,oBAAoB;AAC7B,SAAS,mBAAoC,iCAAiD;AAC9F,SAAS,wBAAwB;AACjC,SAAS,uBAAuB;AAChC,SAAS,0BAA0B;AACnC,SAAS,0BAA0B;AACnC,SAAS,qBAAqB;AAC9B,SAAS,uBAAuB;AAChC,SAAS,yBAAyB;AAElC,SAAS,0BAA0B;AACnC,SAAS,uBAAuB;AAChC,SAAS,oBAAoB;AAC7B,SAAS,2BAA2B;AACpC,SAAS,4BAAwD;AACjE,SAAS,mBAAmB,yBAAyB;AAUrD,MAAM,MAAM,aAAa,UAAU;AAEnC,MAAM,eAA+F;AAAA,EACnG,OAAO,EAAE,YAAY,GAAG,YAAY,GAAG,YAAY,EAAE;AAAA,EACrD,UAAU,EAAE,YAAY,GAAG,YAAY,IAAI,YAAY,GAAG;AAAA,EAC1D,eAAe,EAAE,YAAY,GAAG,YAAY,IAAI,YAAY,GAAG;AACjE;AAKA,MAAM,yBAAiD;AAAA,EACrD,OAAO;AAAA,EACP,UAAU;AAAA,EACV,eAAe;AACjB;AACA,MAAM,6BAAqD;AAAA,EACzD,OAAO;AAAA,EACP,UAAU;AAAA,EACV,eAAe;AACjB;AAEA,MAAM,sBAAsB;AAC5B,MAAM,yBAAyB;AAE/B,eAAsB,oBACpB,OACA,SACA,QACA,QACyB;AACzB,QAAM,QAAQ,KAAK,IAAI;AACvB,QAAM,QAAQ,MAAM,SAAS;AAC7B,QAAM,SAAS,aAAa,KAAK,KAAK,aAAa;AACnD,QAAM,aAAa,MAAM,eAAe,OAAO;AAE/C,MAAI;AAEF,QAAI,KAAK,6BAA6B,EAAE,UAAU,MAAM,UAAU,MAAM,CAAC;AACzE,UAAM,kBAAkB,MAAM;AAAA,MAC5B,MAAM;AAAA,MACN;AAAA,MACA;AAAA,IACF;AACA,UAAM,aAAa,gBAAgB;AACnC,UAAM,YAAY,gBAAgB;AAClC,QAAI,KAAK,0BAA0B,EAAE,eAAe,WAAW,QAAQ,cAAc,gBAAgB,cAAc,UAAU,CAAC;AAO9H,UAAM,aAAa,WAAW,UAAU,KAAK,QAAQ,SAAS,IAAI,QAAQ,MAAM,GAAG,CAAC,IAAI;AACxF,UAAM,sBAAsB,KAAK,KAAK,aAAa,WAAW,MAAM,IAAI;AAExE,UAAM,gBAAgB,MAAM;AAAA,MAC1B;AAAA,MACA,OAAO,UAAU,WAAW;AAC1B,cAAM,UAA6B,CAAC;AACpC,cAAM,WAAW,oBAAI,IAAY;AACjC,cAAM,aAAkC;AAAA,UACtC,YAAY;AAAA,UACZ,gBAAgB,MAAM;AAAA,UACtB,gBAAgB,MAAM;AAAA,QACxB;AAEA,cAAM,QAAQ;AAAA,UACZ,WAAW,IAAI,OAAO,WAAW;AAC/B,gBAAI,OAAO,QAAS;AACpB,gBAAI;AACF,oBAAM,KAAK,MAAM,OAAO,OAAO,UAAU,UAAU;AACnD,yBAAW,KAAK,GAAI,SAAQ,KAAK,CAAC;AAClC,uBAAS,IAAI,OAAO,IAAI;AAAA,YAC1B,SAAS,KAAK;AACZ,kBAAI,KAAK,iCAAiC;AAAA,gBACxC,QAAQ,OAAO;AAAA,gBACf,OAAO;AAAA,gBACP,OAAO,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAAA,cACxD,CAAC;AAAA,YACH;AAAA,UACF,CAAC;AAAA,QACH;AAEA,eAAO,EAAE,SAAS,aAAa,CAAC,GAAG,QAAQ,EAAE;AAAA,MAC/C;AAAA,MACA;AAAA,QACE,eAAe;AAAA,QACf,eAAe,uBAAuB,KAAK,KAAK,uBAAuB;AAAA,QACvE,kBAAkB,2BAA2B,KAAK,KAAK,2BAA2B;AAAA,MACpF;AAAA,IACF;AAEA,UAAM,SAA4B,CAAC;AACnC,UAAM,cAAc,oBAAI,IAAY;AACpC,UAAM,eAAyB,CAAC;AAChC,eAAW,MAAM,eAAe;AAC9B,UAAI,GAAG,MAAM,GAAG,QAAQ;AACtB,eAAO,KAAK,GAAG,GAAG,OAAO,OAAO;AAChC,mBAAW,KAAK,GAAG,OAAO,YAAa,aAAY,IAAI,CAAC;AAAA,MAC1D,WAAW,GAAG,OAAO;AACnB,qBAAa,KAAK,GAAG,GAAG,KAAK,KAAK,GAAG,KAAK,EAAE;AAAA,MAC9C;AAAA,IACF;AACA,QAAI,aAAa,SAAS,GAAG;AAC3B,UAAI,KAAK,kCAAkC,EAAE,QAAQ,aAAa,CAAC;AAAA,IACrE;AAEA,QAAI,KAAK,yBAAyB,EAAE,UAAU,OAAO,QAAQ,SAAS,CAAC,GAAG,WAAW,EAAE,CAAC;AAGxF,QAAI,SAAS,mBAAmB,MAAM;AAEtC,aAAS,gBAAgB,QAAQ;AAAA,MAC/B,gBAAgB,MAAM;AAAA,MACtB,gBAAgB,MAAM;AAAA,IACxB,CAAC;AAED,aAAS,MAAM,cAAc,MAAM,UAAU,MAAM;AACnD,aAAS,OAAO,MAAM,GAAG,UAAU;AAEnC,QAAI,OAAO,WAAW,GAAG;AACvB,aAAO;AAAA,QACL,QAAQ,gBAAgB,MAAM,QAAQ;AAAA;AAAA;AAAA,QACtC,WAAW,CAAC;AAAA,QACZ,SAAS,CAAC;AAAA,QACV,aAAa;AAAA,QACb;AAAA,QACA,eAAe,KAAK,IAAI,IAAI;AAAA,QAC5B,oBAAoB,CAAC,CAAC,UAAU,qBAAqB,MAAM;AAAA,MAC7D;AAAA,IACF;AAGA,UAAM,UAA4B,MAAM,aAAa,QAAQ,QAAQ,UAAU;AAC/E,sBAAkB,SAAS,qBAAqB,sBAAsB;AACtE,QAAI,KAAK,wBAAwB;AAAA,MAC/B,SAAS,QAAQ,OAAO,CAAC,MAAM,EAAE,OAAO,EAAE;AAAA,MAC1C,QAAQ,QAAQ,OAAO,CAAC,MAAM,CAAC,EAAE,OAAO,EAAE;AAAA,IAC5C,CAAC;AAGD,UAAM,kBAAkB,MAAM;AAAA,MAC5B,MAAM;AAAA,MACN;AAAA,MACA;AAAA,MACA;AAAA,IACF;AACA,QAAI,KAAK,sBAAsB,EAAE,cAAc,gBAAgB,cAAc,cAAc,gBAAgB,OAAO,OAAO,CAAC;AAK1H,QAAI,cAAc,gBAAgB;AAClC,QAAI,iBAA6B,gBAAgB;AACjD,QAAI;AACJ,QAAI,CAAC,gBAAgB,gBAAgB,kBAAkB,GAAG;AACxD,UAAI;AACF,cAAM,eAAe,QAClB,OAAO,CAAC,MAAM,EAAE,WAAW,EAAE,iBAAiB,SAAS,CAAC,EACxD,IAAI,CAAC,OAAO,EAAE,KAAK,EAAE,KAAK,OAAO,EAAE,OAAO,UAAU,EAAE,iBAAiB,EAAE;AAC5E,YAAI,aAAa,SAAS,GAAG;AAC3B,gBAAM,QAAQ,MAAM,gBAAgB,MAAM,UAAU,YAAY;AAChE,wBAAc,MAAM;AACpB,+BAAqB,MAAM;AAC3B,2BAAiB,MAAM,UACpB,OAAO,CAAC,QAAQ,OAAO,KAAK,MAAM,aAAa,MAAM,EACrD,IAAI,CAAC,QAAQ;AACZ,kBAAM,IAAI,aAAa,GAAG;AAC1B,mBAAO;AAAA,cACL,OAAO,MAAM;AAAA,cACb,KAAK,EAAE;AAAA,cACP,OAAO,EAAE;AAAA,cACT,SAAS,EAAE,SAAS,MAAM,GAAG,GAAG;AAAA,YAClC;AAAA,UACF,CAAC;AACH,cAAI,KAAK,6BAA6B,EAAE,cAAc,YAAY,OAAO,CAAC;AAAA,QAC5E;AAAA,MACF,SAAS,KAAK;AACZ,YAAI,KAAK,wDAAwD;AAAA,UAC/D,OAAO,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAAA,QACxD,CAAC;AAAA,MACH;AAAA,IACF;AAKA,UAAM,qBAAqB,cAAc,eACrC,0BAA0B,MAAM,QAAQ,EAAE,WAC1C,CAAC;AACL,UAAM,QAAQ,CAAC,gBAAgB,eAC3B,MAAM;AAAA,MACJ,MAAM;AAAA,MACN;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF,IACA;AAEJ,WAAO;AAAA,MACL,QAAQ;AAAA,MACR,WAAW;AAAA,MACX;AAAA,MACA,aAAa;AAAA,MACb;AAAA,MACA,eAAe,KAAK,IAAI,IAAI;AAAA,MAC5B,oBAAoB,CAAC,CAAC,UAAU,qBAAqB,MAAM;AAAA,MAC3D,GAAI,QAAQ,EAAE,MAAM,IAAI,CAAC;AAAA,IAC3B;AAAA,EACF,SAAS,KAAK;AACZ,QAAI,MAAM,4BAA4B;AAAA,MACpC,UAAU,MAAM;AAAA,MAChB,OAAO,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAAA,IACxD,CAAC;AACD,WAAO;AAAA,MACL,QAAQ;AAAA,MACR,WAAW,CAAC;AAAA,MACZ,SAAS,CAAC;AAAA,MACV,aAAa,CAAC;AAAA,MACd;AAAA,MACA,eAAe,KAAK,IAAI,IAAI;AAAA,MAC5B,oBAAoB,CAAC,CAAC,UAAU,qBAAqB,MAAM;AAAA,MAC3D,OAAO,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAAA,IACxD;AAAA,EACF;AACF;AAUA,eAAe,aACb,QACA,QACA,YAC2B;AAC3B,QAAM,gBAAgB,OAAO,MAAM,GAAG,UAAU,EAAE,IAAI,OAAO,WAAoC;AAC/F,QAAI;AACF,YAAM,MAAM,MAAM,QAAQ,KAAK;AAAA,QAC7B,OAAO,MAAM,OAAO,KAAK,EAAE,UAAU,OAAO,CAAC;AAAA,QAC7C,IAAI;AAAA,UAAe,CAAC,GAAG,WACrB,WAAW,MAAM,OAAO,IAAI,MAAM,eAAe,CAAC,GAAG,IAAK;AAAA,QAC5D;AAAA,MACF,CAAC;AAED,YAAM,YAAY,MAAM,mBAAmB;AAC3C,YAAM,aAAa,MAAM,UAAU,QAAQ,IAAI,MAAM,IAAI,UAAU;AAAA,QACjE,UAAU;AAAA,QACV,aAAa,IAAI;AAAA,MACnB,CAAC;AACD,YAAM,YAAY,gBAAgB,WAAW,UAAU,mBAAmB;AAE1E,UAAI;AACF,qBAAa,KAAK,UAAU;AAAA,MAC9B,SAAS,KAAK;AACZ,YAAI,KAAK,mCAAmC,EAAE,KAAK,OAAO,KAAK,OAAO,OAAO,GAAG,EAAE,CAAC;AAAA,MACrF;AAEA,UAAI;AACF,cAAM,mBAAmB,oBAAoB;AAC7C,YAAI,iBAAiB,YAAY,GAAG;AAClC,2BAAiB,WAAW,IAAI,UAAU,WAAW,QAAQ;AAAA,QAC/D;AAAA,MACF,SAAS,KAAK;AACZ,YAAI,MAAM,8CAA8C,EAAE,OAAO,OAAO,GAAG,EAAE,CAAC;AAAA,MAChF;AAEA,aAAO;AAAA,QACL,KAAK,OAAO;AAAA,QACZ,OAAO,WAAW,SAAS,OAAO;AAAA,QAClC,kBAAkB;AAAA,QAClB,iBAAiB,OAAO;AAAA,QACxB,SAAS;AAAA,MACX;AAAA,IACF,SAAS,KAAK;AACZ,UAAI,MAAM,mCAAmC;AAAA,QAC3C,KAAK,OAAO;AAAA,QACZ,OAAO,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAAA,MACxD,CAAC;AACD,aAAO;AAAA,QACL,KAAK,OAAO;AAAA,QACZ,OAAO,OAAO;AAAA,QACd,kBAAkB,OAAO;AAAA,QACzB,iBAAiB,OAAO;AAAA,QACxB,SAAS;AAAA,QACT,aAAa,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAAA,MAC9D;AAAA,IACF;AAAA,EACF,CAAC;AAED,SAAO,QAAQ,IAAI,aAAa;AAClC;AAKA,SAAS,kBACP,SACA,cACA,UACM;AACN,MAAI,OAAO;AACX,aAAW,KAAK,SAAS;AACvB,QAAI,CAAC,EAAE,iBAAkB;AACzB,QAAI,QAAQ,UAAU;AACpB,QAAE,mBAAmB;AACrB;AAAA,IACF;AACA,UAAM,YAAY,WAAW;AAC7B,UAAM,MAAM,KAAK,IAAI,cAAc,SAAS;AAC5C,QAAI,EAAE,iBAAiB,SAAS,KAAK;AACnC,QAAE,mBAAmB,gBAAgB,EAAE,kBAAkB,GAAG;AAAA,IAC9D;AACA,YAAQ,EAAE,iBAAiB;AAAA,EAC7B;AACF;","names":[]}
@@ -2,6 +2,8 @@ export interface LocalSynthesisOptions {
2
2
  maxSources?: number;
3
3
  maxCharsPerSource?: number;
4
4
  timeoutMs?: number;
5
+ maxTokens?: number;
6
+ modelOverride?: string;
5
7
  }
6
8
  export interface LocalSynthesisSource {
7
9
  url: string;
@@ -13,4 +15,5 @@ export interface LocalSynthesisResult {
13
15
  citations: number[];
14
16
  }
15
17
  export declare function synthesizeLocal(question: string, sources: LocalSynthesisSource[], opts?: LocalSynthesisOptions): Promise<LocalSynthesisResult>;
18
+ export declare function isLocalLlmEnabled(): boolean;
16
19
  //# sourceMappingURL=synthesis-local.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"synthesis-local.d.ts","sourceRoot":"","sources":["../../src/research/synthesis-local.ts"],"names":[],"mappings":"AASA,MAAM,WAAW,qBAAqB;IACpC,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,oBAAoB;IACnC,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,oBAAoB;IACnC,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,MAAM,EAAE,CAAC;CACrB;AAED,wBAAsB,eAAe,CACnC,QAAQ,EAAE,MAAM,EAChB,OAAO,EAAE,oBAAoB,EAAE,EAC/B,IAAI,GAAE,qBAA0B,GAC/B,OAAO,CAAC,oBAAoB,CAAC,CA2D/B"}
1
+ {"version":3,"file":"synthesis-local.d.ts","sourceRoot":"","sources":["../../src/research/synthesis-local.ts"],"names":[],"mappings":"AAUA,MAAM,WAAW,qBAAqB;IACpC,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,aAAa,CAAC,EAAE,MAAM,CAAC;CACxB;AAED,MAAM,WAAW,oBAAoB;IACnC,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,oBAAoB;IACnC,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,MAAM,EAAE,CAAC;CACrB;AAED,wBAAsB,eAAe,CACnC,QAAQ,EAAE,MAAM,EAChB,OAAO,EAAE,oBAAoB,EAAE,EAC/B,IAAI,GAAE,qBAA0B,GAC/B,OAAO,CAAC,oBAAoB,CAAC,CAkC/B;AAID,wBAAgB,iBAAiB,IAAI,OAAO,CAE3C"}
@@ -1,24 +1,21 @@
1
1
  import { createLogger } from "../logger.js";
2
- import { isLocalLlmEnabled } from "../extraction/v1/local-llm.js";
2
+ import { isLlmConfigured, runLlmText } from "../integrations/cloud/llm/run.js";
3
3
  const log = createLogger("research");
4
4
  const DEFAULT_MAX_SOURCES = 8;
5
5
  const DEFAULT_MAX_CHARS_PER_SOURCE = 4e3;
6
6
  const DEFAULT_TIMEOUT_MS = 6e4;
7
+ const DEFAULT_MAX_TOKENS = 3e3;
7
8
  async function synthesizeLocal(question, sources, opts = {}) {
8
- if (!isLocalLlmEnabled()) {
9
- throw new Error("Local LLM not configured. Set WIGOLO_LLM_PROVIDER.");
9
+ if (!isLlmConfigured()) {
10
+ throw new Error("LLM not configured. Set WIGOLO_LLM_PROVIDER or a provider API key.");
10
11
  }
11
12
  const maxSources = opts.maxSources ?? DEFAULT_MAX_SOURCES;
12
13
  const maxCharsPerSource = opts.maxCharsPerSource ?? DEFAULT_MAX_CHARS_PER_SOURCE;
13
- const timeoutMs = opts.timeoutMs ?? DEFAULT_TIMEOUT_MS;
14
- const provider = process.env["WIGOLO_LLM_PROVIDER"];
15
- const endpoint = provider.includes("/chat/completions") ? provider : provider.replace(/\/+$/, "") + "/v1/chat/completions";
16
- const model = process.env["WIGOLO_LLM_MODEL"] ?? "local";
17
14
  const sliced = sources.slice(0, maxSources);
18
15
  const sourceBlocks = sliced.map((s, i) => {
19
- const body2 = s.markdown.length > maxCharsPerSource ? s.markdown.slice(0, maxCharsPerSource) : s.markdown;
16
+ const body = s.markdown.length > maxCharsPerSource ? s.markdown.slice(0, maxCharsPerSource) : s.markdown;
20
17
  return `[${i + 1}] ${s.title}
21
- ${body2}`;
18
+ ${body}`;
22
19
  });
23
20
  const prompt = `You answer questions using ONLY the provided sources. Cite each fact with [N] where N is the source number.
24
21
 
@@ -26,31 +23,22 @@ Question: ${question}
26
23
 
27
24
  Sources:
28
25
  ${sourceBlocks.join("\n\n")}`;
29
- const body = {
30
- model,
31
- messages: [{ role: "user", content: prompt }]
32
- };
33
- let response;
34
26
  try {
35
- response = await fetch(endpoint, {
36
- method: "POST",
37
- headers: { "content-type": "application/json" },
38
- body: JSON.stringify(body),
39
- signal: AbortSignal.timeout(timeoutMs)
27
+ const result = await runLlmText({
28
+ prompt,
29
+ maxTokens: opts.maxTokens ?? DEFAULT_MAX_TOKENS,
30
+ modelOverride: opts.modelOverride,
31
+ timeoutMs: opts.timeoutMs ?? DEFAULT_TIMEOUT_MS
40
32
  });
33
+ log.info("local synthesis ok", { provider: result.provider, model: result.model, latencyMs: result.latencyMs });
34
+ return { text: result.text, citations: extractCitations(result.text) };
41
35
  } catch (err) {
42
- log.error("local synthesis request failed", { error: String(err) });
36
+ log.error("local synthesis request failed", { error: err instanceof Error ? err.message : String(err) });
43
37
  throw err;
44
38
  }
45
- if (!response.ok) {
46
- throw new Error(`Local LLM endpoint returned ${response.status}`);
47
- }
48
- const payload = await response.json();
49
- const content = payload.choices?.[0]?.message?.content;
50
- if (typeof content !== "string") {
51
- throw new Error("Local LLM response missing message content");
52
- }
53
- return { text: content, citations: extractCitations(content) };
39
+ }
40
+ function isLocalLlmEnabled() {
41
+ return isLlmConfigured();
54
42
  }
55
43
  function extractCitations(text) {
56
44
  const matches = text.match(/\[(\d+)\]/g);
@@ -68,6 +56,7 @@ function extractCitations(text) {
68
56
  return out;
69
57
  }
70
58
  export {
59
+ isLocalLlmEnabled,
71
60
  synthesizeLocal
72
61
  };
73
62
  //# sourceMappingURL=synthesis-local.js.map
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/research/synthesis-local.ts"],"sourcesContent":["import { createLogger } from '../logger.js';\nimport { isLocalLlmEnabled } from '../extraction/v1/local-llm.js';\n\nconst log = createLogger('research');\n\nconst DEFAULT_MAX_SOURCES = 8;\nconst DEFAULT_MAX_CHARS_PER_SOURCE = 4000;\nconst DEFAULT_TIMEOUT_MS = 60_000;\n\nexport interface LocalSynthesisOptions {\n maxSources?: number;\n maxCharsPerSource?: number;\n timeoutMs?: number;\n}\n\nexport interface LocalSynthesisSource {\n url: string;\n title: string;\n markdown: string;\n}\n\nexport interface LocalSynthesisResult {\n text: string;\n citations: number[];\n}\n\nexport async function synthesizeLocal(\n question: string,\n sources: LocalSynthesisSource[],\n opts: LocalSynthesisOptions = {},\n): Promise<LocalSynthesisResult> {\n if (!isLocalLlmEnabled()) {\n throw new Error('Local LLM not configured. Set WIGOLO_LLM_PROVIDER.');\n }\n\n const maxSources = opts.maxSources ?? DEFAULT_MAX_SOURCES;\n const maxCharsPerSource = opts.maxCharsPerSource ?? DEFAULT_MAX_CHARS_PER_SOURCE;\n const timeoutMs = opts.timeoutMs ?? DEFAULT_TIMEOUT_MS;\n\n const provider = process.env['WIGOLO_LLM_PROVIDER']!;\n const endpoint = provider.includes('/chat/completions')\n ? provider\n : provider.replace(/\\/+$/, '') + '/v1/chat/completions';\n const model = process.env['WIGOLO_LLM_MODEL'] ?? 'local';\n\n const sliced = sources.slice(0, maxSources);\n const sourceBlocks = sliced.map((s, i) => {\n const body = s.markdown.length > maxCharsPerSource\n ? s.markdown.slice(0, maxCharsPerSource)\n : s.markdown;\n return `[${i + 1}] ${s.title}\\n${body}`;\n });\n\n const prompt =\n 'You answer questions using ONLY the provided sources. Cite each fact with [N] where N is the source number.\\n\\n' +\n `Question: ${question}\\n\\n` +\n `Sources:\\n${sourceBlocks.join('\\n\\n')}`;\n\n const body = {\n model,\n messages: [{ role: 'user', content: prompt }],\n };\n\n let response: Response;\n try {\n response = await fetch(endpoint, {\n method: 'POST',\n headers: { 'content-type': 'application/json' },\n body: JSON.stringify(body),\n signal: AbortSignal.timeout(timeoutMs),\n });\n } catch (err) {\n log.error('local synthesis request failed', { error: String(err) });\n throw err;\n }\n\n if (!response.ok) {\n throw new Error(`Local LLM endpoint returned ${response.status}`);\n }\n\n const payload = (await response.json()) as {\n choices?: Array<{ message?: { content?: string } }>;\n };\n const content = payload.choices?.[0]?.message?.content;\n if (typeof content !== 'string') {\n throw new Error('Local LLM response missing message content');\n }\n\n return { text: content, citations: extractCitations(content) };\n}\n\nfunction extractCitations(text: string): number[] {\n const matches = text.match(/\\[(\\d+)\\]/g);\n if (!matches) return [];\n const seen = new Set<number>();\n const out: number[] = [];\n for (const m of matches) {\n const n = Number(m.slice(1, -1));\n if (!Number.isFinite(n) || n < 1) continue;\n const idx = n - 1;\n if (seen.has(idx)) continue;\n seen.add(idx);\n out.push(idx);\n }\n return out;\n}\n"],"mappings":"AAAA,SAAS,oBAAoB;AAC7B,SAAS,yBAAyB;AAElC,MAAM,MAAM,aAAa,UAAU;AAEnC,MAAM,sBAAsB;AAC5B,MAAM,+BAA+B;AACrC,MAAM,qBAAqB;AAmB3B,eAAsB,gBACpB,UACA,SACA,OAA8B,CAAC,GACA;AAC/B,MAAI,CAAC,kBAAkB,GAAG;AACxB,UAAM,IAAI,MAAM,oDAAoD;AAAA,EACtE;AAEA,QAAM,aAAa,KAAK,cAAc;AACtC,QAAM,oBAAoB,KAAK,qBAAqB;AACpD,QAAM,YAAY,KAAK,aAAa;AAEpC,QAAM,WAAW,QAAQ,IAAI,qBAAqB;AAClD,QAAM,WAAW,SAAS,SAAS,mBAAmB,IAClD,WACA,SAAS,QAAQ,QAAQ,EAAE,IAAI;AACnC,QAAM,QAAQ,QAAQ,IAAI,kBAAkB,KAAK;AAEjD,QAAM,SAAS,QAAQ,MAAM,GAAG,UAAU;AAC1C,QAAM,eAAe,OAAO,IAAI,CAAC,GAAG,MAAM;AACxC,UAAMA,QAAO,EAAE,SAAS,SAAS,oBAC7B,EAAE,SAAS,MAAM,GAAG,iBAAiB,IACrC,EAAE;AACN,WAAO,IAAI,IAAI,CAAC,KAAK,EAAE,KAAK;AAAA,EAAKA,KAAI;AAAA,EACvC,CAAC;AAED,QAAM,SACJ;AAAA;AAAA,YACa,QAAQ;AAAA;AAAA;AAAA,EACR,aAAa,KAAK,MAAM,CAAC;AAExC,QAAM,OAAO;AAAA,IACX;AAAA,IACA,UAAU,CAAC,EAAE,MAAM,QAAQ,SAAS,OAAO,CAAC;AAAA,EAC9C;AAEA,MAAI;AACJ,MAAI;AACF,eAAW,MAAM,MAAM,UAAU;AAAA,MAC/B,QAAQ;AAAA,MACR,SAAS,EAAE,gBAAgB,mBAAmB;AAAA,MAC9C,MAAM,KAAK,UAAU,IAAI;AAAA,MACzB,QAAQ,YAAY,QAAQ,SAAS;AAAA,IACvC,CAAC;AAAA,EACH,SAAS,KAAK;AACZ,QAAI,MAAM,kCAAkC,EAAE,OAAO,OAAO,GAAG,EAAE,CAAC;AAClE,UAAM;AAAA,EACR;AAEA,MAAI,CAAC,SAAS,IAAI;AAChB,UAAM,IAAI,MAAM,+BAA+B,SAAS,MAAM,EAAE;AAAA,EAClE;AAEA,QAAM,UAAW,MAAM,SAAS,KAAK;AAGrC,QAAM,UAAU,QAAQ,UAAU,CAAC,GAAG,SAAS;AAC/C,MAAI,OAAO,YAAY,UAAU;AAC/B,UAAM,IAAI,MAAM,4CAA4C;AAAA,EAC9D;AAEA,SAAO,EAAE,MAAM,SAAS,WAAW,iBAAiB,OAAO,EAAE;AAC/D;AAEA,SAAS,iBAAiB,MAAwB;AAChD,QAAM,UAAU,KAAK,MAAM,YAAY;AACvC,MAAI,CAAC,QAAS,QAAO,CAAC;AACtB,QAAM,OAAO,oBAAI,IAAY;AAC7B,QAAM,MAAgB,CAAC;AACvB,aAAW,KAAK,SAAS;AACvB,UAAM,IAAI,OAAO,EAAE,MAAM,GAAG,EAAE,CAAC;AAC/B,QAAI,CAAC,OAAO,SAAS,CAAC,KAAK,IAAI,EAAG;AAClC,UAAM,MAAM,IAAI;AAChB,QAAI,KAAK,IAAI,GAAG,EAAG;AACnB,SAAK,IAAI,GAAG;AACZ,QAAI,KAAK,GAAG;AAAA,EACd;AACA,SAAO;AACT;","names":["body"]}
1
+ {"version":3,"sources":["../../src/research/synthesis-local.ts"],"sourcesContent":["import { createLogger } from '../logger.js';\nimport { isLlmConfigured, runLlmText } from '../integrations/cloud/llm/run.js';\n\nconst log = createLogger('research');\n\nconst DEFAULT_MAX_SOURCES = 8;\nconst DEFAULT_MAX_CHARS_PER_SOURCE = 4000;\nconst DEFAULT_TIMEOUT_MS = 60_000;\nconst DEFAULT_MAX_TOKENS = 3000;\n\nexport interface LocalSynthesisOptions {\n maxSources?: number;\n maxCharsPerSource?: number;\n timeoutMs?: number;\n maxTokens?: number;\n modelOverride?: string;\n}\n\nexport interface LocalSynthesisSource {\n url: string;\n title: string;\n markdown: string;\n}\n\nexport interface LocalSynthesisResult {\n text: string;\n citations: number[];\n}\n\nexport async function synthesizeLocal(\n question: string,\n sources: LocalSynthesisSource[],\n opts: LocalSynthesisOptions = {},\n): Promise<LocalSynthesisResult> {\n if (!isLlmConfigured()) {\n throw new Error('LLM not configured. Set WIGOLO_LLM_PROVIDER or a provider API key.');\n }\n\n const maxSources = opts.maxSources ?? DEFAULT_MAX_SOURCES;\n const maxCharsPerSource = opts.maxCharsPerSource ?? DEFAULT_MAX_CHARS_PER_SOURCE;\n\n const sliced = sources.slice(0, maxSources);\n const sourceBlocks = sliced.map((s, i) => {\n const body = s.markdown.length > maxCharsPerSource\n ? s.markdown.slice(0, maxCharsPerSource)\n : s.markdown;\n return `[${i + 1}] ${s.title}\\n${body}`;\n });\n\n const prompt =\n 'You answer questions using ONLY the provided sources. Cite each fact with [N] where N is the source number.\\n\\n' +\n `Question: ${question}\\n\\n` +\n `Sources:\\n${sourceBlocks.join('\\n\\n')}`;\n\n try {\n const result = await runLlmText({\n prompt,\n maxTokens: opts.maxTokens ?? DEFAULT_MAX_TOKENS,\n modelOverride: opts.modelOverride,\n timeoutMs: opts.timeoutMs ?? DEFAULT_TIMEOUT_MS,\n });\n log.info('local synthesis ok', { provider: result.provider, model: result.model, latencyMs: result.latencyMs });\n return { text: result.text, citations: extractCitations(result.text) };\n } catch (err) {\n log.error('local synthesis request failed', { error: err instanceof Error ? err.message : String(err) });\n throw err;\n }\n}\n\n// Backwards-compat shim callers (research/pipeline.ts) used isLocalLlmEnabled()\n// to gate this fallback. Keep the same gate name pointing at the unified runner.\nexport function isLocalLlmEnabled(): boolean {\n return isLlmConfigured();\n}\n\nfunction extractCitations(text: string): number[] {\n const matches = text.match(/\\[(\\d+)\\]/g);\n if (!matches) return [];\n const seen = new Set<number>();\n const out: number[] = [];\n for (const m of matches) {\n const n = Number(m.slice(1, -1));\n if (!Number.isFinite(n) || n < 1) continue;\n const idx = n - 1;\n if (seen.has(idx)) continue;\n seen.add(idx);\n out.push(idx);\n }\n return out;\n}\n"],"mappings":"AAAA,SAAS,oBAAoB;AAC7B,SAAS,iBAAiB,kBAAkB;AAE5C,MAAM,MAAM,aAAa,UAAU;AAEnC,MAAM,sBAAsB;AAC5B,MAAM,+BAA+B;AACrC,MAAM,qBAAqB;AAC3B,MAAM,qBAAqB;AAqB3B,eAAsB,gBACpB,UACA,SACA,OAA8B,CAAC,GACA;AAC/B,MAAI,CAAC,gBAAgB,GAAG;AACtB,UAAM,IAAI,MAAM,oEAAoE;AAAA,EACtF;AAEA,QAAM,aAAa,KAAK,cAAc;AACtC,QAAM,oBAAoB,KAAK,qBAAqB;AAEpD,QAAM,SAAS,QAAQ,MAAM,GAAG,UAAU;AAC1C,QAAM,eAAe,OAAO,IAAI,CAAC,GAAG,MAAM;AACxC,UAAM,OAAO,EAAE,SAAS,SAAS,oBAC7B,EAAE,SAAS,MAAM,GAAG,iBAAiB,IACrC,EAAE;AACN,WAAO,IAAI,IAAI,CAAC,KAAK,EAAE,KAAK;AAAA,EAAK,IAAI;AAAA,EACvC,CAAC;AAED,QAAM,SACJ;AAAA;AAAA,YACa,QAAQ;AAAA;AAAA;AAAA,EACR,aAAa,KAAK,MAAM,CAAC;AAExC,MAAI;AACF,UAAM,SAAS,MAAM,WAAW;AAAA,MAC9B;AAAA,MACA,WAAW,KAAK,aAAa;AAAA,MAC7B,eAAe,KAAK;AAAA,MACpB,WAAW,KAAK,aAAa;AAAA,IAC/B,CAAC;AACD,QAAI,KAAK,sBAAsB,EAAE,UAAU,OAAO,UAAU,OAAO,OAAO,OAAO,WAAW,OAAO,UAAU,CAAC;AAC9G,WAAO,EAAE,MAAM,OAAO,MAAM,WAAW,iBAAiB,OAAO,IAAI,EAAE;AAAA,EACvE,SAAS,KAAK;AACZ,QAAI,MAAM,kCAAkC,EAAE,OAAO,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG,EAAE,CAAC;AACvG,UAAM;AAAA,EACR;AACF;AAIO,SAAS,oBAA6B;AAC3C,SAAO,gBAAgB;AACzB;AAEA,SAAS,iBAAiB,MAAwB;AAChD,QAAM,UAAU,KAAK,MAAM,YAAY;AACvC,MAAI,CAAC,QAAS,QAAO,CAAC;AACtB,QAAM,OAAO,oBAAI,IAAY;AAC7B,QAAM,MAAgB,CAAC;AACvB,aAAW,KAAK,SAAS;AACvB,UAAM,IAAI,OAAO,EAAE,MAAM,GAAG,EAAE,CAAC;AAC/B,QAAI,CAAC,OAAO,SAAS,CAAC,KAAK,IAAI,EAAG;AAClC,UAAM,MAAM,IAAI;AAChB,QAAI,KAAK,IAAI,GAAG,EAAG;AACnB,SAAK,IAAI,GAAG;AACZ,QAAI,KAAK,GAAG;AAAA,EACd;AACA,SAAO;AACT;","names":[]}
@@ -1 +1 @@
1
- {"version":3,"file":"filters.d.ts","sourceRoot":"","sources":["../../src/search/filters.ts"],"names":[],"mappings":"AAiBA,wBAAgB,eAAe,CAAC,CAAC,SAAS;IAAE,GAAG,EAAE,MAAM,CAAA;CAAE,EACvD,OAAO,EAAE,CAAC,EAAE,EACZ,cAAc,CAAC,EAAE,MAAM,EAAE,EACzB,cAAc,CAAC,EAAE,MAAM,EAAE,GACxB,CAAC,EAAE,CAgBL;AAOD,wBAAgB,iBAAiB,CAAC,CAAC,EACjC,OAAO,EAAE,CAAC,EAAE,EACZ,QAAQ,CAAC,EAAE,MAAM,EACjB,MAAM,CAAC,EAAE,MAAM,GACd,CAAC,EAAE,CAYL;AAED,wBAAgB,gBAAgB,CAAC,CAAC,EAChC,OAAO,EAAE,CAAC,EAAE,EACZ,SAAS,CAAC,EAAE,MAAM,GACjB,CAAC,EAAE,CAGL;AAED,MAAM,WAAW,aAAa;IAC5B,cAAc,CAAC,EAAE,MAAM,EAAE,CAAC;IAC1B,cAAc,CAAC,EAAE,MAAM,EAAE,CAAC;IAC1B,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,wBAAgB,eAAe,CAAC,CAAC,SAAS;IAAE,GAAG,EAAE,MAAM,CAAA;CAAE,EACvD,OAAO,EAAE,CAAC,EAAE,EACZ,OAAO,EAAE,aAAa,GACrB,CAAC,EAAE,CAKL"}
1
+ {"version":3,"file":"filters.d.ts","sourceRoot":"","sources":["../../src/search/filters.ts"],"names":[],"mappings":"AAiBA,wBAAgB,eAAe,CAAC,CAAC,SAAS;IAAE,GAAG,EAAE,MAAM,CAAA;CAAE,EACvD,OAAO,EAAE,CAAC,EAAE,EACZ,cAAc,CAAC,EAAE,MAAM,EAAE,EACzB,cAAc,CAAC,EAAE,MAAM,EAAE,GACxB,CAAC,EAAE,CAgBL;AAOD,wBAAgB,iBAAiB,CAAC,CAAC,EACjC,OAAO,EAAE,CAAC,EAAE,EACZ,QAAQ,CAAC,EAAE,MAAM,EACjB,MAAM,CAAC,EAAE,MAAM,GACd,CAAC,EAAE,CAyBL;AAED,wBAAgB,gBAAgB,CAAC,CAAC,EAChC,OAAO,EAAE,CAAC,EAAE,EACZ,SAAS,CAAC,EAAE,MAAM,GACjB,CAAC,EAAE,CAGL;AAED,MAAM,WAAW,aAAa;IAC5B,cAAc,CAAC,EAAE,MAAM,EAAE,CAAC;IAC1B,cAAc,CAAC,EAAE,MAAM,EAAE,CAAC;IAC1B,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,wBAAgB,eAAe,CAAC,CAAC,SAAS;IAAE,GAAG,EAAE,MAAM,CAAA;CAAE,EACvD,OAAO,EAAE,CAAC,EAAE,EACZ,OAAO,EAAE,aAAa,GACrB,CAAC,EAAE,CAKL"}
@@ -37,7 +37,17 @@ function filterByDateRange(results, fromDate, toDate) {
37
37
  if (fromDate && !isValidIsoDate(fromDate)) return results;
38
38
  if (toDate && !isValidIsoDate(toDate)) return results;
39
39
  if (fromDate && toDate && new Date(fromDate) > new Date(toDate)) return results;
40
- return results;
40
+ const fromMs = fromDate ? new Date(fromDate).getTime() : null;
41
+ const toMs = toDate ? new Date(toDate).getTime() + 24 * 3600 * 1e3 - 1 : null;
42
+ return results.filter((r) => {
43
+ const published = r.published_date;
44
+ if (typeof published !== "string" || !published) return true;
45
+ const t = Date.parse(published);
46
+ if (isNaN(t)) return true;
47
+ if (fromMs !== null && t < fromMs) return false;
48
+ if (toMs !== null && t > toMs) return false;
49
+ return true;
50
+ });
41
51
  }
42
52
  function filterByCategory(results, _category) {
43
53
  return results;
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/search/filters.ts"],"sourcesContent":["function getDomain(url: string): string {\n try {\n return new URL(url).hostname.toLowerCase();\n } catch {\n return '';\n }\n}\n\nfunction normalizeDomain(domain: string): string {\n return domain.replace(/\\/+$/, '').toLowerCase();\n}\n\nfunction domainMatches(hostname: string, domain: string): boolean {\n const normalized = normalizeDomain(domain);\n return hostname === normalized || hostname.endsWith('.' + normalized);\n}\n\nexport function filterByDomains<T extends { url: string }>(\n results: T[],\n includeDomains?: string[],\n excludeDomains?: string[],\n): T[] {\n if (!includeDomains?.length && !excludeDomains?.length) return results;\n\n return results.filter((r) => {\n const hostname = getDomain(r.url);\n if (!hostname) {\n return !includeDomains?.length;\n }\n if (includeDomains?.length) {\n if (!includeDomains.some((d) => domainMatches(hostname, d))) return false;\n }\n if (excludeDomains?.length) {\n if (excludeDomains.some((d) => domainMatches(hostname, d))) return false;\n }\n return true;\n });\n}\n\nfunction isValidIsoDate(dateStr: string): boolean {\n const parsed = new Date(dateStr);\n return !isNaN(parsed.getTime()) && /^\\d{4}-\\d{2}-\\d{2}$/.test(dateStr);\n}\n\nexport function filterByDateRange<T>(\n results: T[],\n fromDate?: string,\n toDate?: string,\n): T[] {\n if (!fromDate && !toDate) return results;\n\n if (fromDate && !isValidIsoDate(fromDate)) return results;\n if (toDate && !isValidIsoDate(toDate)) return results;\n\n if (fromDate && toDate && new Date(fromDate) > new Date(toDate)) return results;\n\n // Date filtering is best-effort on direct scraping engines.\n // SearXNG handles dates natively via time_range. For fallback engines,\n // snippet text doesn't reliably contain dates, so we keep all results.\n return results;\n}\n\nexport function filterByCategory<T>(\n results: T[],\n _category?: string,\n): T[] {\n // Category filtering is handled by SearXNG natively.\n return results;\n}\n\nexport interface FilterOptions {\n includeDomains?: string[];\n excludeDomains?: string[];\n fromDate?: string;\n toDate?: string;\n category?: string;\n}\n\nexport function applyAllFilters<T extends { url: string }>(\n results: T[],\n options: FilterOptions,\n): T[] {\n let filtered = filterByDomains(results, options.includeDomains, options.excludeDomains);\n filtered = filterByDateRange(filtered, options.fromDate, options.toDate);\n filtered = filterByCategory(filtered, options.category);\n return filtered;\n}\n"],"mappings":"AAAA,SAAS,UAAU,KAAqB;AACtC,MAAI;AACF,WAAO,IAAI,IAAI,GAAG,EAAE,SAAS,YAAY;AAAA,EAC3C,QAAQ;AACN,WAAO;AAAA,EACT;AACF;AAEA,SAAS,gBAAgB,QAAwB;AAC/C,SAAO,OAAO,QAAQ,QAAQ,EAAE,EAAE,YAAY;AAChD;AAEA,SAAS,cAAc,UAAkB,QAAyB;AAChE,QAAM,aAAa,gBAAgB,MAAM;AACzC,SAAO,aAAa,cAAc,SAAS,SAAS,MAAM,UAAU;AACtE;AAEO,SAAS,gBACd,SACA,gBACA,gBACK;AACL,MAAI,CAAC,gBAAgB,UAAU,CAAC,gBAAgB,OAAQ,QAAO;AAE/D,SAAO,QAAQ,OAAO,CAAC,MAAM;AAC3B,UAAM,WAAW,UAAU,EAAE,GAAG;AAChC,QAAI,CAAC,UAAU;AACb,aAAO,CAAC,gBAAgB;AAAA,IAC1B;AACA,QAAI,gBAAgB,QAAQ;AAC1B,UAAI,CAAC,eAAe,KAAK,CAAC,MAAM,cAAc,UAAU,CAAC,CAAC,EAAG,QAAO;AAAA,IACtE;AACA,QAAI,gBAAgB,QAAQ;AAC1B,UAAI,eAAe,KAAK,CAAC,MAAM,cAAc,UAAU,CAAC,CAAC,EAAG,QAAO;AAAA,IACrE;AACA,WAAO;AAAA,EACT,CAAC;AACH;AAEA,SAAS,eAAe,SAA0B;AAChD,QAAM,SAAS,IAAI,KAAK,OAAO;AAC/B,SAAO,CAAC,MAAM,OAAO,QAAQ,CAAC,KAAK,sBAAsB,KAAK,OAAO;AACvE;AAEO,SAAS,kBACd,SACA,UACA,QACK;AACL,MAAI,CAAC,YAAY,CAAC,OAAQ,QAAO;AAEjC,MAAI,YAAY,CAAC,eAAe,QAAQ,EAAG,QAAO;AAClD,MAAI,UAAU,CAAC,eAAe,MAAM,EAAG,QAAO;AAE9C,MAAI,YAAY,UAAU,IAAI,KAAK,QAAQ,IAAI,IAAI,KAAK,MAAM,EAAG,QAAO;AAKxE,SAAO;AACT;AAEO,SAAS,iBACd,SACA,WACK;AAEL,SAAO;AACT;AAUO,SAAS,gBACd,SACA,SACK;AACL,MAAI,WAAW,gBAAgB,SAAS,QAAQ,gBAAgB,QAAQ,cAAc;AACtF,aAAW,kBAAkB,UAAU,QAAQ,UAAU,QAAQ,MAAM;AACvE,aAAW,iBAAiB,UAAU,QAAQ,QAAQ;AACtD,SAAO;AACT;","names":[]}
1
+ {"version":3,"sources":["../../src/search/filters.ts"],"sourcesContent":["function getDomain(url: string): string {\n try {\n return new URL(url).hostname.toLowerCase();\n } catch {\n return '';\n }\n}\n\nfunction normalizeDomain(domain: string): string {\n return domain.replace(/\\/+$/, '').toLowerCase();\n}\n\nfunction domainMatches(hostname: string, domain: string): boolean {\n const normalized = normalizeDomain(domain);\n return hostname === normalized || hostname.endsWith('.' + normalized);\n}\n\nexport function filterByDomains<T extends { url: string }>(\n results: T[],\n includeDomains?: string[],\n excludeDomains?: string[],\n): T[] {\n if (!includeDomains?.length && !excludeDomains?.length) return results;\n\n return results.filter((r) => {\n const hostname = getDomain(r.url);\n if (!hostname) {\n return !includeDomains?.length;\n }\n if (includeDomains?.length) {\n if (!includeDomains.some((d) => domainMatches(hostname, d))) return false;\n }\n if (excludeDomains?.length) {\n if (excludeDomains.some((d) => domainMatches(hostname, d))) return false;\n }\n return true;\n });\n}\n\nfunction isValidIsoDate(dateStr: string): boolean {\n const parsed = new Date(dateStr);\n return !isNaN(parsed.getTime()) && /^\\d{4}-\\d{2}-\\d{2}$/.test(dateStr);\n}\n\nexport function filterByDateRange<T>(\n results: T[],\n fromDate?: string,\n toDate?: string,\n): T[] {\n if (!fromDate && !toDate) return results;\n\n if (fromDate && !isValidIsoDate(fromDate)) return results;\n if (toDate && !isValidIsoDate(toDate)) return results;\n\n if (fromDate && toDate && new Date(fromDate) > new Date(toDate)) return results;\n\n const fromMs = fromDate ? new Date(fromDate).getTime() : null;\n // toDate inclusive: treat as end-of-day so 'to=2026-01-31' keeps anything stamped Jan 31.\n const toMs = toDate ? new Date(toDate).getTime() + 24 * 3600 * 1000 - 1 : null;\n\n // Drop results with a published_date outside the window. Results without a\n // published_date pass through — SearXNG and most fallback engines do not\n // expose reliable dates per result, and the user's request for recency is\n // already biased via time_range on the upstream call.\n return results.filter((r) => {\n const published = (r as { published_date?: unknown }).published_date;\n if (typeof published !== 'string' || !published) return true;\n const t = Date.parse(published);\n if (isNaN(t)) return true;\n if (fromMs !== null && t < fromMs) return false;\n if (toMs !== null && t > toMs) return false;\n return true;\n });\n}\n\nexport function filterByCategory<T>(\n results: T[],\n _category?: string,\n): T[] {\n // Category filtering is handled by SearXNG natively.\n return results;\n}\n\nexport interface FilterOptions {\n includeDomains?: string[];\n excludeDomains?: string[];\n fromDate?: string;\n toDate?: string;\n category?: string;\n}\n\nexport function applyAllFilters<T extends { url: string }>(\n results: T[],\n options: FilterOptions,\n): T[] {\n let filtered = filterByDomains(results, options.includeDomains, options.excludeDomains);\n filtered = filterByDateRange(filtered, options.fromDate, options.toDate);\n filtered = filterByCategory(filtered, options.category);\n return filtered;\n}\n"],"mappings":"AAAA,SAAS,UAAU,KAAqB;AACtC,MAAI;AACF,WAAO,IAAI,IAAI,GAAG,EAAE,SAAS,YAAY;AAAA,EAC3C,QAAQ;AACN,WAAO;AAAA,EACT;AACF;AAEA,SAAS,gBAAgB,QAAwB;AAC/C,SAAO,OAAO,QAAQ,QAAQ,EAAE,EAAE,YAAY;AAChD;AAEA,SAAS,cAAc,UAAkB,QAAyB;AAChE,QAAM,aAAa,gBAAgB,MAAM;AACzC,SAAO,aAAa,cAAc,SAAS,SAAS,MAAM,UAAU;AACtE;AAEO,SAAS,gBACd,SACA,gBACA,gBACK;AACL,MAAI,CAAC,gBAAgB,UAAU,CAAC,gBAAgB,OAAQ,QAAO;AAE/D,SAAO,QAAQ,OAAO,CAAC,MAAM;AAC3B,UAAM,WAAW,UAAU,EAAE,GAAG;AAChC,QAAI,CAAC,UAAU;AACb,aAAO,CAAC,gBAAgB;AAAA,IAC1B;AACA,QAAI,gBAAgB,QAAQ;AAC1B,UAAI,CAAC,eAAe,KAAK,CAAC,MAAM,cAAc,UAAU,CAAC,CAAC,EAAG,QAAO;AAAA,IACtE;AACA,QAAI,gBAAgB,QAAQ;AAC1B,UAAI,eAAe,KAAK,CAAC,MAAM,cAAc,UAAU,CAAC,CAAC,EAAG,QAAO;AAAA,IACrE;AACA,WAAO;AAAA,EACT,CAAC;AACH;AAEA,SAAS,eAAe,SAA0B;AAChD,QAAM,SAAS,IAAI,KAAK,OAAO;AAC/B,SAAO,CAAC,MAAM,OAAO,QAAQ,CAAC,KAAK,sBAAsB,KAAK,OAAO;AACvE;AAEO,SAAS,kBACd,SACA,UACA,QACK;AACL,MAAI,CAAC,YAAY,CAAC,OAAQ,QAAO;AAEjC,MAAI,YAAY,CAAC,eAAe,QAAQ,EAAG,QAAO;AAClD,MAAI,UAAU,CAAC,eAAe,MAAM,EAAG,QAAO;AAE9C,MAAI,YAAY,UAAU,IAAI,KAAK,QAAQ,IAAI,IAAI,KAAK,MAAM,EAAG,QAAO;AAExE,QAAM,SAAS,WAAW,IAAI,KAAK,QAAQ,EAAE,QAAQ,IAAI;AAEzD,QAAM,OAAO,SAAS,IAAI,KAAK,MAAM,EAAE,QAAQ,IAAI,KAAK,OAAO,MAAO,IAAI;AAM1E,SAAO,QAAQ,OAAO,CAAC,MAAM;AAC3B,UAAM,YAAa,EAAmC;AACtD,QAAI,OAAO,cAAc,YAAY,CAAC,UAAW,QAAO;AACxD,UAAM,IAAI,KAAK,MAAM,SAAS;AAC9B,QAAI,MAAM,CAAC,EAAG,QAAO;AACrB,QAAI,WAAW,QAAQ,IAAI,OAAQ,QAAO;AAC1C,QAAI,SAAS,QAAQ,IAAI,KAAM,QAAO;AACtC,WAAO;AAAA,EACT,CAAC;AACH;AAEO,SAAS,iBACd,SACA,WACK;AAEL,SAAO;AACT;AAUO,SAAS,gBACd,SACA,SACK;AACL,MAAI,WAAW,gBAAgB,SAAS,QAAQ,gBAAgB,QAAQ,cAAc;AACtF,aAAW,kBAAkB,UAAU,QAAQ,UAAU,QAAQ,MAAM;AACvE,aAAW,iBAAiB,UAAU,QAAQ,QAAQ;AACtD,SAAO;AACT;","names":[]}
@@ -8,5 +8,6 @@ export declare class TransformersRerankProvider implements RerankProvider {
8
8
  warmup(): Promise<void>;
9
9
  private load;
10
10
  rerank(query: string, candidates: RerankCandidate[], topK?: number): Promise<RerankResult[]>;
11
+ dispose(): Promise<void>;
11
12
  }
12
13
  //# sourceMappingURL=transformers-rerank-provider.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"transformers-rerank-provider.d.ts","sourceRoot":"","sources":["../../../src/search/reranker/transformers-rerank-provider.ts"],"names":[],"mappings":"AAMA,OAAO,KAAK,EACV,cAAc,EACd,eAAe,EACf,YAAY,EACb,MAAM,oCAAoC,CAAC;AAwC5C,qBAAa,0BAA2B,YAAW,cAAc;IAC/D,OAAO,CAAC,SAAS,CAA0B;IAC3C,OAAO,CAAC,KAAK,CAAsB;IACnC,OAAO,CAAC,WAAW,CAAgE;IACnF,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;;IAMnB,MAAM,IAAI,OAAO,CAAC,IAAI,CAAC;IAI7B,OAAO,CAAC,IAAI;IA6BN,MAAM,CACV,KAAK,EAAE,MAAM,EACb,UAAU,EAAE,eAAe,EAAE,EAC7B,IAAI,SAAoB,GACvB,OAAO,CAAC,YAAY,EAAE,CAAC;CA8B3B"}
1
+ {"version":3,"file":"transformers-rerank-provider.d.ts","sourceRoot":"","sources":["../../../src/search/reranker/transformers-rerank-provider.ts"],"names":[],"mappings":"AAMA,OAAO,KAAK,EACV,cAAc,EACd,eAAe,EACf,YAAY,EACb,MAAM,oCAAoC,CAAC;AAwC5C,qBAAa,0BAA2B,YAAW,cAAc;IAC/D,OAAO,CAAC,SAAS,CAA0B;IAC3C,OAAO,CAAC,KAAK,CAAsB;IACnC,OAAO,CAAC,WAAW,CAAgE;IACnF,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;;IAMnB,MAAM,IAAI,OAAO,CAAC,IAAI,CAAC;IAI7B,OAAO,CAAC,IAAI;IA6BN,MAAM,CACV,KAAK,EAAE,MAAM,EACb,UAAU,EAAE,eAAe,EAAE,EAC7B,IAAI,SAAoB,GACvB,OAAO,CAAC,YAAY,EAAE,CAAC;IAkCpB,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CAa/B"}
@@ -71,6 +71,22 @@ class TransformersRerankProvider {
71
71
  }));
72
72
  return scored.sort((a, b) => b.score - a.score).slice(0, topK);
73
73
  }
74
+ // Release the underlying ONNX session before process exit. Without this,
75
+ // the runtime's worker threads race during C++ destructor teardown and
76
+ // surface as `mutex lock failed: Invalid argument` on macOS.
77
+ async dispose() {
78
+ const model = this.model;
79
+ this.model = null;
80
+ this.tokenizer = null;
81
+ this.loadPromise = null;
82
+ if (!model) return;
83
+ try {
84
+ const m = model;
85
+ if (typeof m.dispose === "function") await m.dispose();
86
+ } catch (err) {
87
+ log.debug("reranker dispose failed", { error: err instanceof Error ? err.message : String(err) });
88
+ }
89
+ }
74
90
  }
75
91
  export {
76
92
  TransformersRerankProvider
@@ -1 +1 @@
1
- {"version":3,"sources":["../../../src/search/reranker/transformers-rerank-provider.ts"],"sourcesContent":["import { join } from 'node:path';\nimport {\n AutoTokenizer,\n AutoModelForSequenceClassification,\n env,\n} from '@huggingface/transformers';\nimport type {\n RerankProvider,\n RerankCandidate,\n RerankResult,\n} from '../../providers/rerank-provider.js';\nimport { createLogger } from '../../logger.js';\nimport { getConfig } from '../../config.js';\n\nconst log = createLogger('reranker');\n\n// Cross-encoder reranker via Transformers.js.\n//\n// The high-level `pipeline('text-classification', ...)` API does not pass\n// `text_pair`, so it can't drive a cross-encoder properly. We therefore\n// load the tokenizer + sequence-classification model directly: feed\n// (query, document) pairs to the tokenizer and read raw logits from the\n// model. ms-marco-MiniLM-L-6-v2 is a single-output regressor (num_labels=1)\n// where higher logit = more relevant, so the logit is used as the rerank\n// score with no further transform.\ntype Tokenizer = Awaited<ReturnType<typeof AutoTokenizer.from_pretrained>>;\ntype Model = Awaited<ReturnType<typeof AutoModelForSequenceClassification.from_pretrained>>;\n\ninterface LogitsTensor {\n data: ArrayLike<number>;\n dims: number[];\n}\n\n// Recognize the noisy huggingface fetch failure signature and replace it\n// with an actionable instruction. Transformers.js parses a config that\n// failed to download, then dereferences `tokenizer_class` on undefined.\nfunction wrapLoadError(err: unknown): Error {\n const message = err instanceof Error ? err.message : String(err);\n const looksLikeMissingModel =\n /tokenizer_class|tokenizer_config|preprocessor_config|fetch failed|ENOTFOUND|ECONNREFUSED|ETIMEDOUT|ENETUNREACH/i.test(\n message,\n );\n if (looksLikeMissingModel) {\n return new Error(\n `Reranker model not downloaded — run \\`wigolo warmup\\` (cause: ${message})`,\n );\n }\n return new Error(`Failed to load reranker model: ${message}`);\n}\n\nexport class TransformersRerankProvider implements RerankProvider {\n private tokenizer: Tokenizer | null = null;\n private model: Model | null = null;\n private loadPromise: Promise<{ tokenizer: Tokenizer; model: Model }> | null = null;\n readonly modelId: string;\n\n constructor() {\n this.modelId = 'Xenova/ms-marco-MiniLM-L-6-v2';\n }\n\n async warmup(): Promise<void> {\n await this.load();\n }\n\n private load(): Promise<{ tokenizer: Tokenizer; model: Model }> {\n if (this.tokenizer && this.model) {\n return Promise.resolve({ tokenizer: this.tokenizer, model: this.model });\n }\n if (this.loadPromise) return this.loadPromise;\n\n log.info('Loading rerank model', { modelId: this.modelId });\n const cacheDir = join(getConfig().dataDir, 'transformers');\n // Direct the library at a writable cache under the wigolo data dir so\n // models don't end up in a user home cache the daemon can't manage.\n env.cacheDir = cacheDir;\n\n this.loadPromise = Promise.all([\n AutoTokenizer.from_pretrained(this.modelId),\n AutoModelForSequenceClassification.from_pretrained(this.modelId),\n ])\n .then(([tokenizer, model]) => {\n this.tokenizer = tokenizer;\n this.model = model;\n return { tokenizer, model };\n })\n .catch((err: unknown) => {\n this.loadPromise = null;\n throw wrapLoadError(err);\n });\n\n return this.loadPromise;\n }\n\n async rerank(\n query: string,\n candidates: RerankCandidate[],\n topK = candidates.length,\n ): Promise<RerankResult[]> {\n if (candidates.length === 0) return [];\n\n const { tokenizer, model } = await this.load();\n\n // Build batch: query repeated against each document.\n const queries = candidates.map(() => query);\n const docs = candidates.map((c) => c.text);\n\n const inputs = tokenizer(queries, {\n text_pair: docs,\n padding: true,\n truncation: true,\n });\n\n const outputs = (await model(inputs)) as { logits: LogitsTensor };\n const logits = outputs.logits;\n // logits shape is [batch, 1] for single-label regression rerankers.\n // For multi-label heads (rare for rerankers) we still take the first\n // value as the relevance score.\n const stride = logits.dims.length >= 2 ? logits.dims[1] : 1;\n const data = logits.data;\n\n const scored: RerankResult[] = candidates.map((c, i) => ({\n id: c.id,\n score: Number(data[i * stride]),\n }));\n\n return scored.sort((a, b) => b.score - a.score).slice(0, topK);\n }\n}\n"],"mappings":"AAAA,SAAS,YAAY;AACrB;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,OACK;AAMP,SAAS,oBAAoB;AAC7B,SAAS,iBAAiB;AAE1B,MAAM,MAAM,aAAa,UAAU;AAsBnC,SAAS,cAAc,KAAqB;AAC1C,QAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC/D,QAAM,wBACJ,kHAAkH;AAAA,IAChH;AAAA,EACF;AACF,MAAI,uBAAuB;AACzB,WAAO,IAAI;AAAA,MACT,sEAAiE,OAAO;AAAA,IAC1E;AAAA,EACF;AACA,SAAO,IAAI,MAAM,kCAAkC,OAAO,EAAE;AAC9D;AAEO,MAAM,2BAAqD;AAAA,EACxD,YAA8B;AAAA,EAC9B,QAAsB;AAAA,EACtB,cAAsE;AAAA,EACrE;AAAA,EAET,cAAc;AACZ,SAAK,UAAU;AAAA,EACjB;AAAA,EAEA,MAAM,SAAwB;AAC5B,UAAM,KAAK,KAAK;AAAA,EAClB;AAAA,EAEQ,OAAwD;AAC9D,QAAI,KAAK,aAAa,KAAK,OAAO;AAChC,aAAO,QAAQ,QAAQ,EAAE,WAAW,KAAK,WAAW,OAAO,KAAK,MAAM,CAAC;AAAA,IACzE;AACA,QAAI,KAAK,YAAa,QAAO,KAAK;AAElC,QAAI,KAAK,wBAAwB,EAAE,SAAS,KAAK,QAAQ,CAAC;AAC1D,UAAM,WAAW,KAAK,UAAU,EAAE,SAAS,cAAc;AAGzD,QAAI,WAAW;AAEf,SAAK,cAAc,QAAQ,IAAI;AAAA,MAC7B,cAAc,gBAAgB,KAAK,OAAO;AAAA,MAC1C,mCAAmC,gBAAgB,KAAK,OAAO;AAAA,IACjE,CAAC,EACE,KAAK,CAAC,CAAC,WAAW,KAAK,MAAM;AAC5B,WAAK,YAAY;AACjB,WAAK,QAAQ;AACb,aAAO,EAAE,WAAW,MAAM;AAAA,IAC5B,CAAC,EACA,MAAM,CAAC,QAAiB;AACvB,WAAK,cAAc;AACnB,YAAM,cAAc,GAAG;AAAA,IACzB,CAAC;AAEH,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAM,OACJ,OACA,YACA,OAAO,WAAW,QACO;AACzB,QAAI,WAAW,WAAW,EAAG,QAAO,CAAC;AAErC,UAAM,EAAE,WAAW,MAAM,IAAI,MAAM,KAAK,KAAK;AAG7C,UAAM,UAAU,WAAW,IAAI,MAAM,KAAK;AAC1C,UAAM,OAAO,WAAW,IAAI,CAAC,MAAM,EAAE,IAAI;AAEzC,UAAM,SAAS,UAAU,SAAS;AAAA,MAChC,WAAW;AAAA,MACX,SAAS;AAAA,MACT,YAAY;AAAA,IACd,CAAC;AAED,UAAM,UAAW,MAAM,MAAM,MAAM;AACnC,UAAM,SAAS,QAAQ;AAIvB,UAAM,SAAS,OAAO,KAAK,UAAU,IAAI,OAAO,KAAK,CAAC,IAAI;AAC1D,UAAM,OAAO,OAAO;AAEpB,UAAM,SAAyB,WAAW,IAAI,CAAC,GAAG,OAAO;AAAA,MACvD,IAAI,EAAE;AAAA,MACN,OAAO,OAAO,KAAK,IAAI,MAAM,CAAC;AAAA,IAChC,EAAE;AAEF,WAAO,OAAO,KAAK,CAAC,GAAG,MAAM,EAAE,QAAQ,EAAE,KAAK,EAAE,MAAM,GAAG,IAAI;AAAA,EAC/D;AACF;","names":[]}
1
+ {"version":3,"sources":["../../../src/search/reranker/transformers-rerank-provider.ts"],"sourcesContent":["import { join } from 'node:path';\nimport {\n AutoTokenizer,\n AutoModelForSequenceClassification,\n env,\n} from '@huggingface/transformers';\nimport type {\n RerankProvider,\n RerankCandidate,\n RerankResult,\n} from '../../providers/rerank-provider.js';\nimport { createLogger } from '../../logger.js';\nimport { getConfig } from '../../config.js';\n\nconst log = createLogger('reranker');\n\n// Cross-encoder reranker via Transformers.js.\n//\n// The high-level `pipeline('text-classification', ...)` API does not pass\n// `text_pair`, so it can't drive a cross-encoder properly. We therefore\n// load the tokenizer + sequence-classification model directly: feed\n// (query, document) pairs to the tokenizer and read raw logits from the\n// model. ms-marco-MiniLM-L-6-v2 is a single-output regressor (num_labels=1)\n// where higher logit = more relevant, so the logit is used as the rerank\n// score with no further transform.\ntype Tokenizer = Awaited<ReturnType<typeof AutoTokenizer.from_pretrained>>;\ntype Model = Awaited<ReturnType<typeof AutoModelForSequenceClassification.from_pretrained>>;\n\ninterface LogitsTensor {\n data: ArrayLike<number>;\n dims: number[];\n}\n\n// Recognize the noisy huggingface fetch failure signature and replace it\n// with an actionable instruction. Transformers.js parses a config that\n// failed to download, then dereferences `tokenizer_class` on undefined.\nfunction wrapLoadError(err: unknown): Error {\n const message = err instanceof Error ? err.message : String(err);\n const looksLikeMissingModel =\n /tokenizer_class|tokenizer_config|preprocessor_config|fetch failed|ENOTFOUND|ECONNREFUSED|ETIMEDOUT|ENETUNREACH/i.test(\n message,\n );\n if (looksLikeMissingModel) {\n return new Error(\n `Reranker model not downloaded — run \\`wigolo warmup\\` (cause: ${message})`,\n );\n }\n return new Error(`Failed to load reranker model: ${message}`);\n}\n\nexport class TransformersRerankProvider implements RerankProvider {\n private tokenizer: Tokenizer | null = null;\n private model: Model | null = null;\n private loadPromise: Promise<{ tokenizer: Tokenizer; model: Model }> | null = null;\n readonly modelId: string;\n\n constructor() {\n this.modelId = 'Xenova/ms-marco-MiniLM-L-6-v2';\n }\n\n async warmup(): Promise<void> {\n await this.load();\n }\n\n private load(): Promise<{ tokenizer: Tokenizer; model: Model }> {\n if (this.tokenizer && this.model) {\n return Promise.resolve({ tokenizer: this.tokenizer, model: this.model });\n }\n if (this.loadPromise) return this.loadPromise;\n\n log.info('Loading rerank model', { modelId: this.modelId });\n const cacheDir = join(getConfig().dataDir, 'transformers');\n // Direct the library at a writable cache under the wigolo data dir so\n // models don't end up in a user home cache the daemon can't manage.\n env.cacheDir = cacheDir;\n\n this.loadPromise = Promise.all([\n AutoTokenizer.from_pretrained(this.modelId),\n AutoModelForSequenceClassification.from_pretrained(this.modelId),\n ])\n .then(([tokenizer, model]) => {\n this.tokenizer = tokenizer;\n this.model = model;\n return { tokenizer, model };\n })\n .catch((err: unknown) => {\n this.loadPromise = null;\n throw wrapLoadError(err);\n });\n\n return this.loadPromise;\n }\n\n async rerank(\n query: string,\n candidates: RerankCandidate[],\n topK = candidates.length,\n ): Promise<RerankResult[]> {\n if (candidates.length === 0) return [];\n\n const { tokenizer, model } = await this.load();\n\n // Build batch: query repeated against each document.\n const queries = candidates.map(() => query);\n const docs = candidates.map((c) => c.text);\n\n const inputs = tokenizer(queries, {\n text_pair: docs,\n padding: true,\n truncation: true,\n });\n\n const outputs = (await model(inputs)) as { logits: LogitsTensor };\n const logits = outputs.logits;\n // logits shape is [batch, 1] for single-label regression rerankers.\n // For multi-label heads (rare for rerankers) we still take the first\n // value as the relevance score.\n const stride = logits.dims.length >= 2 ? logits.dims[1] : 1;\n const data = logits.data;\n\n const scored: RerankResult[] = candidates.map((c, i) => ({\n id: c.id,\n score: Number(data[i * stride]),\n }));\n\n return scored.sort((a, b) => b.score - a.score).slice(0, topK);\n }\n\n // Release the underlying ONNX session before process exit. Without this,\n // the runtime's worker threads race during C++ destructor teardown and\n // surface as `mutex lock failed: Invalid argument` on macOS.\n async dispose(): Promise<void> {\n const model = this.model;\n this.model = null;\n this.tokenizer = null;\n this.loadPromise = null;\n if (!model) return;\n try {\n const m = model as unknown as { dispose?: () => Promise<unknown> };\n if (typeof m.dispose === 'function') await m.dispose();\n } catch (err) {\n log.debug('reranker dispose failed', { error: err instanceof Error ? err.message : String(err) });\n }\n }\n}\n"],"mappings":"AAAA,SAAS,YAAY;AACrB;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,OACK;AAMP,SAAS,oBAAoB;AAC7B,SAAS,iBAAiB;AAE1B,MAAM,MAAM,aAAa,UAAU;AAsBnC,SAAS,cAAc,KAAqB;AAC1C,QAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC/D,QAAM,wBACJ,kHAAkH;AAAA,IAChH;AAAA,EACF;AACF,MAAI,uBAAuB;AACzB,WAAO,IAAI;AAAA,MACT,sEAAiE,OAAO;AAAA,IAC1E;AAAA,EACF;AACA,SAAO,IAAI,MAAM,kCAAkC,OAAO,EAAE;AAC9D;AAEO,MAAM,2BAAqD;AAAA,EACxD,YAA8B;AAAA,EAC9B,QAAsB;AAAA,EACtB,cAAsE;AAAA,EACrE;AAAA,EAET,cAAc;AACZ,SAAK,UAAU;AAAA,EACjB;AAAA,EAEA,MAAM,SAAwB;AAC5B,UAAM,KAAK,KAAK;AAAA,EAClB;AAAA,EAEQ,OAAwD;AAC9D,QAAI,KAAK,aAAa,KAAK,OAAO;AAChC,aAAO,QAAQ,QAAQ,EAAE,WAAW,KAAK,WAAW,OAAO,KAAK,MAAM,CAAC;AAAA,IACzE;AACA,QAAI,KAAK,YAAa,QAAO,KAAK;AAElC,QAAI,KAAK,wBAAwB,EAAE,SAAS,KAAK,QAAQ,CAAC;AAC1D,UAAM,WAAW,KAAK,UAAU,EAAE,SAAS,cAAc;AAGzD,QAAI,WAAW;AAEf,SAAK,cAAc,QAAQ,IAAI;AAAA,MAC7B,cAAc,gBAAgB,KAAK,OAAO;AAAA,MAC1C,mCAAmC,gBAAgB,KAAK,OAAO;AAAA,IACjE,CAAC,EACE,KAAK,CAAC,CAAC,WAAW,KAAK,MAAM;AAC5B,WAAK,YAAY;AACjB,WAAK,QAAQ;AACb,aAAO,EAAE,WAAW,MAAM;AAAA,IAC5B,CAAC,EACA,MAAM,CAAC,QAAiB;AACvB,WAAK,cAAc;AACnB,YAAM,cAAc,GAAG;AAAA,IACzB,CAAC;AAEH,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAM,OACJ,OACA,YACA,OAAO,WAAW,QACO;AACzB,QAAI,WAAW,WAAW,EAAG,QAAO,CAAC;AAErC,UAAM,EAAE,WAAW,MAAM,IAAI,MAAM,KAAK,KAAK;AAG7C,UAAM,UAAU,WAAW,IAAI,MAAM,KAAK;AAC1C,UAAM,OAAO,WAAW,IAAI,CAAC,MAAM,EAAE,IAAI;AAEzC,UAAM,SAAS,UAAU,SAAS;AAAA,MAChC,WAAW;AAAA,MACX,SAAS;AAAA,MACT,YAAY;AAAA,IACd,CAAC;AAED,UAAM,UAAW,MAAM,MAAM,MAAM;AACnC,UAAM,SAAS,QAAQ;AAIvB,UAAM,SAAS,OAAO,KAAK,UAAU,IAAI,OAAO,KAAK,CAAC,IAAI;AAC1D,UAAM,OAAO,OAAO;AAEpB,UAAM,SAAyB,WAAW,IAAI,CAAC,GAAG,OAAO;AAAA,MACvD,IAAI,EAAE;AAAA,MACN,OAAO,OAAO,KAAK,IAAI,MAAM,CAAC;AAAA,IAChC,EAAE;AAEF,WAAO,OAAO,KAAK,CAAC,GAAG,MAAM,EAAE,QAAQ,EAAE,KAAK,EAAE,MAAM,GAAG,IAAI;AAAA,EAC/D;AAAA;AAAA;AAAA;AAAA,EAKA,MAAM,UAAyB;AAC7B,UAAM,QAAQ,KAAK;AACnB,SAAK,QAAQ;AACb,SAAK,YAAY;AACjB,SAAK,cAAc;AACnB,QAAI,CAAC,MAAO;AACZ,QAAI;AACF,YAAM,IAAI;AACV,UAAI,OAAO,EAAE,YAAY,WAAY,OAAM,EAAE,QAAQ;AAAA,IACvD,SAAS,KAAK;AACZ,UAAI,MAAM,2BAA2B,EAAE,OAAO,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG,EAAE,CAAC;AAAA,IAClG;AAAA,EACF;AACF;","names":[]}
@@ -1 +1 @@
1
- {"version":3,"file":"cache.d.ts","sourceRoot":"","sources":["../../src/tools/cache.ts"],"names":[],"mappings":"AAaA,OAAO,KAAK,EAAE,UAAU,EAAE,WAAW,EAAiC,MAAM,aAAa,CAAC;AAC1F,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AAQtD,wBAAsB,WAAW,CAAC,KAAK,EAAE,UAAU,EAAE,MAAM,CAAC,EAAE,WAAW,GAAG,OAAO,CAAC,WAAW,CAAC,CAmH/F"}
1
+ {"version":3,"file":"cache.d.ts","sourceRoot":"","sources":["../../src/tools/cache.ts"],"names":[],"mappings":"AAaA,OAAO,KAAK,EAAE,UAAU,EAAE,WAAW,EAAiC,MAAM,aAAa,CAAC;AAC1F,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AAQtD,wBAAsB,WAAW,CAAC,KAAK,EAAE,UAAU,EAAE,MAAM,CAAC,EAAE,WAAW,GAAG,OAAO,CAAC,WAAW,CAAC,CAqH/F"}
@@ -102,12 +102,14 @@ async function handleCache(input, router) {
102
102
  query: input.query,
103
103
  urlPattern: input.url_pattern,
104
104
  since: input.since,
105
- mode: input.mode
105
+ mode: input.mode,
106
+ limit: input.limit
106
107
  });
107
108
  const results = searchCacheFiltered({
108
109
  query: input.query,
109
110
  urlPattern: input.url_pattern,
110
- since: input.since
111
+ since: input.since,
112
+ limit: input.limit
111
113
  });
112
114
  return {
113
115
  results: results.map((r) => ({