@staticn0va/wigolo 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. package/README.md +142 -345
  2. package/dist/agent/pipeline.d.ts.map +1 -1
  3. package/dist/agent/pipeline.js +35 -5
  4. package/dist/agent/pipeline.js.map +1 -1
  5. package/dist/cache/store.d.ts +1 -0
  6. package/dist/cache/store.d.ts.map +1 -1
  7. package/dist/cache/store.js +4 -2
  8. package/dist/cache/store.js.map +1 -1
  9. package/dist/cli/doctor.d.ts.map +1 -1
  10. package/dist/cli/doctor.js +43 -17
  11. package/dist/cli/doctor.js.map +1 -1
  12. package/dist/cli/shutdown.d.ts +2 -0
  13. package/dist/cli/shutdown.d.ts.map +1 -0
  14. package/dist/cli/shutdown.js +26 -0
  15. package/dist/cli/shutdown.js.map +1 -0
  16. package/dist/extraction/v1/local-llm.d.ts.map +1 -1
  17. package/dist/extraction/v1/local-llm.js +13 -37
  18. package/dist/extraction/v1/local-llm.js.map +1 -1
  19. package/dist/fetch/error-describe.d.ts +7 -0
  20. package/dist/fetch/error-describe.d.ts.map +1 -0
  21. package/dist/fetch/error-describe.js +37 -0
  22. package/dist/fetch/error-describe.js.map +1 -0
  23. package/dist/fetch/router.d.ts.map +1 -1
  24. package/dist/fetch/router.js +4 -2
  25. package/dist/fetch/router.js.map +1 -1
  26. package/dist/index.js +17 -12
  27. package/dist/index.js.map +1 -1
  28. package/dist/integrations/cloud/llm/model-select.d.ts +5 -0
  29. package/dist/integrations/cloud/llm/model-select.d.ts.map +1 -0
  30. package/dist/integrations/cloud/llm/model-select.js +32 -0
  31. package/dist/integrations/cloud/llm/model-select.js.map +1 -0
  32. package/dist/integrations/cloud/llm/run.d.ts +27 -0
  33. package/dist/integrations/cloud/llm/run.d.ts.map +1 -0
  34. package/dist/integrations/cloud/llm/run.js +99 -0
  35. package/dist/integrations/cloud/llm/run.js.map +1 -0
  36. package/dist/integrations/cloud/llm/text-adapters.d.ts +19 -0
  37. package/dist/integrations/cloud/llm/text-adapters.d.ts.map +1 -0
  38. package/dist/integrations/cloud/llm/text-adapters.js +103 -0
  39. package/dist/integrations/cloud/llm/text-adapters.js.map +1 -0
  40. package/dist/providers/rerank-provider.d.ts +1 -0
  41. package/dist/providers/rerank-provider.d.ts.map +1 -1
  42. package/dist/providers/rerank-provider.js +13 -0
  43. package/dist/providers/rerank-provider.js.map +1 -1
  44. package/dist/research/brief.d.ts +1 -0
  45. package/dist/research/brief.d.ts.map +1 -1
  46. package/dist/research/brief.js +8 -4
  47. package/dist/research/brief.js.map +1 -1
  48. package/dist/research/pipeline.js +1 -1
  49. package/dist/research/pipeline.js.map +1 -1
  50. package/dist/research/synthesis-local.d.ts +3 -0
  51. package/dist/research/synthesis-local.d.ts.map +1 -1
  52. package/dist/research/synthesis-local.js +18 -29
  53. package/dist/research/synthesis-local.js.map +1 -1
  54. package/dist/search/filters.d.ts.map +1 -1
  55. package/dist/search/filters.js +11 -1
  56. package/dist/search/filters.js.map +1 -1
  57. package/dist/search/reranker/transformers-rerank-provider.d.ts +1 -0
  58. package/dist/search/reranker/transformers-rerank-provider.d.ts.map +1 -1
  59. package/dist/search/reranker/transformers-rerank-provider.js +16 -0
  60. package/dist/search/reranker/transformers-rerank-provider.js.map +1 -1
  61. package/dist/tools/cache.d.ts.map +1 -1
  62. package/dist/tools/cache.js +4 -2
  63. package/dist/tools/cache.js.map +1 -1
  64. package/dist/tools/fetch.d.ts.map +1 -1
  65. package/dist/tools/fetch.js +17 -4
  66. package/dist/tools/fetch.js.map +1 -1
  67. package/package.json +1 -1
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/agent/pipeline.ts"],"sourcesContent":["import { createLogger } from '../logger.js';\nimport { planExecution } from './planner.js';\nimport { executeAgentPlan } from './executor.js';\nimport { extractWithSchema } from '../extraction/schema.js';\nimport {\n type SamplingCapableServer,\n requestSampling,\n checkSamplingSupport,\n} from '../search/sampling.js';\nimport type {\n AgentInput,\n AgentOutput,\n AgentSource,\n AgentStep,\n SearchEngine,\n} from '../types.js';\nimport type { SmartRouter } from '../fetch/router.js';\nimport type { JsonSchema } from '../extraction/schema.js';\n\nconst log = createLogger('agent');\n\nconst DEFAULT_MAX_PAGES = 10;\nconst DEFAULT_MAX_TIME_MS = 60000;\n\nexport async function runAgentPipeline(\n input: AgentInput,\n engines: SearchEngine[],\n router: SmartRouter,\n server?: SamplingCapableServer,\n): Promise<AgentOutput> {\n const start = Date.now();\n const maxPages = input.max_pages ?? DEFAULT_MAX_PAGES;\n const maxTimeMs = input.max_time_ms ?? DEFAULT_MAX_TIME_MS;\n const deadlineMs = start + maxTimeMs;\n const steps: AgentStep[] = [];\n\n try {\n const planStart = Date.now();\n log.info('agent pipeline started', { prompt: input.prompt.slice(0, 100), maxPages, maxTimeMs });\n\n const plan = await planExecution(input.prompt, input.urls, server);\n\n steps.push({\n action: 'plan',\n detail: `Generated ${plan.searches.length} searches, ${plan.urls.length} URLs${plan.samplingUsed ? ' (via sampling)' : ' (keyword extraction)'}`,\n time_ms: Date.now() - planStart,\n });\n\n log.info('plan generated', {\n searches: plan.searches.length,\n urls: plan.urls.length,\n samplingUsed: plan.samplingUsed,\n });\n\n const execResult = await executeAgentPlan(plan, engines, router, {\n maxPages,\n deadlineMs,\n }, input.prompt);\n\n steps.push(...execResult.steps);\n\n const sources = execResult.sources;\n const pagesFetched = sources.filter((s) => s.fetched).length;\n\n if (input.schema && sources.some((s) => s.fetched)) {\n const extractStart = Date.now();\n const schemaResult = applySchemaExtraction(sources, input.schema as JsonSchema);\n\n steps.push({\n action: 'extract',\n detail: `Applied schema extraction to ${sources.filter((s) => s.fetched).length} sources`,\n time_ms: Date.now() - extractStart,\n });\n\n if (schemaResult) {\n return {\n result: schemaResult,\n sources,\n pages_fetched: pagesFetched,\n steps,\n total_time_ms: Date.now() - start,\n sampling_supported: !!server && checkSamplingSupport(server),\n };\n }\n }\n\n const synthStart = Date.now();\n const result = await synthesizeResult(input.prompt, sources, server);\n\n steps.push({\n action: 'synthesize',\n detail: `Produced ${typeof result === 'string' ? result.length : JSON.stringify(result).length} char result${server ? ' (via sampling)' : ''}`,\n time_ms: Date.now() - synthStart,\n });\n\n return {\n result,\n sources,\n pages_fetched: pagesFetched,\n steps,\n total_time_ms: Date.now() - start,\n sampling_supported: !!server && checkSamplingSupport(server),\n };\n } catch (err) {\n log.error('agent pipeline failed', {\n prompt: input.prompt.slice(0, 100),\n error: err instanceof Error ? err.message : String(err),\n });\n return {\n result: '',\n sources: [],\n pages_fetched: 0,\n steps,\n total_time_ms: Date.now() - start,\n sampling_supported: !!server && checkSamplingSupport(server),\n error: err instanceof Error ? err.message : String(err),\n };\n }\n}\n\nfunction applySchemaExtraction(\n sources: AgentSource[],\n schema: JsonSchema,\n): Record<string, unknown> | null {\n try {\n const fetchedSources = sources.filter((s) => s.fetched && s.markdown_content.length > 0);\n if (fetchedSources.length === 0) return null;\n\n const mergedData: Record<string, unknown> = {};\n\n for (const source of fetchedSources) {\n try {\n const html = `<html><body>${source.markdown_content}</body></html>`;\n const extracted = extractWithSchema(html, schema);\n\n for (const [key, value] of Object.entries(extracted)) {\n if (value !== undefined && value !== null && value !== '') {\n if (!(key in mergedData)) {\n mergedData[key] = value;\n }\n }\n }\n } catch (err) {\n log.debug('schema extraction failed for source', {\n url: source.url,\n error: err instanceof Error ? err.message : String(err),\n });\n }\n }\n\n return Object.keys(mergedData).length > 0 ? mergedData : null;\n } catch (err) {\n log.warn('schema extraction phase failed', {\n error: err instanceof Error ? err.message : String(err),\n });\n return null;\n }\n}\n\nasync function synthesizeResult(\n prompt: string,\n sources: AgentSource[],\n server?: SamplingCapableServer,\n): Promise<string> {\n const fetchedSources = sources.filter((s) => s.fetched && s.markdown_content.length > 0);\n\n if (fetchedSources.length === 0) {\n return 'No data could be gathered for this request.';\n }\n\n if (server) {\n try {\n const result = await synthesizeWithSampling(prompt, fetchedSources, server);\n if (result) return result;\n } catch (err) {\n log.warn('sampling synthesis failed, using fallback', {\n error: err instanceof Error ? err.message : String(err),\n });\n }\n }\n\n return buildFallbackSynthesis(prompt, fetchedSources);\n}\n\nasync function synthesizeWithSampling(\n prompt: string,\n sources: AgentSource[],\n server: SamplingCapableServer,\n): Promise<string | null> {\n try {\n const maxCharsPerSource = 3000;\n const sourceBlocks = sources.map((s, i) => {\n const content = s.markdown_content.slice(0, maxCharsPerSource);\n return `[${i + 1}] ${s.title} (${s.url})\\n${content}`;\n });\n\n const totalSourceText = sourceBlocks.join('\\n\\n');\n const truncatedSourceText = totalSourceText.slice(0, 40000);\n\n const samplingPrompt = `You are a data gathering assistant. Based on the user's request and the gathered sources, synthesize a comprehensive result.\n\nUser's request: ${prompt}\n\nGathered sources:\n${truncatedSourceText}\n\nProvide a clear, well-organized response that addresses the user's request based on the gathered data. Include source references [1], [2], etc.`;\n\n if (!checkSamplingSupport(server)) {\n log.debug('client does not support sampling for synthesis');\n return null;\n }\n\n const response = await requestSampling(\n server,\n [{ role: 'user', content: { type: 'text', text: samplingPrompt } }],\n 2000,\n );\n\n if (response?.content?.text && response.content.text.trim().length > 0) {\n return response.content.text.trim();\n }\n\n return null;\n } catch (err) {\n log.debug('sampling synthesis failed', {\n error: err instanceof Error ? err.message : String(err),\n });\n return null;\n }\n}\n\nfunction buildFallbackSynthesis(prompt: string, sources: AgentSource[]): string {\n const header = `## Results: ${prompt}\\n\\nGathered from ${sources.length} source(s):\\n\\n`;\n let result = header;\n const maxTotal = 6000;\n let remaining = maxTotal - header.length;\n\n for (let i = 0; i < sources.length && remaining > 0; i++) {\n const source = sources[i];\n const sourceHeader = `### [${i + 1}] ${source.title}\\n**URL:** ${source.url}\\n\\n`;\n\n if (remaining < sourceHeader.length + 20) break;\n\n result += sourceHeader;\n remaining -= sourceHeader.length;\n\n const contentBudget = Math.min(remaining - 10, source.markdown_content.length, 1500);\n if (contentBudget > 0) {\n let content = source.markdown_content.slice(0, contentBudget);\n if (content.length < source.markdown_content.length) {\n content = content.slice(0, Math.max(contentBudget - 3, 0)) + '...';\n }\n result += content + '\\n\\n';\n remaining -= content.length + 2;\n }\n }\n\n return result.trimEnd();\n}\n"],"mappings":"AAAA,SAAS,oBAAoB;AAC7B,SAAS,qBAAqB;AAC9B,SAAS,wBAAwB;AACjC,SAAS,yBAAyB;AAClC;AAAA,EAEE;AAAA,EACA;AAAA,OACK;AAWP,MAAM,MAAM,aAAa,OAAO;AAEhC,MAAM,oBAAoB;AAC1B,MAAM,sBAAsB;AAE5B,eAAsB,iBACpB,OACA,SACA,QACA,QACsB;AACtB,QAAM,QAAQ,KAAK,IAAI;AACvB,QAAM,WAAW,MAAM,aAAa;AACpC,QAAM,YAAY,MAAM,eAAe;AACvC,QAAM,aAAa,QAAQ;AAC3B,QAAM,QAAqB,CAAC;AAE5B,MAAI;AACF,UAAM,YAAY,KAAK,IAAI;AAC3B,QAAI,KAAK,0BAA0B,EAAE,QAAQ,MAAM,OAAO,MAAM,GAAG,GAAG,GAAG,UAAU,UAAU,CAAC;AAE9F,UAAM,OAAO,MAAM,cAAc,MAAM,QAAQ,MAAM,MAAM,MAAM;AAEjE,UAAM,KAAK;AAAA,MACT,QAAQ;AAAA,MACR,QAAQ,aAAa,KAAK,SAAS,MAAM,cAAc,KAAK,KAAK,MAAM,QAAQ,KAAK,eAAe,oBAAoB,uBAAuB;AAAA,MAC9I,SAAS,KAAK,IAAI,IAAI;AAAA,IACxB,CAAC;AAED,QAAI,KAAK,kBAAkB;AAAA,MACzB,UAAU,KAAK,SAAS;AAAA,MACxB,MAAM,KAAK,KAAK;AAAA,MAChB,cAAc,KAAK;AAAA,IACrB,CAAC;AAED,UAAM,aAAa,MAAM,iBAAiB,MAAM,SAAS,QAAQ;AAAA,MAC/D;AAAA,MACA;AAAA,IACF,GAAG,MAAM,MAAM;AAEf,UAAM,KAAK,GAAG,WAAW,KAAK;AAE9B,UAAM,UAAU,WAAW;AAC3B,UAAM,eAAe,QAAQ,OAAO,CAAC,MAAM,EAAE,OAAO,EAAE;AAEtD,QAAI,MAAM,UAAU,QAAQ,KAAK,CAAC,MAAM,EAAE,OAAO,GAAG;AAClD,YAAM,eAAe,KAAK,IAAI;AAC9B,YAAM,eAAe,sBAAsB,SAAS,MAAM,MAAoB;AAE9E,YAAM,KAAK;AAAA,QACT,QAAQ;AAAA,QACR,QAAQ,gCAAgC,QAAQ,OAAO,CAAC,MAAM,EAAE,OAAO,EAAE,MAAM;AAAA,QAC/E,SAAS,KAAK,IAAI,IAAI;AAAA,MACxB,CAAC;AAED,UAAI,cAAc;AAChB,eAAO;AAAA,UACL,QAAQ;AAAA,UACR;AAAA,UACA,eAAe;AAAA,UACf;AAAA,UACA,eAAe,KAAK,IAAI,IAAI;AAAA,UAC5B,oBAAoB,CAAC,CAAC,UAAU,qBAAqB,MAAM;AAAA,QAC7D;AAAA,MACF;AAAA,IACF;AAEA,UAAM,aAAa,KAAK,IAAI;AAC5B,UAAM,SAAS,MAAM,iBAAiB,MAAM,QAAQ,SAAS,MAAM;AAEnE,UAAM,KAAK;AAAA,MACT,QAAQ;AAAA,MACR,QAAQ,YAAY,OAAO,WAAW,WAAW,OAAO,SAAS,KAAK,UAAU,MAAM,EAAE,MAAM,eAAe,SAAS,oBAAoB,EAAE;AAAA,MAC5I,SAAS,KAAK,IAAI,IAAI;AAAA,IACxB,CAAC;AAED,WAAO;AAAA,MACL;AAAA,MACA;AAAA,MACA,eAAe;AAAA,MACf;AAAA,MACA,eAAe,KAAK,IAAI,IAAI;AAAA,MAC5B,oBAAoB,CAAC,CAAC,UAAU,qBAAqB,MAAM;AAAA,IAC7D;AAAA,EACF,SAAS,KAAK;AACZ,QAAI,MAAM,yBAAyB;AAAA,MACjC,QAAQ,MAAM,OAAO,MAAM,GAAG,GAAG;AAAA,MACjC,OAAO,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAAA,IACxD,CAAC;AACD,WAAO;AAAA,MACL,QAAQ;AAAA,MACR,SAAS,CAAC;AAAA,MACV,eAAe;AAAA,MACf;AAAA,MACA,eAAe,KAAK,IAAI,IAAI;AAAA,MAC5B,oBAAoB,CAAC,CAAC,UAAU,qBAAqB,MAAM;AAAA,MAC3D,OAAO,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAAA,IACxD;AAAA,EACF;AACF;AAEA,SAAS,sBACP,SACA,QACgC;AAChC,MAAI;AACF,UAAM,iBAAiB,QAAQ,OAAO,CAAC,MAAM,EAAE,WAAW,EAAE,iBAAiB,SAAS,CAAC;AACvF,QAAI,eAAe,WAAW,EAAG,QAAO;AAExC,UAAM,aAAsC,CAAC;AAE7C,eAAW,UAAU,gBAAgB;AACnC,UAAI;AACF,cAAM,OAAO,eAAe,OAAO,gBAAgB;AACnD,cAAM,YAAY,kBAAkB,MAAM,MAAM;AAEhD,mBAAW,CAAC,KAAK,KAAK,KAAK,OAAO,QAAQ,SAAS,GAAG;AACpD,cAAI,UAAU,UAAa,UAAU,QAAQ,UAAU,IAAI;AACzD,gBAAI,EAAE,OAAO,aAAa;AACxB,yBAAW,GAAG,IAAI;AAAA,YACpB;AAAA,UACF;AAAA,QACF;AAAA,MACF,SAAS,KAAK;AACZ,YAAI,MAAM,uCAAuC;AAAA,UAC/C,KAAK,OAAO;AAAA,UACZ,OAAO,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAAA,QACxD,CAAC;AAAA,MACH;AAAA,IACF;AAEA,WAAO,OAAO,KAAK,UAAU,EAAE,SAAS,IAAI,aAAa;AAAA,EAC3D,SAAS,KAAK;AACZ,QAAI,KAAK,kCAAkC;AAAA,MACzC,OAAO,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAAA,IACxD,CAAC;AACD,WAAO;AAAA,EACT;AACF;AAEA,eAAe,iBACb,QACA,SACA,QACiB;AACjB,QAAM,iBAAiB,QAAQ,OAAO,CAAC,MAAM,EAAE,WAAW,EAAE,iBAAiB,SAAS,CAAC;AAEvF,MAAI,eAAe,WAAW,GAAG;AAC/B,WAAO;AAAA,EACT;AAEA,MAAI,QAAQ;AACV,QAAI;AACF,YAAM,SAAS,MAAM,uBAAuB,QAAQ,gBAAgB,MAAM;AAC1E,UAAI,OAAQ,QAAO;AAAA,IACrB,SAAS,KAAK;AACZ,UAAI,KAAK,6CAA6C;AAAA,QACpD,OAAO,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAAA,MACxD,CAAC;AAAA,IACH;AAAA,EACF;AAEA,SAAO,uBAAuB,QAAQ,cAAc;AACtD;AAEA,eAAe,uBACb,QACA,SACA,QACwB;AACxB,MAAI;AACF,UAAM,oBAAoB;AAC1B,UAAM,eAAe,QAAQ,IAAI,CAAC,GAAG,MAAM;AACzC,YAAM,UAAU,EAAE,iBAAiB,MAAM,GAAG,iBAAiB;AAC7D,aAAO,IAAI,IAAI,CAAC,KAAK,EAAE,KAAK,KAAK,EAAE,GAAG;AAAA,EAAM,OAAO;AAAA,IACrD,CAAC;AAED,UAAM,kBAAkB,aAAa,KAAK,MAAM;AAChD,UAAM,sBAAsB,gBAAgB,MAAM,GAAG,GAAK;AAE1D,UAAM,iBAAiB;AAAA;AAAA,kBAET,MAAM;AAAA;AAAA;AAAA,EAGtB,mBAAmB;AAAA;AAAA;AAIjB,QAAI,CAAC,qBAAqB,MAAM,GAAG;AACjC,UAAI,MAAM,gDAAgD;AAC1D,aAAO;AAAA,IACT;AAEA,UAAM,WAAW,MAAM;AAAA,MACrB;AAAA,MACA,CAAC,EAAE,MAAM,QAAQ,SAAS,EAAE,MAAM,QAAQ,MAAM,eAAe,EAAE,CAAC;AAAA,MAClE;AAAA,IACF;AAEA,QAAI,UAAU,SAAS,QAAQ,SAAS,QAAQ,KAAK,KAAK,EAAE,SAAS,GAAG;AACtE,aAAO,SAAS,QAAQ,KAAK,KAAK;AAAA,IACpC;AAEA,WAAO;AAAA,EACT,SAAS,KAAK;AACZ,QAAI,MAAM,6BAA6B;AAAA,MACrC,OAAO,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAAA,IACxD,CAAC;AACD,WAAO;AAAA,EACT;AACF;AAEA,SAAS,uBAAuB,QAAgB,SAAgC;AAC9E,QAAM,SAAS,eAAe,MAAM;AAAA;AAAA,gBAAqB,QAAQ,MAAM;AAAA;AAAA;AACvE,MAAI,SAAS;AACb,QAAM,WAAW;AACjB,MAAI,YAAY,WAAW,OAAO;AAElC,WAAS,IAAI,GAAG,IAAI,QAAQ,UAAU,YAAY,GAAG,KAAK;AACxD,UAAM,SAAS,QAAQ,CAAC;AACxB,UAAM,eAAe,QAAQ,IAAI,CAAC,KAAK,OAAO,KAAK;AAAA,WAAc,OAAO,GAAG;AAAA;AAAA;AAE3E,QAAI,YAAY,aAAa,SAAS,GAAI;AAE1C,cAAU;AACV,iBAAa,aAAa;AAE1B,UAAM,gBAAgB,KAAK,IAAI,YAAY,IAAI,OAAO,iBAAiB,QAAQ,IAAI;AACnF,QAAI,gBAAgB,GAAG;AACrB,UAAI,UAAU,OAAO,iBAAiB,MAAM,GAAG,aAAa;AAC5D,UAAI,QAAQ,SAAS,OAAO,iBAAiB,QAAQ;AACnD,kBAAU,QAAQ,MAAM,GAAG,KAAK,IAAI,gBAAgB,GAAG,CAAC,CAAC,IAAI;AAAA,MAC/D;AACA,gBAAU,UAAU;AACpB,mBAAa,QAAQ,SAAS;AAAA,IAChC;AAAA,EACF;AAEA,SAAO,OAAO,QAAQ;AACxB;","names":[]}
1
+ {"version":3,"sources":["../../src/agent/pipeline.ts"],"sourcesContent":["import { createLogger } from '../logger.js';\nimport { planExecution } from './planner.js';\nimport { executeAgentPlan } from './executor.js';\nimport { extractWithSchema } from '../extraction/schema.js';\nimport {\n type SamplingCapableServer,\n requestSampling,\n checkSamplingSupport,\n} from '../search/sampling.js';\nimport { isLlmConfigured, runLlmText } from '../integrations/cloud/llm/run.js';\nimport type {\n AgentInput,\n AgentOutput,\n AgentSource,\n AgentStep,\n SearchEngine,\n} from '../types.js';\nimport type { SmartRouter } from '../fetch/router.js';\nimport type { JsonSchema } from '../extraction/schema.js';\n\nconst log = createLogger('agent');\n\nconst DEFAULT_MAX_PAGES = 10;\nconst DEFAULT_MAX_TIME_MS = 60000;\n\nexport async function runAgentPipeline(\n input: AgentInput,\n engines: SearchEngine[],\n router: SmartRouter,\n server?: SamplingCapableServer,\n): Promise<AgentOutput> {\n const start = Date.now();\n const maxPages = input.max_pages ?? DEFAULT_MAX_PAGES;\n const maxTimeMs = input.max_time_ms ?? DEFAULT_MAX_TIME_MS;\n const deadlineMs = start + maxTimeMs;\n const steps: AgentStep[] = [];\n\n try {\n const planStart = Date.now();\n log.info('agent pipeline started', { prompt: input.prompt.slice(0, 100), maxPages, maxTimeMs });\n\n const plan = await planExecution(input.prompt, input.urls, server);\n\n steps.push({\n action: 'plan',\n detail: `Generated ${plan.searches.length} searches, ${plan.urls.length} URLs${plan.samplingUsed ? ' (via sampling)' : ' (keyword extraction)'}`,\n time_ms: Date.now() - planStart,\n });\n\n log.info('plan generated', {\n searches: plan.searches.length,\n urls: plan.urls.length,\n samplingUsed: plan.samplingUsed,\n });\n\n const execResult = await executeAgentPlan(plan, engines, router, {\n maxPages,\n deadlineMs,\n }, input.prompt);\n\n steps.push(...execResult.steps);\n\n const sources = execResult.sources;\n const pagesFetched = sources.filter((s) => s.fetched).length;\n\n if (input.schema && sources.some((s) => s.fetched)) {\n const extractStart = Date.now();\n const schemaResult = applySchemaExtraction(sources, input.schema as JsonSchema);\n\n steps.push({\n action: 'extract',\n detail: `Applied schema extraction to ${sources.filter((s) => s.fetched).length} sources`,\n time_ms: Date.now() - extractStart,\n });\n\n if (schemaResult) {\n return {\n result: schemaResult,\n sources,\n pages_fetched: pagesFetched,\n steps,\n total_time_ms: Date.now() - start,\n sampling_supported: !!server && checkSamplingSupport(server),\n };\n }\n }\n\n const synthStart = Date.now();\n const { result, samplingUsed, llmUsed } = await synthesizeResult(input.prompt, sources, server);\n\n const resultLen = typeof result === 'string' ? result.length : JSON.stringify(result).length;\n const synthPath = samplingUsed\n ? ' (via sampling)'\n : llmUsed\n ? ' (via configured LLM)'\n : ' (evidence fallback)';\n steps.push({\n action: 'synthesize',\n detail: `Produced ${resultLen} char result${synthPath}`,\n time_ms: Date.now() - synthStart,\n });\n\n return {\n result,\n sources,\n pages_fetched: pagesFetched,\n steps,\n total_time_ms: Date.now() - start,\n sampling_supported: !!server && checkSamplingSupport(server),\n };\n } catch (err) {\n log.error('agent pipeline failed', {\n prompt: input.prompt.slice(0, 100),\n error: err instanceof Error ? err.message : String(err),\n });\n return {\n result: '',\n sources: [],\n pages_fetched: 0,\n steps,\n total_time_ms: Date.now() - start,\n sampling_supported: !!server && checkSamplingSupport(server),\n error: err instanceof Error ? err.message : String(err),\n };\n }\n}\n\nfunction applySchemaExtraction(\n sources: AgentSource[],\n schema: JsonSchema,\n): Record<string, unknown> | null {\n try {\n const fetchedSources = sources.filter((s) => s.fetched && s.markdown_content.length > 0);\n if (fetchedSources.length === 0) return null;\n\n const mergedData: Record<string, unknown> = {};\n\n for (const source of fetchedSources) {\n try {\n const html = `<html><body>${source.markdown_content}</body></html>`;\n const extracted = extractWithSchema(html, schema);\n\n for (const [key, value] of Object.entries(extracted)) {\n if (value !== undefined && value !== null && value !== '') {\n if (!(key in mergedData)) {\n mergedData[key] = value;\n }\n }\n }\n } catch (err) {\n log.debug('schema extraction failed for source', {\n url: source.url,\n error: err instanceof Error ? err.message : String(err),\n });\n }\n }\n\n return Object.keys(mergedData).length > 0 ? mergedData : null;\n } catch (err) {\n log.warn('schema extraction phase failed', {\n error: err instanceof Error ? err.message : String(err),\n });\n return null;\n }\n}\n\nasync function synthesizeResult(\n prompt: string,\n sources: AgentSource[],\n server?: SamplingCapableServer,\n): Promise<{ result: string; samplingUsed: boolean; llmUsed?: boolean }> {\n const fetchedSources = sources.filter((s) => s.fetched && s.markdown_content.length > 0);\n\n if (fetchedSources.length === 0) {\n return { result: 'No data could be gathered for this request.', samplingUsed: false };\n }\n\n if (server) {\n try {\n const result = await synthesizeWithSampling(prompt, fetchedSources, server);\n if (result) return { result, samplingUsed: true };\n } catch (err) {\n log.warn('sampling synthesis failed, using fallback', {\n error: err instanceof Error ? err.message : String(err),\n });\n }\n }\n\n // Second fallback: configured WIGOLO_LLM_PROVIDER drives synthesis when the\n // host MCP did not provide sampling. Only the evidence-dump path remains\n // when nothing is configured.\n if (isLlmConfigured()) {\n try {\n const result = await synthesizeViaLlmRunner(prompt, fetchedSources);\n if (result) return { result, samplingUsed: false, llmUsed: true };\n } catch (err) {\n log.warn('llm runner synthesis failed, using evidence fallback', {\n error: err instanceof Error ? err.message : String(err),\n });\n }\n }\n\n return { result: buildFallbackSynthesis(prompt, fetchedSources), samplingUsed: false };\n}\n\nasync function synthesizeViaLlmRunner(\n prompt: string,\n sources: AgentSource[],\n): Promise<string | null> {\n const maxCharsPerSource = 3000;\n const sourceBlocks = sources.map((s, i) => {\n const content = s.markdown_content.slice(0, maxCharsPerSource);\n return `[${i + 1}] ${s.title} (${s.url})\\n${content}`;\n });\n const truncated = sourceBlocks.join('\\n\\n').slice(0, 40000);\n const fullPrompt =\n 'You are a data gathering assistant. Based on the user request and the gathered sources, ' +\n 'synthesize a clear, well-organized response. Cite sources as [1], [2], etc.\\n\\n' +\n `User request: ${prompt}\\n\\n` +\n `Sources:\\n${truncated}`;\n const r = await runLlmText({ prompt: fullPrompt, maxTokens: 2000 });\n return r.text && r.text.trim().length > 0 ? r.text.trim() : null;\n}\n\nasync function synthesizeWithSampling(\n prompt: string,\n sources: AgentSource[],\n server: SamplingCapableServer,\n): Promise<string | null> {\n try {\n const maxCharsPerSource = 3000;\n const sourceBlocks = sources.map((s, i) => {\n const content = s.markdown_content.slice(0, maxCharsPerSource);\n return `[${i + 1}] ${s.title} (${s.url})\\n${content}`;\n });\n\n const totalSourceText = sourceBlocks.join('\\n\\n');\n const truncatedSourceText = totalSourceText.slice(0, 40000);\n\n const samplingPrompt = `You are a data gathering assistant. Based on the user's request and the gathered sources, synthesize a comprehensive result.\n\nUser's request: ${prompt}\n\nGathered sources:\n${truncatedSourceText}\n\nProvide a clear, well-organized response that addresses the user's request based on the gathered data. Include source references [1], [2], etc.`;\n\n if (!checkSamplingSupport(server)) {\n log.debug('client does not support sampling for synthesis');\n return null;\n }\n\n const response = await requestSampling(\n server,\n [{ role: 'user', content: { type: 'text', text: samplingPrompt } }],\n 2000,\n );\n\n if (response?.content?.text && response.content.text.trim().length > 0) {\n return response.content.text.trim();\n }\n\n return null;\n } catch (err) {\n log.debug('sampling synthesis failed', {\n error: err instanceof Error ? err.message : String(err),\n });\n return null;\n }\n}\n\nfunction buildFallbackSynthesis(prompt: string, sources: AgentSource[]): string {\n const header = `## Results: ${prompt}\\n\\nGathered from ${sources.length} source(s):\\n\\n`;\n let result = header;\n const maxTotal = 6000;\n let remaining = maxTotal - header.length;\n\n for (let i = 0; i < sources.length && remaining > 0; i++) {\n const source = sources[i];\n const sourceHeader = `### [${i + 1}] ${source.title}\\n**URL:** ${source.url}\\n\\n`;\n\n if (remaining < sourceHeader.length + 20) break;\n\n result += sourceHeader;\n remaining -= sourceHeader.length;\n\n const contentBudget = Math.min(remaining - 10, source.markdown_content.length, 1500);\n if (contentBudget > 0) {\n let content = source.markdown_content.slice(0, contentBudget);\n if (content.length < source.markdown_content.length) {\n content = content.slice(0, Math.max(contentBudget - 3, 0)) + '...';\n }\n result += content + '\\n\\n';\n remaining -= content.length + 2;\n }\n }\n\n return result.trimEnd();\n}\n"],"mappings":"AAAA,SAAS,oBAAoB;AAC7B,SAAS,qBAAqB;AAC9B,SAAS,wBAAwB;AACjC,SAAS,yBAAyB;AAClC;AAAA,EAEE;AAAA,EACA;AAAA,OACK;AACP,SAAS,iBAAiB,kBAAkB;AAW5C,MAAM,MAAM,aAAa,OAAO;AAEhC,MAAM,oBAAoB;AAC1B,MAAM,sBAAsB;AAE5B,eAAsB,iBACpB,OACA,SACA,QACA,QACsB;AACtB,QAAM,QAAQ,KAAK,IAAI;AACvB,QAAM,WAAW,MAAM,aAAa;AACpC,QAAM,YAAY,MAAM,eAAe;AACvC,QAAM,aAAa,QAAQ;AAC3B,QAAM,QAAqB,CAAC;AAE5B,MAAI;AACF,UAAM,YAAY,KAAK,IAAI;AAC3B,QAAI,KAAK,0BAA0B,EAAE,QAAQ,MAAM,OAAO,MAAM,GAAG,GAAG,GAAG,UAAU,UAAU,CAAC;AAE9F,UAAM,OAAO,MAAM,cAAc,MAAM,QAAQ,MAAM,MAAM,MAAM;AAEjE,UAAM,KAAK;AAAA,MACT,QAAQ;AAAA,MACR,QAAQ,aAAa,KAAK,SAAS,MAAM,cAAc,KAAK,KAAK,MAAM,QAAQ,KAAK,eAAe,oBAAoB,uBAAuB;AAAA,MAC9I,SAAS,KAAK,IAAI,IAAI;AAAA,IACxB,CAAC;AAED,QAAI,KAAK,kBAAkB;AAAA,MACzB,UAAU,KAAK,SAAS;AAAA,MACxB,MAAM,KAAK,KAAK;AAAA,MAChB,cAAc,KAAK;AAAA,IACrB,CAAC;AAED,UAAM,aAAa,MAAM,iBAAiB,MAAM,SAAS,QAAQ;AAAA,MAC/D;AAAA,MACA;AAAA,IACF,GAAG,MAAM,MAAM;AAEf,UAAM,KAAK,GAAG,WAAW,KAAK;AAE9B,UAAM,UAAU,WAAW;AAC3B,UAAM,eAAe,QAAQ,OAAO,CAAC,MAAM,EAAE,OAAO,EAAE;AAEtD,QAAI,MAAM,UAAU,QAAQ,KAAK,CAAC,MAAM,EAAE,OAAO,GAAG;AAClD,YAAM,eAAe,KAAK,IAAI;AAC9B,YAAM,eAAe,sBAAsB,SAAS,MAAM,MAAoB;AAE9E,YAAM,KAAK;AAAA,QACT,QAAQ;AAAA,QACR,QAAQ,gCAAgC,QAAQ,OAAO,CAAC,MAAM,EAAE,OAAO,EAAE,MAAM;AAAA,QAC/E,SAAS,KAAK,IAAI,IAAI;AAAA,MACxB,CAAC;AAED,UAAI,cAAc;AAChB,eAAO;AAAA,UACL,QAAQ;AAAA,UACR;AAAA,UACA,eAAe;AAAA,UACf;AAAA,UACA,eAAe,KAAK,IAAI,IAAI;AAAA,UAC5B,oBAAoB,CAAC,CAAC,UAAU,qBAAqB,MAAM;AAAA,QAC7D;AAAA,MACF;AAAA,IACF;AAEA,UAAM,aAAa,KAAK,IAAI;AAC5B,UAAM,EAAE,QAAQ,cAAc,QAAQ,IAAI,MAAM,iBAAiB,MAAM,QAAQ,SAAS,MAAM;AAE9F,UAAM,YAAY,OAAO,WAAW,WAAW,OAAO,SAAS,KAAK,UAAU,MAAM,EAAE;AACtF,UAAM,YAAY,eACd,oBACA,UACE,0BACA;AACN,UAAM,KAAK;AAAA,MACT,QAAQ;AAAA,MACR,QAAQ,YAAY,SAAS,eAAe,SAAS;AAAA,MACrD,SAAS,KAAK,IAAI,IAAI;AAAA,IACxB,CAAC;AAED,WAAO;AAAA,MACL;AAAA,MACA;AAAA,MACA,eAAe;AAAA,MACf;AAAA,MACA,eAAe,KAAK,IAAI,IAAI;AAAA,MAC5B,oBAAoB,CAAC,CAAC,UAAU,qBAAqB,MAAM;AAAA,IAC7D;AAAA,EACF,SAAS,KAAK;AACZ,QAAI,MAAM,yBAAyB;AAAA,MACjC,QAAQ,MAAM,OAAO,MAAM,GAAG,GAAG;AAAA,MACjC,OAAO,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAAA,IACxD,CAAC;AACD,WAAO;AAAA,MACL,QAAQ;AAAA,MACR,SAAS,CAAC;AAAA,MACV,eAAe;AAAA,MACf;AAAA,MACA,eAAe,KAAK,IAAI,IAAI;AAAA,MAC5B,oBAAoB,CAAC,CAAC,UAAU,qBAAqB,MAAM;AAAA,MAC3D,OAAO,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAAA,IACxD;AAAA,EACF;AACF;AAEA,SAAS,sBACP,SACA,QACgC;AAChC,MAAI;AACF,UAAM,iBAAiB,QAAQ,OAAO,CAAC,MAAM,EAAE,WAAW,EAAE,iBAAiB,SAAS,CAAC;AACvF,QAAI,eAAe,WAAW,EAAG,QAAO;AAExC,UAAM,aAAsC,CAAC;AAE7C,eAAW,UAAU,gBAAgB;AACnC,UAAI;AACF,cAAM,OAAO,eAAe,OAAO,gBAAgB;AACnD,cAAM,YAAY,kBAAkB,MAAM,MAAM;AAEhD,mBAAW,CAAC,KAAK,KAAK,KAAK,OAAO,QAAQ,SAAS,GAAG;AACpD,cAAI,UAAU,UAAa,UAAU,QAAQ,UAAU,IAAI;AACzD,gBAAI,EAAE,OAAO,aAAa;AACxB,yBAAW,GAAG,IAAI;AAAA,YACpB;AAAA,UACF;AAAA,QACF;AAAA,MACF,SAAS,KAAK;AACZ,YAAI,MAAM,uCAAuC;AAAA,UAC/C,KAAK,OAAO;AAAA,UACZ,OAAO,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAAA,QACxD,CAAC;AAAA,MACH;AAAA,IACF;AAEA,WAAO,OAAO,KAAK,UAAU,EAAE,SAAS,IAAI,aAAa;AAAA,EAC3D,SAAS,KAAK;AACZ,QAAI,KAAK,kCAAkC;AAAA,MACzC,OAAO,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAAA,IACxD,CAAC;AACD,WAAO;AAAA,EACT;AACF;AAEA,eAAe,iBACb,QACA,SACA,QACuE;AACvE,QAAM,iBAAiB,QAAQ,OAAO,CAAC,MAAM,EAAE,WAAW,EAAE,iBAAiB,SAAS,CAAC;AAEvF,MAAI,eAAe,WAAW,GAAG;AAC/B,WAAO,EAAE,QAAQ,+CAA+C,cAAc,MAAM;AAAA,EACtF;AAEA,MAAI,QAAQ;AACV,QAAI;AACF,YAAM,SAAS,MAAM,uBAAuB,QAAQ,gBAAgB,MAAM;AAC1E,UAAI,OAAQ,QAAO,EAAE,QAAQ,cAAc,KAAK;AAAA,IAClD,SAAS,KAAK;AACZ,UAAI,KAAK,6CAA6C;AAAA,QACpD,OAAO,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAAA,MACxD,CAAC;AAAA,IACH;AAAA,EACF;AAKA,MAAI,gBAAgB,GAAG;AACrB,QAAI;AACF,YAAM,SAAS,MAAM,uBAAuB,QAAQ,cAAc;AAClE,UAAI,OAAQ,QAAO,EAAE,QAAQ,cAAc,OAAO,SAAS,KAAK;AAAA,IAClE,SAAS,KAAK;AACZ,UAAI,KAAK,wDAAwD;AAAA,QAC/D,OAAO,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAAA,MACxD,CAAC;AAAA,IACH;AAAA,EACF;AAEA,SAAO,EAAE,QAAQ,uBAAuB,QAAQ,cAAc,GAAG,cAAc,MAAM;AACvF;AAEA,eAAe,uBACb,QACA,SACwB;AACxB,QAAM,oBAAoB;AAC1B,QAAM,eAAe,QAAQ,IAAI,CAAC,GAAG,MAAM;AACzC,UAAM,UAAU,EAAE,iBAAiB,MAAM,GAAG,iBAAiB;AAC7D,WAAO,IAAI,IAAI,CAAC,KAAK,EAAE,KAAK,KAAK,EAAE,GAAG;AAAA,EAAM,OAAO;AAAA,EACrD,CAAC;AACD,QAAM,YAAY,aAAa,KAAK,MAAM,EAAE,MAAM,GAAG,GAAK;AAC1D,QAAM,aACJ;AAAA;AAAA,gBAEiB,MAAM;AAAA;AAAA;AAAA,EACV,SAAS;AACxB,QAAM,IAAI,MAAM,WAAW,EAAE,QAAQ,YAAY,WAAW,IAAK,CAAC;AAClE,SAAO,EAAE,QAAQ,EAAE,KAAK,KAAK,EAAE,SAAS,IAAI,EAAE,KAAK,KAAK,IAAI;AAC9D;AAEA,eAAe,uBACb,QACA,SACA,QACwB;AACxB,MAAI;AACF,UAAM,oBAAoB;AAC1B,UAAM,eAAe,QAAQ,IAAI,CAAC,GAAG,MAAM;AACzC,YAAM,UAAU,EAAE,iBAAiB,MAAM,GAAG,iBAAiB;AAC7D,aAAO,IAAI,IAAI,CAAC,KAAK,EAAE,KAAK,KAAK,EAAE,GAAG;AAAA,EAAM,OAAO;AAAA,IACrD,CAAC;AAED,UAAM,kBAAkB,aAAa,KAAK,MAAM;AAChD,UAAM,sBAAsB,gBAAgB,MAAM,GAAG,GAAK;AAE1D,UAAM,iBAAiB;AAAA;AAAA,kBAET,MAAM;AAAA;AAAA;AAAA,EAGtB,mBAAmB;AAAA;AAAA;AAIjB,QAAI,CAAC,qBAAqB,MAAM,GAAG;AACjC,UAAI,MAAM,gDAAgD;AAC1D,aAAO;AAAA,IACT;AAEA,UAAM,WAAW,MAAM;AAAA,MACrB;AAAA,MACA,CAAC,EAAE,MAAM,QAAQ,SAAS,EAAE,MAAM,QAAQ,MAAM,eAAe,EAAE,CAAC;AAAA,MAClE;AAAA,IACF;AAEA,QAAI,UAAU,SAAS,QAAQ,SAAS,QAAQ,KAAK,KAAK,EAAE,SAAS,GAAG;AACtE,aAAO,SAAS,QAAQ,KAAK,KAAK;AAAA,IACpC;AAEA,WAAO;AAAA,EACT,SAAS,KAAK;AACZ,QAAI,MAAM,6BAA6B;AAAA,MACrC,OAAO,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAAA,IACxD,CAAC;AACD,WAAO;AAAA,EACT;AACF;AAEA,SAAS,uBAAuB,QAAgB,SAAgC;AAC9E,QAAM,SAAS,eAAe,MAAM;AAAA;AAAA,gBAAqB,QAAQ,MAAM;AAAA;AAAA;AACvE,MAAI,SAAS;AACb,QAAM,WAAW;AACjB,MAAI,YAAY,WAAW,OAAO;AAElC,WAAS,IAAI,GAAG,IAAI,QAAQ,UAAU,YAAY,GAAG,KAAK;AACxD,UAAM,SAAS,QAAQ,CAAC;AACxB,UAAM,eAAe,QAAQ,IAAI,CAAC,KAAK,OAAO,KAAK;AAAA,WAAc,OAAO,GAAG;AAAA;AAAA;AAE3E,QAAI,YAAY,aAAa,SAAS,GAAI;AAE1C,cAAU;AACV,iBAAa,aAAa;AAE1B,UAAM,gBAAgB,KAAK,IAAI,YAAY,IAAI,OAAO,iBAAiB,QAAQ,IAAI;AACnF,QAAI,gBAAgB,GAAG;AACrB,UAAI,UAAU,OAAO,iBAAiB,MAAM,GAAG,aAAa;AAC5D,UAAI,QAAQ,SAAS,OAAO,iBAAiB,QAAQ;AACnD,kBAAU,QAAQ,MAAM,GAAG,KAAK,IAAI,gBAAgB,GAAG,CAAC,CAAC,IAAI;AAAA,MAC/D;AACA,gBAAU,UAAU;AACpB,mBAAa,QAAQ,SAAS;AAAA,IAChC;AAAA,EACF;AAEA,SAAO,OAAO,QAAQ;AACxB;","names":[]}
@@ -36,6 +36,7 @@ export declare function searchCacheFiltered(options: {
36
36
  query?: string;
37
37
  urlPattern?: string;
38
38
  since?: string;
39
+ limit?: number;
39
40
  }): CachedContent[];
40
41
  /**
41
42
  * BM25-ranked FTS5 search across cached pages. Returns normalized URLs
@@ -1 +1 @@
1
- {"version":3,"file":"store.d.ts","sourceRoot":"","sources":["../../src/cache/store.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,cAAc,EAAE,gBAAgB,EAAE,aAAa,EAAE,gBAAgB,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAIjH;;;;;;;GAOG;AACH,wBAAgB,gBAAgB,CAAC,CAAC,EAAE,MAAM,GAAG,MAAM,CAYlD;AAgBD,wBAAgB,YAAY,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CA0BhD;AAMD,wBAAgB,YAAY,CAAC,MAAM,EAAE,cAAc,EAAE,UAAU,EAAE,gBAAgB,GAAG,IAAI,CA8CvF;AAsCD,wBAAgB,gBAAgB,CAAC,GAAG,EAAE,MAAM,GAAG,aAAa,GAAG,IAAI,CASlE;AAED,wBAAgB,+BAA+B,CAAC,aAAa,EAAE,MAAM,GAAG,aAAa,GAAG,IAAI,CAM3F;AAED,wBAAgB,uBAAuB,CAAC,aAAa,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAM5E;AAED,wBAAgB,2BAA2B,CAAC,aAAa,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAMhF;AAED,wBAAgB,SAAS,CAAC,MAAM,EAAE,aAAa,GAAG,OAAO,CAGxD;AAED,MAAM,WAAW,kBAAkB;IACjC,eAAe,CAAC,EAAE,MAAM,CAAC;CAC1B;AAED,wBAAgB,aAAa,CAC3B,MAAM,EAAE,aAAa,EACrB,IAAI,GAAE,kBAAuB,GAC5B;IAAE,MAAM,EAAE,OAAO,CAAC;IAAC,KAAK,EAAE,OAAO,CAAA;CAAE,CAQrC;AAED,wBAAgB,WAAW,CAAC,KAAK,EAAE,MAAM,GAAG,aAAa,EAAE,CAY1D;AAED,MAAM,WAAW,kBAAkB;IACjC,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,gBAAgB,EAAE,CAAC;IAC5B,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,WAAW,EAAE,MAAM,CAAC;IACpB,KAAK,CAAC,EAAE,OAAO,CAAC;CACjB;AAED,wBAAgB,kBAAkB,CAChC,KAAK,EAAE,MAAM,EACb,OAAO,EAAE,gBAAgB,EAAE,EAC3B,WAAW,EAAE,MAAM,EAAE,GACpB,IAAI,CAqBN;AAED,wBAAgB,sBAAsB,CACpC,KAAK,EAAE,MAAM,EACb,IAAI,GAAE,kBAAuB,GAC5B,kBAAkB,GAAG,IAAI,CAkC3B;AAED,wBAAgB,mBAAmB,CAAC,OAAO,EAAE;IAC3C,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB,GAAG,aAAa,EAAE,CA4BlB;AAED;;;;;GAKG;AACH,wBAAgB,eAAe,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,KAAK,CAAC;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,CAAC,CAYnG;AAED,wBAAgB,iBAAiB,CAAC,OAAO,EAAE;IACzC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB,GAAG,MAAM,CA0BT;AAKD,wBAAgB,wBAAwB,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAyB/D;AAED,wBAAgB,aAAa,IAAI,UAAU,CAiB1C;AAID,wBAAgB,oBAAoB,CAClC,GAAG,EAAE,MAAM,EACX,SAAS,EAAE,MAAM,EACjB,KAAK,EAAE,MAAM,EACb,IAAI,EAAE,MAAM,GACX,OAAO,CAoBT;AAED,MAAM,WAAW,aAAa;IAC5B,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;CACd;AAED,wBAAgB,kBAAkB,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,MAAM,GAAG,aAAa,GAAG,IAAI,CA8BtF;AAED,MAAM,WAAW,eAAe;IAC9B,aAAa,EAAE,MAAM,CAAC;IACtB,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;CACd;AAED,wBAAgB,gBAAgB,CAAC,OAAO,CAAC,EAAE,MAAM,GAAG,eAAe,EAAE,CAqCpE"}
1
+ {"version":3,"file":"store.d.ts","sourceRoot":"","sources":["../../src/cache/store.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,cAAc,EAAE,gBAAgB,EAAE,aAAa,EAAE,gBAAgB,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAIjH;;;;;;;GAOG;AACH,wBAAgB,gBAAgB,CAAC,CAAC,EAAE,MAAM,GAAG,MAAM,CAYlD;AAgBD,wBAAgB,YAAY,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CA0BhD;AAMD,wBAAgB,YAAY,CAAC,MAAM,EAAE,cAAc,EAAE,UAAU,EAAE,gBAAgB,GAAG,IAAI,CA8CvF;AAsCD,wBAAgB,gBAAgB,CAAC,GAAG,EAAE,MAAM,GAAG,aAAa,GAAG,IAAI,CASlE;AAED,wBAAgB,+BAA+B,CAAC,aAAa,EAAE,MAAM,GAAG,aAAa,GAAG,IAAI,CAM3F;AAED,wBAAgB,uBAAuB,CAAC,aAAa,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAM5E;AAED,wBAAgB,2BAA2B,CAAC,aAAa,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAMhF;AAED,wBAAgB,SAAS,CAAC,MAAM,EAAE,aAAa,GAAG,OAAO,CAGxD;AAED,MAAM,WAAW,kBAAkB;IACjC,eAAe,CAAC,EAAE,MAAM,CAAC;CAC1B;AAED,wBAAgB,aAAa,CAC3B,MAAM,EAAE,aAAa,EACrB,IAAI,GAAE,kBAAuB,GAC5B;IAAE,MAAM,EAAE,OAAO,CAAC;IAAC,KAAK,EAAE,OAAO,CAAA;CAAE,CAQrC;AAED,wBAAgB,WAAW,CAAC,KAAK,EAAE,MAAM,GAAG,aAAa,EAAE,CAY1D;AAED,MAAM,WAAW,kBAAkB;IACjC,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,gBAAgB,EAAE,CAAC;IAC5B,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,WAAW,EAAE,MAAM,CAAC;IACpB,KAAK,CAAC,EAAE,OAAO,CAAC;CACjB;AAED,wBAAgB,kBAAkB,CAChC,KAAK,EAAE,MAAM,EACb,OAAO,EAAE,gBAAgB,EAAE,EAC3B,WAAW,EAAE,MAAM,EAAE,GACpB,IAAI,CAqBN;AAED,wBAAgB,sBAAsB,CACpC,KAAK,EAAE,MAAM,EACb,IAAI,GAAE,kBAAuB,GAC5B,kBAAkB,GAAG,IAAI,CAkC3B;AAID,wBAAgB,mBAAmB,CAAC,OAAO,EAAE;IAC3C,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB,GAAG,aAAa,EAAE,CA6BlB;AAED;;;;;GAKG;AACH,wBAAgB,eAAe,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,KAAK,CAAC;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,CAAC,CAYnG;AAED,wBAAgB,iBAAiB,CAAC,OAAO,EAAE;IACzC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB,GAAG,MAAM,CA0BT;AAKD,wBAAgB,wBAAwB,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAyB/D;AAED,wBAAgB,aAAa,IAAI,UAAU,CAiB1C;AAID,wBAAgB,oBAAoB,CAClC,GAAG,EAAE,MAAM,EACX,SAAS,EAAE,MAAM,EACjB,KAAK,EAAE,MAAM,EACb,IAAI,EAAE,MAAM,GACX,OAAO,CAoBT;AAED,MAAM,WAAW,aAAa;IAC5B,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;CACd;AAED,wBAAgB,kBAAkB,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,MAAM,GAAG,aAAa,GAAG,IAAI,CA8BtF;AAED,MAAM,WAAW,eAAe;IAC9B,aAAa,EAAE,MAAM,CAAC;IACtB,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;CACd;AAED,wBAAgB,gBAAgB,CAAC,OAAO,CAAC,EAAE,MAAM,GAAG,eAAe,EAAE,CAqCpE"}
@@ -213,6 +213,7 @@ function getCachedSearchResults(query, opts = {}) {
213
213
  searched_at: row.searched_at
214
214
  };
215
215
  }
216
+ const DEFAULT_FILTERED_LIMIT = 100;
216
217
  function searchCacheFiltered(options) {
217
218
  const db = getDatabase();
218
219
  const conditions = [];
@@ -233,8 +234,9 @@ function searchCacheFiltered(options) {
233
234
  }
234
235
  const whereClause = conditions.length > 0 ? `WHERE ${conditions.join(" AND ")}` : "";
235
236
  const orderClause = options.query ? "ORDER BY rank" : "ORDER BY url_cache.fetched_at DESC";
236
- const sql = `SELECT url_cache.* FROM ${fromClause} ${whereClause} ${orderClause} LIMIT 100`;
237
- const rows = db.prepare(sql).all(...params);
237
+ const limit = Math.max(1, Math.floor(options.limit ?? DEFAULT_FILTERED_LIMIT));
238
+ const sql = `SELECT url_cache.* FROM ${fromClause} ${whereClause} ${orderClause} LIMIT ?`;
239
+ const rows = db.prepare(sql).all(...params, limit);
238
240
  return rows.map(rowToCachedContent);
239
241
  }
240
242
  function ftsSearchRanked(query, limit) {
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/cache/store.ts"],"sourcesContent":["import { createHash } from 'node:crypto';\nimport { getDatabase } from './db.js';\nimport { getConfig } from '../config.js';\nimport { createLogger } from '../logger.js';\nimport type { RawFetchResult, ExtractionResult, CachedContent, SearchResultItem, CacheStats } from '../types.js';\n\nconst log = createLogger('cache');\n\n/**\n * Sanitize a user query for sqlite FTS5 MATCH.\n *\n * Why: bare tokens with `.` / `-` / `/` / `:` / digits-with-dot\n * (e.g. \"5.4\", \"x-y\", \"https://foo\") raise `fts5: syntax error near \".\"`.\n * Quoting tokens that aren't pure word-chars lets FTS5 treat them as phrases.\n * Already-quoted phrases and explicit operators (AND/OR/NOT/parens) pass through.\n */\nexport function sanitizeFtsQuery(q: string): string {\n const trimmed = q.trim();\n if (!trimmed) return '';\n if (/^\".*\"$/.test(trimmed)) return trimmed;\n const tokens = trimmed.match(/\"[^\"]*\"|\\S+/g) ?? [];\n const RESERVED = new Set(['AND', 'OR', 'NOT', '(', ')']);\n return tokens.map(tok => {\n if (tok.startsWith('\"') && tok.endsWith('\"')) return tok;\n if (RESERVED.has(tok)) return tok;\n if (/^\\w+\\*?$/.test(tok)) return tok;\n return `\"${tok.replace(/\"/g, '\"\"')}\"`;\n }).join(' ');\n}\n\nconst TRACKING_PARAMS = new Set([\n 'utm_source',\n 'utm_medium',\n 'utm_campaign',\n 'utm_content',\n 'utm_term',\n 'utm_id',\n 'fbclid',\n 'gclid',\n 'msclkid',\n 'mc_cid',\n 'mc_eid',\n]);\n\nexport function normalizeUrl(url: string): string {\n const parsed = new URL(url);\n\n parsed.protocol = parsed.protocol.toLowerCase();\n parsed.hostname = parsed.hostname.toLowerCase().replace(/^www\\./, '');\n\n for (const key of [...parsed.searchParams.keys()]) {\n if (TRACKING_PARAMS.has(key) || key.startsWith('utm_')) {\n parsed.searchParams.delete(key);\n }\n }\n\n parsed.searchParams.sort();\n\n let result = parsed.toString();\n\n // Strip trailing slash from path (but not root)\n if (parsed.pathname !== '/' && result.endsWith('/')) {\n result = result.slice(0, -1);\n }\n // Remove trailing slash from origin-only URLs too\n if (parsed.pathname === '/' && !parsed.search && !parsed.hash) {\n result = result.replace(/\\/$/, '');\n }\n\n return result;\n}\n\nfunction toIsoSeconds(date: Date): string {\n return date.toISOString().replace('T', ' ').replace(/\\.\\d+Z$/, '');\n}\n\nexport function cacheContent(result: RawFetchResult, extraction: ExtractionResult): void {\n try {\n const db = getDatabase();\n const config = getConfig();\n\n const normalizedUrl = normalizeUrl(result.finalUrl || result.url);\n const contentHash = createHash('sha256').update(extraction.markdown).digest('hex');\n\n const now = new Date();\n const expiresAt = new Date(now.getTime() + config.cacheTtlContent * 1000);\n\n const stmt = db.prepare(`\n INSERT OR REPLACE INTO url_cache (\n url, normalized_url, title, markdown, raw_html,\n metadata, links, images, fetch_method, extractor_used,\n content_hash, fetched_at, expires_at\n )\n VALUES (\n @url, @normalizedUrl, @title, @markdown, @rawHtml,\n @metadata, @links, @images, @fetchMethod, @extractorUsed,\n @contentHash, @fetchedAt, @expiresAt\n )\n `);\n\n stmt.run({\n url: result.url,\n normalizedUrl,\n title: extraction.title,\n markdown: extraction.markdown,\n rawHtml: result.html,\n metadata: JSON.stringify(extraction.metadata),\n links: JSON.stringify(extraction.links),\n images: JSON.stringify(extraction.images),\n fetchMethod: result.method,\n extractorUsed: extraction.extractor,\n contentHash: contentHash,\n fetchedAt: toIsoSeconds(now),\n expiresAt: toIsoSeconds(expiresAt),\n });\n } catch (err) {\n log.warn('cacheContent failed', {\n url: result.url,\n finalUrl: result.finalUrl,\n error: err instanceof Error ? err.message : String(err),\n });\n }\n}\n\ninterface DbRow {\n id: number;\n url: string;\n normalized_url: string;\n title: string;\n markdown: string;\n raw_html: string;\n metadata: string;\n links: string;\n images: string;\n fetch_method: string;\n extractor_used: string;\n content_hash: string;\n fetched_at: string;\n expires_at: string | null;\n}\n\nfunction rowToCachedContent(row: DbRow): CachedContent {\n return {\n id: row.id,\n url: row.url,\n normalizedUrl: row.normalized_url,\n title: row.title,\n markdown: row.markdown,\n rawHtml: row.raw_html,\n metadata: row.metadata,\n links: row.links,\n images: row.images,\n fetchMethod: row.fetch_method as CachedContent['fetchMethod'],\n extractorUsed: row.extractor_used as CachedContent['extractorUsed'],\n contentHash: row.content_hash,\n fetchedAt: row.fetched_at,\n expiresAt: row.expires_at,\n };\n}\n\nexport function getCachedContent(url: string): CachedContent | null {\n const db = getDatabase();\n const normalizedUrl = normalizeUrl(url);\n\n const row = db.prepare(`\n SELECT * FROM url_cache WHERE url = ? OR normalized_url = ? LIMIT 1\n `).get(url, normalizedUrl) as DbRow | undefined;\n\n return row ? rowToCachedContent(row) : null;\n}\n\nexport function getCachedContentByNormalizedUrl(normalizedUrl: string): CachedContent | null {\n const db = getDatabase();\n const row = db.prepare(\n 'SELECT * FROM url_cache WHERE normalized_url = ? LIMIT 1',\n ).get(normalizedUrl) as DbRow | undefined;\n return row ? rowToCachedContent(row) : null;\n}\n\nexport function getHashForNormalizedUrl(normalizedUrl: string): string | null {\n const db = getDatabase();\n const row = db.prepare(\n 'SELECT content_hash FROM url_cache WHERE normalized_url = ? LIMIT 1',\n ).get(normalizedUrl) as { content_hash: string } | undefined;\n return row?.content_hash ?? null;\n}\n\nexport function getMarkdownForNormalizedUrl(normalizedUrl: string): string | null {\n const db = getDatabase();\n const row = db.prepare(\n 'SELECT markdown FROM url_cache WHERE normalized_url = ? LIMIT 1',\n ).get(normalizedUrl) as { markdown: string } | undefined;\n return row ? row.markdown : null;\n}\n\nexport function isExpired(cached: CachedContent): boolean {\n if (!cached.expiresAt) return false;\n return new Date(cached.expiresAt).getTime() < Date.now();\n}\n\nexport interface CacheLookupOptions {\n staleMaxSeconds?: number;\n}\n\nexport function isCacheUsable(\n cached: CachedContent,\n opts: CacheLookupOptions = {},\n): { usable: boolean; stale: boolean } {\n if (!cached.expiresAt) return { usable: true, stale: false };\n const expiresMs = new Date(cached.expiresAt).getTime();\n const now = Date.now();\n if (expiresMs >= now) return { usable: true, stale: false };\n const staleMaxMs = (opts.staleMaxSeconds ?? 0) * 1000;\n if (now - expiresMs <= staleMaxMs) return { usable: true, stale: true };\n return { usable: false, stale: false };\n}\n\nexport function searchCache(query: string): CachedContent[] {\n const db = getDatabase();\n\n const rows = db.prepare(`\n SELECT url_cache.*\n FROM url_cache\n JOIN url_cache_fts ON url_cache.id = url_cache_fts.rowid\n WHERE url_cache_fts MATCH ?\n ORDER BY rank\n `).all(sanitizeFtsQuery(query)) as DbRow[];\n\n return rows.map(rowToCachedContent);\n}\n\nexport interface CachedSearchResult {\n query: string;\n results: SearchResultItem[];\n engines_used: string[];\n searched_at: string;\n stale?: boolean;\n}\n\nexport function cacheSearchResults(\n query: string,\n results: SearchResultItem[],\n enginesUsed: string[],\n): void {\n const db = getDatabase();\n const config = getConfig();\n\n const queryHash = createHash('sha256').update(query.toLowerCase().trim()).digest('hex');\n const now = new Date();\n const expiresAt = new Date(now.getTime() + config.cacheTtlSearch * 1000);\n\n const stmt = db.prepare(`\n INSERT OR REPLACE INTO search_cache (query, query_hash, results, engines_used, searched_at, expires_at)\n VALUES (@query, @queryHash, @results, @enginesUsed, @searchedAt, @expiresAt)\n `);\n\n stmt.run({\n query,\n queryHash,\n results: JSON.stringify(results),\n enginesUsed: JSON.stringify(enginesUsed),\n searchedAt: toIsoSeconds(now),\n expiresAt: toIsoSeconds(expiresAt),\n });\n}\n\nexport function getCachedSearchResults(\n query: string,\n opts: CacheLookupOptions = {},\n): CachedSearchResult | null {\n const db = getDatabase();\n const queryHash = createHash('sha256').update(query.toLowerCase().trim()).digest('hex');\n\n const row = db.prepare(\n 'SELECT query, results, engines_used, searched_at, expires_at FROM search_cache WHERE query_hash = ? LIMIT 1',\n ).get(queryHash) as\n | { query: string; results: string; engines_used: string; searched_at: string; expires_at: string | null }\n | undefined;\n\n if (!row) return null;\n\n if (row.expires_at) {\n const expiresMs = new Date(row.expires_at).getTime();\n const now = Date.now();\n if (expiresMs < now) {\n const staleMaxMs = (opts.staleMaxSeconds ?? 0) * 1000;\n if (now - expiresMs > staleMaxMs) return null;\n return {\n query: row.query,\n results: JSON.parse(row.results) as SearchResultItem[],\n engines_used: JSON.parse(row.engines_used) as string[],\n searched_at: row.searched_at,\n stale: true,\n };\n }\n }\n\n return {\n query: row.query,\n results: JSON.parse(row.results) as SearchResultItem[],\n engines_used: JSON.parse(row.engines_used) as string[],\n searched_at: row.searched_at,\n };\n}\n\nexport function searchCacheFiltered(options: {\n query?: string;\n urlPattern?: string;\n since?: string;\n}): CachedContent[] {\n const db = getDatabase();\n const conditions: string[] = [];\n const params: unknown[] = [];\n let fromClause = 'url_cache';\n\n if (options.query) {\n fromClause = 'url_cache JOIN url_cache_fts ON url_cache.id = url_cache_fts.rowid';\n conditions.push('url_cache_fts MATCH ?');\n params.push(sanitizeFtsQuery(options.query));\n }\n\n if (options.urlPattern) {\n conditions.push('url_cache.normalized_url GLOB ?');\n params.push(options.urlPattern);\n }\n\n if (options.since) {\n conditions.push('url_cache.fetched_at > datetime(?)');\n params.push(options.since);\n }\n\n const whereClause = conditions.length > 0 ? `WHERE ${conditions.join(' AND ')}` : '';\n const orderClause = options.query ? 'ORDER BY rank' : 'ORDER BY url_cache.fetched_at DESC';\n\n const sql = `SELECT url_cache.* FROM ${fromClause} ${whereClause} ${orderClause} LIMIT 100`;\n const rows = db.prepare(sql).all(...params) as DbRow[];\n return rows.map(rowToCachedContent);\n}\n\n/**\n * BM25-ranked FTS5 search across cached pages. Returns normalized URLs\n * paired with their rank score. `rank` from FTS5 is negative (lower is\n * better in sqlite ordering), so we flip the sign to surface a \"higher is\n * better\" score for consumers (e.g. RRF input).\n */\nexport function ftsSearchRanked(query: string, limit: number): Array<{ url: string; score: number }> {\n if (!query.trim() || limit <= 0) return [];\n const db = getDatabase();\n const rows = db.prepare(`\n SELECT url_cache.normalized_url AS url, url_cache_fts.rank AS rank\n FROM url_cache\n JOIN url_cache_fts ON url_cache.id = url_cache_fts.rowid\n WHERE url_cache_fts MATCH ?\n ORDER BY url_cache_fts.rank\n LIMIT ?\n `).all(sanitizeFtsQuery(query), limit) as Array<{ url: string; rank: number }>;\n return rows.map(r => ({ url: r.url, score: -r.rank }));\n}\n\nexport function clearCacheEntries(options: {\n query?: string;\n urlPattern?: string;\n since?: string;\n}): number {\n const db = getDatabase();\n const conditions: string[] = [];\n const params: unknown[] = [];\n\n if (options.query) {\n conditions.push(\n 'id IN (SELECT url_cache.id FROM url_cache JOIN url_cache_fts ON url_cache.id = url_cache_fts.rowid WHERE url_cache_fts MATCH ?)',\n );\n params.push(sanitizeFtsQuery(options.query));\n }\n\n if (options.urlPattern) {\n conditions.push('normalized_url GLOB ?');\n params.push(options.urlPattern);\n }\n\n if (options.since) {\n conditions.push('fetched_at > datetime(?)');\n params.push(options.since);\n }\n\n const whereClause = conditions.length > 0 ? `WHERE ${conditions.join(' AND ')}` : '';\n const sql = `DELETE FROM url_cache ${whereClause}`;\n const result = db.prepare(sql).run(...params);\n return result.changes;\n}\n\n// Counts cached URLs for an exact host (apex scoping — `blog.example.com`\n// and `example.com` are NOT collapsed). Leading `www.` is stripped to align\n// with normalizeUrl.\nexport function countCachedUrlsForDomain(domain: string): number {\n const db = getDatabase();\n const normalized = domain.toLowerCase().replace(/^www\\./, '');\n const stmt = db.prepare(`\n SELECT COUNT(*) AS n FROM url_cache\n WHERE url LIKE 'http://' || ? || '/%'\n OR url LIKE 'https://' || ? || '/%'\n OR url LIKE 'http://www.' || ? || '/%'\n OR url LIKE 'https://www.' || ? || '/%'\n OR url = 'http://' || ?\n OR url = 'https://' || ?\n OR url = 'http://www.' || ?\n OR url = 'https://www.' || ?\n `);\n const row = stmt.get(\n normalized,\n normalized,\n normalized,\n normalized,\n normalized,\n normalized,\n normalized,\n normalized,\n ) as { n: number };\n return row.n;\n}\n\nexport function getCacheStats(): CacheStats {\n const db = getDatabase();\n const row = db.prepare(`\n SELECT\n COUNT(*) as total_urls,\n COALESCE(SUM(LENGTH(markdown) + LENGTH(COALESCE(raw_html, ''))), 0) as total_bytes,\n MIN(fetched_at) as oldest,\n MAX(fetched_at) as newest\n FROM url_cache\n `).get() as { total_urls: number; total_bytes: number; oldest: string | null; newest: string | null };\n\n return {\n total_urls: row.total_urls,\n total_size_mb: Math.round((row.total_bytes / (1024 * 1024)) * 1e6) / 1e6,\n oldest: row.oldest ?? '',\n newest: row.newest ?? '',\n };\n}\n\n// --- Embedding store functions (Slice 22) ---\n\nexport function updateCacheEmbedding(\n url: string,\n embedding: Buffer,\n model: string,\n dims: number,\n): boolean {\n try {\n const db = getDatabase();\n let normalized: string;\n try {\n normalized = normalizeUrl(url);\n } catch {\n normalized = url;\n }\n\n const result = db.prepare(`\n UPDATE url_cache\n SET embedding = ?, embedding_model = ?, embedding_dims = ?, updated_at = datetime('now')\n WHERE normalized_url = ?\n `).run(embedding, model, dims, normalized);\n\n return result.changes > 0;\n } catch {\n return false;\n }\n}\n\nexport interface EmbeddingData {\n embedding: Buffer;\n model: string;\n dims: number;\n}\n\nexport function getEmbeddingForUrl(url: string, modelId?: string): EmbeddingData | null {\n try {\n const db = getDatabase();\n let normalized: string;\n try {\n normalized = normalizeUrl(url);\n } catch {\n normalized = url;\n }\n\n const row = db.prepare(`\n SELECT embedding, embedding_model, embedding_dims\n FROM url_cache\n WHERE (url = ? OR normalized_url = ?) AND embedding IS NOT NULL\n LIMIT 1\n `).get(url, normalized) as { embedding: Buffer; embedding_model: string; embedding_dims: number } | undefined;\n\n if (!row) return null;\n // Filter by modelId when caller wants only embeddings from the current\n // model; mismatched entries return null so they are treated as cache miss.\n if (modelId !== undefined && row.embedding_model !== modelId) return null;\n\n return {\n embedding: row.embedding,\n model: row.embedding_model,\n dims: row.embedding_dims,\n };\n } catch {\n return null;\n }\n}\n\nexport interface StoredEmbedding {\n normalizedUrl: string;\n embedding: Buffer;\n model: string;\n dims: number;\n}\n\nexport function getAllEmbeddings(modelId?: string): StoredEmbedding[] {\n try {\n const db = getDatabase();\n // Filter by modelId when provided so stale entries from a previous model\n // (different dim / vector space) are skipped — the in-memory vector index\n // requires matching dimensionality across all entries.\n const rows = modelId !== undefined\n ? db.prepare(`\n SELECT normalized_url, embedding, embedding_model, embedding_dims\n FROM url_cache\n WHERE embedding IS NOT NULL AND embedding_model = ?\n `).all(modelId) as Array<{\n normalized_url: string;\n embedding: Buffer;\n embedding_model: string;\n embedding_dims: number;\n }>\n : db.prepare(`\n SELECT normalized_url, embedding, embedding_model, embedding_dims\n FROM url_cache\n WHERE embedding IS NOT NULL\n `).all() as Array<{\n normalized_url: string;\n embedding: Buffer;\n embedding_model: string;\n embedding_dims: number;\n }>;\n\n return rows.map(r => ({\n normalizedUrl: r.normalized_url,\n embedding: r.embedding,\n model: r.embedding_model,\n dims: r.embedding_dims,\n }));\n } catch {\n return [];\n }\n}\n"],"mappings":"AAAA,SAAS,kBAAkB;AAC3B,SAAS,mBAAmB;AAC5B,SAAS,iBAAiB;AAC1B,SAAS,oBAAoB;AAG7B,MAAM,MAAM,aAAa,OAAO;AAUzB,SAAS,iBAAiB,GAAmB;AAClD,QAAM,UAAU,EAAE,KAAK;AACvB,MAAI,CAAC,QAAS,QAAO;AACrB,MAAI,SAAS,KAAK,OAAO,EAAG,QAAO;AACnC,QAAM,SAAS,QAAQ,MAAM,cAAc,KAAK,CAAC;AACjD,QAAM,WAAW,oBAAI,IAAI,CAAC,OAAO,MAAM,OAAO,KAAK,GAAG,CAAC;AACvD,SAAO,OAAO,IAAI,SAAO;AACvB,QAAI,IAAI,WAAW,GAAG,KAAK,IAAI,SAAS,GAAG,EAAG,QAAO;AACrD,QAAI,SAAS,IAAI,GAAG,EAAG,QAAO;AAC9B,QAAI,WAAW,KAAK,GAAG,EAAG,QAAO;AACjC,WAAO,IAAI,IAAI,QAAQ,MAAM,IAAI,CAAC;AAAA,EACpC,CAAC,EAAE,KAAK,GAAG;AACb;AAEA,MAAM,kBAAkB,oBAAI,IAAI;AAAA,EAC9B;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF,CAAC;AAEM,SAAS,aAAa,KAAqB;AAChD,QAAM,SAAS,IAAI,IAAI,GAAG;AAE1B,SAAO,WAAW,OAAO,SAAS,YAAY;AAC9C,SAAO,WAAW,OAAO,SAAS,YAAY,EAAE,QAAQ,UAAU,EAAE;AAEpE,aAAW,OAAO,CAAC,GAAG,OAAO,aAAa,KAAK,CAAC,GAAG;AACjD,QAAI,gBAAgB,IAAI,GAAG,KAAK,IAAI,WAAW,MAAM,GAAG;AACtD,aAAO,aAAa,OAAO,GAAG;AAAA,IAChC;AAAA,EACF;AAEA,SAAO,aAAa,KAAK;AAEzB,MAAI,SAAS,OAAO,SAAS;AAG7B,MAAI,OAAO,aAAa,OAAO,OAAO,SAAS,GAAG,GAAG;AACnD,aAAS,OAAO,MAAM,GAAG,EAAE;AAAA,EAC7B;AAEA,MAAI,OAAO,aAAa,OAAO,CAAC,OAAO,UAAU,CAAC,OAAO,MAAM;AAC7D,aAAS,OAAO,QAAQ,OAAO,EAAE;AAAA,EACnC;AAEA,SAAO;AACT;AAEA,SAAS,aAAa,MAAoB;AACxC,SAAO,KAAK,YAAY,EAAE,QAAQ,KAAK,GAAG,EAAE,QAAQ,WAAW,EAAE;AACnE;AAEO,SAAS,aAAa,QAAwB,YAAoC;AACvF,MAAI;AACF,UAAM,KAAK,YAAY;AACvB,UAAM,SAAS,UAAU;AAEzB,UAAM,gBAAgB,aAAa,OAAO,YAAY,OAAO,GAAG;AAChE,UAAM,cAAc,WAAW,QAAQ,EAAE,OAAO,WAAW,QAAQ,EAAE,OAAO,KAAK;AAEjF,UAAM,MAAM,oBAAI,KAAK;AACrB,UAAM,YAAY,IAAI,KAAK,IAAI,QAAQ,IAAI,OAAO,kBAAkB,GAAI;AAExE,UAAM,OAAO,GAAG,QAAQ;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,KAWvB;AAED,SAAK,IAAI;AAAA,MACP,KAAK,OAAO;AAAA,MACZ;AAAA,MACA,OAAO,WAAW;AAAA,MAClB,UAAU,WAAW;AAAA,MACrB,SAAS,OAAO;AAAA,MAChB,UAAU,KAAK,UAAU,WAAW,QAAQ;AAAA,MAC5C,OAAO,KAAK,UAAU,WAAW,KAAK;AAAA,MACtC,QAAQ,KAAK,UAAU,WAAW,MAAM;AAAA,MACxC,aAAa,OAAO;AAAA,MACpB,eAAe,WAAW;AAAA,MAC1B;AAAA,MACA,WAAW,aAAa,GAAG;AAAA,MAC3B,WAAW,aAAa,SAAS;AAAA,IACnC,CAAC;AAAA,EACH,SAAS,KAAK;AACZ,QAAI,KAAK,uBAAuB;AAAA,MAC9B,KAAK,OAAO;AAAA,MACZ,UAAU,OAAO;AAAA,MACjB,OAAO,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAAA,IACxD,CAAC;AAAA,EACH;AACF;AAmBA,SAAS,mBAAmB,KAA2B;AACrD,SAAO;AAAA,IACL,IAAI,IAAI;AAAA,IACR,KAAK,IAAI;AAAA,IACT,eAAe,IAAI;AAAA,IACnB,OAAO,IAAI;AAAA,IACX,UAAU,IAAI;AAAA,IACd,SAAS,IAAI;AAAA,IACb,UAAU,IAAI;AAAA,IACd,OAAO,IAAI;AAAA,IACX,QAAQ,IAAI;AAAA,IACZ,aAAa,IAAI;AAAA,IACjB,eAAe,IAAI;AAAA,IACnB,aAAa,IAAI;AAAA,IACjB,WAAW,IAAI;AAAA,IACf,WAAW,IAAI;AAAA,EACjB;AACF;AAEO,SAAS,iBAAiB,KAAmC;AAClE,QAAM,KAAK,YAAY;AACvB,QAAM,gBAAgB,aAAa,GAAG;AAEtC,QAAM,MAAM,GAAG,QAAQ;AAAA;AAAA,GAEtB,EAAE,IAAI,KAAK,aAAa;AAEzB,SAAO,MAAM,mBAAmB,GAAG,IAAI;AACzC;AAEO,SAAS,gCAAgC,eAA6C;AAC3F,QAAM,KAAK,YAAY;AACvB,QAAM,MAAM,GAAG;AAAA,IACb;AAAA,EACF,EAAE,IAAI,aAAa;AACnB,SAAO,MAAM,mBAAmB,GAAG,IAAI;AACzC;AAEO,SAAS,wBAAwB,eAAsC;AAC5E,QAAM,KAAK,YAAY;AACvB,QAAM,MAAM,GAAG;AAAA,IACb;AAAA,EACF,EAAE,IAAI,aAAa;AACnB,SAAO,KAAK,gBAAgB;AAC9B;AAEO,SAAS,4BAA4B,eAAsC;AAChF,QAAM,KAAK,YAAY;AACvB,QAAM,MAAM,GAAG;AAAA,IACb;AAAA,EACF,EAAE,IAAI,aAAa;AACnB,SAAO,MAAM,IAAI,WAAW;AAC9B;AAEO,SAAS,UAAU,QAAgC;AACxD,MAAI,CAAC,OAAO,UAAW,QAAO;AAC9B,SAAO,IAAI,KAAK,OAAO,SAAS,EAAE,QAAQ,IAAI,KAAK,IAAI;AACzD;AAMO,SAAS,cACd,QACA,OAA2B,CAAC,GACS;AACrC,MAAI,CAAC,OAAO,UAAW,QAAO,EAAE,QAAQ,MAAM,OAAO,MAAM;AAC3D,QAAM,YAAY,IAAI,KAAK,OAAO,SAAS,EAAE,QAAQ;AACrD,QAAM,MAAM,KAAK,IAAI;AACrB,MAAI,aAAa,IAAK,QAAO,EAAE,QAAQ,MAAM,OAAO,MAAM;AAC1D,QAAM,cAAc,KAAK,mBAAmB,KAAK;AACjD,MAAI,MAAM,aAAa,WAAY,QAAO,EAAE,QAAQ,MAAM,OAAO,KAAK;AACtE,SAAO,EAAE,QAAQ,OAAO,OAAO,MAAM;AACvC;AAEO,SAAS,YAAY,OAAgC;AAC1D,QAAM,KAAK,YAAY;AAEvB,QAAM,OAAO,GAAG,QAAQ;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,GAMvB,EAAE,IAAI,iBAAiB,KAAK,CAAC;AAE9B,SAAO,KAAK,IAAI,kBAAkB;AACpC;AAUO,SAAS,mBACd,OACA,SACA,aACM;AACN,QAAM,KAAK,YAAY;AACvB,QAAM,SAAS,UAAU;AAEzB,QAAM,YAAY,WAAW,QAAQ,EAAE,OAAO,MAAM,YAAY,EAAE,KAAK,CAAC,EAAE,OAAO,KAAK;AACtF,QAAM,MAAM,oBAAI,KAAK;AACrB,QAAM,YAAY,IAAI,KAAK,IAAI,QAAQ,IAAI,OAAO,iBAAiB,GAAI;AAEvE,QAAM,OAAO,GAAG,QAAQ;AAAA;AAAA;AAAA,GAGvB;AAED,OAAK,IAAI;AAAA,IACP;AAAA,IACA;AAAA,IACA,SAAS,KAAK,UAAU,OAAO;AAAA,IAC/B,aAAa,KAAK,UAAU,WAAW;AAAA,IACvC,YAAY,aAAa,GAAG;AAAA,IAC5B,WAAW,aAAa,SAAS;AAAA,EACnC,CAAC;AACH;AAEO,SAAS,uBACd,OACA,OAA2B,CAAC,GACD;AAC3B,QAAM,KAAK,YAAY;AACvB,QAAM,YAAY,WAAW,QAAQ,EAAE,OAAO,MAAM,YAAY,EAAE,KAAK,CAAC,EAAE,OAAO,KAAK;AAEtF,QAAM,MAAM,GAAG;AAAA,IACb;AAAA,EACF,EAAE,IAAI,SAAS;AAIf,MAAI,CAAC,IAAK,QAAO;AAEjB,MAAI,IAAI,YAAY;AAClB,UAAM,YAAY,IAAI,KAAK,IAAI,UAAU,EAAE,QAAQ;AACnD,UAAM,MAAM,KAAK,IAAI;AACrB,QAAI,YAAY,KAAK;AACnB,YAAM,cAAc,KAAK,mBAAmB,KAAK;AACjD,UAAI,MAAM,YAAY,WAAY,QAAO;AACzC,aAAO;AAAA,QACL,OAAO,IAAI;AAAA,QACX,SAAS,KAAK,MAAM,IAAI,OAAO;AAAA,QAC/B,cAAc,KAAK,MAAM,IAAI,YAAY;AAAA,QACzC,aAAa,IAAI;AAAA,QACjB,OAAO;AAAA,MACT;AAAA,IACF;AAAA,EACF;AAEA,SAAO;AAAA,IACL,OAAO,IAAI;AAAA,IACX,SAAS,KAAK,MAAM,IAAI,OAAO;AAAA,IAC/B,cAAc,KAAK,MAAM,IAAI,YAAY;AAAA,IACzC,aAAa,IAAI;AAAA,EACnB;AACF;AAEO,SAAS,oBAAoB,SAIhB;AAClB,QAAM,KAAK,YAAY;AACvB,QAAM,aAAuB,CAAC;AAC9B,QAAM,SAAoB,CAAC;AAC3B,MAAI,aAAa;AAEjB,MAAI,QAAQ,OAAO;AACjB,iBAAa;AACb,eAAW,KAAK,uBAAuB;AACvC,WAAO,KAAK,iBAAiB,QAAQ,KAAK,CAAC;AAAA,EAC7C;AAEA,MAAI,QAAQ,YAAY;AACtB,eAAW,KAAK,iCAAiC;AACjD,WAAO,KAAK,QAAQ,UAAU;AAAA,EAChC;AAEA,MAAI,QAAQ,OAAO;AACjB,eAAW,KAAK,oCAAoC;AACpD,WAAO,KAAK,QAAQ,KAAK;AAAA,EAC3B;AAEA,QAAM,cAAc,WAAW,SAAS,IAAI,SAAS,WAAW,KAAK,OAAO,CAAC,KAAK;AAClF,QAAM,cAAc,QAAQ,QAAQ,kBAAkB;AAEtD,QAAM,MAAM,2BAA2B,UAAU,IAAI,WAAW,IAAI,WAAW;AAC/E,QAAM,OAAO,GAAG,QAAQ,GAAG,EAAE,IAAI,GAAG,MAAM;AAC1C,SAAO,KAAK,IAAI,kBAAkB;AACpC;AAQO,SAAS,gBAAgB,OAAe,OAAsD;AACnG,MAAI,CAAC,MAAM,KAAK,KAAK,SAAS,EAAG,QAAO,CAAC;AACzC,QAAM,KAAK,YAAY;AACvB,QAAM,OAAO,GAAG,QAAQ;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,GAOvB,EAAE,IAAI,iBAAiB,KAAK,GAAG,KAAK;AACrC,SAAO,KAAK,IAAI,QAAM,EAAE,KAAK,EAAE,KAAK,OAAO,CAAC,EAAE,KAAK,EAAE;AACvD;AAEO,SAAS,kBAAkB,SAIvB;AACT,QAAM,KAAK,YAAY;AACvB,QAAM,aAAuB,CAAC;AAC9B,QAAM,SAAoB,CAAC;AAE3B,MAAI,QAAQ,OAAO;AACjB,eAAW;AAAA,MACT;AAAA,IACF;AACA,WAAO,KAAK,iBAAiB,QAAQ,KAAK,CAAC;AAAA,EAC7C;AAEA,MAAI,QAAQ,YAAY;AACtB,eAAW,KAAK,uBAAuB;AACvC,WAAO,KAAK,QAAQ,UAAU;AAAA,EAChC;AAEA,MAAI,QAAQ,OAAO;AACjB,eAAW,KAAK,0BAA0B;AAC1C,WAAO,KAAK,QAAQ,KAAK;AAAA,EAC3B;AAEA,QAAM,cAAc,WAAW,SAAS,IAAI,SAAS,WAAW,KAAK,OAAO,CAAC,KAAK;AAClF,QAAM,MAAM,yBAAyB,WAAW;AAChD,QAAM,SAAS,GAAG,QAAQ,GAAG,EAAE,IAAI,GAAG,MAAM;AAC5C,SAAO,OAAO;AAChB;AAKO,SAAS,yBAAyB,QAAwB;AAC/D,QAAM,KAAK,YAAY;AACvB,QAAM,aAAa,OAAO,YAAY,EAAE,QAAQ,UAAU,EAAE;AAC5D,QAAM,OAAO,GAAG,QAAQ;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,GAUvB;AACD,QAAM,MAAM,KAAK;AAAA,IACf;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACA,SAAO,IAAI;AACb;AAEO,SAAS,gBAA4B;AAC1C,QAAM,KAAK,YAAY;AACvB,QAAM,MAAM,GAAG,QAAQ;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,GAOtB,EAAE,IAAI;AAEP,SAAO;AAAA,IACL,YAAY,IAAI;AAAA,IAChB,eAAe,KAAK,MAAO,IAAI,eAAe,OAAO,QAAS,GAAG,IAAI;AAAA,IACrE,QAAQ,IAAI,UAAU;AAAA,IACtB,QAAQ,IAAI,UAAU;AAAA,EACxB;AACF;AAIO,SAAS,qBACd,KACA,WACA,OACA,MACS;AACT,MAAI;AACF,UAAM,KAAK,YAAY;AACvB,QAAI;AACJ,QAAI;AACF,mBAAa,aAAa,GAAG;AAAA,IAC/B,QAAQ;AACN,mBAAa;AAAA,IACf;AAEA,UAAM,SAAS,GAAG,QAAQ;AAAA;AAAA;AAAA;AAAA,KAIzB,EAAE,IAAI,WAAW,OAAO,MAAM,UAAU;AAEzC,WAAO,OAAO,UAAU;AAAA,EAC1B,QAAQ;AACN,WAAO;AAAA,EACT;AACF;AAQO,SAAS,mBAAmB,KAAa,SAAwC;AACtF,MAAI;AACF,UAAM,KAAK,YAAY;AACvB,QAAI;AACJ,QAAI;AACF,mBAAa,aAAa,GAAG;AAAA,IAC/B,QAAQ;AACN,mBAAa;AAAA,IACf;AAEA,UAAM,MAAM,GAAG,QAAQ;AAAA;AAAA;AAAA;AAAA;AAAA,KAKtB,EAAE,IAAI,KAAK,UAAU;AAEtB,QAAI,CAAC,IAAK,QAAO;AAGjB,QAAI,YAAY,UAAa,IAAI,oBAAoB,QAAS,QAAO;AAErE,WAAO;AAAA,MACL,WAAW,IAAI;AAAA,MACf,OAAO,IAAI;AAAA,MACX,MAAM,IAAI;AAAA,IACZ;AAAA,EACF,QAAQ;AACN,WAAO;AAAA,EACT;AACF;AASO,SAAS,iBAAiB,SAAqC;AACpE,MAAI;AACF,UAAM,KAAK,YAAY;AAIvB,UAAM,OAAO,YAAY,SACrB,GAAG,QAAQ;AAAA;AAAA;AAAA;AAAA,SAIV,EAAE,IAAI,OAAO,IAMd,GAAG,QAAQ;AAAA;AAAA;AAAA;AAAA,SAIV,EAAE,IAAI;AAOX,WAAO,KAAK,IAAI,QAAM;AAAA,MACpB,eAAe,EAAE;AAAA,MACjB,WAAW,EAAE;AAAA,MACb,OAAO,EAAE;AAAA,MACT,MAAM,EAAE;AAAA,IACV,EAAE;AAAA,EACJ,QAAQ;AACN,WAAO,CAAC;AAAA,EACV;AACF;","names":[]}
1
+ {"version":3,"sources":["../../src/cache/store.ts"],"sourcesContent":["import { createHash } from 'node:crypto';\nimport { getDatabase } from './db.js';\nimport { getConfig } from '../config.js';\nimport { createLogger } from '../logger.js';\nimport type { RawFetchResult, ExtractionResult, CachedContent, SearchResultItem, CacheStats } from '../types.js';\n\nconst log = createLogger('cache');\n\n/**\n * Sanitize a user query for sqlite FTS5 MATCH.\n *\n * Why: bare tokens with `.` / `-` / `/` / `:` / digits-with-dot\n * (e.g. \"5.4\", \"x-y\", \"https://foo\") raise `fts5: syntax error near \".\"`.\n * Quoting tokens that aren't pure word-chars lets FTS5 treat them as phrases.\n * Already-quoted phrases and explicit operators (AND/OR/NOT/parens) pass through.\n */\nexport function sanitizeFtsQuery(q: string): string {\n const trimmed = q.trim();\n if (!trimmed) return '';\n if (/^\".*\"$/.test(trimmed)) return trimmed;\n const tokens = trimmed.match(/\"[^\"]*\"|\\S+/g) ?? [];\n const RESERVED = new Set(['AND', 'OR', 'NOT', '(', ')']);\n return tokens.map(tok => {\n if (tok.startsWith('\"') && tok.endsWith('\"')) return tok;\n if (RESERVED.has(tok)) return tok;\n if (/^\\w+\\*?$/.test(tok)) return tok;\n return `\"${tok.replace(/\"/g, '\"\"')}\"`;\n }).join(' ');\n}\n\nconst TRACKING_PARAMS = new Set([\n 'utm_source',\n 'utm_medium',\n 'utm_campaign',\n 'utm_content',\n 'utm_term',\n 'utm_id',\n 'fbclid',\n 'gclid',\n 'msclkid',\n 'mc_cid',\n 'mc_eid',\n]);\n\nexport function normalizeUrl(url: string): string {\n const parsed = new URL(url);\n\n parsed.protocol = parsed.protocol.toLowerCase();\n parsed.hostname = parsed.hostname.toLowerCase().replace(/^www\\./, '');\n\n for (const key of [...parsed.searchParams.keys()]) {\n if (TRACKING_PARAMS.has(key) || key.startsWith('utm_')) {\n parsed.searchParams.delete(key);\n }\n }\n\n parsed.searchParams.sort();\n\n let result = parsed.toString();\n\n // Strip trailing slash from path (but not root)\n if (parsed.pathname !== '/' && result.endsWith('/')) {\n result = result.slice(0, -1);\n }\n // Remove trailing slash from origin-only URLs too\n if (parsed.pathname === '/' && !parsed.search && !parsed.hash) {\n result = result.replace(/\\/$/, '');\n }\n\n return result;\n}\n\nfunction toIsoSeconds(date: Date): string {\n return date.toISOString().replace('T', ' ').replace(/\\.\\d+Z$/, '');\n}\n\nexport function cacheContent(result: RawFetchResult, extraction: ExtractionResult): void {\n try {\n const db = getDatabase();\n const config = getConfig();\n\n const normalizedUrl = normalizeUrl(result.finalUrl || result.url);\n const contentHash = createHash('sha256').update(extraction.markdown).digest('hex');\n\n const now = new Date();\n const expiresAt = new Date(now.getTime() + config.cacheTtlContent * 1000);\n\n const stmt = db.prepare(`\n INSERT OR REPLACE INTO url_cache (\n url, normalized_url, title, markdown, raw_html,\n metadata, links, images, fetch_method, extractor_used,\n content_hash, fetched_at, expires_at\n )\n VALUES (\n @url, @normalizedUrl, @title, @markdown, @rawHtml,\n @metadata, @links, @images, @fetchMethod, @extractorUsed,\n @contentHash, @fetchedAt, @expiresAt\n )\n `);\n\n stmt.run({\n url: result.url,\n normalizedUrl,\n title: extraction.title,\n markdown: extraction.markdown,\n rawHtml: result.html,\n metadata: JSON.stringify(extraction.metadata),\n links: JSON.stringify(extraction.links),\n images: JSON.stringify(extraction.images),\n fetchMethod: result.method,\n extractorUsed: extraction.extractor,\n contentHash: contentHash,\n fetchedAt: toIsoSeconds(now),\n expiresAt: toIsoSeconds(expiresAt),\n });\n } catch (err) {\n log.warn('cacheContent failed', {\n url: result.url,\n finalUrl: result.finalUrl,\n error: err instanceof Error ? err.message : String(err),\n });\n }\n}\n\ninterface DbRow {\n id: number;\n url: string;\n normalized_url: string;\n title: string;\n markdown: string;\n raw_html: string;\n metadata: string;\n links: string;\n images: string;\n fetch_method: string;\n extractor_used: string;\n content_hash: string;\n fetched_at: string;\n expires_at: string | null;\n}\n\nfunction rowToCachedContent(row: DbRow): CachedContent {\n return {\n id: row.id,\n url: row.url,\n normalizedUrl: row.normalized_url,\n title: row.title,\n markdown: row.markdown,\n rawHtml: row.raw_html,\n metadata: row.metadata,\n links: row.links,\n images: row.images,\n fetchMethod: row.fetch_method as CachedContent['fetchMethod'],\n extractorUsed: row.extractor_used as CachedContent['extractorUsed'],\n contentHash: row.content_hash,\n fetchedAt: row.fetched_at,\n expiresAt: row.expires_at,\n };\n}\n\nexport function getCachedContent(url: string): CachedContent | null {\n const db = getDatabase();\n const normalizedUrl = normalizeUrl(url);\n\n const row = db.prepare(`\n SELECT * FROM url_cache WHERE url = ? OR normalized_url = ? LIMIT 1\n `).get(url, normalizedUrl) as DbRow | undefined;\n\n return row ? rowToCachedContent(row) : null;\n}\n\nexport function getCachedContentByNormalizedUrl(normalizedUrl: string): CachedContent | null {\n const db = getDatabase();\n const row = db.prepare(\n 'SELECT * FROM url_cache WHERE normalized_url = ? LIMIT 1',\n ).get(normalizedUrl) as DbRow | undefined;\n return row ? rowToCachedContent(row) : null;\n}\n\nexport function getHashForNormalizedUrl(normalizedUrl: string): string | null {\n const db = getDatabase();\n const row = db.prepare(\n 'SELECT content_hash FROM url_cache WHERE normalized_url = ? LIMIT 1',\n ).get(normalizedUrl) as { content_hash: string } | undefined;\n return row?.content_hash ?? null;\n}\n\nexport function getMarkdownForNormalizedUrl(normalizedUrl: string): string | null {\n const db = getDatabase();\n const row = db.prepare(\n 'SELECT markdown FROM url_cache WHERE normalized_url = ? LIMIT 1',\n ).get(normalizedUrl) as { markdown: string } | undefined;\n return row ? row.markdown : null;\n}\n\nexport function isExpired(cached: CachedContent): boolean {\n if (!cached.expiresAt) return false;\n return new Date(cached.expiresAt).getTime() < Date.now();\n}\n\nexport interface CacheLookupOptions {\n staleMaxSeconds?: number;\n}\n\nexport function isCacheUsable(\n cached: CachedContent,\n opts: CacheLookupOptions = {},\n): { usable: boolean; stale: boolean } {\n if (!cached.expiresAt) return { usable: true, stale: false };\n const expiresMs = new Date(cached.expiresAt).getTime();\n const now = Date.now();\n if (expiresMs >= now) return { usable: true, stale: false };\n const staleMaxMs = (opts.staleMaxSeconds ?? 0) * 1000;\n if (now - expiresMs <= staleMaxMs) return { usable: true, stale: true };\n return { usable: false, stale: false };\n}\n\nexport function searchCache(query: string): CachedContent[] {\n const db = getDatabase();\n\n const rows = db.prepare(`\n SELECT url_cache.*\n FROM url_cache\n JOIN url_cache_fts ON url_cache.id = url_cache_fts.rowid\n WHERE url_cache_fts MATCH ?\n ORDER BY rank\n `).all(sanitizeFtsQuery(query)) as DbRow[];\n\n return rows.map(rowToCachedContent);\n}\n\nexport interface CachedSearchResult {\n query: string;\n results: SearchResultItem[];\n engines_used: string[];\n searched_at: string;\n stale?: boolean;\n}\n\nexport function cacheSearchResults(\n query: string,\n results: SearchResultItem[],\n enginesUsed: string[],\n): void {\n const db = getDatabase();\n const config = getConfig();\n\n const queryHash = createHash('sha256').update(query.toLowerCase().trim()).digest('hex');\n const now = new Date();\n const expiresAt = new Date(now.getTime() + config.cacheTtlSearch * 1000);\n\n const stmt = db.prepare(`\n INSERT OR REPLACE INTO search_cache (query, query_hash, results, engines_used, searched_at, expires_at)\n VALUES (@query, @queryHash, @results, @enginesUsed, @searchedAt, @expiresAt)\n `);\n\n stmt.run({\n query,\n queryHash,\n results: JSON.stringify(results),\n enginesUsed: JSON.stringify(enginesUsed),\n searchedAt: toIsoSeconds(now),\n expiresAt: toIsoSeconds(expiresAt),\n });\n}\n\nexport function getCachedSearchResults(\n query: string,\n opts: CacheLookupOptions = {},\n): CachedSearchResult | null {\n const db = getDatabase();\n const queryHash = createHash('sha256').update(query.toLowerCase().trim()).digest('hex');\n\n const row = db.prepare(\n 'SELECT query, results, engines_used, searched_at, expires_at FROM search_cache WHERE query_hash = ? LIMIT 1',\n ).get(queryHash) as\n | { query: string; results: string; engines_used: string; searched_at: string; expires_at: string | null }\n | undefined;\n\n if (!row) return null;\n\n if (row.expires_at) {\n const expiresMs = new Date(row.expires_at).getTime();\n const now = Date.now();\n if (expiresMs < now) {\n const staleMaxMs = (opts.staleMaxSeconds ?? 0) * 1000;\n if (now - expiresMs > staleMaxMs) return null;\n return {\n query: row.query,\n results: JSON.parse(row.results) as SearchResultItem[],\n engines_used: JSON.parse(row.engines_used) as string[],\n searched_at: row.searched_at,\n stale: true,\n };\n }\n }\n\n return {\n query: row.query,\n results: JSON.parse(row.results) as SearchResultItem[],\n engines_used: JSON.parse(row.engines_used) as string[],\n searched_at: row.searched_at,\n };\n}\n\nconst DEFAULT_FILTERED_LIMIT = 100;\n\nexport function searchCacheFiltered(options: {\n query?: string;\n urlPattern?: string;\n since?: string;\n limit?: number;\n}): CachedContent[] {\n const db = getDatabase();\n const conditions: string[] = [];\n const params: unknown[] = [];\n let fromClause = 'url_cache';\n\n if (options.query) {\n fromClause = 'url_cache JOIN url_cache_fts ON url_cache.id = url_cache_fts.rowid';\n conditions.push('url_cache_fts MATCH ?');\n params.push(sanitizeFtsQuery(options.query));\n }\n\n if (options.urlPattern) {\n conditions.push('url_cache.normalized_url GLOB ?');\n params.push(options.urlPattern);\n }\n\n if (options.since) {\n conditions.push('url_cache.fetched_at > datetime(?)');\n params.push(options.since);\n }\n\n const whereClause = conditions.length > 0 ? `WHERE ${conditions.join(' AND ')}` : '';\n const orderClause = options.query ? 'ORDER BY rank' : 'ORDER BY url_cache.fetched_at DESC';\n const limit = Math.max(1, Math.floor(options.limit ?? DEFAULT_FILTERED_LIMIT));\n\n const sql = `SELECT url_cache.* FROM ${fromClause} ${whereClause} ${orderClause} LIMIT ?`;\n const rows = db.prepare(sql).all(...params, limit) as DbRow[];\n return rows.map(rowToCachedContent);\n}\n\n/**\n * BM25-ranked FTS5 search across cached pages. Returns normalized URLs\n * paired with their rank score. `rank` from FTS5 is negative (lower is\n * better in sqlite ordering), so we flip the sign to surface a \"higher is\n * better\" score for consumers (e.g. RRF input).\n */\nexport function ftsSearchRanked(query: string, limit: number): Array<{ url: string; score: number }> {\n if (!query.trim() || limit <= 0) return [];\n const db = getDatabase();\n const rows = db.prepare(`\n SELECT url_cache.normalized_url AS url, url_cache_fts.rank AS rank\n FROM url_cache\n JOIN url_cache_fts ON url_cache.id = url_cache_fts.rowid\n WHERE url_cache_fts MATCH ?\n ORDER BY url_cache_fts.rank\n LIMIT ?\n `).all(sanitizeFtsQuery(query), limit) as Array<{ url: string; rank: number }>;\n return rows.map(r => ({ url: r.url, score: -r.rank }));\n}\n\nexport function clearCacheEntries(options: {\n query?: string;\n urlPattern?: string;\n since?: string;\n}): number {\n const db = getDatabase();\n const conditions: string[] = [];\n const params: unknown[] = [];\n\n if (options.query) {\n conditions.push(\n 'id IN (SELECT url_cache.id FROM url_cache JOIN url_cache_fts ON url_cache.id = url_cache_fts.rowid WHERE url_cache_fts MATCH ?)',\n );\n params.push(sanitizeFtsQuery(options.query));\n }\n\n if (options.urlPattern) {\n conditions.push('normalized_url GLOB ?');\n params.push(options.urlPattern);\n }\n\n if (options.since) {\n conditions.push('fetched_at > datetime(?)');\n params.push(options.since);\n }\n\n const whereClause = conditions.length > 0 ? `WHERE ${conditions.join(' AND ')}` : '';\n const sql = `DELETE FROM url_cache ${whereClause}`;\n const result = db.prepare(sql).run(...params);\n return result.changes;\n}\n\n// Counts cached URLs for an exact host (apex scoping — `blog.example.com`\n// and `example.com` are NOT collapsed). Leading `www.` is stripped to align\n// with normalizeUrl.\nexport function countCachedUrlsForDomain(domain: string): number {\n const db = getDatabase();\n const normalized = domain.toLowerCase().replace(/^www\\./, '');\n const stmt = db.prepare(`\n SELECT COUNT(*) AS n FROM url_cache\n WHERE url LIKE 'http://' || ? || '/%'\n OR url LIKE 'https://' || ? || '/%'\n OR url LIKE 'http://www.' || ? || '/%'\n OR url LIKE 'https://www.' || ? || '/%'\n OR url = 'http://' || ?\n OR url = 'https://' || ?\n OR url = 'http://www.' || ?\n OR url = 'https://www.' || ?\n `);\n const row = stmt.get(\n normalized,\n normalized,\n normalized,\n normalized,\n normalized,\n normalized,\n normalized,\n normalized,\n ) as { n: number };\n return row.n;\n}\n\nexport function getCacheStats(): CacheStats {\n const db = getDatabase();\n const row = db.prepare(`\n SELECT\n COUNT(*) as total_urls,\n COALESCE(SUM(LENGTH(markdown) + LENGTH(COALESCE(raw_html, ''))), 0) as total_bytes,\n MIN(fetched_at) as oldest,\n MAX(fetched_at) as newest\n FROM url_cache\n `).get() as { total_urls: number; total_bytes: number; oldest: string | null; newest: string | null };\n\n return {\n total_urls: row.total_urls,\n total_size_mb: Math.round((row.total_bytes / (1024 * 1024)) * 1e6) / 1e6,\n oldest: row.oldest ?? '',\n newest: row.newest ?? '',\n };\n}\n\n// --- Embedding store functions (Slice 22) ---\n\nexport function updateCacheEmbedding(\n url: string,\n embedding: Buffer,\n model: string,\n dims: number,\n): boolean {\n try {\n const db = getDatabase();\n let normalized: string;\n try {\n normalized = normalizeUrl(url);\n } catch {\n normalized = url;\n }\n\n const result = db.prepare(`\n UPDATE url_cache\n SET embedding = ?, embedding_model = ?, embedding_dims = ?, updated_at = datetime('now')\n WHERE normalized_url = ?\n `).run(embedding, model, dims, normalized);\n\n return result.changes > 0;\n } catch {\n return false;\n }\n}\n\nexport interface EmbeddingData {\n embedding: Buffer;\n model: string;\n dims: number;\n}\n\nexport function getEmbeddingForUrl(url: string, modelId?: string): EmbeddingData | null {\n try {\n const db = getDatabase();\n let normalized: string;\n try {\n normalized = normalizeUrl(url);\n } catch {\n normalized = url;\n }\n\n const row = db.prepare(`\n SELECT embedding, embedding_model, embedding_dims\n FROM url_cache\n WHERE (url = ? OR normalized_url = ?) AND embedding IS NOT NULL\n LIMIT 1\n `).get(url, normalized) as { embedding: Buffer; embedding_model: string; embedding_dims: number } | undefined;\n\n if (!row) return null;\n // Filter by modelId when caller wants only embeddings from the current\n // model; mismatched entries return null so they are treated as cache miss.\n if (modelId !== undefined && row.embedding_model !== modelId) return null;\n\n return {\n embedding: row.embedding,\n model: row.embedding_model,\n dims: row.embedding_dims,\n };\n } catch {\n return null;\n }\n}\n\nexport interface StoredEmbedding {\n normalizedUrl: string;\n embedding: Buffer;\n model: string;\n dims: number;\n}\n\nexport function getAllEmbeddings(modelId?: string): StoredEmbedding[] {\n try {\n const db = getDatabase();\n // Filter by modelId when provided so stale entries from a previous model\n // (different dim / vector space) are skipped — the in-memory vector index\n // requires matching dimensionality across all entries.\n const rows = modelId !== undefined\n ? db.prepare(`\n SELECT normalized_url, embedding, embedding_model, embedding_dims\n FROM url_cache\n WHERE embedding IS NOT NULL AND embedding_model = ?\n `).all(modelId) as Array<{\n normalized_url: string;\n embedding: Buffer;\n embedding_model: string;\n embedding_dims: number;\n }>\n : db.prepare(`\n SELECT normalized_url, embedding, embedding_model, embedding_dims\n FROM url_cache\n WHERE embedding IS NOT NULL\n `).all() as Array<{\n normalized_url: string;\n embedding: Buffer;\n embedding_model: string;\n embedding_dims: number;\n }>;\n\n return rows.map(r => ({\n normalizedUrl: r.normalized_url,\n embedding: r.embedding,\n model: r.embedding_model,\n dims: r.embedding_dims,\n }));\n } catch {\n return [];\n }\n}\n"],"mappings":"AAAA,SAAS,kBAAkB;AAC3B,SAAS,mBAAmB;AAC5B,SAAS,iBAAiB;AAC1B,SAAS,oBAAoB;AAG7B,MAAM,MAAM,aAAa,OAAO;AAUzB,SAAS,iBAAiB,GAAmB;AAClD,QAAM,UAAU,EAAE,KAAK;AACvB,MAAI,CAAC,QAAS,QAAO;AACrB,MAAI,SAAS,KAAK,OAAO,EAAG,QAAO;AACnC,QAAM,SAAS,QAAQ,MAAM,cAAc,KAAK,CAAC;AACjD,QAAM,WAAW,oBAAI,IAAI,CAAC,OAAO,MAAM,OAAO,KAAK,GAAG,CAAC;AACvD,SAAO,OAAO,IAAI,SAAO;AACvB,QAAI,IAAI,WAAW,GAAG,KAAK,IAAI,SAAS,GAAG,EAAG,QAAO;AACrD,QAAI,SAAS,IAAI,GAAG,EAAG,QAAO;AAC9B,QAAI,WAAW,KAAK,GAAG,EAAG,QAAO;AACjC,WAAO,IAAI,IAAI,QAAQ,MAAM,IAAI,CAAC;AAAA,EACpC,CAAC,EAAE,KAAK,GAAG;AACb;AAEA,MAAM,kBAAkB,oBAAI,IAAI;AAAA,EAC9B;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF,CAAC;AAEM,SAAS,aAAa,KAAqB;AAChD,QAAM,SAAS,IAAI,IAAI,GAAG;AAE1B,SAAO,WAAW,OAAO,SAAS,YAAY;AAC9C,SAAO,WAAW,OAAO,SAAS,YAAY,EAAE,QAAQ,UAAU,EAAE;AAEpE,aAAW,OAAO,CAAC,GAAG,OAAO,aAAa,KAAK,CAAC,GAAG;AACjD,QAAI,gBAAgB,IAAI,GAAG,KAAK,IAAI,WAAW,MAAM,GAAG;AACtD,aAAO,aAAa,OAAO,GAAG;AAAA,IAChC;AAAA,EACF;AAEA,SAAO,aAAa,KAAK;AAEzB,MAAI,SAAS,OAAO,SAAS;AAG7B,MAAI,OAAO,aAAa,OAAO,OAAO,SAAS,GAAG,GAAG;AACnD,aAAS,OAAO,MAAM,GAAG,EAAE;AAAA,EAC7B;AAEA,MAAI,OAAO,aAAa,OAAO,CAAC,OAAO,UAAU,CAAC,OAAO,MAAM;AAC7D,aAAS,OAAO,QAAQ,OAAO,EAAE;AAAA,EACnC;AAEA,SAAO;AACT;AAEA,SAAS,aAAa,MAAoB;AACxC,SAAO,KAAK,YAAY,EAAE,QAAQ,KAAK,GAAG,EAAE,QAAQ,WAAW,EAAE;AACnE;AAEO,SAAS,aAAa,QAAwB,YAAoC;AACvF,MAAI;AACF,UAAM,KAAK,YAAY;AACvB,UAAM,SAAS,UAAU;AAEzB,UAAM,gBAAgB,aAAa,OAAO,YAAY,OAAO,GAAG;AAChE,UAAM,cAAc,WAAW,QAAQ,EAAE,OAAO,WAAW,QAAQ,EAAE,OAAO,KAAK;AAEjF,UAAM,MAAM,oBAAI,KAAK;AACrB,UAAM,YAAY,IAAI,KAAK,IAAI,QAAQ,IAAI,OAAO,kBAAkB,GAAI;AAExE,UAAM,OAAO,GAAG,QAAQ;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,KAWvB;AAED,SAAK,IAAI;AAAA,MACP,KAAK,OAAO;AAAA,MACZ;AAAA,MACA,OAAO,WAAW;AAAA,MAClB,UAAU,WAAW;AAAA,MACrB,SAAS,OAAO;AAAA,MAChB,UAAU,KAAK,UAAU,WAAW,QAAQ;AAAA,MAC5C,OAAO,KAAK,UAAU,WAAW,KAAK;AAAA,MACtC,QAAQ,KAAK,UAAU,WAAW,MAAM;AAAA,MACxC,aAAa,OAAO;AAAA,MACpB,eAAe,WAAW;AAAA,MAC1B;AAAA,MACA,WAAW,aAAa,GAAG;AAAA,MAC3B,WAAW,aAAa,SAAS;AAAA,IACnC,CAAC;AAAA,EACH,SAAS,KAAK;AACZ,QAAI,KAAK,uBAAuB;AAAA,MAC9B,KAAK,OAAO;AAAA,MACZ,UAAU,OAAO;AAAA,MACjB,OAAO,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAAA,IACxD,CAAC;AAAA,EACH;AACF;AAmBA,SAAS,mBAAmB,KAA2B;AACrD,SAAO;AAAA,IACL,IAAI,IAAI;AAAA,IACR,KAAK,IAAI;AAAA,IACT,eAAe,IAAI;AAAA,IACnB,OAAO,IAAI;AAAA,IACX,UAAU,IAAI;AAAA,IACd,SAAS,IAAI;AAAA,IACb,UAAU,IAAI;AAAA,IACd,OAAO,IAAI;AAAA,IACX,QAAQ,IAAI;AAAA,IACZ,aAAa,IAAI;AAAA,IACjB,eAAe,IAAI;AAAA,IACnB,aAAa,IAAI;AAAA,IACjB,WAAW,IAAI;AAAA,IACf,WAAW,IAAI;AAAA,EACjB;AACF;AAEO,SAAS,iBAAiB,KAAmC;AAClE,QAAM,KAAK,YAAY;AACvB,QAAM,gBAAgB,aAAa,GAAG;AAEtC,QAAM,MAAM,GAAG,QAAQ;AAAA;AAAA,GAEtB,EAAE,IAAI,KAAK,aAAa;AAEzB,SAAO,MAAM,mBAAmB,GAAG,IAAI;AACzC;AAEO,SAAS,gCAAgC,eAA6C;AAC3F,QAAM,KAAK,YAAY;AACvB,QAAM,MAAM,GAAG;AAAA,IACb;AAAA,EACF,EAAE,IAAI,aAAa;AACnB,SAAO,MAAM,mBAAmB,GAAG,IAAI;AACzC;AAEO,SAAS,wBAAwB,eAAsC;AAC5E,QAAM,KAAK,YAAY;AACvB,QAAM,MAAM,GAAG;AAAA,IACb;AAAA,EACF,EAAE,IAAI,aAAa;AACnB,SAAO,KAAK,gBAAgB;AAC9B;AAEO,SAAS,4BAA4B,eAAsC;AAChF,QAAM,KAAK,YAAY;AACvB,QAAM,MAAM,GAAG;AAAA,IACb;AAAA,EACF,EAAE,IAAI,aAAa;AACnB,SAAO,MAAM,IAAI,WAAW;AAC9B;AAEO,SAAS,UAAU,QAAgC;AACxD,MAAI,CAAC,OAAO,UAAW,QAAO;AAC9B,SAAO,IAAI,KAAK,OAAO,SAAS,EAAE,QAAQ,IAAI,KAAK,IAAI;AACzD;AAMO,SAAS,cACd,QACA,OAA2B,CAAC,GACS;AACrC,MAAI,CAAC,OAAO,UAAW,QAAO,EAAE,QAAQ,MAAM,OAAO,MAAM;AAC3D,QAAM,YAAY,IAAI,KAAK,OAAO,SAAS,EAAE,QAAQ;AACrD,QAAM,MAAM,KAAK,IAAI;AACrB,MAAI,aAAa,IAAK,QAAO,EAAE,QAAQ,MAAM,OAAO,MAAM;AAC1D,QAAM,cAAc,KAAK,mBAAmB,KAAK;AACjD,MAAI,MAAM,aAAa,WAAY,QAAO,EAAE,QAAQ,MAAM,OAAO,KAAK;AACtE,SAAO,EAAE,QAAQ,OAAO,OAAO,MAAM;AACvC;AAEO,SAAS,YAAY,OAAgC;AAC1D,QAAM,KAAK,YAAY;AAEvB,QAAM,OAAO,GAAG,QAAQ;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,GAMvB,EAAE,IAAI,iBAAiB,KAAK,CAAC;AAE9B,SAAO,KAAK,IAAI,kBAAkB;AACpC;AAUO,SAAS,mBACd,OACA,SACA,aACM;AACN,QAAM,KAAK,YAAY;AACvB,QAAM,SAAS,UAAU;AAEzB,QAAM,YAAY,WAAW,QAAQ,EAAE,OAAO,MAAM,YAAY,EAAE,KAAK,CAAC,EAAE,OAAO,KAAK;AACtF,QAAM,MAAM,oBAAI,KAAK;AACrB,QAAM,YAAY,IAAI,KAAK,IAAI,QAAQ,IAAI,OAAO,iBAAiB,GAAI;AAEvE,QAAM,OAAO,GAAG,QAAQ;AAAA;AAAA;AAAA,GAGvB;AAED,OAAK,IAAI;AAAA,IACP;AAAA,IACA;AAAA,IACA,SAAS,KAAK,UAAU,OAAO;AAAA,IAC/B,aAAa,KAAK,UAAU,WAAW;AAAA,IACvC,YAAY,aAAa,GAAG;AAAA,IAC5B,WAAW,aAAa,SAAS;AAAA,EACnC,CAAC;AACH;AAEO,SAAS,uBACd,OACA,OAA2B,CAAC,GACD;AAC3B,QAAM,KAAK,YAAY;AACvB,QAAM,YAAY,WAAW,QAAQ,EAAE,OAAO,MAAM,YAAY,EAAE,KAAK,CAAC,EAAE,OAAO,KAAK;AAEtF,QAAM,MAAM,GAAG;AAAA,IACb;AAAA,EACF,EAAE,IAAI,SAAS;AAIf,MAAI,CAAC,IAAK,QAAO;AAEjB,MAAI,IAAI,YAAY;AAClB,UAAM,YAAY,IAAI,KAAK,IAAI,UAAU,EAAE,QAAQ;AACnD,UAAM,MAAM,KAAK,IAAI;AACrB,QAAI,YAAY,KAAK;AACnB,YAAM,cAAc,KAAK,mBAAmB,KAAK;AACjD,UAAI,MAAM,YAAY,WAAY,QAAO;AACzC,aAAO;AAAA,QACL,OAAO,IAAI;AAAA,QACX,SAAS,KAAK,MAAM,IAAI,OAAO;AAAA,QAC/B,cAAc,KAAK,MAAM,IAAI,YAAY;AAAA,QACzC,aAAa,IAAI;AAAA,QACjB,OAAO;AAAA,MACT;AAAA,IACF;AAAA,EACF;AAEA,SAAO;AAAA,IACL,OAAO,IAAI;AAAA,IACX,SAAS,KAAK,MAAM,IAAI,OAAO;AAAA,IAC/B,cAAc,KAAK,MAAM,IAAI,YAAY;AAAA,IACzC,aAAa,IAAI;AAAA,EACnB;AACF;AAEA,MAAM,yBAAyB;AAExB,SAAS,oBAAoB,SAKhB;AAClB,QAAM,KAAK,YAAY;AACvB,QAAM,aAAuB,CAAC;AAC9B,QAAM,SAAoB,CAAC;AAC3B,MAAI,aAAa;AAEjB,MAAI,QAAQ,OAAO;AACjB,iBAAa;AACb,eAAW,KAAK,uBAAuB;AACvC,WAAO,KAAK,iBAAiB,QAAQ,KAAK,CAAC;AAAA,EAC7C;AAEA,MAAI,QAAQ,YAAY;AACtB,eAAW,KAAK,iCAAiC;AACjD,WAAO,KAAK,QAAQ,UAAU;AAAA,EAChC;AAEA,MAAI,QAAQ,OAAO;AACjB,eAAW,KAAK,oCAAoC;AACpD,WAAO,KAAK,QAAQ,KAAK;AAAA,EAC3B;AAEA,QAAM,cAAc,WAAW,SAAS,IAAI,SAAS,WAAW,KAAK,OAAO,CAAC,KAAK;AAClF,QAAM,cAAc,QAAQ,QAAQ,kBAAkB;AACtD,QAAM,QAAQ,KAAK,IAAI,GAAG,KAAK,MAAM,QAAQ,SAAS,sBAAsB,CAAC;AAE7E,QAAM,MAAM,2BAA2B,UAAU,IAAI,WAAW,IAAI,WAAW;AAC/E,QAAM,OAAO,GAAG,QAAQ,GAAG,EAAE,IAAI,GAAG,QAAQ,KAAK;AACjD,SAAO,KAAK,IAAI,kBAAkB;AACpC;AAQO,SAAS,gBAAgB,OAAe,OAAsD;AACnG,MAAI,CAAC,MAAM,KAAK,KAAK,SAAS,EAAG,QAAO,CAAC;AACzC,QAAM,KAAK,YAAY;AACvB,QAAM,OAAO,GAAG,QAAQ;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,GAOvB,EAAE,IAAI,iBAAiB,KAAK,GAAG,KAAK;AACrC,SAAO,KAAK,IAAI,QAAM,EAAE,KAAK,EAAE,KAAK,OAAO,CAAC,EAAE,KAAK,EAAE;AACvD;AAEO,SAAS,kBAAkB,SAIvB;AACT,QAAM,KAAK,YAAY;AACvB,QAAM,aAAuB,CAAC;AAC9B,QAAM,SAAoB,CAAC;AAE3B,MAAI,QAAQ,OAAO;AACjB,eAAW;AAAA,MACT;AAAA,IACF;AACA,WAAO,KAAK,iBAAiB,QAAQ,KAAK,CAAC;AAAA,EAC7C;AAEA,MAAI,QAAQ,YAAY;AACtB,eAAW,KAAK,uBAAuB;AACvC,WAAO,KAAK,QAAQ,UAAU;AAAA,EAChC;AAEA,MAAI,QAAQ,OAAO;AACjB,eAAW,KAAK,0BAA0B;AAC1C,WAAO,KAAK,QAAQ,KAAK;AAAA,EAC3B;AAEA,QAAM,cAAc,WAAW,SAAS,IAAI,SAAS,WAAW,KAAK,OAAO,CAAC,KAAK;AAClF,QAAM,MAAM,yBAAyB,WAAW;AAChD,QAAM,SAAS,GAAG,QAAQ,GAAG,EAAE,IAAI,GAAG,MAAM;AAC5C,SAAO,OAAO;AAChB;AAKO,SAAS,yBAAyB,QAAwB;AAC/D,QAAM,KAAK,YAAY;AACvB,QAAM,aAAa,OAAO,YAAY,EAAE,QAAQ,UAAU,EAAE;AAC5D,QAAM,OAAO,GAAG,QAAQ;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,GAUvB;AACD,QAAM,MAAM,KAAK;AAAA,IACf;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACA,SAAO,IAAI;AACb;AAEO,SAAS,gBAA4B;AAC1C,QAAM,KAAK,YAAY;AACvB,QAAM,MAAM,GAAG,QAAQ;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,GAOtB,EAAE,IAAI;AAEP,SAAO;AAAA,IACL,YAAY,IAAI;AAAA,IAChB,eAAe,KAAK,MAAO,IAAI,eAAe,OAAO,QAAS,GAAG,IAAI;AAAA,IACrE,QAAQ,IAAI,UAAU;AAAA,IACtB,QAAQ,IAAI,UAAU;AAAA,EACxB;AACF;AAIO,SAAS,qBACd,KACA,WACA,OACA,MACS;AACT,MAAI;AACF,UAAM,KAAK,YAAY;AACvB,QAAI;AACJ,QAAI;AACF,mBAAa,aAAa,GAAG;AAAA,IAC/B,QAAQ;AACN,mBAAa;AAAA,IACf;AAEA,UAAM,SAAS,GAAG,QAAQ;AAAA;AAAA;AAAA;AAAA,KAIzB,EAAE,IAAI,WAAW,OAAO,MAAM,UAAU;AAEzC,WAAO,OAAO,UAAU;AAAA,EAC1B,QAAQ;AACN,WAAO;AAAA,EACT;AACF;AAQO,SAAS,mBAAmB,KAAa,SAAwC;AACtF,MAAI;AACF,UAAM,KAAK,YAAY;AACvB,QAAI;AACJ,QAAI;AACF,mBAAa,aAAa,GAAG;AAAA,IAC/B,QAAQ;AACN,mBAAa;AAAA,IACf;AAEA,UAAM,MAAM,GAAG,QAAQ;AAAA;AAAA;AAAA;AAAA;AAAA,KAKtB,EAAE,IAAI,KAAK,UAAU;AAEtB,QAAI,CAAC,IAAK,QAAO;AAGjB,QAAI,YAAY,UAAa,IAAI,oBAAoB,QAAS,QAAO;AAErE,WAAO;AAAA,MACL,WAAW,IAAI;AAAA,MACf,OAAO,IAAI;AAAA,MACX,MAAM,IAAI;AAAA,IACZ;AAAA,EACF,QAAQ;AACN,WAAO;AAAA,EACT;AACF;AASO,SAAS,iBAAiB,SAAqC;AACpE,MAAI;AACF,UAAM,KAAK,YAAY;AAIvB,UAAM,OAAO,YAAY,SACrB,GAAG,QAAQ;AAAA;AAAA;AAAA;AAAA,SAIV,EAAE,IAAI,OAAO,IAMd,GAAG,QAAQ;AAAA;AAAA;AAAA;AAAA,SAIV,EAAE,IAAI;AAOX,WAAO,KAAK,IAAI,QAAM;AAAA,MACpB,eAAe,EAAE;AAAA,MACjB,WAAW,EAAE;AAAA,MACb,OAAO,EAAE;AAAA,MACT,MAAM,EAAE;AAAA,IACV,EAAE;AAAA,EACJ,QAAQ;AACN,WAAO,CAAC;AAAA,EACV;AACF;","names":[]}
@@ -1 +1 @@
1
- {"version":3,"file":"doctor.d.ts","sourceRoot":"","sources":["../../src/cli/doctor.ts"],"names":[],"mappings":"AAyGA;;;;;GAKG;AACH,wBAAsB,SAAS,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAUhE"}
1
+ {"version":3,"file":"doctor.d.ts","sourceRoot":"","sources":["../../src/cli/doctor.ts"],"names":[],"mappings":"AA2HA;;;;;GAKG;AACH,wBAAsB,SAAS,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAUhE"}
@@ -1,6 +1,7 @@
1
1
  import { spawnSync } from "node:child_process";
2
2
  import { existsSync, readFileSync, readdirSync } from "node:fs";
3
3
  import { join } from "node:path";
4
+ import { chromium, firefox, webkit } from "playwright";
4
5
  import { getBootstrapState } from "../searxng/bootstrap.js";
5
6
  import { isProcessAlive } from "../searxng/process.js";
6
7
  import { getConfig } from "../config.js";
@@ -9,7 +10,8 @@ import { getEmbedProvider } from "../providers/embed-provider.js";
9
10
  import { initDatabase, closeDatabase } from "../cache/db.js";
10
11
  import { loadFeedConfig } from "../search/v1/rss/feed-config.js";
11
12
  import { isTelemetryEnabled } from "./telemetry.js";
12
- import { allProviders, providerEnvVar } from "../integrations/cloud/llm/select.js";
13
+ import { allProviders, providerEnvVar, selectProvider } from "../integrations/cloud/llm/select.js";
14
+ import { resolveModel, providerDefaultModel, providerModelEnvVar } from "../integrations/cloud/llm/model-select.js";
13
15
  import { setLogSuppression } from "../logger.js";
14
16
  function out(line = "") {
15
17
  process.stderr.write(`${line}
@@ -38,19 +40,27 @@ function checkPlaywright() {
38
40
  }
39
41
  } catch {
40
42
  }
41
- const probe = (browser) => {
42
- const r = spawnSync("npx", ["playwright", "install", "--dry-run", browser], { encoding: "utf-8", timeout: 5e3 });
43
- return r.status === 0 && !/is not installed/i.test(r.stdout + r.stderr);
43
+ const probeBrowser = (api) => {
44
+ try {
45
+ const exec = api.executablePath();
46
+ return { ok: !!exec && existsSync(exec), path: exec };
47
+ } catch {
48
+ return { ok: false };
49
+ }
44
50
  };
51
+ const chromiumProbe = probeBrowser(chromium);
52
+ const firefoxProbe = probeBrowser(firefox);
53
+ const webkitProbe = probeBrowser(webkit);
45
54
  return {
46
55
  installed,
47
56
  version,
48
57
  browsers: {
49
- chromium: probe("chromium"),
50
- chromiumHeadlessShell: probe("chromium-headless-shell"),
51
- firefox: probe("firefox"),
52
- webkit: probe("webkit")
53
- }
58
+ chromium: chromiumProbe.ok,
59
+ chromiumHeadlessShell: chromiumProbe.ok,
60
+ firefox: firefoxProbe.ok,
61
+ webkit: webkitProbe.ok
62
+ },
63
+ chromiumPath: chromiumProbe.path
54
64
  };
55
65
  }
56
66
  async function checkReranker() {
@@ -117,11 +127,12 @@ async function runDoctorInner(dataDir) {
117
127
  const pw = checkPlaywright();
118
128
  out("[wigolo doctor] Browser engine:");
119
129
  out(` Installation: ${pw.installed ? `installed${pw.version ? ` (v${pw.version})` : ""}` : "not installed"}`);
120
- out(` Browsers: chromium ${pw.browsers.chromium ? "OK" : "missing"} headless-shell ${pw.browsers.chromiumHeadlessShell ? "OK" : "missing"} firefox ${pw.browsers.firefox ? "OK" : "missing"} webkit ${pw.browsers.webkit ? "OK" : "missing"}`);
121
- if (!pw.installed || !pw.browsers.chromium || !pw.browsers.chromiumHeadlessShell) {
122
- if (!pw.browsers.chromiumHeadlessShell && pw.installed) {
123
- out(" Hint: run 'npx playwright install chromium-headless-shell' \u2014 JS-rendered pages will fail without it");
124
- }
130
+ out(` Browsers: chromium ${pw.browsers.chromium ? "OK" : "missing"} firefox ${pw.browsers.firefox ? "OK" : "missing"} webkit ${pw.browsers.webkit ? "OK" : "missing"}`);
131
+ if (pw.chromiumPath) {
132
+ out(` Chromium path: ${pw.chromiumPath}${pw.browsers.chromium ? "" : " (missing on disk)"}`);
133
+ }
134
+ if (!pw.browsers.chromium) {
135
+ out(" Hint: run 'npx playwright install chromium' \u2014 JS-rendered pages will fail without it");
125
136
  degraded = true;
126
137
  }
127
138
  out("");
@@ -140,17 +151,32 @@ async function runDoctorInner(dataDir) {
140
151
  out(` Embeddings model: not installed${embeddings.reason ? ` (${embeddings.reason})` : ""}`);
141
152
  }
142
153
  out("");
143
- out("[wigolo doctor] LLM fallback (extract):");
154
+ out("[wigolo doctor] LLM (extract / research / agent):");
144
155
  const cfg = getConfig();
156
+ const active = selectProvider(process.env);
145
157
  for (const p of allProviders()) {
146
158
  const envVar = providerEnvVar(p);
147
159
  const set = !!process.env[envVar];
160
+ const activeMark = p === active ? " <- active" : "";
148
161
  out(
149
- ` ${p.padEnd(10)} ${set ? "configured" : "no key"} (${envVar}${set ? "" : " unset"})`
162
+ ` ${p.padEnd(10)} ${set ? "configured" : "no key"} (${envVar}${set ? "" : " unset"})${activeMark}`
150
163
  );
164
+ if (set) {
165
+ const model = resolveModel(p, void 0, process.env);
166
+ const modelEnv = providerModelEnvVar(p);
167
+ const usingDefault = model === providerDefaultModel(p) && !process.env[modelEnv] && !process.env.WIGOLO_LLM_MODEL;
168
+ out(` model: ${model}${usingDefault ? " (default)" : ""}`);
169
+ }
151
170
  }
152
171
  if (cfg.llmProvider) {
153
- out(` override: WIGOLO_LLM_PROVIDER=${cfg.llmProvider}`);
172
+ if (cfg.llmProvider.startsWith("http://") || cfg.llmProvider.startsWith("https://")) {
173
+ out(` override: custom URL (${cfg.llmProvider})`);
174
+ } else {
175
+ out(` override: WIGOLO_LLM_PROVIDER=${cfg.llmProvider}`);
176
+ }
177
+ }
178
+ if (process.env.WIGOLO_LLM_MODEL) {
179
+ out(` WIGOLO_LLM_MODEL: ${process.env.WIGOLO_LLM_MODEL} (universal override)`);
154
180
  }
155
181
  out(` cache TTL: ${cfg.llmCacheTtlDays} days`);
156
182
  out(` per-request: ${cfg.llmMaxCallsPerRequest} call(s) max`);
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/cli/doctor.ts"],"sourcesContent":["import { spawnSync } from 'node:child_process';\nimport { existsSync, readFileSync, readdirSync } from 'node:fs';\nimport { join } from 'node:path';\nimport { getBootstrapState, type BootstrapState } from '../searxng/bootstrap.js';\nimport { isProcessAlive } from '../searxng/process.js';\nimport { getConfig } from '../config.js';\nimport { getRerankProvider } from '../providers/rerank-provider.js';\nimport { getEmbedProvider } from '../providers/embed-provider.js';\nimport { initDatabase, closeDatabase } from '../cache/db.js';\nimport { loadFeedConfig } from '../search/v1/rss/feed-config.js';\nimport { isTelemetryEnabled } from './telemetry.js';\nimport { allProviders, providerEnvVar } from '../integrations/cloud/llm/select.js';\nimport { setLogSuppression } from '../logger.js';\n\nfunction out(line = ''): void { process.stderr.write(`${line}\\n`); }\n\nfunction checkPython(): { ok: boolean; version?: string } {\n const r = spawnSync('python3', ['--version'], { encoding: 'utf-8' });\n if (r.status !== 0 || r.error) return { ok: false };\n const match = (r.stdout || r.stderr || '').match(/Python (\\d+\\.\\d+\\.\\d+)/);\n return { ok: true, version: match?.[1] };\n}\n\nfunction checkDocker(): { ok: boolean; version?: string } {\n const r = spawnSync('docker', ['--version'], { encoding: 'utf-8' });\n if (r.status !== 0 || r.error) return { ok: false };\n return { ok: true, version: (r.stdout || '').trim() };\n}\n\nfunction checkPlaywright(): { installed: boolean; version?: string; browsers: { chromium: boolean; chromiumHeadlessShell: boolean; firefox: boolean; webkit: boolean } } {\n let installed = false;\n let version: string | undefined;\n try {\n const r = spawnSync('npx', ['playwright', '--version'], { encoding: 'utf-8', timeout: 5000 });\n if (r.status === 0) {\n installed = true;\n const m = r.stdout.match(/(\\d+\\.\\d+\\.\\d+)/);\n version = m?.[1];\n }\n } catch { /* ignore */ }\n // Browser detection — light check\n const probe = (browser: string): boolean => {\n const r = spawnSync('npx', ['playwright', 'install', '--dry-run', browser], { encoding: 'utf-8', timeout: 5000 });\n return r.status === 0 && !/is not installed/i.test(r.stdout + r.stderr);\n };\n return {\n installed,\n version,\n browsers: {\n chromium: probe('chromium'),\n chromiumHeadlessShell: probe('chromium-headless-shell'),\n firefox: probe('firefox'),\n webkit: probe('webkit'),\n },\n };\n}\n\nasync function checkReranker(\n): Promise<{ installed: boolean; modelId?: string; rerankMs?: number; reason?: string }> {\n try {\n const provider = await getRerankProvider();\n const docs = [\n 'React Server Components render on the server.',\n 'Next.js App Router uses RSC by default.',\n 'Bananas are a popular fruit.',\n 'TypeScript adds static types to JavaScript.',\n 'The capital of France is Paris.',\n ].map((text, i) => ({ id: String(i), text }));\n const t0 = Date.now();\n await provider.rerank('react server components', docs);\n const rerankMs = Date.now() - t0;\n return { installed: true, modelId: provider.modelId, rerankMs };\n } catch (err) {\n return { installed: false, reason: err instanceof Error ? err.message : 'rerank failed' };\n }\n}\n\nfunction checkFastembedCache(dataDir: string): { installed: boolean; reason?: string } {\n const cacheDir = join(dataDir, 'fastembed');\n if (!existsSync(cacheDir)) {\n return { installed: false, reason: 'cache dir missing — run `wigolo warmup --embeddings`' };\n }\n try {\n // First-run downloads create a model subdir with ONNX assets. Empty cache\n // dir means the model has not been fetched yet.\n const entries = readdirSync(cacheDir);\n if (entries.length === 0) {\n return { installed: false, reason: 'cache empty — run `wigolo warmup --embeddings`' };\n }\n return { installed: true };\n } catch (err) {\n return { installed: false, reason: err instanceof Error ? err.message : 'unknown error' };\n }\n}\n\nfunction humanRetry(nextRetryAt?: string): string {\n if (!nextRetryAt) return 'not scheduled';\n const when = new Date(nextRetryAt);\n const mins = Math.round((when.getTime() - Date.now()) / 60_000);\n if (mins < 0) return `${nextRetryAt} (ready now)`;\n if (mins < 60) return `${nextRetryAt} (in ${mins} minutes)`;\n const hrs = Math.round(mins / 60);\n return `${nextRetryAt} (in ${hrs} hours)`;\n}\n\n/**\n * Exit code contract:\n * - 0 when all required components OK, or only optional packages (content extractor/ML reranker) missing.\n * - 1 when any required component is degraded: Python missing, browser missing,\n * search engine bootstrap failed/no_runtime, or search engine process supposed to be up but isn't.\n */\nexport async function runDoctor(dataDir: string): Promise<number> {\n // Doctor produces its own human-readable diagnostic — suppress info/debug\n // logger noise from the modules it touches so the output stays clean.\n // Warnings and errors still come through.\n setLogSuppression('warn');\n try {\n return await runDoctorInner(dataDir);\n } finally {\n setLogSuppression(null);\n }\n}\n\nasync function runDoctorInner(dataDir: string): Promise<number> {\n let degraded = false;\n\n out(`[wigolo doctor] Data dir: ${dataDir}`);\n out('');\n\n const py = checkPython();\n const dk = checkDocker();\n out('[wigolo doctor] Runtime:');\n out(` Python 3: ${py.ok ? `available (${py.version ?? 'unknown'})` : 'not available'}`);\n out(` Docker: ${dk.ok ? `available (${dk.version})` : 'not available'}`);\n if (!py.ok && !dk.ok) degraded = true;\n\n out('');\n const pw = checkPlaywright();\n out('[wigolo doctor] Browser engine:');\n out(` Installation: ${pw.installed ? `installed${pw.version ? ` (v${pw.version})` : ''}` : 'not installed'}`);\n out(` Browsers: chromium ${pw.browsers.chromium ? 'OK' : 'missing'} headless-shell ${pw.browsers.chromiumHeadlessShell ? 'OK' : 'missing'} firefox ${pw.browsers.firefox ? 'OK' : 'missing'} webkit ${pw.browsers.webkit ? 'OK' : 'missing'}`);\n if (!pw.installed || !pw.browsers.chromium || !pw.browsers.chromiumHeadlessShell) {\n if (!pw.browsers.chromiumHeadlessShell && pw.installed) {\n out(\" Hint: run 'npx playwright install chromium-headless-shell' — JS-rendered pages will fail without it\");\n }\n degraded = true;\n }\n\n out('');\n const reranker = await checkReranker();\n const embeddings = checkFastembedCache(dataDir);\n out('[wigolo doctor] Optional components:');\n if (reranker.installed) {\n const timing = reranker.rerankMs !== undefined ? ` — 5-doc rerank ${reranker.rerankMs}ms` : '';\n out(` ML reranker: installed (${reranker.modelId})${timing}`);\n } else {\n out(` ML reranker: not installed${reranker.reason ? ` (${reranker.reason})` : ''}`);\n }\n if (embeddings.installed) {\n out(` Embeddings model: installed (fastembed BGE-small-en-v1.5)`);\n } else {\n out(` Embeddings model: not installed${embeddings.reason ? ` (${embeddings.reason})` : ''}`);\n }\n\n out('');\n out('[wigolo doctor] LLM fallback (extract):');\n const cfg = getConfig();\n for (const p of allProviders()) {\n const envVar = providerEnvVar(p);\n const set = !!process.env[envVar];\n out(\n ` ${p.padEnd(10)} ${set ? 'configured' : 'no key'} (${envVar}${set ? '' : ' unset'})`,\n );\n }\n if (cfg.llmProvider) {\n out(` override: WIGOLO_LLM_PROVIDER=${cfg.llmProvider}`);\n }\n out(` cache TTL: ${cfg.llmCacheTtlDays} days`);\n out(` per-request: ${cfg.llmMaxCallsPerRequest} call(s) max`);\n\n out('');\n const state = getBootstrapState(dataDir) as BootstrapState | null;\n out('[wigolo doctor] Search engine:');\n if (!state) {\n out(' status: not bootstrapped — run `npx @staticn0va/wigolo warmup`');\n degraded = true;\n } else if (state.status === 'ready') {\n out(` status: ready`);\n out(` path: ${state.searxngPath ?? 'unknown'}`);\n } else {\n out(` status: ${state.status}`);\n if (state.attempts !== undefined) out(` attempts: ${state.attempts} / 3`);\n if (state.lastAttemptAt) out(` lastAttemptAt: ${state.lastAttemptAt}`);\n if (state.nextRetryAt || state.status === 'failed') out(` nextRetryAt: ${humanRetry(state.nextRetryAt)}`);\n if (state.lastError?.command) out(` command: ${state.lastError.command}`);\n if (state.lastError?.exitCode !== undefined) out(` exit code: ${state.lastError.exitCode}`);\n if (state.lastError?.message) out(` message: ${state.lastError.message}`);\n if (state.lastError?.stderr) {\n out(' stderr:');\n for (const line of state.lastError.stderr.split('\\n').slice(0, 20)) out(` ${line}`);\n }\n degraded = true;\n }\n\n out('');\n const lockPath = join(dataDir, 'searxng.lock');\n if (existsSync(lockPath)) {\n try {\n const lock = JSON.parse(readFileSync(lockPath, 'utf-8')) as { pid?: number; port?: number };\n if (lock.pid && isProcessAlive(lock.pid)) {\n out(`[wigolo doctor] Search engine process: running (pid ${lock.pid}, port ${lock.port ?? '?'})`);\n } else {\n out('[wigolo doctor] Search engine process: stale lock (process exited) — will be cleaned on next start');\n }\n } catch {\n out('[wigolo doctor] Search engine process: lock file unparseable — will be cleaned on next start');\n }\n } else {\n out('[wigolo doctor] Search engine process: not running (starts on-demand with MCP server)');\n }\n\n if (state?.status === 'failed') {\n out('');\n out('[wigolo doctor] Recovery:');\n if (state.nextRetryAt) out(` - Wait until next auto-retry (${humanRetry(state.nextRetryAt)}), or`);\n out(` - Force retry now: npx @staticn0va/wigolo warmup --force`);\n }\n\n await checkV1Embeddings();\n await checkSqliteVec(dataDir);\n checkRssFeeds(dataDir);\n checkTelemetryStatus();\n\n out('');\n out(`[wigolo doctor] Overall: ${degraded ? 'DEGRADED' : 'OK'}`);\n return degraded ? 1 : 0;\n}\n\nasync function checkV1Embeddings(): Promise<void> {\n out('');\n out('[wigolo doctor] V1 embeddings:');\n try {\n const provider = await getEmbedProvider();\n out(` provider: ready (fastembed ${provider.modelId}, dim=${provider.dim})`);\n } catch (err) {\n const msg = err instanceof Error ? err.message : String(err);\n out(` provider: not ready (${msg.slice(0, 80)})`);\n }\n}\n\nasync function checkSqliteVec(dataDir: string): Promise<void> {\n out('');\n out('[wigolo doctor] V1 sqlite-vec:');\n let opened = false;\n try {\n const db = initDatabase(join(dataDir, 'wigolo.db'));\n opened = true;\n try {\n const row = db.prepare('SELECT vec_version() AS v').get() as { v?: string } | undefined;\n const v = row?.v ?? 'unknown';\n out(` extension: loaded (vec_version ${v})`);\n } catch {\n out(' extension: not loaded (run warmup to load on next start)');\n }\n } catch (err) {\n const msg = err instanceof Error ? err.message : String(err);\n out(` extension: (check failed: ${msg.slice(0, 80)})`);\n } finally {\n if (opened) {\n try { closeDatabase(); } catch { /* ignore */ }\n }\n }\n}\n\nfunction checkRssFeeds(dataDir: string): void {\n out('');\n out('[wigolo doctor] RSS feeds:');\n try {\n const { feeds } = loadFeedConfig({ dataDir });\n if (feeds.length === 0) {\n out(' feeds: none configured (set WIGOLO_RSS_FEEDS to opt in)');\n return;\n }\n\n let db: ReturnType<typeof initDatabase> | null = null;\n try {\n db = initDatabase(join(dataDir, 'wigolo.db'));\n } catch (err) {\n const msg = err instanceof Error ? err.message : String(err);\n out(` feeds: ${feeds.length} configured (db unreadable: ${msg.slice(0, 60)})`);\n return;\n }\n\n let stmt: ReturnType<typeof db.prepare> | null = null;\n try {\n stmt = db.prepare(\n 'SELECT COUNT(*) AS n, MAX(fetched_at) AS last_at FROM feed_items WHERE feed_url = ?',\n );\n } catch {\n // feed_items table missing — treat every feed as never polled.\n }\n\n try {\n const now = Date.now();\n for (const feed of feeds) {\n let line: string;\n if (!stmt) {\n line = ` ${feed.url} 0 items [never polled]`;\n } else {\n try {\n const row = stmt.get(feed.url) as { n?: number; last_at?: string | null } | undefined;\n const n = row?.n ?? 0;\n const lastAt = row?.last_at ?? null;\n if (!lastAt) {\n line = ` ${feed.url} ${n} items [never polled]`;\n } else {\n const ageMs = now - new Date(lastAt).getTime();\n const ageHr = ageMs / 3_600_000;\n const fresh = ageHr <= 24 ? 'fresh' : 'stale';\n const ageLabel = ageHr < 1\n ? `${Math.max(0, Math.round(ageMs / 60_000))}m ago`\n : `${Math.round(ageHr)}h ago`;\n const day = lastAt.slice(0, 10);\n line = ` ${feed.url} ${n} items, last fetched ${day} (${ageLabel}) [${fresh}]`;\n }\n } catch (err) {\n const msg = err instanceof Error ? err.message : String(err);\n line = ` ${feed.url} (check failed: ${msg.slice(0, 60)})`;\n }\n }\n out(line);\n }\n } finally {\n try { closeDatabase(); } catch { /* ignore */ }\n }\n } catch (err) {\n const msg = err instanceof Error ? err.message : String(err);\n out(` (check failed: ${msg.slice(0, 80)})`);\n }\n}\n\nfunction checkTelemetryStatus(): void {\n out('');\n const state = isTelemetryEnabled() ? 'enabled' : 'disabled';\n out(`[wigolo doctor] Telemetry: opt-in ${state} (WIGOLO_TELEMETRY=1 to opt in)`);\n}\n"],"mappings":"AAAA,SAAS,iBAAiB;AAC1B,SAAS,YAAY,cAAc,mBAAmB;AACtD,SAAS,YAAY;AACrB,SAAS,yBAA8C;AACvD,SAAS,sBAAsB;AAC/B,SAAS,iBAAiB;AAC1B,SAAS,yBAAyB;AAClC,SAAS,wBAAwB;AACjC,SAAS,cAAc,qBAAqB;AAC5C,SAAS,sBAAsB;AAC/B,SAAS,0BAA0B;AACnC,SAAS,cAAc,sBAAsB;AAC7C,SAAS,yBAAyB;AAElC,SAAS,IAAI,OAAO,IAAU;AAAE,UAAQ,OAAO,MAAM,GAAG,IAAI;AAAA,CAAI;AAAG;AAEnE,SAAS,cAAiD;AACxD,QAAM,IAAI,UAAU,WAAW,CAAC,WAAW,GAAG,EAAE,UAAU,QAAQ,CAAC;AACnE,MAAI,EAAE,WAAW,KAAK,EAAE,MAAO,QAAO,EAAE,IAAI,MAAM;AAClD,QAAM,SAAS,EAAE,UAAU,EAAE,UAAU,IAAI,MAAM,wBAAwB;AACzE,SAAO,EAAE,IAAI,MAAM,SAAS,QAAQ,CAAC,EAAE;AACzC;AAEA,SAAS,cAAiD;AACxD,QAAM,IAAI,UAAU,UAAU,CAAC,WAAW,GAAG,EAAE,UAAU,QAAQ,CAAC;AAClE,MAAI,EAAE,WAAW,KAAK,EAAE,MAAO,QAAO,EAAE,IAAI,MAAM;AAClD,SAAO,EAAE,IAAI,MAAM,UAAU,EAAE,UAAU,IAAI,KAAK,EAAE;AACtD;AAEA,SAAS,kBAAgK;AACvK,MAAI,YAAY;AAChB,MAAI;AACJ,MAAI;AACF,UAAM,IAAI,UAAU,OAAO,CAAC,cAAc,WAAW,GAAG,EAAE,UAAU,SAAS,SAAS,IAAK,CAAC;AAC5F,QAAI,EAAE,WAAW,GAAG;AAClB,kBAAY;AACZ,YAAM,IAAI,EAAE,OAAO,MAAM,iBAAiB;AAC1C,gBAAU,IAAI,CAAC;AAAA,IACjB;AAAA,EACF,QAAQ;AAAA,EAAe;AAEvB,QAAM,QAAQ,CAAC,YAA6B;AAC1C,UAAM,IAAI,UAAU,OAAO,CAAC,cAAc,WAAW,aAAa,OAAO,GAAG,EAAE,UAAU,SAAS,SAAS,IAAK,CAAC;AAChH,WAAO,EAAE,WAAW,KAAK,CAAC,oBAAoB,KAAK,EAAE,SAAS,EAAE,MAAM;AAAA,EACxE;AACA,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA,UAAU;AAAA,MACR,UAAU,MAAM,UAAU;AAAA,MAC1B,uBAAuB,MAAM,yBAAyB;AAAA,MACtD,SAAS,MAAM,SAAS;AAAA,MACxB,QAAQ,MAAM,QAAQ;AAAA,IACxB;AAAA,EACF;AACF;AAEA,eAAe,gBAC0E;AACvF,MAAI;AACF,UAAM,WAAW,MAAM,kBAAkB;AACzC,UAAM,OAAO;AAAA,MACX;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF,EAAE,IAAI,CAAC,MAAM,OAAO,EAAE,IAAI,OAAO,CAAC,GAAG,KAAK,EAAE;AAC5C,UAAM,KAAK,KAAK,IAAI;AACpB,UAAM,SAAS,OAAO,2BAA2B,IAAI;AACrD,UAAM,WAAW,KAAK,IAAI,IAAI;AAC9B,WAAO,EAAE,WAAW,MAAM,SAAS,SAAS,SAAS,SAAS;AAAA,EAChE,SAAS,KAAK;AACZ,WAAO,EAAE,WAAW,OAAO,QAAQ,eAAe,QAAQ,IAAI,UAAU,gBAAgB;AAAA,EAC1F;AACF;AAEA,SAAS,oBAAoB,SAA0D;AACrF,QAAM,WAAW,KAAK,SAAS,WAAW;AAC1C,MAAI,CAAC,WAAW,QAAQ,GAAG;AACzB,WAAO,EAAE,WAAW,OAAO,QAAQ,4DAAuD;AAAA,EAC5F;AACA,MAAI;AAGF,UAAM,UAAU,YAAY,QAAQ;AACpC,QAAI,QAAQ,WAAW,GAAG;AACxB,aAAO,EAAE,WAAW,OAAO,QAAQ,sDAAiD;AAAA,IACtF;AACA,WAAO,EAAE,WAAW,KAAK;AAAA,EAC3B,SAAS,KAAK;AACZ,WAAO,EAAE,WAAW,OAAO,QAAQ,eAAe,QAAQ,IAAI,UAAU,gBAAgB;AAAA,EAC1F;AACF;AAEA,SAAS,WAAW,aAA8B;AAChD,MAAI,CAAC,YAAa,QAAO;AACzB,QAAM,OAAO,IAAI,KAAK,WAAW;AACjC,QAAM,OAAO,KAAK,OAAO,KAAK,QAAQ,IAAI,KAAK,IAAI,KAAK,GAAM;AAC9D,MAAI,OAAO,EAAG,QAAO,GAAG,WAAW;AACnC,MAAI,OAAO,GAAI,QAAO,GAAG,WAAW,QAAQ,IAAI;AAChD,QAAM,MAAM,KAAK,MAAM,OAAO,EAAE;AAChC,SAAO,GAAG,WAAW,QAAQ,GAAG;AAClC;AAQA,eAAsB,UAAU,SAAkC;AAIhE,oBAAkB,MAAM;AACxB,MAAI;AACF,WAAO,MAAM,eAAe,OAAO;AAAA,EACrC,UAAE;AACA,sBAAkB,IAAI;AAAA,EACxB;AACF;AAEA,eAAe,eAAe,SAAkC;AAC9D,MAAI,WAAW;AAEf,MAAI,oCAAoC,OAAO,EAAE;AACjD,MAAI,EAAE;AAEN,QAAM,KAAK,YAAY;AACvB,QAAM,KAAK,YAAY;AACvB,MAAI,0BAA0B;AAC9B,MAAI,oBAAoB,GAAG,KAAK,cAAc,GAAG,WAAW,SAAS,MAAM,eAAe,EAAE;AAC5F,MAAI,oBAAoB,GAAG,KAAK,cAAc,GAAG,OAAO,MAAM,eAAe,EAAE;AAC/E,MAAI,CAAC,GAAG,MAAM,CAAC,GAAG,GAAI,YAAW;AAEjC,MAAI,EAAE;AACN,QAAM,KAAK,gBAAgB;AAC3B,MAAI,iCAAiC;AACrC,MAAI,oBAAoB,GAAG,YAAY,YAAY,GAAG,UAAU,MAAM,GAAG,OAAO,MAAM,EAAE,KAAK,eAAe,EAAE;AAC9G,MAAI,6BAA6B,GAAG,SAAS,WAAW,OAAO,SAAS,oBAAoB,GAAG,SAAS,wBAAwB,OAAO,SAAS,aAAa,GAAG,SAAS,UAAU,OAAO,SAAS,YAAY,GAAG,SAAS,SAAS,OAAO,SAAS,EAAE;AACtP,MAAI,CAAC,GAAG,aAAa,CAAC,GAAG,SAAS,YAAY,CAAC,GAAG,SAAS,uBAAuB;AAChF,QAAI,CAAC,GAAG,SAAS,yBAAyB,GAAG,WAAW;AACtD,UAAI,qHAAgH;AAAA,IACtH;AACA,eAAW;AAAA,EACb;AAEA,MAAI,EAAE;AACN,QAAM,WAAW,MAAM,cAAc;AACrC,QAAM,aAAa,oBAAoB,OAAO;AAC9C,MAAI,sCAAsC;AAC1C,MAAI,SAAS,WAAW;AACtB,UAAM,SAAS,SAAS,aAAa,SAAY,wBAAmB,SAAS,QAAQ,OAAO;AAC5F,QAAI,oCAAoC,SAAS,OAAO,IAAI,MAAM,EAAE;AAAA,EACtE,OAAO;AACL,QAAI,sCAAsC,SAAS,SAAS,KAAK,SAAS,MAAM,MAAM,EAAE,EAAE;AAAA,EAC5F;AACA,MAAI,WAAW,WAAW;AACxB,QAAI,+DAA+D;AAAA,EACrE,OAAO;AACL,QAAI,sCAAsC,WAAW,SAAS,KAAK,WAAW,MAAM,MAAM,EAAE,EAAE;AAAA,EAChG;AAEA,MAAI,EAAE;AACN,MAAI,yCAAyC;AAC7C,QAAM,MAAM,UAAU;AACtB,aAAW,KAAK,aAAa,GAAG;AAC9B,UAAM,SAAS,eAAe,CAAC;AAC/B,UAAM,MAAM,CAAC,CAAC,QAAQ,IAAI,MAAM;AAChC;AAAA,MACE,KAAK,EAAE,OAAO,EAAE,CAAC,IAAI,MAAM,eAAe,QAAQ,KAAK,MAAM,GAAG,MAAM,KAAK,QAAQ;AAAA,IACrF;AAAA,EACF;AACA,MAAI,IAAI,aAAa;AACnB,QAAI,sCAAsC,IAAI,WAAW,EAAE;AAAA,EAC7D;AACA,MAAI,kBAAkB,IAAI,eAAe,OAAO;AAChD,MAAI,kBAAkB,IAAI,qBAAqB,cAAc;AAE7D,MAAI,EAAE;AACN,QAAM,QAAQ,kBAAkB,OAAO;AACvC,MAAI,gCAAgC;AACpC,MAAI,CAAC,OAAO;AACV,QAAI,8EAAyE;AAC7E,eAAW;AAAA,EACb,WAAW,MAAM,WAAW,SAAS;AACnC,QAAI,wBAAwB;AAC5B,QAAI,oBAAoB,MAAM,eAAe,SAAS,EAAE;AAAA,EAC1D,OAAO;AACL,QAAI,oBAAoB,MAAM,MAAM,EAAE;AACtC,QAAI,MAAM,aAAa,OAAW,KAAI,oBAAoB,MAAM,QAAQ,MAAM;AAC9E,QAAI,MAAM,cAAe,KAAI,oBAAoB,MAAM,aAAa,EAAE;AACtE,QAAI,MAAM,eAAe,MAAM,WAAW,SAAU,KAAI,oBAAoB,WAAW,MAAM,WAAW,CAAC,EAAE;AAC3G,QAAI,MAAM,WAAW,QAAS,KAAI,oBAAoB,MAAM,UAAU,OAAO,EAAE;AAC/E,QAAI,MAAM,WAAW,aAAa,OAAW,KAAI,oBAAoB,MAAM,UAAU,QAAQ,EAAE;AAC/F,QAAI,MAAM,WAAW,QAAS,KAAI,oBAAoB,MAAM,UAAU,OAAO,EAAE;AAC/E,QAAI,MAAM,WAAW,QAAQ;AAC3B,UAAI,WAAW;AACf,iBAAW,QAAQ,MAAM,UAAU,OAAO,MAAM,IAAI,EAAE,MAAM,GAAG,EAAE,EAAG,KAAI,OAAO,IAAI,EAAE;AAAA,IACvF;AACA,eAAW;AAAA,EACb;AAEA,MAAI,EAAE;AACN,QAAM,WAAW,KAAK,SAAS,cAAc;AAC7C,MAAI,WAAW,QAAQ,GAAG;AACxB,QAAI;AACF,YAAM,OAAO,KAAK,MAAM,aAAa,UAAU,OAAO,CAAC;AACvD,UAAI,KAAK,OAAO,eAAe,KAAK,GAAG,GAAG;AACxC,YAAI,wDAAwD,KAAK,GAAG,UAAU,KAAK,QAAQ,GAAG,GAAG;AAAA,MACnG,OAAO;AACL,YAAI,0GAAqG;AAAA,MAC3G;AAAA,IACF,QAAQ;AACN,UAAI,oGAA+F;AAAA,IACrG;AAAA,EACF,OAAO;AACL,QAAI,wFAAwF;AAAA,EAC9F;AAEA,MAAI,OAAO,WAAW,UAAU;AAC9B,QAAI,EAAE;AACN,QAAI,2BAA2B;AAC/B,QAAI,MAAM,YAAa,KAAI,mCAAmC,WAAW,MAAM,WAAW,CAAC,OAAO;AAClG,QAAI,4DAA4D;AAAA,EAClE;AAEA,QAAM,kBAAkB;AACxB,QAAM,eAAe,OAAO;AAC5B,gBAAc,OAAO;AACrB,uBAAqB;AAErB,MAAI,EAAE;AACN,MAAI,4BAA4B,WAAW,aAAa,IAAI,EAAE;AAC9D,SAAO,WAAW,IAAI;AACxB;AAEA,eAAe,oBAAmC;AAChD,MAAI,EAAE;AACN,MAAI,gCAAgC;AACpC,MAAI;AACF,UAAM,WAAW,MAAM,iBAAiB;AACxC,QAAI,qCAAqC,SAAS,OAAO,SAAS,SAAS,GAAG,GAAG;AAAA,EACnF,SAAS,KAAK;AACZ,UAAM,MAAM,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC3D,QAAI,+BAA+B,IAAI,MAAM,GAAG,EAAE,CAAC,GAAG;AAAA,EACxD;AACF;AAEA,eAAe,eAAe,SAAgC;AAC5D,MAAI,EAAE;AACN,MAAI,gCAAgC;AACpC,MAAI,SAAS;AACb,MAAI;AACF,UAAM,KAAK,aAAa,KAAK,SAAS,WAAW,CAAC;AAClD,aAAS;AACT,QAAI;AACF,YAAM,MAAM,GAAG,QAAQ,2BAA2B,EAAE,IAAI;AACxD,YAAM,IAAI,KAAK,KAAK;AACpB,UAAI,wCAAwC,CAAC,GAAG;AAAA,IAClD,QAAQ;AACN,UAAI,gEAAgE;AAAA,IACtE;AAAA,EACF,SAAS,KAAK;AACZ,UAAM,MAAM,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC3D,QAAI,mCAAmC,IAAI,MAAM,GAAG,EAAE,CAAC,GAAG;AAAA,EAC5D,UAAE;AACA,QAAI,QAAQ;AACV,UAAI;AAAE,sBAAc;AAAA,MAAG,QAAQ;AAAA,MAAe;AAAA,IAChD;AAAA,EACF;AACF;AAEA,SAAS,cAAc,SAAuB;AAC5C,MAAI,EAAE;AACN,MAAI,4BAA4B;AAChC,MAAI;AACF,UAAM,EAAE,MAAM,IAAI,eAAe,EAAE,QAAQ,CAAC;AAC5C,QAAI,MAAM,WAAW,GAAG;AACtB,UAAI,mEAAmE;AACvE;AAAA,IACF;AAEA,QAAI,KAA6C;AACjD,QAAI;AACF,WAAK,aAAa,KAAK,SAAS,WAAW,CAAC;AAAA,IAC9C,SAAS,KAAK;AACZ,YAAM,MAAM,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC3D,UAAI,oBAAoB,MAAM,MAAM,+BAA+B,IAAI,MAAM,GAAG,EAAE,CAAC,GAAG;AACtF;AAAA,IACF;AAEA,QAAI,OAA6C;AACjD,QAAI;AACF,aAAO,GAAG;AAAA,QACR;AAAA,MACF;AAAA,IACF,QAAQ;AAAA,IAER;AAEA,QAAI;AACF,YAAM,MAAM,KAAK,IAAI;AACrB,iBAAW,QAAQ,OAAO;AACxB,YAAI;AACJ,YAAI,CAAC,MAAM;AACT,iBAAO,KAAK,KAAK,GAAG;AAAA,QACtB,OAAO;AACL,cAAI;AACF,kBAAM,MAAM,KAAK,IAAI,KAAK,GAAG;AAC7B,kBAAM,IAAI,KAAK,KAAK;AACpB,kBAAM,SAAS,KAAK,WAAW;AAC/B,gBAAI,CAAC,QAAQ;AACX,qBAAO,KAAK,KAAK,GAAG,KAAK,CAAC;AAAA,YAC5B,OAAO;AACL,oBAAM,QAAQ,MAAM,IAAI,KAAK,MAAM,EAAE,QAAQ;AAC7C,oBAAM,QAAQ,QAAQ;AACtB,oBAAM,QAAQ,SAAS,KAAK,UAAU;AACtC,oBAAM,WAAW,QAAQ,IACrB,GAAG,KAAK,IAAI,GAAG,KAAK,MAAM,QAAQ,GAAM,CAAC,CAAC,UAC1C,GAAG,KAAK,MAAM,KAAK,CAAC;AACxB,oBAAM,MAAM,OAAO,MAAM,GAAG,EAAE;AAC9B,qBAAO,KAAK,KAAK,GAAG,KAAK,CAAC,wBAAwB,GAAG,KAAK,QAAQ,MAAM,KAAK;AAAA,YAC/E;AAAA,UACF,SAAS,KAAK;AACZ,kBAAM,MAAM,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC3D,mBAAO,KAAK,KAAK,GAAG,oBAAoB,IAAI,MAAM,GAAG,EAAE,CAAC;AAAA,UAC1D;AAAA,QACF;AACA,YAAI,IAAI;AAAA,MACV;AAAA,IACF,UAAE;AACA,UAAI;AAAE,sBAAc;AAAA,MAAG,QAAQ;AAAA,MAAe;AAAA,IAChD;AAAA,EACF,SAAS,KAAK;AACZ,UAAM,MAAM,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC3D,QAAI,oBAAoB,IAAI,MAAM,GAAG,EAAE,CAAC,GAAG;AAAA,EAC7C;AACF;AAEA,SAAS,uBAA6B;AACpC,MAAI,EAAE;AACN,QAAM,QAAQ,mBAAmB,IAAI,YAAY;AACjD,MAAI,qCAAqC,KAAK,iCAAiC;AACjF;","names":[]}
1
+ {"version":3,"sources":["../../src/cli/doctor.ts"],"sourcesContent":["import { spawnSync } from 'node:child_process';\nimport { existsSync, readFileSync, readdirSync } from 'node:fs';\nimport { join } from 'node:path';\nimport { chromium, firefox, webkit } from 'playwright';\nimport { getBootstrapState, type BootstrapState } from '../searxng/bootstrap.js';\nimport { isProcessAlive } from '../searxng/process.js';\nimport { getConfig } from '../config.js';\nimport { getRerankProvider } from '../providers/rerank-provider.js';\nimport { getEmbedProvider } from '../providers/embed-provider.js';\nimport { initDatabase, closeDatabase } from '../cache/db.js';\nimport { loadFeedConfig } from '../search/v1/rss/feed-config.js';\nimport { isTelemetryEnabled } from './telemetry.js';\nimport { allProviders, providerEnvVar, selectProvider } from '../integrations/cloud/llm/select.js';\nimport { resolveModel, providerDefaultModel, providerModelEnvVar } from '../integrations/cloud/llm/model-select.js';\nimport { setLogSuppression } from '../logger.js';\n\nfunction out(line = ''): void { process.stderr.write(`${line}\\n`); }\n\nfunction checkPython(): { ok: boolean; version?: string } {\n const r = spawnSync('python3', ['--version'], { encoding: 'utf-8' });\n if (r.status !== 0 || r.error) return { ok: false };\n const match = (r.stdout || r.stderr || '').match(/Python (\\d+\\.\\d+\\.\\d+)/);\n return { ok: true, version: match?.[1] };\n}\n\nfunction checkDocker(): { ok: boolean; version?: string } {\n const r = spawnSync('docker', ['--version'], { encoding: 'utf-8' });\n if (r.status !== 0 || r.error) return { ok: false };\n return { ok: true, version: (r.stdout || '').trim() };\n}\n\nfunction checkPlaywright(): { installed: boolean; version?: string; browsers: { chromium: boolean; chromiumHeadlessShell: boolean; firefox: boolean; webkit: boolean }; chromiumPath?: string } {\n let installed = false;\n let version: string | undefined;\n try {\n const r = spawnSync('npx', ['playwright', '--version'], { encoding: 'utf-8', timeout: 5000 });\n if (r.status === 0) {\n installed = true;\n const m = r.stdout.match(/(\\d+\\.\\d+\\.\\d+)/);\n version = m?.[1];\n }\n } catch { /* ignore */ }\n\n // Probe browser readiness by resolving the bundled Playwright's actual\n // executable path and checking the file on disk. This matches what fetch\n // uses via chromium.launch(), so doctor cannot lie about parity.\n const probeBrowser = (api: { executablePath(): string }): { ok: boolean; path?: string } => {\n try {\n const exec = api.executablePath();\n return { ok: !!exec && existsSync(exec), path: exec };\n } catch {\n return { ok: false };\n }\n };\n\n const chromiumProbe = probeBrowser(chromium);\n const firefoxProbe = probeBrowser(firefox);\n const webkitProbe = probeBrowser(webkit);\n\n // headless-shell uses chromium binary or a sibling; presence implied when\n // chromium ok. If a user explicitly needs the shell, fetch will surface\n // playwright_not_installed regardless.\n return {\n installed,\n version,\n browsers: {\n chromium: chromiumProbe.ok,\n chromiumHeadlessShell: chromiumProbe.ok,\n firefox: firefoxProbe.ok,\n webkit: webkitProbe.ok,\n },\n chromiumPath: chromiumProbe.path,\n };\n}\n\nasync function checkReranker(\n): Promise<{ installed: boolean; modelId?: string; rerankMs?: number; reason?: string }> {\n try {\n const provider = await getRerankProvider();\n const docs = [\n 'React Server Components render on the server.',\n 'Next.js App Router uses RSC by default.',\n 'Bananas are a popular fruit.',\n 'TypeScript adds static types to JavaScript.',\n 'The capital of France is Paris.',\n ].map((text, i) => ({ id: String(i), text }));\n const t0 = Date.now();\n await provider.rerank('react server components', docs);\n const rerankMs = Date.now() - t0;\n return { installed: true, modelId: provider.modelId, rerankMs };\n } catch (err) {\n return { installed: false, reason: err instanceof Error ? err.message : 'rerank failed' };\n }\n}\n\nfunction checkFastembedCache(dataDir: string): { installed: boolean; reason?: string } {\n const cacheDir = join(dataDir, 'fastembed');\n if (!existsSync(cacheDir)) {\n return { installed: false, reason: 'cache dir missing — run `wigolo warmup --embeddings`' };\n }\n try {\n // First-run downloads create a model subdir with ONNX assets. Empty cache\n // dir means the model has not been fetched yet.\n const entries = readdirSync(cacheDir);\n if (entries.length === 0) {\n return { installed: false, reason: 'cache empty — run `wigolo warmup --embeddings`' };\n }\n return { installed: true };\n } catch (err) {\n return { installed: false, reason: err instanceof Error ? err.message : 'unknown error' };\n }\n}\n\nfunction humanRetry(nextRetryAt?: string): string {\n if (!nextRetryAt) return 'not scheduled';\n const when = new Date(nextRetryAt);\n const mins = Math.round((when.getTime() - Date.now()) / 60_000);\n if (mins < 0) return `${nextRetryAt} (ready now)`;\n if (mins < 60) return `${nextRetryAt} (in ${mins} minutes)`;\n const hrs = Math.round(mins / 60);\n return `${nextRetryAt} (in ${hrs} hours)`;\n}\n\n/**\n * Exit code contract:\n * - 0 when all required components OK, or only optional packages (content extractor/ML reranker) missing.\n * - 1 when any required component is degraded: Python missing, browser missing,\n * search engine bootstrap failed/no_runtime, or search engine process supposed to be up but isn't.\n */\nexport async function runDoctor(dataDir: string): Promise<number> {\n // Doctor produces its own human-readable diagnostic — suppress info/debug\n // logger noise from the modules it touches so the output stays clean.\n // Warnings and errors still come through.\n setLogSuppression('warn');\n try {\n return await runDoctorInner(dataDir);\n } finally {\n setLogSuppression(null);\n }\n}\n\nasync function runDoctorInner(dataDir: string): Promise<number> {\n let degraded = false;\n\n out(`[wigolo doctor] Data dir: ${dataDir}`);\n out('');\n\n const py = checkPython();\n const dk = checkDocker();\n out('[wigolo doctor] Runtime:');\n out(` Python 3: ${py.ok ? `available (${py.version ?? 'unknown'})` : 'not available'}`);\n out(` Docker: ${dk.ok ? `available (${dk.version})` : 'not available'}`);\n if (!py.ok && !dk.ok) degraded = true;\n\n out('');\n const pw = checkPlaywright();\n out('[wigolo doctor] Browser engine:');\n out(` Installation: ${pw.installed ? `installed${pw.version ? ` (v${pw.version})` : ''}` : 'not installed'}`);\n out(` Browsers: chromium ${pw.browsers.chromium ? 'OK' : 'missing'} firefox ${pw.browsers.firefox ? 'OK' : 'missing'} webkit ${pw.browsers.webkit ? 'OK' : 'missing'}`);\n if (pw.chromiumPath) {\n out(` Chromium path: ${pw.chromiumPath}${pw.browsers.chromium ? '' : ' (missing on disk)'}`);\n }\n if (!pw.browsers.chromium) {\n out(\" Hint: run 'npx playwright install chromium' — JS-rendered pages will fail without it\");\n degraded = true;\n }\n\n out('');\n const reranker = await checkReranker();\n const embeddings = checkFastembedCache(dataDir);\n out('[wigolo doctor] Optional components:');\n if (reranker.installed) {\n const timing = reranker.rerankMs !== undefined ? ` — 5-doc rerank ${reranker.rerankMs}ms` : '';\n out(` ML reranker: installed (${reranker.modelId})${timing}`);\n } else {\n out(` ML reranker: not installed${reranker.reason ? ` (${reranker.reason})` : ''}`);\n }\n if (embeddings.installed) {\n out(` Embeddings model: installed (fastembed BGE-small-en-v1.5)`);\n } else {\n out(` Embeddings model: not installed${embeddings.reason ? ` (${embeddings.reason})` : ''}`);\n }\n\n out('');\n out('[wigolo doctor] LLM (extract / research / agent):');\n const cfg = getConfig();\n const active = selectProvider(process.env);\n for (const p of allProviders()) {\n const envVar = providerEnvVar(p);\n const set = !!process.env[envVar];\n const activeMark = p === active ? ' <- active' : '';\n out(\n ` ${p.padEnd(10)} ${set ? 'configured' : 'no key'} (${envVar}${set ? '' : ' unset'})${activeMark}`,\n );\n if (set) {\n const model = resolveModel(p, undefined, process.env);\n const modelEnv = providerModelEnvVar(p);\n const usingDefault = model === providerDefaultModel(p) && !process.env[modelEnv] && !process.env.WIGOLO_LLM_MODEL;\n out(` model: ${model}${usingDefault ? ' (default)' : ''}`);\n }\n }\n if (cfg.llmProvider) {\n if (cfg.llmProvider.startsWith('http://') || cfg.llmProvider.startsWith('https://')) {\n out(` override: custom URL (${cfg.llmProvider})`);\n } else {\n out(` override: WIGOLO_LLM_PROVIDER=${cfg.llmProvider}`);\n }\n }\n if (process.env.WIGOLO_LLM_MODEL) {\n out(` WIGOLO_LLM_MODEL: ${process.env.WIGOLO_LLM_MODEL} (universal override)`);\n }\n out(` cache TTL: ${cfg.llmCacheTtlDays} days`);\n out(` per-request: ${cfg.llmMaxCallsPerRequest} call(s) max`);\n\n out('');\n const state = getBootstrapState(dataDir) as BootstrapState | null;\n out('[wigolo doctor] Search engine:');\n if (!state) {\n out(' status: not bootstrapped — run `npx @staticn0va/wigolo warmup`');\n degraded = true;\n } else if (state.status === 'ready') {\n out(` status: ready`);\n out(` path: ${state.searxngPath ?? 'unknown'}`);\n } else {\n out(` status: ${state.status}`);\n if (state.attempts !== undefined) out(` attempts: ${state.attempts} / 3`);\n if (state.lastAttemptAt) out(` lastAttemptAt: ${state.lastAttemptAt}`);\n if (state.nextRetryAt || state.status === 'failed') out(` nextRetryAt: ${humanRetry(state.nextRetryAt)}`);\n if (state.lastError?.command) out(` command: ${state.lastError.command}`);\n if (state.lastError?.exitCode !== undefined) out(` exit code: ${state.lastError.exitCode}`);\n if (state.lastError?.message) out(` message: ${state.lastError.message}`);\n if (state.lastError?.stderr) {\n out(' stderr:');\n for (const line of state.lastError.stderr.split('\\n').slice(0, 20)) out(` ${line}`);\n }\n degraded = true;\n }\n\n out('');\n const lockPath = join(dataDir, 'searxng.lock');\n if (existsSync(lockPath)) {\n try {\n const lock = JSON.parse(readFileSync(lockPath, 'utf-8')) as { pid?: number; port?: number };\n if (lock.pid && isProcessAlive(lock.pid)) {\n out(`[wigolo doctor] Search engine process: running (pid ${lock.pid}, port ${lock.port ?? '?'})`);\n } else {\n out('[wigolo doctor] Search engine process: stale lock (process exited) — will be cleaned on next start');\n }\n } catch {\n out('[wigolo doctor] Search engine process: lock file unparseable — will be cleaned on next start');\n }\n } else {\n out('[wigolo doctor] Search engine process: not running (starts on-demand with MCP server)');\n }\n\n if (state?.status === 'failed') {\n out('');\n out('[wigolo doctor] Recovery:');\n if (state.nextRetryAt) out(` - Wait until next auto-retry (${humanRetry(state.nextRetryAt)}), or`);\n out(` - Force retry now: npx @staticn0va/wigolo warmup --force`);\n }\n\n await checkV1Embeddings();\n await checkSqliteVec(dataDir);\n checkRssFeeds(dataDir);\n checkTelemetryStatus();\n\n out('');\n out(`[wigolo doctor] Overall: ${degraded ? 'DEGRADED' : 'OK'}`);\n return degraded ? 1 : 0;\n}\n\nasync function checkV1Embeddings(): Promise<void> {\n out('');\n out('[wigolo doctor] V1 embeddings:');\n try {\n const provider = await getEmbedProvider();\n out(` provider: ready (fastembed ${provider.modelId}, dim=${provider.dim})`);\n } catch (err) {\n const msg = err instanceof Error ? err.message : String(err);\n out(` provider: not ready (${msg.slice(0, 80)})`);\n }\n}\n\nasync function checkSqliteVec(dataDir: string): Promise<void> {\n out('');\n out('[wigolo doctor] V1 sqlite-vec:');\n let opened = false;\n try {\n const db = initDatabase(join(dataDir, 'wigolo.db'));\n opened = true;\n try {\n const row = db.prepare('SELECT vec_version() AS v').get() as { v?: string } | undefined;\n const v = row?.v ?? 'unknown';\n out(` extension: loaded (vec_version ${v})`);\n } catch {\n out(' extension: not loaded (run warmup to load on next start)');\n }\n } catch (err) {\n const msg = err instanceof Error ? err.message : String(err);\n out(` extension: (check failed: ${msg.slice(0, 80)})`);\n } finally {\n if (opened) {\n try { closeDatabase(); } catch { /* ignore */ }\n }\n }\n}\n\nfunction checkRssFeeds(dataDir: string): void {\n out('');\n out('[wigolo doctor] RSS feeds:');\n try {\n const { feeds } = loadFeedConfig({ dataDir });\n if (feeds.length === 0) {\n out(' feeds: none configured (set WIGOLO_RSS_FEEDS to opt in)');\n return;\n }\n\n let db: ReturnType<typeof initDatabase> | null = null;\n try {\n db = initDatabase(join(dataDir, 'wigolo.db'));\n } catch (err) {\n const msg = err instanceof Error ? err.message : String(err);\n out(` feeds: ${feeds.length} configured (db unreadable: ${msg.slice(0, 60)})`);\n return;\n }\n\n let stmt: ReturnType<typeof db.prepare> | null = null;\n try {\n stmt = db.prepare(\n 'SELECT COUNT(*) AS n, MAX(fetched_at) AS last_at FROM feed_items WHERE feed_url = ?',\n );\n } catch {\n // feed_items table missing — treat every feed as never polled.\n }\n\n try {\n const now = Date.now();\n for (const feed of feeds) {\n let line: string;\n if (!stmt) {\n line = ` ${feed.url} 0 items [never polled]`;\n } else {\n try {\n const row = stmt.get(feed.url) as { n?: number; last_at?: string | null } | undefined;\n const n = row?.n ?? 0;\n const lastAt = row?.last_at ?? null;\n if (!lastAt) {\n line = ` ${feed.url} ${n} items [never polled]`;\n } else {\n const ageMs = now - new Date(lastAt).getTime();\n const ageHr = ageMs / 3_600_000;\n const fresh = ageHr <= 24 ? 'fresh' : 'stale';\n const ageLabel = ageHr < 1\n ? `${Math.max(0, Math.round(ageMs / 60_000))}m ago`\n : `${Math.round(ageHr)}h ago`;\n const day = lastAt.slice(0, 10);\n line = ` ${feed.url} ${n} items, last fetched ${day} (${ageLabel}) [${fresh}]`;\n }\n } catch (err) {\n const msg = err instanceof Error ? err.message : String(err);\n line = ` ${feed.url} (check failed: ${msg.slice(0, 60)})`;\n }\n }\n out(line);\n }\n } finally {\n try { closeDatabase(); } catch { /* ignore */ }\n }\n } catch (err) {\n const msg = err instanceof Error ? err.message : String(err);\n out(` (check failed: ${msg.slice(0, 80)})`);\n }\n}\n\nfunction checkTelemetryStatus(): void {\n out('');\n const state = isTelemetryEnabled() ? 'enabled' : 'disabled';\n out(`[wigolo doctor] Telemetry: opt-in ${state} (WIGOLO_TELEMETRY=1 to opt in)`);\n}\n"],"mappings":"AAAA,SAAS,iBAAiB;AAC1B,SAAS,YAAY,cAAc,mBAAmB;AACtD,SAAS,YAAY;AACrB,SAAS,UAAU,SAAS,cAAc;AAC1C,SAAS,yBAA8C;AACvD,SAAS,sBAAsB;AAC/B,SAAS,iBAAiB;AAC1B,SAAS,yBAAyB;AAClC,SAAS,wBAAwB;AACjC,SAAS,cAAc,qBAAqB;AAC5C,SAAS,sBAAsB;AAC/B,SAAS,0BAA0B;AACnC,SAAS,cAAc,gBAAgB,sBAAsB;AAC7D,SAAS,cAAc,sBAAsB,2BAA2B;AACxE,SAAS,yBAAyB;AAElC,SAAS,IAAI,OAAO,IAAU;AAAE,UAAQ,OAAO,MAAM,GAAG,IAAI;AAAA,CAAI;AAAG;AAEnE,SAAS,cAAiD;AACxD,QAAM,IAAI,UAAU,WAAW,CAAC,WAAW,GAAG,EAAE,UAAU,QAAQ,CAAC;AACnE,MAAI,EAAE,WAAW,KAAK,EAAE,MAAO,QAAO,EAAE,IAAI,MAAM;AAClD,QAAM,SAAS,EAAE,UAAU,EAAE,UAAU,IAAI,MAAM,wBAAwB;AACzE,SAAO,EAAE,IAAI,MAAM,SAAS,QAAQ,CAAC,EAAE;AACzC;AAEA,SAAS,cAAiD;AACxD,QAAM,IAAI,UAAU,UAAU,CAAC,WAAW,GAAG,EAAE,UAAU,QAAQ,CAAC;AAClE,MAAI,EAAE,WAAW,KAAK,EAAE,MAAO,QAAO,EAAE,IAAI,MAAM;AAClD,SAAO,EAAE,IAAI,MAAM,UAAU,EAAE,UAAU,IAAI,KAAK,EAAE;AACtD;AAEA,SAAS,kBAAuL;AAC9L,MAAI,YAAY;AAChB,MAAI;AACJ,MAAI;AACF,UAAM,IAAI,UAAU,OAAO,CAAC,cAAc,WAAW,GAAG,EAAE,UAAU,SAAS,SAAS,IAAK,CAAC;AAC5F,QAAI,EAAE,WAAW,GAAG;AAClB,kBAAY;AACZ,YAAM,IAAI,EAAE,OAAO,MAAM,iBAAiB;AAC1C,gBAAU,IAAI,CAAC;AAAA,IACjB;AAAA,EACF,QAAQ;AAAA,EAAe;AAKvB,QAAM,eAAe,CAAC,QAAsE;AAC1F,QAAI;AACF,YAAM,OAAO,IAAI,eAAe;AAChC,aAAO,EAAE,IAAI,CAAC,CAAC,QAAQ,WAAW,IAAI,GAAG,MAAM,KAAK;AAAA,IACtD,QAAQ;AACN,aAAO,EAAE,IAAI,MAAM;AAAA,IACrB;AAAA,EACF;AAEA,QAAM,gBAAgB,aAAa,QAAQ;AAC3C,QAAM,eAAe,aAAa,OAAO;AACzC,QAAM,cAAc,aAAa,MAAM;AAKvC,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA,UAAU;AAAA,MACR,UAAU,cAAc;AAAA,MACxB,uBAAuB,cAAc;AAAA,MACrC,SAAS,aAAa;AAAA,MACtB,QAAQ,YAAY;AAAA,IACtB;AAAA,IACA,cAAc,cAAc;AAAA,EAC9B;AACF;AAEA,eAAe,gBAC0E;AACvF,MAAI;AACF,UAAM,WAAW,MAAM,kBAAkB;AACzC,UAAM,OAAO;AAAA,MACX;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF,EAAE,IAAI,CAAC,MAAM,OAAO,EAAE,IAAI,OAAO,CAAC,GAAG,KAAK,EAAE;AAC5C,UAAM,KAAK,KAAK,IAAI;AACpB,UAAM,SAAS,OAAO,2BAA2B,IAAI;AACrD,UAAM,WAAW,KAAK,IAAI,IAAI;AAC9B,WAAO,EAAE,WAAW,MAAM,SAAS,SAAS,SAAS,SAAS;AAAA,EAChE,SAAS,KAAK;AACZ,WAAO,EAAE,WAAW,OAAO,QAAQ,eAAe,QAAQ,IAAI,UAAU,gBAAgB;AAAA,EAC1F;AACF;AAEA,SAAS,oBAAoB,SAA0D;AACrF,QAAM,WAAW,KAAK,SAAS,WAAW;AAC1C,MAAI,CAAC,WAAW,QAAQ,GAAG;AACzB,WAAO,EAAE,WAAW,OAAO,QAAQ,4DAAuD;AAAA,EAC5F;AACA,MAAI;AAGF,UAAM,UAAU,YAAY,QAAQ;AACpC,QAAI,QAAQ,WAAW,GAAG;AACxB,aAAO,EAAE,WAAW,OAAO,QAAQ,sDAAiD;AAAA,IACtF;AACA,WAAO,EAAE,WAAW,KAAK;AAAA,EAC3B,SAAS,KAAK;AACZ,WAAO,EAAE,WAAW,OAAO,QAAQ,eAAe,QAAQ,IAAI,UAAU,gBAAgB;AAAA,EAC1F;AACF;AAEA,SAAS,WAAW,aAA8B;AAChD,MAAI,CAAC,YAAa,QAAO;AACzB,QAAM,OAAO,IAAI,KAAK,WAAW;AACjC,QAAM,OAAO,KAAK,OAAO,KAAK,QAAQ,IAAI,KAAK,IAAI,KAAK,GAAM;AAC9D,MAAI,OAAO,EAAG,QAAO,GAAG,WAAW;AACnC,MAAI,OAAO,GAAI,QAAO,GAAG,WAAW,QAAQ,IAAI;AAChD,QAAM,MAAM,KAAK,MAAM,OAAO,EAAE;AAChC,SAAO,GAAG,WAAW,QAAQ,GAAG;AAClC;AAQA,eAAsB,UAAU,SAAkC;AAIhE,oBAAkB,MAAM;AACxB,MAAI;AACF,WAAO,MAAM,eAAe,OAAO;AAAA,EACrC,UAAE;AACA,sBAAkB,IAAI;AAAA,EACxB;AACF;AAEA,eAAe,eAAe,SAAkC;AAC9D,MAAI,WAAW;AAEf,MAAI,oCAAoC,OAAO,EAAE;AACjD,MAAI,EAAE;AAEN,QAAM,KAAK,YAAY;AACvB,QAAM,KAAK,YAAY;AACvB,MAAI,0BAA0B;AAC9B,MAAI,oBAAoB,GAAG,KAAK,cAAc,GAAG,WAAW,SAAS,MAAM,eAAe,EAAE;AAC5F,MAAI,oBAAoB,GAAG,KAAK,cAAc,GAAG,OAAO,MAAM,eAAe,EAAE;AAC/E,MAAI,CAAC,GAAG,MAAM,CAAC,GAAG,GAAI,YAAW;AAEjC,MAAI,EAAE;AACN,QAAM,KAAK,gBAAgB;AAC3B,MAAI,iCAAiC;AACrC,MAAI,oBAAoB,GAAG,YAAY,YAAY,GAAG,UAAU,MAAM,GAAG,OAAO,MAAM,EAAE,KAAK,eAAe,EAAE;AAC9G,MAAI,6BAA6B,GAAG,SAAS,WAAW,OAAO,SAAS,aAAa,GAAG,SAAS,UAAU,OAAO,SAAS,YAAY,GAAG,SAAS,SAAS,OAAO,SAAS,EAAE;AAC9K,MAAI,GAAG,cAAc;AACnB,QAAI,oBAAoB,GAAG,YAAY,GAAG,GAAG,SAAS,WAAW,KAAK,oBAAoB,EAAE;AAAA,EAC9F;AACA,MAAI,CAAC,GAAG,SAAS,UAAU;AACzB,QAAI,sGAAiG;AACrG,eAAW;AAAA,EACb;AAEA,MAAI,EAAE;AACN,QAAM,WAAW,MAAM,cAAc;AACrC,QAAM,aAAa,oBAAoB,OAAO;AAC9C,MAAI,sCAAsC;AAC1C,MAAI,SAAS,WAAW;AACtB,UAAM,SAAS,SAAS,aAAa,SAAY,wBAAmB,SAAS,QAAQ,OAAO;AAC5F,QAAI,oCAAoC,SAAS,OAAO,IAAI,MAAM,EAAE;AAAA,EACtE,OAAO;AACL,QAAI,sCAAsC,SAAS,SAAS,KAAK,SAAS,MAAM,MAAM,EAAE,EAAE;AAAA,EAC5F;AACA,MAAI,WAAW,WAAW;AACxB,QAAI,+DAA+D;AAAA,EACrE,OAAO;AACL,QAAI,sCAAsC,WAAW,SAAS,KAAK,WAAW,MAAM,MAAM,EAAE,EAAE;AAAA,EAChG;AAEA,MAAI,EAAE;AACN,MAAI,mDAAmD;AACvD,QAAM,MAAM,UAAU;AACtB,QAAM,SAAS,eAAe,QAAQ,GAAG;AACzC,aAAW,KAAK,aAAa,GAAG;AAC9B,UAAM,SAAS,eAAe,CAAC;AAC/B,UAAM,MAAM,CAAC,CAAC,QAAQ,IAAI,MAAM;AAChC,UAAM,aAAa,MAAM,SAAS,eAAe;AACjD;AAAA,MACE,KAAK,EAAE,OAAO,EAAE,CAAC,IAAI,MAAM,eAAe,QAAQ,KAAK,MAAM,GAAG,MAAM,KAAK,QAAQ,IAAI,UAAU;AAAA,IACnG;AACA,QAAI,KAAK;AACP,YAAM,QAAQ,aAAa,GAAG,QAAW,QAAQ,GAAG;AACpD,YAAM,WAAW,oBAAoB,CAAC;AACtC,YAAM,eAAe,UAAU,qBAAqB,CAAC,KAAK,CAAC,QAAQ,IAAI,QAAQ,KAAK,CAAC,QAAQ,IAAI;AACjG,UAAI,kBAAkB,KAAK,GAAG,eAAe,eAAe,EAAE,EAAE;AAAA,IAClE;AAAA,EACF;AACA,MAAI,IAAI,aAAa;AACnB,QAAI,IAAI,YAAY,WAAW,SAAS,KAAK,IAAI,YAAY,WAAW,UAAU,GAAG;AACnF,UAAI,8BAA8B,IAAI,WAAW,GAAG;AAAA,IACtD,OAAO;AACL,UAAI,sCAAsC,IAAI,WAAW,EAAE;AAAA,IAC7D;AAAA,EACF;AACA,MAAI,QAAQ,IAAI,kBAAkB;AAChC,QAAI,uBAAuB,QAAQ,IAAI,gBAAgB,uBAAuB;AAAA,EAChF;AACA,MAAI,kBAAkB,IAAI,eAAe,OAAO;AAChD,MAAI,kBAAkB,IAAI,qBAAqB,cAAc;AAE7D,MAAI,EAAE;AACN,QAAM,QAAQ,kBAAkB,OAAO;AACvC,MAAI,gCAAgC;AACpC,MAAI,CAAC,OAAO;AACV,QAAI,8EAAyE;AAC7E,eAAW;AAAA,EACb,WAAW,MAAM,WAAW,SAAS;AACnC,QAAI,wBAAwB;AAC5B,QAAI,oBAAoB,MAAM,eAAe,SAAS,EAAE;AAAA,EAC1D,OAAO;AACL,QAAI,oBAAoB,MAAM,MAAM,EAAE;AACtC,QAAI,MAAM,aAAa,OAAW,KAAI,oBAAoB,MAAM,QAAQ,MAAM;AAC9E,QAAI,MAAM,cAAe,KAAI,oBAAoB,MAAM,aAAa,EAAE;AACtE,QAAI,MAAM,eAAe,MAAM,WAAW,SAAU,KAAI,oBAAoB,WAAW,MAAM,WAAW,CAAC,EAAE;AAC3G,QAAI,MAAM,WAAW,QAAS,KAAI,oBAAoB,MAAM,UAAU,OAAO,EAAE;AAC/E,QAAI,MAAM,WAAW,aAAa,OAAW,KAAI,oBAAoB,MAAM,UAAU,QAAQ,EAAE;AAC/F,QAAI,MAAM,WAAW,QAAS,KAAI,oBAAoB,MAAM,UAAU,OAAO,EAAE;AAC/E,QAAI,MAAM,WAAW,QAAQ;AAC3B,UAAI,WAAW;AACf,iBAAW,QAAQ,MAAM,UAAU,OAAO,MAAM,IAAI,EAAE,MAAM,GAAG,EAAE,EAAG,KAAI,OAAO,IAAI,EAAE;AAAA,IACvF;AACA,eAAW;AAAA,EACb;AAEA,MAAI,EAAE;AACN,QAAM,WAAW,KAAK,SAAS,cAAc;AAC7C,MAAI,WAAW,QAAQ,GAAG;AACxB,QAAI;AACF,YAAM,OAAO,KAAK,MAAM,aAAa,UAAU,OAAO,CAAC;AACvD,UAAI,KAAK,OAAO,eAAe,KAAK,GAAG,GAAG;AACxC,YAAI,wDAAwD,KAAK,GAAG,UAAU,KAAK,QAAQ,GAAG,GAAG;AAAA,MACnG,OAAO;AACL,YAAI,0GAAqG;AAAA,MAC3G;AAAA,IACF,QAAQ;AACN,UAAI,oGAA+F;AAAA,IACrG;AAAA,EACF,OAAO;AACL,QAAI,wFAAwF;AAAA,EAC9F;AAEA,MAAI,OAAO,WAAW,UAAU;AAC9B,QAAI,EAAE;AACN,QAAI,2BAA2B;AAC/B,QAAI,MAAM,YAAa,KAAI,mCAAmC,WAAW,MAAM,WAAW,CAAC,OAAO;AAClG,QAAI,4DAA4D;AAAA,EAClE;AAEA,QAAM,kBAAkB;AACxB,QAAM,eAAe,OAAO;AAC5B,gBAAc,OAAO;AACrB,uBAAqB;AAErB,MAAI,EAAE;AACN,MAAI,4BAA4B,WAAW,aAAa,IAAI,EAAE;AAC9D,SAAO,WAAW,IAAI;AACxB;AAEA,eAAe,oBAAmC;AAChD,MAAI,EAAE;AACN,MAAI,gCAAgC;AACpC,MAAI;AACF,UAAM,WAAW,MAAM,iBAAiB;AACxC,QAAI,qCAAqC,SAAS,OAAO,SAAS,SAAS,GAAG,GAAG;AAAA,EACnF,SAAS,KAAK;AACZ,UAAM,MAAM,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC3D,QAAI,+BAA+B,IAAI,MAAM,GAAG,EAAE,CAAC,GAAG;AAAA,EACxD;AACF;AAEA,eAAe,eAAe,SAAgC;AAC5D,MAAI,EAAE;AACN,MAAI,gCAAgC;AACpC,MAAI,SAAS;AACb,MAAI;AACF,UAAM,KAAK,aAAa,KAAK,SAAS,WAAW,CAAC;AAClD,aAAS;AACT,QAAI;AACF,YAAM,MAAM,GAAG,QAAQ,2BAA2B,EAAE,IAAI;AACxD,YAAM,IAAI,KAAK,KAAK;AACpB,UAAI,wCAAwC,CAAC,GAAG;AAAA,IAClD,QAAQ;AACN,UAAI,gEAAgE;AAAA,IACtE;AAAA,EACF,SAAS,KAAK;AACZ,UAAM,MAAM,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC3D,QAAI,mCAAmC,IAAI,MAAM,GAAG,EAAE,CAAC,GAAG;AAAA,EAC5D,UAAE;AACA,QAAI,QAAQ;AACV,UAAI;AAAE,sBAAc;AAAA,MAAG,QAAQ;AAAA,MAAe;AAAA,IAChD;AAAA,EACF;AACF;AAEA,SAAS,cAAc,SAAuB;AAC5C,MAAI,EAAE;AACN,MAAI,4BAA4B;AAChC,MAAI;AACF,UAAM,EAAE,MAAM,IAAI,eAAe,EAAE,QAAQ,CAAC;AAC5C,QAAI,MAAM,WAAW,GAAG;AACtB,UAAI,mEAAmE;AACvE;AAAA,IACF;AAEA,QAAI,KAA6C;AACjD,QAAI;AACF,WAAK,aAAa,KAAK,SAAS,WAAW,CAAC;AAAA,IAC9C,SAAS,KAAK;AACZ,YAAM,MAAM,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC3D,UAAI,oBAAoB,MAAM,MAAM,+BAA+B,IAAI,MAAM,GAAG,EAAE,CAAC,GAAG;AACtF;AAAA,IACF;AAEA,QAAI,OAA6C;AACjD,QAAI;AACF,aAAO,GAAG;AAAA,QACR;AAAA,MACF;AAAA,IACF,QAAQ;AAAA,IAER;AAEA,QAAI;AACF,YAAM,MAAM,KAAK,IAAI;AACrB,iBAAW,QAAQ,OAAO;AACxB,YAAI;AACJ,YAAI,CAAC,MAAM;AACT,iBAAO,KAAK,KAAK,GAAG;AAAA,QACtB,OAAO;AACL,cAAI;AACF,kBAAM,MAAM,KAAK,IAAI,KAAK,GAAG;AAC7B,kBAAM,IAAI,KAAK,KAAK;AACpB,kBAAM,SAAS,KAAK,WAAW;AAC/B,gBAAI,CAAC,QAAQ;AACX,qBAAO,KAAK,KAAK,GAAG,KAAK,CAAC;AAAA,YAC5B,OAAO;AACL,oBAAM,QAAQ,MAAM,IAAI,KAAK,MAAM,EAAE,QAAQ;AAC7C,oBAAM,QAAQ,QAAQ;AACtB,oBAAM,QAAQ,SAAS,KAAK,UAAU;AACtC,oBAAM,WAAW,QAAQ,IACrB,GAAG,KAAK,IAAI,GAAG,KAAK,MAAM,QAAQ,GAAM,CAAC,CAAC,UAC1C,GAAG,KAAK,MAAM,KAAK,CAAC;AACxB,oBAAM,MAAM,OAAO,MAAM,GAAG,EAAE;AAC9B,qBAAO,KAAK,KAAK,GAAG,KAAK,CAAC,wBAAwB,GAAG,KAAK,QAAQ,MAAM,KAAK;AAAA,YAC/E;AAAA,UACF,SAAS,KAAK;AACZ,kBAAM,MAAM,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC3D,mBAAO,KAAK,KAAK,GAAG,oBAAoB,IAAI,MAAM,GAAG,EAAE,CAAC;AAAA,UAC1D;AAAA,QACF;AACA,YAAI,IAAI;AAAA,MACV;AAAA,IACF,UAAE;AACA,UAAI;AAAE,sBAAc;AAAA,MAAG,QAAQ;AAAA,MAAe;AAAA,IAChD;AAAA,EACF,SAAS,KAAK;AACZ,UAAM,MAAM,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC3D,QAAI,oBAAoB,IAAI,MAAM,GAAG,EAAE,CAAC,GAAG;AAAA,EAC7C;AACF;AAEA,SAAS,uBAA6B;AACpC,MAAI,EAAE;AACN,QAAM,QAAQ,mBAAmB,IAAI,YAAY;AACjD,MAAI,qCAAqC,KAAK,iCAAiC;AACjF;","names":[]}
@@ -0,0 +1,2 @@
1
+ export declare function shutdownCli(): Promise<void>;
2
+ //# sourceMappingURL=shutdown.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"shutdown.d.ts","sourceRoot":"","sources":["../../src/cli/shutdown.ts"],"names":[],"mappings":"AAcA,wBAAsB,WAAW,IAAI,OAAO,CAAC,IAAI,CAAC,CAgBjD"}
@@ -0,0 +1,26 @@
1
+ import { closeDatabase } from "../cache/db.js";
2
+ import { resetEmbeddingService } from "../embedding/embed.js";
3
+ import { disposeRerankProvider } from "../providers/rerank-provider.js";
4
+ import { createLogger } from "../logger.js";
5
+ const log = createLogger("cli");
6
+ async function shutdownCli() {
7
+ try {
8
+ await disposeRerankProvider();
9
+ } catch (err) {
10
+ log.debug("rerank dispose failed", { error: err instanceof Error ? err.message : String(err) });
11
+ }
12
+ try {
13
+ resetEmbeddingService();
14
+ } catch (err) {
15
+ log.debug("embedding reset failed", { error: err instanceof Error ? err.message : String(err) });
16
+ }
17
+ try {
18
+ closeDatabase();
19
+ } catch (err) {
20
+ log.debug("database close failed", { error: err instanceof Error ? err.message : String(err) });
21
+ }
22
+ }
23
+ export {
24
+ shutdownCli
25
+ };
26
+ //# sourceMappingURL=shutdown.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../../src/cli/shutdown.ts"],"sourcesContent":["import { closeDatabase } from '../cache/db.js';\nimport { resetEmbeddingService } from '../embedding/embed.js';\nimport { disposeRerankProvider } from '../providers/rerank-provider.js';\nimport { createLogger } from '../logger.js';\n\nconst log = createLogger('cli');\n\n// Release native resources (ONNX sessions, sqlite-vec, embedding subprocess)\n// before the process exits. Without explicit teardown, libc++ destructors\n// race during shutdown and surface as `mutex lock failed: Invalid argument`\n// on macOS — the cosmetic-but-loud SIGABRT noted in v0.1.1 bench.\n//\n// Best-effort: every step swallows its own errors so a partial failure\n// doesn't block subsequent cleanup steps.\nexport async function shutdownCli(): Promise<void> {\n try {\n await disposeRerankProvider();\n } catch (err) {\n log.debug('rerank dispose failed', { error: err instanceof Error ? err.message : String(err) });\n }\n try {\n resetEmbeddingService();\n } catch (err) {\n log.debug('embedding reset failed', { error: err instanceof Error ? err.message : String(err) });\n }\n try {\n closeDatabase();\n } catch (err) {\n log.debug('database close failed', { error: err instanceof Error ? err.message : String(err) });\n }\n}\n"],"mappings":"AAAA,SAAS,qBAAqB;AAC9B,SAAS,6BAA6B;AACtC,SAAS,6BAA6B;AACtC,SAAS,oBAAoB;AAE7B,MAAM,MAAM,aAAa,KAAK;AAS9B,eAAsB,cAA6B;AACjD,MAAI;AACF,UAAM,sBAAsB;AAAA,EAC9B,SAAS,KAAK;AACZ,QAAI,MAAM,yBAAyB,EAAE,OAAO,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG,EAAE,CAAC;AAAA,EAChG;AACA,MAAI;AACF,0BAAsB;AAAA,EACxB,SAAS,KAAK;AACZ,QAAI,MAAM,0BAA0B,EAAE,OAAO,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG,EAAE,CAAC;AAAA,EACjG;AACA,MAAI;AACF,kBAAc;AAAA,EAChB,SAAS,KAAK;AACZ,QAAI,MAAM,yBAAyB,EAAE,OAAO,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG,EAAE,CAAC;AAAA,EAChG;AACF;","names":[]}
@@ -1 +1 @@
1
- {"version":3,"file":"local-llm.d.ts","sourceRoot":"","sources":["../../../src/extraction/v1/local-llm.ts"],"names":[],"mappings":"AAOA,wBAAgB,iBAAiB,IAAI,OAAO,CAE3C;AAED,MAAM,WAAW,eAAe;IAC9B,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAChC,IAAI,EAAE,MAAM,CAAC;IACb,GAAG,EAAE,MAAM,CAAC;CACb;AAED,wBAAsB,mBAAmB,CACvC,OAAO,EAAE,eAAe,GACvB,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,CAAC,CA4DzC"}
1
+ {"version":3,"file":"local-llm.d.ts","sourceRoot":"","sources":["../../../src/extraction/v1/local-llm.ts"],"names":[],"mappings":"AAQA,wBAAgB,iBAAiB,IAAI,OAAO,CAE3C;AAED,MAAM,WAAW,eAAe;IAC9B,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAChC,IAAI,EAAE,MAAM,CAAC;IACb,GAAG,EAAE,MAAM,CAAC;CACb;AAED,wBAAsB,mBAAmB,CACvC,OAAO,EAAE,eAAe,GACvB,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,CAAC,CAwBzC"}
@@ -1,55 +1,31 @@
1
1
  import { createLogger } from "../../logger.js";
2
+ import { isLlmConfigured, runLlmJson } from "../../integrations/cloud/llm/run.js";
2
3
  const log = createLogger("extract");
3
4
  const MAX_HTML_CHARS = 5e4;
4
5
  const REQUEST_TIMEOUT_MS = 3e4;
5
6
  function isLocalLlmEnabled() {
6
- return !!process.env["WIGOLO_LLM_PROVIDER"];
7
+ return isLlmConfigured();
7
8
  }
8
9
  async function extractWithLocalLlm(request) {
9
10
  if (!isLocalLlmEnabled()) return null;
10
- const provider = process.env["WIGOLO_LLM_PROVIDER"];
11
- const endpoint = provider.includes("/chat/completions") ? provider : provider.replace(/\/+$/, "") + "/v1/chat/completions";
12
- const model = process.env["WIGOLO_LLM_MODEL"] ?? "local";
13
11
  const htmlSlice = request.html.length > MAX_HTML_CHARS ? request.html.slice(0, MAX_HTML_CHARS) : request.html;
14
- const prompt = `Extract data matching this JSON schema from the HTML. Return only valid JSON.
15
- Schema: ${JSON.stringify(request.schema)}
12
+ const prompt = `Extract data matching the JSON schema from the HTML below. Return only the JSON object \u2014 no prose, no markdown fences.
13
+
16
14
  URL: ${request.url}
17
- HTML: ${htmlSlice}`;
18
- const body = {
19
- model,
20
- messages: [{ role: "user", content: prompt }],
21
- response_format: { type: "json_object" }
22
- };
23
- let response;
15
+
16
+ HTML:
17
+ ${htmlSlice}`;
24
18
  try {
25
- response = await fetch(endpoint, {
26
- method: "POST",
27
- headers: { "content-type": "application/json" },
28
- body: JSON.stringify(body),
29
- signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS)
19
+ const r = await runLlmJson({
20
+ prompt,
21
+ jsonSchema: request.schema,
22
+ timeoutMs: REQUEST_TIMEOUT_MS
30
23
  });
24
+ return r.values;
31
25
  } catch (err) {
32
- log.error("local llm request failed", { error: String(err) });
26
+ log.error("local llm request failed", { error: err instanceof Error ? err.message : String(err) });
33
27
  throw err;
34
28
  }
35
- if (!response.ok) {
36
- throw new Error(`Local LLM endpoint returned ${response.status}`);
37
- }
38
- const payload = await response.json();
39
- const content = payload.choices?.[0]?.message?.content;
40
- if (typeof content !== "string" || content.trim().length === 0) {
41
- throw new Error("Local LLM response missing message content");
42
- }
43
- let parsed;
44
- try {
45
- parsed = JSON.parse(content);
46
- } catch (err) {
47
- throw new Error(`Local LLM returned invalid JSON: ${String(err)}`);
48
- }
49
- if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) {
50
- throw new Error("Local LLM response is not a JSON object");
51
- }
52
- return parsed;
53
29
  }
54
30
  export {
55
31
  extractWithLocalLlm,
@@ -1 +1 @@
1
- {"version":3,"sources":["../../../src/extraction/v1/local-llm.ts"],"sourcesContent":["import { createLogger } from '../../logger.js';\n\nconst log = createLogger('extract');\n\nconst MAX_HTML_CHARS = 50000;\nconst REQUEST_TIMEOUT_MS = 30_000;\n\nexport function isLocalLlmEnabled(): boolean {\n return !!process.env['WIGOLO_LLM_PROVIDER'];\n}\n\nexport interface LocalLlmRequest {\n schema: Record<string, unknown>;\n html: string;\n url: string;\n}\n\nexport async function extractWithLocalLlm(\n request: LocalLlmRequest,\n): Promise<Record<string, unknown> | null> {\n if (!isLocalLlmEnabled()) return null;\n\n const provider = process.env['WIGOLO_LLM_PROVIDER']!;\n const endpoint = provider.includes('/chat/completions')\n ? provider\n : provider.replace(/\\/+$/, '') + '/v1/chat/completions';\n const model = process.env['WIGOLO_LLM_MODEL'] ?? 'local';\n\n const htmlSlice = request.html.length > MAX_HTML_CHARS\n ? request.html.slice(0, MAX_HTML_CHARS)\n : request.html;\n\n const prompt =\n 'Extract data matching this JSON schema from the HTML. Return only valid JSON.\\n' +\n `Schema: ${JSON.stringify(request.schema)}\\n` +\n `URL: ${request.url}\\n` +\n `HTML: ${htmlSlice}`;\n\n const body = {\n model,\n messages: [{ role: 'user', content: prompt }],\n response_format: { type: 'json_object' },\n };\n\n let response: Response;\n try {\n response = await fetch(endpoint, {\n method: 'POST',\n headers: { 'content-type': 'application/json' },\n body: JSON.stringify(body),\n signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS),\n });\n } catch (err) {\n log.error('local llm request failed', { error: String(err) });\n throw err;\n }\n\n if (!response.ok) {\n throw new Error(`Local LLM endpoint returned ${response.status}`);\n }\n\n const payload = (await response.json()) as {\n choices?: Array<{ message?: { content?: string } }>;\n };\n const content = payload.choices?.[0]?.message?.content;\n if (typeof content !== 'string' || content.trim().length === 0) {\n throw new Error('Local LLM response missing message content');\n }\n\n let parsed: unknown;\n try {\n parsed = JSON.parse(content);\n } catch (err) {\n throw new Error(`Local LLM returned invalid JSON: ${String(err)}`);\n }\n if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) {\n throw new Error('Local LLM response is not a JSON object');\n }\n return parsed as Record<string, unknown>;\n}\n"],"mappings":"AAAA,SAAS,oBAAoB;AAE7B,MAAM,MAAM,aAAa,SAAS;AAElC,MAAM,iBAAiB;AACvB,MAAM,qBAAqB;AAEpB,SAAS,oBAA6B;AAC3C,SAAO,CAAC,CAAC,QAAQ,IAAI,qBAAqB;AAC5C;AAQA,eAAsB,oBACpB,SACyC;AACzC,MAAI,CAAC,kBAAkB,EAAG,QAAO;AAEjC,QAAM,WAAW,QAAQ,IAAI,qBAAqB;AAClD,QAAM,WAAW,SAAS,SAAS,mBAAmB,IAClD,WACA,SAAS,QAAQ,QAAQ,EAAE,IAAI;AACnC,QAAM,QAAQ,QAAQ,IAAI,kBAAkB,KAAK;AAEjD,QAAM,YAAY,QAAQ,KAAK,SAAS,iBACpC,QAAQ,KAAK,MAAM,GAAG,cAAc,IACpC,QAAQ;AAEZ,QAAM,SACJ;AAAA,UACW,KAAK,UAAU,QAAQ,MAAM,CAAC;AAAA,OACjC,QAAQ,GAAG;AAAA,QACV,SAAS;AAEpB,QAAM,OAAO;AAAA,IACX;AAAA,IACA,UAAU,CAAC,EAAE,MAAM,QAAQ,SAAS,OAAO,CAAC;AAAA,IAC5C,iBAAiB,EAAE,MAAM,cAAc;AAAA,EACzC;AAEA,MAAI;AACJ,MAAI;AACF,eAAW,MAAM,MAAM,UAAU;AAAA,MAC/B,QAAQ;AAAA,MACR,SAAS,EAAE,gBAAgB,mBAAmB;AAAA,MAC9C,MAAM,KAAK,UAAU,IAAI;AAAA,MACzB,QAAQ,YAAY,QAAQ,kBAAkB;AAAA,IAChD,CAAC;AAAA,EACH,SAAS,KAAK;AACZ,QAAI,MAAM,4BAA4B,EAAE,OAAO,OAAO,GAAG,EAAE,CAAC;AAC5D,UAAM;AAAA,EACR;AAEA,MAAI,CAAC,SAAS,IAAI;AAChB,UAAM,IAAI,MAAM,+BAA+B,SAAS,MAAM,EAAE;AAAA,EAClE;AAEA,QAAM,UAAW,MAAM,SAAS,KAAK;AAGrC,QAAM,UAAU,QAAQ,UAAU,CAAC,GAAG,SAAS;AAC/C,MAAI,OAAO,YAAY,YAAY,QAAQ,KAAK,EAAE,WAAW,GAAG;AAC9D,UAAM,IAAI,MAAM,4CAA4C;AAAA,EAC9D;AAEA,MAAI;AACJ,MAAI;AACF,aAAS,KAAK,MAAM,OAAO;AAAA,EAC7B,SAAS,KAAK;AACZ,UAAM,IAAI,MAAM,oCAAoC,OAAO,GAAG,CAAC,EAAE;AAAA,EACnE;AACA,MAAI,CAAC,UAAU,OAAO,WAAW,YAAY,MAAM,QAAQ,MAAM,GAAG;AAClE,UAAM,IAAI,MAAM,yCAAyC;AAAA,EAC3D;AACA,SAAO;AACT;","names":[]}
1
+ {"version":3,"sources":["../../../src/extraction/v1/local-llm.ts"],"sourcesContent":["import { createLogger } from '../../logger.js';\nimport { isLlmConfigured, runLlmJson } from '../../integrations/cloud/llm/run.js';\n\nconst log = createLogger('extract');\n\nconst MAX_HTML_CHARS = 50000;\nconst REQUEST_TIMEOUT_MS = 30_000;\n\nexport function isLocalLlmEnabled(): boolean {\n return isLlmConfigured();\n}\n\nexport interface LocalLlmRequest {\n schema: Record<string, unknown>;\n html: string;\n url: string;\n}\n\nexport async function extractWithLocalLlm(\n request: LocalLlmRequest,\n): Promise<Record<string, unknown> | null> {\n if (!isLocalLlmEnabled()) return null;\n\n const htmlSlice = request.html.length > MAX_HTML_CHARS\n ? request.html.slice(0, MAX_HTML_CHARS)\n : request.html;\n\n const prompt =\n 'Extract data matching the JSON schema from the HTML below. ' +\n 'Return only the JSON object — no prose, no markdown fences.\\n\\n' +\n `URL: ${request.url}\\n\\n` +\n `HTML:\\n${htmlSlice}`;\n\n try {\n const r = await runLlmJson({\n prompt,\n jsonSchema: request.schema,\n timeoutMs: REQUEST_TIMEOUT_MS,\n });\n return r.values;\n } catch (err) {\n log.error('local llm request failed', { error: err instanceof Error ? err.message : String(err) });\n throw err;\n }\n}\n"],"mappings":"AAAA,SAAS,oBAAoB;AAC7B,SAAS,iBAAiB,kBAAkB;AAE5C,MAAM,MAAM,aAAa,SAAS;AAElC,MAAM,iBAAiB;AACvB,MAAM,qBAAqB;AAEpB,SAAS,oBAA6B;AAC3C,SAAO,gBAAgB;AACzB;AAQA,eAAsB,oBACpB,SACyC;AACzC,MAAI,CAAC,kBAAkB,EAAG,QAAO;AAEjC,QAAM,YAAY,QAAQ,KAAK,SAAS,iBACpC,QAAQ,KAAK,MAAM,GAAG,cAAc,IACpC,QAAQ;AAEZ,QAAM,SACJ;AAAA;AAAA,OAEQ,QAAQ,GAAG;AAAA;AAAA;AAAA,EACT,SAAS;AAErB,MAAI;AACF,UAAM,IAAI,MAAM,WAAW;AAAA,MACzB;AAAA,MACA,YAAY,QAAQ;AAAA,MACpB,WAAW;AAAA,IACb,CAAC;AACD,WAAO,EAAE;AAAA,EACX,SAAS,KAAK;AACZ,QAAI,MAAM,4BAA4B,EAAE,OAAO,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG,EAAE,CAAC;AACjG,UAAM;AAAA,EACR;AACF;","names":[]}