@staticn0va/wigolo 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (1003) hide show
  1. package/LICENSE +1 -1
  2. package/README.md +146 -227
  3. package/SKILL.md +382 -0
  4. package/assets/blocks/claude-code/CLAUDE.md.block +20 -0
  5. package/assets/blocks/claude-code/wigolo-command.md +40 -0
  6. package/assets/blocks/cursor/wigolo.mdc +46 -0
  7. package/assets/blocks/gemini-cli/GEMINI.md.block +18 -0
  8. package/assets/blocks/vscode/copilot-instructions.md.block +18 -0
  9. package/assets/skills/wigolo/SKILL.md +50 -0
  10. package/assets/skills/wigolo/rules/cache-first.md +30 -0
  11. package/assets/skills/wigolo/rules/synthesis.md +43 -0
  12. package/assets/skills/wigolo-agent/SKILL.md +73 -0
  13. package/assets/skills/wigolo-crawl/SKILL.md +60 -0
  14. package/assets/skills/wigolo-extract/SKILL.md +59 -0
  15. package/assets/skills/wigolo-fetch/SKILL.md +65 -0
  16. package/assets/skills/wigolo-find-similar/SKILL.md +72 -0
  17. package/assets/skills/wigolo-research/SKILL.md +77 -0
  18. package/assets/skills/wigolo-search/SKILL.md +78 -0
  19. package/dist/agent/executor.d.ts +33 -0
  20. package/dist/agent/executor.d.ts.map +1 -0
  21. package/dist/agent/executor.js +233 -0
  22. package/dist/agent/executor.js.map +1 -0
  23. package/dist/agent/pipeline.d.ts +5 -0
  24. package/dist/agent/pipeline.d.ts.map +1 -0
  25. package/dist/agent/pipeline.js +238 -0
  26. package/dist/agent/pipeline.js.map +1 -0
  27. package/dist/agent/planner.d.ts +13 -0
  28. package/dist/agent/planner.d.ts.map +1 -0
  29. package/dist/agent/planner.js +271 -0
  30. package/dist/agent/planner.js.map +1 -0
  31. package/dist/agent/relevance.d.ts +15 -0
  32. package/dist/agent/relevance.d.ts.map +1 -0
  33. package/dist/agent/relevance.js +60 -0
  34. package/dist/agent/relevance.js.map +1 -0
  35. package/dist/cache/backfill-embeddings.d.ts +23 -0
  36. package/dist/cache/backfill-embeddings.d.ts.map +1 -0
  37. package/dist/cache/backfill-embeddings.js +105 -0
  38. package/dist/cache/backfill-embeddings.js.map +1 -0
  39. package/dist/cache/change-detector.d.ts +7 -0
  40. package/dist/cache/change-detector.d.ts.map +1 -0
  41. package/dist/cache/change-detector.js +43 -0
  42. package/dist/cache/change-detector.js.map +1 -0
  43. package/dist/cache/db.d.ts +1 -0
  44. package/dist/cache/db.d.ts.map +1 -1
  45. package/dist/cache/db.js +94 -22
  46. package/dist/cache/db.js.map +1 -1
  47. package/dist/cache/diff-summary.d.ts +2 -0
  48. package/dist/cache/diff-summary.d.ts.map +1 -0
  49. package/dist/cache/diff-summary.js +82 -0
  50. package/dist/cache/diff-summary.js.map +1 -0
  51. package/dist/cache/migrations/runner.d.ts +29 -0
  52. package/dist/cache/migrations/runner.d.ts.map +1 -0
  53. package/dist/cache/migrations/runner.js +147 -0
  54. package/dist/cache/migrations/runner.js.map +1 -0
  55. package/dist/cache/sqlite-vec-store.d.ts +42 -0
  56. package/dist/cache/sqlite-vec-store.d.ts.map +1 -0
  57. package/dist/cache/sqlite-vec-store.js +176 -0
  58. package/dist/cache/sqlite-vec-store.js.map +1 -0
  59. package/dist/cache/store.d.ts +47 -1
  60. package/dist/cache/store.d.ts.map +1 -1
  61. package/dist/cache/store.js +364 -168
  62. package/dist/cache/store.js.map +1 -1
  63. package/dist/cli/agents/antigravity.d.ts +20 -0
  64. package/dist/cli/agents/antigravity.d.ts.map +1 -0
  65. package/dist/cli/agents/antigravity.js +49 -0
  66. package/dist/cli/agents/antigravity.js.map +1 -0
  67. package/dist/cli/agents/claude-code.d.ts +25 -0
  68. package/dist/cli/agents/claude-code.d.ts.map +1 -0
  69. package/dist/cli/agents/claude-code.js +111 -0
  70. package/dist/cli/agents/claude-code.js.map +1 -0
  71. package/dist/cli/agents/cursor.d.ts +21 -0
  72. package/dist/cli/agents/cursor.d.ts.map +1 -0
  73. package/dist/cli/agents/cursor.js +58 -0
  74. package/dist/cli/agents/cursor.js.map +1 -0
  75. package/dist/cli/agents/gemini-cli.d.ts +21 -0
  76. package/dist/cli/agents/gemini-cli.d.ts.map +1 -0
  77. package/dist/cli/agents/gemini-cli.js +55 -0
  78. package/dist/cli/agents/gemini-cli.js.map +1 -0
  79. package/dist/cli/agents/registry.d.ts +21 -0
  80. package/dist/cli/agents/registry.d.ts.map +1 -0
  81. package/dist/cli/agents/registry.js +27 -0
  82. package/dist/cli/agents/registry.js.map +1 -0
  83. package/dist/cli/agents/utils.d.ts +26 -0
  84. package/dist/cli/agents/utils.d.ts.map +1 -0
  85. package/dist/cli/agents/utils.js +136 -0
  86. package/dist/cli/agents/utils.js.map +1 -0
  87. package/dist/cli/agents/vscode.d.ts +21 -0
  88. package/dist/cli/agents/vscode.d.ts.map +1 -0
  89. package/dist/cli/agents/vscode.js +62 -0
  90. package/dist/cli/agents/vscode.js.map +1 -0
  91. package/dist/cli/auth.d.ts +2 -0
  92. package/dist/cli/auth.d.ts.map +1 -0
  93. package/dist/cli/auth.js +94 -0
  94. package/dist/cli/auth.js.map +1 -0
  95. package/dist/cli/backfill.d.ts +2 -0
  96. package/dist/cli/backfill.d.ts.map +1 -0
  97. package/dist/cli/backfill.js +58 -0
  98. package/dist/cli/backfill.js.map +1 -0
  99. package/dist/cli/daemon.d.ts +6 -1
  100. package/dist/cli/daemon.d.ts.map +1 -1
  101. package/dist/cli/daemon.js +61 -3
  102. package/dist/cli/daemon.js.map +1 -1
  103. package/dist/cli/doctor.d.ts +8 -0
  104. package/dist/cli/doctor.d.ts.map +1 -0
  105. package/dist/cli/doctor.js +344 -0
  106. package/dist/cli/doctor.js.map +1 -0
  107. package/dist/cli/health.d.ts +1 -1
  108. package/dist/cli/health.d.ts.map +1 -1
  109. package/dist/cli/health.js +42 -3
  110. package/dist/cli/health.js.map +1 -1
  111. package/dist/cli/help.d.ts +6 -0
  112. package/dist/cli/help.d.ts.map +1 -0
  113. package/dist/cli/help.js +63 -0
  114. package/dist/cli/help.js.map +1 -0
  115. package/dist/cli/index.d.ts +1 -1
  116. package/dist/cli/index.d.ts.map +1 -1
  117. package/dist/cli/index.js +35 -7
  118. package/dist/cli/index.js.map +1 -1
  119. package/dist/cli/init.d.ts +2 -0
  120. package/dist/cli/init.d.ts.map +1 -0
  121. package/dist/cli/init.js +201 -0
  122. package/dist/cli/init.js.map +1 -0
  123. package/dist/cli/plugin.d.ts +5 -0
  124. package/dist/cli/plugin.d.ts.map +1 -0
  125. package/dist/cli/plugin.js +185 -0
  126. package/dist/cli/plugin.js.map +1 -0
  127. package/dist/cli/setup-mcp.d.ts +2 -0
  128. package/dist/cli/setup-mcp.d.ts.map +1 -0
  129. package/dist/cli/setup-mcp.js +114 -0
  130. package/dist/cli/setup-mcp.js.map +1 -0
  131. package/dist/cli/shell.d.ts +2 -0
  132. package/dist/cli/shell.d.ts.map +1 -0
  133. package/dist/cli/shell.js +86 -0
  134. package/dist/cli/shell.js.map +1 -0
  135. package/dist/cli/shutdown.d.ts +2 -0
  136. package/dist/cli/shutdown.d.ts.map +1 -0
  137. package/dist/cli/shutdown.js +26 -0
  138. package/dist/cli/shutdown.js.map +1 -0
  139. package/dist/cli/status.d.ts +2 -0
  140. package/dist/cli/status.d.ts.map +1 -0
  141. package/dist/cli/status.js +31 -0
  142. package/dist/cli/status.js.map +1 -0
  143. package/dist/cli/telemetry.d.ts +10 -0
  144. package/dist/cli/telemetry.d.ts.map +1 -0
  145. package/dist/cli/telemetry.js +56 -0
  146. package/dist/cli/telemetry.js.map +1 -0
  147. package/dist/cli/tui/agents-types.d.ts +28 -0
  148. package/dist/cli/tui/agents-types.d.ts.map +1 -0
  149. package/dist/cli/tui/agents-types.js +1 -0
  150. package/dist/cli/tui/agents-types.js.map +1 -0
  151. package/dist/cli/tui/agents.d.ts +11 -0
  152. package/dist/cli/tui/agents.d.ts.map +1 -0
  153. package/dist/cli/tui/agents.js +93 -0
  154. package/dist/cli/tui/agents.js.map +1 -0
  155. package/dist/cli/tui/banner.d.ts +3 -0
  156. package/dist/cli/tui/banner.d.ts.map +1 -0
  157. package/dist/cli/tui/banner.js +30 -0
  158. package/dist/cli/tui/banner.js.map +1 -0
  159. package/dist/cli/tui/components/AgentSelect.d.ts +13 -0
  160. package/dist/cli/tui/components/AgentSelect.d.ts.map +1 -0
  161. package/dist/cli/tui/components/AgentSelect.js +116 -0
  162. package/dist/cli/tui/components/AgentSelect.js.map +1 -0
  163. package/dist/cli/tui/components/Banner.d.ts +6 -0
  164. package/dist/cli/tui/components/Banner.d.ts.map +1 -0
  165. package/dist/cli/tui/components/Banner.js +25 -0
  166. package/dist/cli/tui/components/Banner.js.map +1 -0
  167. package/dist/cli/tui/components/BrowserSelect.d.ts +7 -0
  168. package/dist/cli/tui/components/BrowserSelect.d.ts.map +1 -0
  169. package/dist/cli/tui/components/BrowserSelect.js +19 -0
  170. package/dist/cli/tui/components/BrowserSelect.js.map +1 -0
  171. package/dist/cli/tui/components/InstallProgress.d.ts +9 -0
  172. package/dist/cli/tui/components/InstallProgress.d.ts.map +1 -0
  173. package/dist/cli/tui/components/InstallProgress.js +67 -0
  174. package/dist/cli/tui/components/InstallProgress.js.map +1 -0
  175. package/dist/cli/tui/components/SkillInstall.d.ts +14 -0
  176. package/dist/cli/tui/components/SkillInstall.d.ts.map +1 -0
  177. package/dist/cli/tui/components/SkillInstall.js +94 -0
  178. package/dist/cli/tui/components/SkillInstall.js.map +1 -0
  179. package/dist/cli/tui/components/Summary.d.ts +22 -0
  180. package/dist/cli/tui/components/Summary.d.ts.map +1 -0
  181. package/dist/cli/tui/components/Summary.js +135 -0
  182. package/dist/cli/tui/components/Summary.js.map +1 -0
  183. package/dist/cli/tui/components/SystemCheck.d.ts +8 -0
  184. package/dist/cli/tui/components/SystemCheck.d.ts.map +1 -0
  185. package/dist/cli/tui/components/SystemCheck.js +71 -0
  186. package/dist/cli/tui/components/SystemCheck.js.map +1 -0
  187. package/dist/cli/tui/components/Verification.d.ts +8 -0
  188. package/dist/cli/tui/components/Verification.d.ts.map +1 -0
  189. package/dist/cli/tui/components/Verification.js +63 -0
  190. package/dist/cli/tui/components/Verification.js.map +1 -0
  191. package/dist/cli/tui/config-writer-cli.d.ts +12 -0
  192. package/dist/cli/tui/config-writer-cli.d.ts.map +1 -0
  193. package/dist/cli/tui/config-writer-cli.js +39 -0
  194. package/dist/cli/tui/config-writer-cli.js.map +1 -0
  195. package/dist/cli/tui/config-writer-json.d.ts +16 -0
  196. package/dist/cli/tui/config-writer-json.d.ts.map +1 -0
  197. package/dist/cli/tui/config-writer-json.js +86 -0
  198. package/dist/cli/tui/config-writer-json.js.map +1 -0
  199. package/dist/cli/tui/config-writer-toml.d.ts +16 -0
  200. package/dist/cli/tui/config-writer-toml.d.ts.map +1 -0
  201. package/dist/cli/tui/config-writer-toml.js +83 -0
  202. package/dist/cli/tui/config-writer-toml.js.map +1 -0
  203. package/dist/cli/tui/config-writer.d.ts +25 -0
  204. package/dist/cli/tui/config-writer.d.ts.map +1 -0
  205. package/dist/cli/tui/config-writer.js +101 -0
  206. package/dist/cli/tui/config-writer.js.map +1 -0
  207. package/dist/cli/tui/detect-helpers.d.ts +6 -0
  208. package/dist/cli/tui/detect-helpers.d.ts.map +1 -0
  209. package/dist/cli/tui/detect-helpers.js +45 -0
  210. package/dist/cli/tui/detect-helpers.js.map +1 -0
  211. package/dist/cli/tui/extras-prompt.d.ts +7 -0
  212. package/dist/cli/tui/extras-prompt.d.ts.map +1 -0
  213. package/dist/cli/tui/extras-prompt.js +42 -0
  214. package/dist/cli/tui/extras-prompt.js.map +1 -0
  215. package/dist/cli/tui/flags-types.d.ts +19 -0
  216. package/dist/cli/tui/flags-types.d.ts.map +1 -0
  217. package/dist/cli/tui/flags-types.js +23 -0
  218. package/dist/cli/tui/flags-types.js.map +1 -0
  219. package/dist/cli/tui/flags.d.ts +5 -0
  220. package/dist/cli/tui/flags.d.ts.map +1 -0
  221. package/dist/cli/tui/flags.js +132 -0
  222. package/dist/cli/tui/flags.js.map +1 -0
  223. package/dist/cli/tui/format.d.ts +14 -0
  224. package/dist/cli/tui/format.d.ts.map +1 -0
  225. package/dist/cli/tui/format.js +37 -0
  226. package/dist/cli/tui/format.js.map +1 -0
  227. package/dist/cli/tui/hooks/useAgentDetect.d.ts +6 -0
  228. package/dist/cli/tui/hooks/useAgentDetect.d.ts.map +1 -0
  229. package/dist/cli/tui/hooks/useAgentDetect.js +19 -0
  230. package/dist/cli/tui/hooks/useAgentDetect.js.map +1 -0
  231. package/dist/cli/tui/hooks/useInstall.d.ts +14 -0
  232. package/dist/cli/tui/hooks/useInstall.d.ts.map +1 -0
  233. package/dist/cli/tui/hooks/useInstall.js +90 -0
  234. package/dist/cli/tui/hooks/useInstall.js.map +1 -0
  235. package/dist/cli/tui/hooks/useSystemCheck.d.ts +13 -0
  236. package/dist/cli/tui/hooks/useSystemCheck.d.ts.map +1 -0
  237. package/dist/cli/tui/hooks/useSystemCheck.js +95 -0
  238. package/dist/cli/tui/hooks/useSystemCheck.js.map +1 -0
  239. package/dist/cli/tui/hooks/useVerify.d.ts +14 -0
  240. package/dist/cli/tui/hooks/useVerify.d.ts.map +1 -0
  241. package/dist/cli/tui/hooks/useVerify.js +71 -0
  242. package/dist/cli/tui/hooks/useVerify.js.map +1 -0
  243. package/dist/cli/tui/ink-init.d.ts +2 -0
  244. package/dist/cli/tui/ink-init.d.ts.map +1 -0
  245. package/dist/cli/tui/ink-init.js +198 -0
  246. package/dist/cli/tui/ink-init.js.map +1 -0
  247. package/dist/cli/tui/reporter-auto.d.ts +7 -0
  248. package/dist/cli/tui/reporter-auto.d.ts.map +1 -0
  249. package/dist/cli/tui/reporter-auto.js +15 -0
  250. package/dist/cli/tui/reporter-auto.js.map +1 -0
  251. package/dist/cli/tui/reporter.d.ts +26 -0
  252. package/dist/cli/tui/reporter.d.ts.map +1 -0
  253. package/dist/cli/tui/reporter.js +32 -0
  254. package/dist/cli/tui/reporter.js.map +1 -0
  255. package/dist/cli/tui/run-command.d.ts +14 -0
  256. package/dist/cli/tui/run-command.d.ts.map +1 -0
  257. package/dist/cli/tui/run-command.js +72 -0
  258. package/dist/cli/tui/run-command.js.map +1 -0
  259. package/dist/cli/tui/select-agents.d.ts +6 -0
  260. package/dist/cli/tui/select-agents.d.ts.map +1 -0
  261. package/dist/cli/tui/select-agents.js +32 -0
  262. package/dist/cli/tui/select-agents.js.map +1 -0
  263. package/dist/cli/tui/status-agents.d.ts +11 -0
  264. package/dist/cli/tui/status-agents.d.ts.map +1 -0
  265. package/dist/cli/tui/status-agents.js +53 -0
  266. package/dist/cli/tui/status-agents.js.map +1 -0
  267. package/dist/cli/tui/status-cache.d.ts +6 -0
  268. package/dist/cli/tui/status-cache.d.ts.map +1 -0
  269. package/dist/cli/tui/status-cache.js +39 -0
  270. package/dist/cli/tui/status-cache.js.map +1 -0
  271. package/dist/cli/tui/status-format.d.ts +14 -0
  272. package/dist/cli/tui/status-format.d.ts.map +1 -0
  273. package/dist/cli/tui/status-format.js +41 -0
  274. package/dist/cli/tui/status-format.js.map +1 -0
  275. package/dist/cli/tui/status-python.d.ts +6 -0
  276. package/dist/cli/tui/status-python.d.ts.map +1 -0
  277. package/dist/cli/tui/status-python.js +30 -0
  278. package/dist/cli/tui/status-python.js.map +1 -0
  279. package/dist/cli/tui/system-check.d.ts +24 -0
  280. package/dist/cli/tui/system-check.d.ts.map +1 -0
  281. package/dist/cli/tui/system-check.js +103 -0
  282. package/dist/cli/tui/system-check.js.map +1 -0
  283. package/dist/cli/tui/tui-reporter.d.ts +19 -0
  284. package/dist/cli/tui/tui-reporter.d.ts.map +1 -0
  285. package/dist/cli/tui/tui-reporter.js +95 -0
  286. package/dist/cli/tui/tui-reporter.js.map +1 -0
  287. package/dist/cli/tui/utils/config-writer.d.ts +3 -0
  288. package/dist/cli/tui/utils/config-writer.d.ts.map +1 -0
  289. package/dist/cli/tui/utils/config-writer.js +22 -0
  290. package/dist/cli/tui/utils/config-writer.js.map +1 -0
  291. package/dist/cli/tui/utils/suppress-logs.d.ts +3 -0
  292. package/dist/cli/tui/utils/suppress-logs.d.ts.map +1 -0
  293. package/dist/cli/tui/utils/suppress-logs.js +11 -0
  294. package/dist/cli/tui/utils/suppress-logs.js.map +1 -0
  295. package/dist/cli/tui/verify-suggestions.d.ts +5 -0
  296. package/dist/cli/tui/verify-suggestions.d.ts.map +1 -0
  297. package/dist/cli/tui/verify-suggestions.js +20 -0
  298. package/dist/cli/tui/verify-suggestions.js.map +1 -0
  299. package/dist/cli/tui/verify.d.ts +14 -0
  300. package/dist/cli/tui/verify.d.ts.map +1 -0
  301. package/dist/cli/tui/verify.js +101 -0
  302. package/dist/cli/tui/verify.js.map +1 -0
  303. package/dist/cli/tui/version.d.ts +2 -0
  304. package/dist/cli/tui/version.d.ts.map +1 -0
  305. package/dist/cli/tui/version.js +14 -0
  306. package/dist/cli/tui/version.js.map +1 -0
  307. package/dist/cli/uninstall.d.ts +2 -0
  308. package/dist/cli/uninstall.d.ts.map +1 -0
  309. package/dist/cli/uninstall.js +57 -0
  310. package/dist/cli/uninstall.js.map +1 -0
  311. package/dist/cli/warmup.d.ts +10 -2
  312. package/dist/cli/warmup.d.ts.map +1 -1
  313. package/dist/cli/warmup.js +226 -93
  314. package/dist/cli/warmup.js.map +1 -1
  315. package/dist/config.d.ts +28 -2
  316. package/dist/config.d.ts.map +1 -1
  317. package/dist/config.js +106 -56
  318. package/dist/config.js.map +1 -1
  319. package/dist/crawl/crawler.d.ts +6 -0
  320. package/dist/crawl/crawler.d.ts.map +1 -1
  321. package/dist/crawl/crawler.js +210 -209
  322. package/dist/crawl/crawler.js.map +1 -1
  323. package/dist/crawl/dedup.d.ts +1 -0
  324. package/dist/crawl/dedup.d.ts.map +1 -1
  325. package/dist/crawl/dedup.js +124 -81
  326. package/dist/crawl/dedup.js.map +1 -1
  327. package/dist/crawl/etag-incremental.d.ts +43 -0
  328. package/dist/crawl/etag-incremental.d.ts.map +1 -0
  329. package/dist/crawl/etag-incremental.js +94 -0
  330. package/dist/crawl/etag-incremental.js.map +1 -0
  331. package/dist/crawl/index-to-vec.d.ts +10 -0
  332. package/dist/crawl/index-to-vec.d.ts.map +1 -0
  333. package/dist/crawl/index-to-vec.js +44 -0
  334. package/dist/crawl/index-to-vec.js.map +1 -0
  335. package/dist/crawl/mapper.js +136 -164
  336. package/dist/crawl/mapper.js.map +1 -1
  337. package/dist/crawl/rate-limiter.js +63 -66
  338. package/dist/crawl/rate-limiter.js.map +1 -1
  339. package/dist/crawl/robots.js +58 -57
  340. package/dist/crawl/robots.js.map +1 -1
  341. package/dist/crawl/sitemap-first.d.ts +12 -0
  342. package/dist/crawl/sitemap-first.d.ts.map +1 -0
  343. package/dist/crawl/sitemap-first.js +47 -0
  344. package/dist/crawl/sitemap-first.js.map +1 -0
  345. package/dist/crawl/sitemap.js +33 -32
  346. package/dist/crawl/sitemap.js.map +1 -1
  347. package/dist/crawl/url-utils.d.ts +1 -0
  348. package/dist/crawl/url-utils.d.ts.map +1 -1
  349. package/dist/crawl/url-utils.js +49 -37
  350. package/dist/crawl/url-utils.js.map +1 -1
  351. package/dist/daemon/health-check.d.ts +16 -0
  352. package/dist/daemon/health-check.d.ts.map +1 -0
  353. package/dist/daemon/health-check.js +33 -0
  354. package/dist/daemon/health-check.js.map +1 -0
  355. package/dist/daemon/http-server.d.ts +26 -0
  356. package/dist/daemon/http-server.d.ts.map +1 -0
  357. package/dist/daemon/http-server.js +275 -0
  358. package/dist/daemon/http-server.js.map +1 -0
  359. package/dist/daemon/proxy.d.ts +10 -0
  360. package/dist/daemon/proxy.d.ts.map +1 -0
  361. package/dist/daemon/proxy.js +93 -0
  362. package/dist/daemon/proxy.js.map +1 -0
  363. package/dist/embedding/embed.d.ts +59 -0
  364. package/dist/embedding/embed.d.ts.map +1 -0
  365. package/dist/embedding/embed.js +233 -0
  366. package/dist/embedding/embed.js.map +1 -0
  367. package/dist/embedding/fastembed-provider.d.ts +19 -0
  368. package/dist/embedding/fastembed-provider.d.ts.map +1 -0
  369. package/dist/embedding/fastembed-provider.js +51 -0
  370. package/dist/embedding/fastembed-provider.js.map +1 -0
  371. package/dist/embedding/key-terms.d.ts +12 -0
  372. package/dist/embedding/key-terms.d.ts.map +1 -0
  373. package/dist/embedding/key-terms.js +234 -0
  374. package/dist/embedding/key-terms.js.map +1 -0
  375. package/dist/extraction/boilerplate.d.ts +15 -0
  376. package/dist/extraction/boilerplate.d.ts.map +1 -0
  377. package/dist/extraction/boilerplate.js +52 -0
  378. package/dist/extraction/boilerplate.js.map +1 -0
  379. package/dist/extraction/defuddle.d.ts.map +1 -1
  380. package/dist/extraction/defuddle.js +27 -23
  381. package/dist/extraction/defuddle.js.map +1 -1
  382. package/dist/extraction/extract.d.ts.map +1 -1
  383. package/dist/extraction/extract.js +76 -76
  384. package/dist/extraction/extract.js.map +1 -1
  385. package/dist/extraction/jsonld.js +50 -54
  386. package/dist/extraction/jsonld.js.map +1 -1
  387. package/dist/extraction/lang-hints.d.ts +2 -0
  388. package/dist/extraction/lang-hints.d.ts.map +1 -0
  389. package/dist/extraction/lang-hints.js +30 -0
  390. package/dist/extraction/lang-hints.js.map +1 -0
  391. package/dist/extraction/llm-fallback.d.ts +17 -0
  392. package/dist/extraction/llm-fallback.d.ts.map +1 -0
  393. package/dist/extraction/llm-fallback.js +130 -0
  394. package/dist/extraction/llm-fallback.js.map +1 -0
  395. package/dist/extraction/markdown-sanitize.d.ts +2 -0
  396. package/dist/extraction/markdown-sanitize.d.ts.map +1 -0
  397. package/dist/extraction/markdown-sanitize.js +151 -0
  398. package/dist/extraction/markdown-sanitize.js.map +1 -0
  399. package/dist/extraction/markdown.d.ts +11 -0
  400. package/dist/extraction/markdown.d.ts.map +1 -1
  401. package/dist/extraction/markdown.js +195 -91
  402. package/dist/extraction/markdown.js.map +1 -1
  403. package/dist/extraction/pipeline.d.ts +8 -0
  404. package/dist/extraction/pipeline.d.ts.map +1 -1
  405. package/dist/extraction/pipeline.js +57 -91
  406. package/dist/extraction/pipeline.js.map +1 -1
  407. package/dist/extraction/readability.d.ts +1 -1
  408. package/dist/extraction/readability.d.ts.map +1 -1
  409. package/dist/extraction/readability.js +28 -29
  410. package/dist/extraction/readability.js.map +1 -1
  411. package/dist/extraction/schema.d.ts +12 -0
  412. package/dist/extraction/schema.d.ts.map +1 -1
  413. package/dist/extraction/schema.js +135 -72
  414. package/dist/extraction/schema.js.map +1 -1
  415. package/dist/extraction/site-extractors/docs-generic.d.ts.map +1 -1
  416. package/dist/extraction/site-extractors/docs-generic.js +81 -91
  417. package/dist/extraction/site-extractors/docs-generic.js.map +1 -1
  418. package/dist/extraction/site-extractors/github.d.ts.map +1 -1
  419. package/dist/extraction/site-extractors/github.js +87 -95
  420. package/dist/extraction/site-extractors/github.js.map +1 -1
  421. package/dist/extraction/site-extractors/mdn.d.ts.map +1 -1
  422. package/dist/extraction/site-extractors/mdn.js +46 -54
  423. package/dist/extraction/site-extractors/mdn.js.map +1 -1
  424. package/dist/extraction/site-extractors/stackoverflow.d.ts.map +1 -1
  425. package/dist/extraction/site-extractors/stackoverflow.js +71 -80
  426. package/dist/extraction/site-extractors/stackoverflow.js.map +1 -1
  427. package/dist/extraction/structured-data.d.ts +4 -0
  428. package/dist/extraction/structured-data.d.ts.map +1 -0
  429. package/dist/extraction/structured-data.js +173 -0
  430. package/dist/extraction/structured-data.js.map +1 -0
  431. package/dist/extraction/structured.d.ts +4 -0
  432. package/dist/extraction/structured.d.ts.map +1 -0
  433. package/dist/extraction/structured.js +163 -0
  434. package/dist/extraction/structured.js.map +1 -0
  435. package/dist/extraction/v1/classifier.d.ts +3 -0
  436. package/dist/extraction/v1/classifier.d.ts.map +1 -0
  437. package/dist/extraction/v1/classifier.js +110 -0
  438. package/dist/extraction/v1/classifier.js.map +1 -0
  439. package/dist/extraction/v1/extract-provider.d.ts +16 -0
  440. package/dist/extraction/v1/extract-provider.d.ts.map +1 -0
  441. package/dist/extraction/v1/extract-provider.js +43 -0
  442. package/dist/extraction/v1/extract-provider.js.map +1 -0
  443. package/dist/extraction/v1/local-llm.d.ts +8 -0
  444. package/dist/extraction/v1/local-llm.d.ts.map +1 -0
  445. package/dist/extraction/v1/local-llm.js +34 -0
  446. package/dist/extraction/v1/local-llm.js.map +1 -0
  447. package/dist/extraction/v1/news.d.ts +3 -0
  448. package/dist/extraction/v1/news.d.ts.map +1 -0
  449. package/dist/extraction/v1/news.js +61 -0
  450. package/dist/extraction/v1/news.js.map +1 -0
  451. package/dist/extraction/v1/product.d.ts +3 -0
  452. package/dist/extraction/v1/product.d.ts.map +1 -0
  453. package/dist/extraction/v1/product.js +166 -0
  454. package/dist/extraction/v1/product.js.map +1 -0
  455. package/dist/extraction/v1/recipe.d.ts +3 -0
  456. package/dist/extraction/v1/recipe.d.ts.map +1 -0
  457. package/dist/extraction/v1/recipe.js +136 -0
  458. package/dist/extraction/v1/recipe.js.map +1 -0
  459. package/dist/extraction/v1/routed.d.ts +17 -0
  460. package/dist/extraction/v1/routed.d.ts.map +1 -0
  461. package/dist/extraction/v1/routed.js +68 -0
  462. package/dist/extraction/v1/routed.js.map +1 -0
  463. package/dist/extraction/v1/schemas/Article.d.ts +11 -0
  464. package/dist/extraction/v1/schemas/Article.d.ts.map +1 -0
  465. package/dist/extraction/v1/schemas/Article.js +23 -0
  466. package/dist/extraction/v1/schemas/Article.js.map +1 -0
  467. package/dist/extraction/v1/schemas/CodeSnippet.d.ts +9 -0
  468. package/dist/extraction/v1/schemas/CodeSnippet.d.ts.map +1 -0
  469. package/dist/extraction/v1/schemas/CodeSnippet.js +90 -0
  470. package/dist/extraction/v1/schemas/CodeSnippet.js.map +1 -0
  471. package/dist/extraction/v1/schemas/EventListing.d.ts +10 -0
  472. package/dist/extraction/v1/schemas/EventListing.d.ts.map +1 -0
  473. package/dist/extraction/v1/schemas/EventListing.js +122 -0
  474. package/dist/extraction/v1/schemas/EventListing.js.map +1 -0
  475. package/dist/extraction/v1/schemas/Paper.d.ts +10 -0
  476. package/dist/extraction/v1/schemas/Paper.d.ts.map +1 -0
  477. package/dist/extraction/v1/schemas/Paper.js +156 -0
  478. package/dist/extraction/v1/schemas/Paper.js.map +1 -0
  479. package/dist/extraction/v1/schemas/Product.d.ts +17 -0
  480. package/dist/extraction/v1/schemas/Product.d.ts.map +1 -0
  481. package/dist/extraction/v1/schemas/Product.js +149 -0
  482. package/dist/extraction/v1/schemas/Product.js.map +1 -0
  483. package/dist/extraction/v1/schemas/Recipe.d.ts +14 -0
  484. package/dist/extraction/v1/schemas/Recipe.d.ts.map +1 -0
  485. package/dist/extraction/v1/schemas/Recipe.js +160 -0
  486. package/dist/extraction/v1/schemas/Recipe.js.map +1 -0
  487. package/dist/extraction/v1/schemas/index.d.ts +13 -0
  488. package/dist/extraction/v1/schemas/index.d.ts.map +1 -0
  489. package/dist/extraction/v1/schemas/index.js +44 -0
  490. package/dist/extraction/v1/schemas/index.js.map +1 -0
  491. package/dist/extraction/v1/site-extractors.d.ts +5 -0
  492. package/dist/extraction/v1/site-extractors.d.ts.map +1 -0
  493. package/dist/extraction/v1/site-extractors.js +31 -0
  494. package/dist/extraction/v1/site-extractors.js.map +1 -0
  495. package/dist/fetch/action-executor.d.ts +28 -0
  496. package/dist/fetch/action-executor.d.ts.map +1 -0
  497. package/dist/fetch/action-executor.js +88 -0
  498. package/dist/fetch/action-executor.js.map +1 -0
  499. package/dist/fetch/auth.d.ts +2 -1
  500. package/dist/fetch/auth.d.ts.map +1 -1
  501. package/dist/fetch/auth.js +56 -26
  502. package/dist/fetch/auth.js.map +1 -1
  503. package/dist/fetch/browser-pool.d.ts +30 -11
  504. package/dist/fetch/browser-pool.d.ts.map +1 -1
  505. package/dist/fetch/browser-pool.js +303 -127
  506. package/dist/fetch/browser-pool.js.map +1 -1
  507. package/dist/fetch/browser-selector.d.ts +17 -0
  508. package/dist/fetch/browser-selector.d.ts.map +1 -0
  509. package/dist/fetch/browser-selector.js +72 -0
  510. package/dist/fetch/browser-selector.js.map +1 -0
  511. package/dist/fetch/browser-types.d.ts +3 -0
  512. package/dist/fetch/browser-types.d.ts.map +1 -0
  513. package/dist/fetch/browser-types.js +45 -0
  514. package/dist/fetch/browser-types.js.map +1 -0
  515. package/dist/fetch/cdp-client.d.ts +9 -0
  516. package/dist/fetch/cdp-client.d.ts.map +1 -0
  517. package/dist/fetch/cdp-client.js +89 -0
  518. package/dist/fetch/cdp-client.js.map +1 -0
  519. package/dist/fetch/content-check.js +39 -46
  520. package/dist/fetch/content-check.js.map +1 -1
  521. package/dist/fetch/error-describe.d.ts +7 -0
  522. package/dist/fetch/error-describe.d.ts.map +1 -0
  523. package/dist/fetch/error-describe.js +37 -0
  524. package/dist/fetch/error-describe.js.map +1 -0
  525. package/dist/fetch/http-client.d.ts +4 -0
  526. package/dist/fetch/http-client.d.ts.map +1 -1
  527. package/dist/fetch/http-client.js +147 -128
  528. package/dist/fetch/http-client.js.map +1 -1
  529. package/dist/fetch/lightpanda.d.ts +28 -0
  530. package/dist/fetch/lightpanda.d.ts.map +1 -0
  531. package/dist/fetch/lightpanda.js +174 -0
  532. package/dist/fetch/lightpanda.js.map +1 -0
  533. package/dist/fetch/playwright-tier.d.ts +19 -0
  534. package/dist/fetch/playwright-tier.d.ts.map +1 -0
  535. package/dist/fetch/playwright-tier.js +76 -0
  536. package/dist/fetch/playwright-tier.js.map +1 -0
  537. package/dist/fetch/router.d.ts +49 -3
  538. package/dist/fetch/router.d.ts.map +1 -1
  539. package/dist/fetch/router.js +187 -81
  540. package/dist/fetch/router.js.map +1 -1
  541. package/dist/index.js +102 -17
  542. package/dist/index.js.map +1 -1
  543. package/dist/instructions.d.ts +31 -0
  544. package/dist/instructions.d.ts.map +1 -0
  545. package/dist/instructions.js +245 -0
  546. package/dist/instructions.js.map +1 -0
  547. package/dist/integrations/cloud/llm/anthropic.d.ts +3 -0
  548. package/dist/integrations/cloud/llm/anthropic.d.ts.map +1 -0
  549. package/dist/integrations/cloud/llm/anthropic.js +41 -0
  550. package/dist/integrations/cloud/llm/anthropic.js.map +1 -0
  551. package/dist/integrations/cloud/llm/cache.d.ts +5 -0
  552. package/dist/integrations/cloud/llm/cache.d.ts.map +1 -0
  553. package/dist/integrations/cloud/llm/cache.js +49 -0
  554. package/dist/integrations/cloud/llm/cache.js.map +1 -0
  555. package/dist/integrations/cloud/llm/gemini.d.ts +3 -0
  556. package/dist/integrations/cloud/llm/gemini.d.ts.map +1 -0
  557. package/dist/integrations/cloud/llm/gemini.js +37 -0
  558. package/dist/integrations/cloud/llm/gemini.js.map +1 -0
  559. package/dist/integrations/cloud/llm/groq.d.ts +3 -0
  560. package/dist/integrations/cloud/llm/groq.d.ts.map +1 -0
  561. package/dist/integrations/cloud/llm/groq.js +74 -0
  562. package/dist/integrations/cloud/llm/groq.js.map +1 -0
  563. package/dist/integrations/cloud/llm/hash.d.ts +3 -0
  564. package/dist/integrations/cloud/llm/hash.d.ts.map +1 -0
  565. package/dist/integrations/cloud/llm/hash.js +26 -0
  566. package/dist/integrations/cloud/llm/hash.js.map +1 -0
  567. package/dist/integrations/cloud/llm/model-select.d.ts +5 -0
  568. package/dist/integrations/cloud/llm/model-select.d.ts.map +1 -0
  569. package/dist/integrations/cloud/llm/model-select.js +32 -0
  570. package/dist/integrations/cloud/llm/model-select.js.map +1 -0
  571. package/dist/integrations/cloud/llm/openai.d.ts +3 -0
  572. package/dist/integrations/cloud/llm/openai.d.ts.map +1 -0
  573. package/dist/integrations/cloud/llm/openai.js +43 -0
  574. package/dist/integrations/cloud/llm/openai.js.map +1 -0
  575. package/dist/integrations/cloud/llm/run.d.ts +27 -0
  576. package/dist/integrations/cloud/llm/run.d.ts.map +1 -0
  577. package/dist/integrations/cloud/llm/run.js +99 -0
  578. package/dist/integrations/cloud/llm/run.js.map +1 -0
  579. package/dist/integrations/cloud/llm/select.d.ts +5 -0
  580. package/dist/integrations/cloud/llm/select.d.ts.map +1 -0
  581. package/dist/integrations/cloud/llm/select.js +30 -0
  582. package/dist/integrations/cloud/llm/select.js.map +1 -0
  583. package/dist/integrations/cloud/llm/text-adapters.d.ts +19 -0
  584. package/dist/integrations/cloud/llm/text-adapters.d.ts.map +1 -0
  585. package/dist/integrations/cloud/llm/text-adapters.js +103 -0
  586. package/dist/integrations/cloud/llm/text-adapters.js.map +1 -0
  587. package/dist/integrations/cloud/llm/types.d.ts +24 -0
  588. package/dist/integrations/cloud/llm/types.d.ts.map +1 -0
  589. package/dist/integrations/cloud/llm/types.js +1 -0
  590. package/dist/integrations/cloud/llm/types.js.map +1 -0
  591. package/dist/integrations/cloud/llm/validate.d.ts +6 -0
  592. package/dist/integrations/cloud/llm/validate.d.ts.map +1 -0
  593. package/dist/integrations/cloud/llm/validate.js +63 -0
  594. package/dist/integrations/cloud/llm/validate.js.map +1 -0
  595. package/dist/logger.d.ts +4 -1
  596. package/dist/logger.d.ts.map +1 -1
  597. package/dist/logger.js +71 -30
  598. package/dist/logger.js.map +1 -1
  599. package/dist/pdf-parse.d.js +1 -0
  600. package/dist/pdf-parse.d.js.map +1 -0
  601. package/dist/plugins/loader.d.ts +20 -0
  602. package/dist/plugins/loader.d.ts.map +1 -0
  603. package/dist/plugins/loader.js +157 -0
  604. package/dist/plugins/loader.js.map +1 -0
  605. package/dist/plugins/registry.d.ts +26 -0
  606. package/dist/plugins/registry.d.ts.map +1 -0
  607. package/dist/plugins/registry.js +71 -0
  608. package/dist/plugins/registry.js.map +1 -0
  609. package/dist/plugins/validate.d.ts +9 -0
  610. package/dist/plugins/validate.d.ts.map +1 -0
  611. package/dist/plugins/validate.js +79 -0
  612. package/dist/plugins/validate.js.map +1 -0
  613. package/dist/providers/embed-provider.d.ts +11 -0
  614. package/dist/providers/embed-provider.d.ts.map +1 -0
  615. package/dist/providers/embed-provider.js +24 -0
  616. package/dist/providers/embed-provider.js.map +1 -0
  617. package/dist/providers/extract-provider.d.ts +23 -0
  618. package/dist/providers/extract-provider.d.ts.map +1 -0
  619. package/dist/providers/extract-provider.js +25 -0
  620. package/dist/providers/extract-provider.js.map +1 -0
  621. package/dist/providers/rerank-provider.d.ts +17 -0
  622. package/dist/providers/rerank-provider.d.ts.map +1 -0
  623. package/dist/providers/rerank-provider.js +41 -0
  624. package/dist/providers/rerank-provider.js.map +1 -0
  625. package/dist/providers/search-provider.d.ts +25 -0
  626. package/dist/providers/search-provider.d.ts.map +1 -0
  627. package/dist/providers/search-provider.js +44 -0
  628. package/dist/providers/search-provider.js.map +1 -0
  629. package/dist/providers/vector-store.d.ts +27 -0
  630. package/dist/providers/vector-store.d.ts.map +1 -0
  631. package/dist/providers/vector-store.js +27 -0
  632. package/dist/providers/vector-store.js.map +1 -0
  633. package/dist/python-env.d.ts +9 -0
  634. package/dist/python-env.d.ts.map +1 -0
  635. package/dist/python-env.js +13 -0
  636. package/dist/python-env.js.map +1 -0
  637. package/dist/repl/commands/agent.d.ts +5 -0
  638. package/dist/repl/commands/agent.d.ts.map +1 -0
  639. package/dist/repl/commands/agent.js +62 -0
  640. package/dist/repl/commands/agent.js.map +1 -0
  641. package/dist/repl/commands/cache.d.ts +4 -0
  642. package/dist/repl/commands/cache.d.ts.map +1 -0
  643. package/dist/repl/commands/cache.js +43 -0
  644. package/dist/repl/commands/cache.js.map +1 -0
  645. package/dist/repl/commands/crawl.d.ts +7 -0
  646. package/dist/repl/commands/crawl.d.ts.map +1 -0
  647. package/dist/repl/commands/crawl.js +44 -0
  648. package/dist/repl/commands/crawl.js.map +1 -0
  649. package/dist/repl/commands/extract.d.ts +5 -0
  650. package/dist/repl/commands/extract.d.ts.map +1 -0
  651. package/dist/repl/commands/extract.js +47 -0
  652. package/dist/repl/commands/extract.js.map +1 -0
  653. package/dist/repl/commands/fetch.d.ts +5 -0
  654. package/dist/repl/commands/fetch.d.ts.map +1 -0
  655. package/dist/repl/commands/fetch.js +67 -0
  656. package/dist/repl/commands/fetch.js.map +1 -0
  657. package/dist/repl/commands/find-similar.d.ts +5 -0
  658. package/dist/repl/commands/find-similar.d.ts.map +1 -0
  659. package/dist/repl/commands/find-similar.js +74 -0
  660. package/dist/repl/commands/find-similar.js.map +1 -0
  661. package/dist/repl/commands/research.d.ts +5 -0
  662. package/dist/repl/commands/research.d.ts.map +1 -0
  663. package/dist/repl/commands/research.js +65 -0
  664. package/dist/repl/commands/research.js.map +1 -0
  665. package/dist/repl/commands/search.d.ts +5 -0
  666. package/dist/repl/commands/search.d.ts.map +1 -0
  667. package/dist/repl/commands/search.js +74 -0
  668. package/dist/repl/commands/search.js.map +1 -0
  669. package/dist/repl/commands/types.d.ts +9 -0
  670. package/dist/repl/commands/types.d.ts.map +1 -0
  671. package/dist/repl/commands/types.js +1 -0
  672. package/dist/repl/commands/types.js.map +1 -0
  673. package/dist/repl/formatters.d.ts +13 -0
  674. package/dist/repl/formatters.d.ts.map +1 -0
  675. package/dist/repl/formatters.js +283 -0
  676. package/dist/repl/formatters.js.map +1 -0
  677. package/dist/repl/parser.d.ts +9 -0
  678. package/dist/repl/parser.d.ts.map +1 -0
  679. package/dist/repl/parser.js +86 -0
  680. package/dist/repl/parser.js.map +1 -0
  681. package/dist/repl/shell.d.ts +8 -0
  682. package/dist/repl/shell.d.ts.map +1 -0
  683. package/dist/repl/shell.js +184 -0
  684. package/dist/repl/shell.js.map +1 -0
  685. package/dist/research/branch-exploration.d.ts +14 -0
  686. package/dist/research/branch-exploration.d.ts.map +1 -0
  687. package/dist/research/branch-exploration.js +100 -0
  688. package/dist/research/branch-exploration.js.map +1 -0
  689. package/dist/research/brief.d.ts +6 -0
  690. package/dist/research/brief.d.ts.map +1 -0
  691. package/dist/research/brief.js +246 -0
  692. package/dist/research/brief.js.map +1 -0
  693. package/dist/research/citation-graph.d.ts +9 -0
  694. package/dist/research/citation-graph.d.ts.map +1 -0
  695. package/dist/research/citation-graph.js +114 -0
  696. package/dist/research/citation-graph.js.map +1 -0
  697. package/dist/research/decompose.d.ts +14 -0
  698. package/dist/research/decompose.d.ts.map +1 -0
  699. package/dist/research/decompose.js +439 -0
  700. package/dist/research/decompose.js.map +1 -0
  701. package/dist/research/pipeline.d.ts +5 -0
  702. package/dist/research/pipeline.d.ts.map +1 -0
  703. package/dist/research/pipeline.js +269 -0
  704. package/dist/research/pipeline.js.map +1 -0
  705. package/dist/research/synthesis-local.d.ts +19 -0
  706. package/dist/research/synthesis-local.d.ts.map +1 -0
  707. package/dist/research/synthesis-local.js +62 -0
  708. package/dist/research/synthesis-local.js.map +1 -0
  709. package/dist/research/synthesize.d.ts +10 -0
  710. package/dist/research/synthesize.d.ts.map +1 -0
  711. package/dist/research/synthesize.js +137 -0
  712. package/dist/research/synthesize.js.map +1 -0
  713. package/dist/search/answer-synthesis.d.ts +33 -0
  714. package/dist/search/answer-synthesis.d.ts.map +1 -0
  715. package/dist/search/answer-synthesis.js +244 -0
  716. package/dist/search/answer-synthesis.js.map +1 -0
  717. package/dist/search/context-formatter.d.ts +3 -0
  718. package/dist/search/context-formatter.d.ts.map +1 -0
  719. package/dist/search/context-formatter.js +56 -0
  720. package/dist/search/context-formatter.js.map +1 -0
  721. package/dist/search/dedup.d.ts +1 -0
  722. package/dist/search/dedup.d.ts.map +1 -1
  723. package/dist/search/dedup.js +40 -32
  724. package/dist/search/dedup.js.map +1 -1
  725. package/dist/search/engines/arxiv.d.ts +7 -0
  726. package/dist/search/engines/arxiv.d.ts.map +1 -0
  727. package/dist/search/engines/arxiv.js +70 -0
  728. package/dist/search/engines/arxiv.js.map +1 -0
  729. package/dist/search/engines/bing-news.d.ts +7 -0
  730. package/dist/search/engines/bing-news.d.ts.map +1 -0
  731. package/dist/search/engines/bing-news.js +97 -0
  732. package/dist/search/engines/bing-news.js.map +1 -0
  733. package/dist/search/engines/bing.d.ts +1 -0
  734. package/dist/search/engines/bing.d.ts.map +1 -1
  735. package/dist/search/engines/bing.js +100 -44
  736. package/dist/search/engines/bing.js.map +1 -1
  737. package/dist/search/engines/devdocs.d.ts +6 -0
  738. package/dist/search/engines/devdocs.d.ts.map +1 -0
  739. package/dist/search/engines/devdocs.js +56 -0
  740. package/dist/search/engines/devdocs.js.map +1 -0
  741. package/dist/search/engines/duckduckgo.d.ts.map +1 -1
  742. package/dist/search/engines/duckduckgo.js +56 -44
  743. package/dist/search/engines/duckduckgo.js.map +1 -1
  744. package/dist/search/engines/github-code.d.ts +7 -0
  745. package/dist/search/engines/github-code.d.ts.map +1 -0
  746. package/dist/search/engines/github-code.js +55 -0
  747. package/dist/search/engines/github-code.js.map +1 -0
  748. package/dist/search/engines/hn-algolia.d.ts +7 -0
  749. package/dist/search/engines/hn-algolia.d.ts.map +1 -0
  750. package/dist/search/engines/hn-algolia.js +76 -0
  751. package/dist/search/engines/hn-algolia.js.map +1 -0
  752. package/dist/search/engines/lobsters.d.ts +7 -0
  753. package/dist/search/engines/lobsters.d.ts.map +1 -0
  754. package/dist/search/engines/lobsters.js +83 -0
  755. package/dist/search/engines/lobsters.js.map +1 -0
  756. package/dist/search/engines/mdn.d.ts +7 -0
  757. package/dist/search/engines/mdn.d.ts.map +1 -0
  758. package/dist/search/engines/mdn.js +48 -0
  759. package/dist/search/engines/mdn.js.map +1 -0
  760. package/dist/search/engines/semantic-scholar.d.ts +7 -0
  761. package/dist/search/engines/semantic-scholar.d.ts.map +1 -0
  762. package/dist/search/engines/semantic-scholar.js +69 -0
  763. package/dist/search/engines/semantic-scholar.js.map +1 -0
  764. package/dist/search/engines/stackoverflow.d.ts +7 -0
  765. package/dist/search/engines/stackoverflow.d.ts.map +1 -0
  766. package/dist/search/engines/stackoverflow.js +73 -0
  767. package/dist/search/engines/stackoverflow.js.map +1 -0
  768. package/dist/search/engines/startpage.d.ts.map +1 -1
  769. package/dist/search/engines/startpage.js +65 -46
  770. package/dist/search/engines/startpage.js.map +1 -1
  771. package/dist/search/evidence.d.ts +25 -0
  772. package/dist/search/evidence.d.ts.map +1 -0
  773. package/dist/search/evidence.js +220 -0
  774. package/dist/search/evidence.js.map +1 -0
  775. package/dist/search/filters.d.ts.map +1 -1
  776. package/dist/search/filters.js +58 -54
  777. package/dist/search/filters.js.map +1 -1
  778. package/dist/search/find-similar/crawl-rank.d.ts +9 -0
  779. package/dist/search/find-similar/crawl-rank.d.ts.map +1 -0
  780. package/dist/search/find-similar/crawl-rank.js +272 -0
  781. package/dist/search/find-similar/crawl-rank.js.map +1 -0
  782. package/dist/search/find-similar/mode.d.ts +4 -0
  783. package/dist/search/find-similar/mode.d.ts.map +1 -0
  784. package/dist/search/find-similar/mode.js +12 -0
  785. package/dist/search/find-similar/mode.js.map +1 -0
  786. package/dist/search/find-similar.d.ts +5 -0
  787. package/dist/search/find-similar.d.ts.map +1 -0
  788. package/dist/search/find-similar.js +509 -0
  789. package/dist/search/find-similar.js.map +1 -0
  790. package/dist/search/highlights.d.ts +19 -0
  791. package/dist/search/highlights.d.ts.map +1 -0
  792. package/dist/search/highlights.js +167 -0
  793. package/dist/search/highlights.js.map +1 -0
  794. package/dist/search/language-filter.d.ts +29 -0
  795. package/dist/search/language-filter.d.ts.map +1 -0
  796. package/dist/search/language-filter.js +126 -0
  797. package/dist/search/language-filter.js.map +1 -0
  798. package/dist/search/legacy/searxng-orchestrator.d.ts +4 -0
  799. package/dist/search/legacy/searxng-orchestrator.d.ts.map +1 -0
  800. package/dist/search/legacy/searxng-orchestrator.js +501 -0
  801. package/dist/search/legacy/searxng-orchestrator.js.map +1 -0
  802. package/dist/search/legacy/searxng-provider.d.ts +7 -0
  803. package/dist/search/legacy/searxng-provider.d.ts.map +1 -0
  804. package/dist/search/legacy/searxng-provider.js +11 -0
  805. package/dist/search/legacy/searxng-provider.js.map +1 -0
  806. package/dist/search/multi-query.d.ts +25 -0
  807. package/dist/search/multi-query.d.ts.map +1 -0
  808. package/dist/search/multi-query.js +228 -0
  809. package/dist/search/multi-query.js.map +1 -0
  810. package/dist/search/query.js +32 -34
  811. package/dist/search/query.js.map +1 -1
  812. package/dist/search/rerank.d.ts +3 -1
  813. package/dist/search/rerank.d.ts.map +1 -1
  814. package/dist/search/rerank.js +44 -35
  815. package/dist/search/rerank.js.map +1 -1
  816. package/dist/search/reranker/authority-boost.d.ts +3 -0
  817. package/dist/search/reranker/authority-boost.d.ts.map +1 -0
  818. package/dist/search/reranker/authority-boost.js +179 -0
  819. package/dist/search/reranker/authority-boost.js.map +1 -0
  820. package/dist/search/reranker/consensus-boost.d.ts +3 -0
  821. package/dist/search/reranker/consensus-boost.d.ts.map +1 -0
  822. package/dist/search/reranker/consensus-boost.js +27 -0
  823. package/dist/search/reranker/consensus-boost.js.map +1 -0
  824. package/dist/search/reranker/recency-boost.d.ts +3 -0
  825. package/dist/search/reranker/recency-boost.d.ts.map +1 -0
  826. package/dist/search/reranker/recency-boost.js +13 -0
  827. package/dist/search/reranker/recency-boost.js.map +1 -0
  828. package/dist/search/reranker/recency.d.ts +3 -0
  829. package/dist/search/reranker/recency.d.ts.map +1 -0
  830. package/dist/search/reranker/recency.js +23 -0
  831. package/dist/search/reranker/recency.js.map +1 -0
  832. package/dist/search/reranker/transformers-rerank-provider.d.ts +13 -0
  833. package/dist/search/reranker/transformers-rerank-provider.d.ts.map +1 -0
  834. package/dist/search/reranker/transformers-rerank-provider.js +94 -0
  835. package/dist/search/reranker/transformers-rerank-provider.js.map +1 -0
  836. package/dist/search/rrf.d.ts +17 -0
  837. package/dist/search/rrf.d.ts.map +1 -0
  838. package/dist/search/rrf.js +39 -0
  839. package/dist/search/rrf.js.map +1 -0
  840. package/dist/search/sampling.d.ts +25 -0
  841. package/dist/search/sampling.d.ts.map +1 -0
  842. package/dist/search/sampling.js +52 -0
  843. package/dist/search/sampling.js.map +1 -0
  844. package/dist/search/searxng.d.ts.map +1 -1
  845. package/dist/search/searxng.js +69 -79
  846. package/dist/search/searxng.js.map +1 -1
  847. package/dist/search/tokens.d.ts +3 -0
  848. package/dist/search/tokens.d.ts.map +1 -0
  849. package/dist/search/tokens.js +39 -0
  850. package/dist/search/tokens.js.map +1 -0
  851. package/dist/search/truncate.d.ts +6 -0
  852. package/dist/search/truncate.d.ts.map +1 -0
  853. package/dist/search/truncate.js +26 -0
  854. package/dist/search/truncate.js.map +1 -0
  855. package/dist/search/url-unwrap.d.ts +3 -0
  856. package/dist/search/url-unwrap.d.ts.map +1 -0
  857. package/dist/search/url-unwrap.js +43 -0
  858. package/dist/search/url-unwrap.js.map +1 -0
  859. package/dist/search/v1/context-rank.d.ts +13 -0
  860. package/dist/search/v1/context-rank.d.ts.map +1 -0
  861. package/dist/search/v1/context-rank.js +74 -0
  862. package/dist/search/v1/context-rank.js.map +1 -0
  863. package/dist/search/v1/engine-base.d.ts +27 -0
  864. package/dist/search/v1/engine-base.d.ts.map +1 -0
  865. package/dist/search/v1/engine-base.js +110 -0
  866. package/dist/search/v1/engine-base.js.map +1 -0
  867. package/dist/search/v1/intent-router.d.ts +22 -0
  868. package/dist/search/v1/intent-router.d.ts.map +1 -0
  869. package/dist/search/v1/intent-router.js +138 -0
  870. package/dist/search/v1/intent-router.js.map +1 -0
  871. package/dist/search/v1/orchestrator.d.ts +24 -0
  872. package/dist/search/v1/orchestrator.d.ts.map +1 -0
  873. package/dist/search/v1/orchestrator.js +163 -0
  874. package/dist/search/v1/orchestrator.js.map +1 -0
  875. package/dist/search/v1/recency-boost.d.ts +9 -0
  876. package/dist/search/v1/recency-boost.d.ts.map +1 -0
  877. package/dist/search/v1/recency-boost.js +37 -0
  878. package/dist/search/v1/recency-boost.js.map +1 -0
  879. package/dist/search/v1/recent-cache-dedup.d.ts +6 -0
  880. package/dist/search/v1/recent-cache-dedup.d.ts.map +1 -0
  881. package/dist/search/v1/recent-cache-dedup.js +85 -0
  882. package/dist/search/v1/recent-cache-dedup.js.map +1 -0
  883. package/dist/search/v1/rss/feed-config.d.ts +21 -0
  884. package/dist/search/v1/rss/feed-config.d.ts.map +1 -0
  885. package/dist/search/v1/rss/feed-config.js +90 -0
  886. package/dist/search/v1/rss/feed-config.js.map +1 -0
  887. package/dist/search/v1/rss/feed-parser.d.ts +14 -0
  888. package/dist/search/v1/rss/feed-parser.d.ts.map +1 -0
  889. package/dist/search/v1/rss/feed-parser.js +104 -0
  890. package/dist/search/v1/rss/feed-parser.js.map +1 -0
  891. package/dist/search/v1/rss/feed-poller.d.ts +22 -0
  892. package/dist/search/v1/rss/feed-poller.d.ts.map +1 -0
  893. package/dist/search/v1/rss/feed-poller.js +102 -0
  894. package/dist/search/v1/rss/feed-poller.js.map +1 -0
  895. package/dist/search/v1/rss/feed-store.d.ts +30 -0
  896. package/dist/search/v1/rss/feed-store.d.ts.map +1 -0
  897. package/dist/search/v1/rss/feed-store.js +134 -0
  898. package/dist/search/v1/rss/feed-store.js.map +1 -0
  899. package/dist/search/v1/rss/rss-engine.d.ts +6 -0
  900. package/dist/search/v1/rss/rss-engine.d.ts.map +1 -0
  901. package/dist/search/v1/rss/rss-engine.js +28 -0
  902. package/dist/search/v1/rss/rss-engine.js.map +1 -0
  903. package/dist/search/v1/v1-provider.d.ts +7 -0
  904. package/dist/search/v1/v1-provider.d.ts.map +1 -0
  905. package/dist/search/v1/v1-provider.js +68 -0
  906. package/dist/search/v1/v1-provider.js.map +1 -0
  907. package/dist/search/v1/verticals/code.d.ts +4 -0
  908. package/dist/search/v1/verticals/code.d.ts.map +1 -0
  909. package/dist/search/v1/verticals/code.js +20 -0
  910. package/dist/search/v1/verticals/code.js.map +1 -0
  911. package/dist/search/v1/verticals/docs.d.ts +4 -0
  912. package/dist/search/v1/verticals/docs.d.ts.map +1 -0
  913. package/dist/search/v1/verticals/docs.js +20 -0
  914. package/dist/search/v1/verticals/docs.js.map +1 -0
  915. package/dist/search/v1/verticals/general.d.ts +4 -0
  916. package/dist/search/v1/verticals/general.d.ts.map +1 -0
  917. package/dist/search/v1/verticals/general.js +22 -0
  918. package/dist/search/v1/verticals/general.js.map +1 -0
  919. package/dist/search/v1/verticals/news.d.ts +10 -0
  920. package/dist/search/v1/verticals/news.d.ts.map +1 -0
  921. package/dist/search/v1/verticals/news.js +52 -0
  922. package/dist/search/v1/verticals/news.js.map +1 -0
  923. package/dist/search/v1/verticals/papers.d.ts +4 -0
  924. package/dist/search/v1/verticals/papers.d.ts.map +1 -0
  925. package/dist/search/v1/verticals/papers.js +23 -0
  926. package/dist/search/v1/verticals/papers.js.map +1 -0
  927. package/dist/search/validator.js +31 -31
  928. package/dist/search/validator.js.map +1 -1
  929. package/dist/searxng/bootstrap.d.ts +30 -0
  930. package/dist/searxng/bootstrap.d.ts.map +1 -1
  931. package/dist/searxng/bootstrap.js +223 -85
  932. package/dist/searxng/bootstrap.js.map +1 -1
  933. package/dist/searxng/docker.d.ts.map +1 -1
  934. package/dist/searxng/docker.js +69 -60
  935. package/dist/searxng/docker.js.map +1 -1
  936. package/dist/searxng/process.d.ts +13 -1
  937. package/dist/searxng/process.d.ts.map +1 -1
  938. package/dist/searxng/process.js +231 -164
  939. package/dist/searxng/process.js.map +1 -1
  940. package/dist/server/backend-status.d.ts +13 -0
  941. package/dist/server/backend-status.d.ts.map +1 -0
  942. package/dist/server/backend-status.js +40 -0
  943. package/dist/server/backend-status.js.map +1 -0
  944. package/dist/server/tool-schemas.d.ts +549 -0
  945. package/dist/server/tool-schemas.d.ts.map +1 -0
  946. package/dist/server/tool-schemas.js +464 -0
  947. package/dist/server/tool-schemas.js.map +1 -0
  948. package/dist/server/warmup-on-start.d.ts +9 -0
  949. package/dist/server/warmup-on-start.d.ts.map +1 -0
  950. package/dist/server/warmup-on-start.js +55 -0
  951. package/dist/server/warmup-on-start.js.map +1 -0
  952. package/dist/server.d.ts +17 -0
  953. package/dist/server.d.ts.map +1 -1
  954. package/dist/server.js +454 -297
  955. package/dist/server.js.map +1 -1
  956. package/dist/tools/agent.d.ts +5 -0
  957. package/dist/tools/agent.d.ts.map +1 -0
  958. package/dist/tools/agent.js +128 -0
  959. package/dist/tools/agent.js.map +1 -0
  960. package/dist/tools/cache.d.ts +2 -1
  961. package/dist/tools/cache.d.ts.map +1 -1
  962. package/dist/tools/cache.js +177 -44
  963. package/dist/tools/cache.js.map +1 -1
  964. package/dist/tools/crawl.d.ts.map +1 -1
  965. package/dist/tools/crawl.js +171 -88
  966. package/dist/tools/crawl.js.map +1 -1
  967. package/dist/tools/extract.d.ts +2 -2
  968. package/dist/tools/extract.d.ts.map +1 -1
  969. package/dist/tools/extract.js +175 -59
  970. package/dist/tools/extract.js.map +1 -1
  971. package/dist/tools/fetch.d.ts +2 -2
  972. package/dist/tools/fetch.d.ts.map +1 -1
  973. package/dist/tools/fetch.js +174 -68
  974. package/dist/tools/fetch.js.map +1 -1
  975. package/dist/tools/find-similar.d.ts +5 -0
  976. package/dist/tools/find-similar.d.ts.map +1 -0
  977. package/dist/tools/find-similar.js +127 -0
  978. package/dist/tools/find-similar.js.map +1 -0
  979. package/dist/tools/research.d.ts +5 -0
  980. package/dist/tools/research.d.ts.map +1 -0
  981. package/dist/tools/research.js +107 -0
  982. package/dist/tools/research.js.map +1 -0
  983. package/dist/tools/search.d.ts +10 -2
  984. package/dist/tools/search.d.ts.map +1 -1
  985. package/dist/tools/search.js +13 -158
  986. package/dist/tools/search.js.map +1 -1
  987. package/dist/types.d.ts +350 -7
  988. package/dist/types.d.ts.map +1 -1
  989. package/dist/types.js +6 -1
  990. package/dist/types.js.map +1 -1
  991. package/dist/util/mode.d.ts +4 -0
  992. package/dist/util/mode.d.ts.map +1 -0
  993. package/dist/util/mode.js +34 -0
  994. package/dist/util/mode.js.map +1 -0
  995. package/package.json +78 -8
  996. package/dist/extraction/trafilatura.d.ts +0 -6
  997. package/dist/extraction/trafilatura.d.ts.map +0 -1
  998. package/dist/extraction/trafilatura.js +0 -105
  999. package/dist/extraction/trafilatura.js.map +0 -1
  1000. package/dist/search/flashrank.d.ts +0 -12
  1001. package/dist/search/flashrank.d.ts.map +0 -1
  1002. package/dist/search/flashrank.js +0 -63
  1003. package/dist/search/flashrank.js.map +0 -1
@@ -1,93 +1,136 @@
1
- import { createHash } from 'node:crypto';
2
- import { getDatabase } from '../cache/db.js';
3
- export function splitIntoBlocks(markdown) {
4
- if (!markdown.trim())
5
- return [];
6
- const lines = markdown.split('\n');
7
- const headingIndices = [];
8
- for (let i = 0; i < lines.length; i++) {
9
- const match = lines[i].match(/^(#{1,6})\s+/);
10
- if (match) {
11
- headingIndices.push({ level: match[1].length, lineIdx: i });
12
- }
1
+ import { createHash } from "node:crypto";
2
+ import { getDatabase } from "../cache/db.js";
3
+ function splitIntoBlocks(markdown) {
4
+ if (!markdown.trim()) return [];
5
+ const lines = markdown.split("\n");
6
+ const headingIndices = [];
7
+ for (let i = 0; i < lines.length; i++) {
8
+ const match = lines[i].match(/^(#{1,6})\s+/);
9
+ if (match) {
10
+ headingIndices.push({ level: match[1].length, lineIdx: i });
13
11
  }
14
- // If no headings, split by double-newline (paragraph blocks)
15
- if (headingIndices.length === 0) {
16
- return markdown.split(/\n\n+/).map((b) => b.trim()).filter(Boolean);
17
- }
18
- // Non-overlapping split: each heading starts a new block, ending at the next heading of ANY level
19
- const blocks = [];
20
- for (let i = 0; i < headingIndices.length; i++) {
21
- const start = headingIndices[i].lineIdx;
22
- const end = i + 1 < headingIndices.length ? headingIndices[i + 1].lineIdx : lines.length;
23
- blocks.push(lines.slice(start, end).join('\n').trim());
24
- }
25
- return blocks.filter(Boolean);
12
+ }
13
+ if (headingIndices.length === 0) {
14
+ return markdown.split(/\n\n+/).map((b) => b.trim()).filter(Boolean);
15
+ }
16
+ const blocks = [];
17
+ for (let i = 0; i < headingIndices.length; i++) {
18
+ const start = headingIndices[i].lineIdx;
19
+ const end = i + 1 < headingIndices.length ? headingIndices[i + 1].lineIdx : lines.length;
20
+ blocks.push(lines.slice(start, end).join("\n").trim());
21
+ }
22
+ return blocks.filter(Boolean);
26
23
  }
27
- export function normalizeBlockText(text) {
28
- return text.toLowerCase().replace(/\s+/g, ' ').trim();
24
+ function normalizeBlockText(text) {
25
+ return text.toLowerCase().replace(/\s+/g, " ").trim();
29
26
  }
30
27
  function hashBlock(text) {
31
- return createHash('sha256').update(normalizeBlockText(text)).digest('hex');
28
+ return createHash("sha256").update(normalizeBlockText(text)).digest("hex");
29
+ }
30
+ const NAV_DEDUPE_THRESHOLD = 0.6;
31
+ const MAX_LEADING_LINES = 30;
32
+ const MAX_TRAILING_LINES = 20;
33
+ const MIN_CORPUS = 4;
34
+ function lineHash(line) {
35
+ return createHash("sha1").update(line.trim().toLowerCase()).digest("hex");
32
36
  }
33
- export function deduplicatePages(pages, domain) {
34
- if (pages.length <= 1)
35
- return pages.map((p) => ({ url: p.url, markdown: p.markdown }));
36
- // Pre-load stored boilerplate hashes for this domain
37
- const storedHashes = domain ? getStoredBoilerplate(domain) : [];
38
- const boilerplateHashes = new Set(storedHashes);
39
- // Split each page into blocks and hash them
40
- const pageBlocks = pages.map((page) => ({
41
- url: page.url,
42
- blocks: splitIntoBlocks(page.markdown),
43
- }));
44
- // Count how many pages each block hash appears in
45
- const hashPageCount = new Map();
46
- for (const page of pageBlocks) {
47
- const seenHashes = new Set();
48
- for (const block of page.blocks) {
49
- const h = hashBlock(block);
50
- if (!seenHashes.has(h)) {
51
- seenHashes.add(h);
52
- hashPageCount.set(h, (hashPageCount.get(h) ?? 0) + 1);
53
- }
54
- }
37
+ function stripRepeatedNavigationLines(pages) {
38
+ if (pages.length < MIN_CORPUS) return pages;
39
+ const lineSets = pages.map((p) => p.markdown.split("\n"));
40
+ const countLeading = /* @__PURE__ */ new Map();
41
+ const countTrailing = /* @__PURE__ */ new Map();
42
+ for (const lines of lineSets) {
43
+ const seenL = /* @__PURE__ */ new Set();
44
+ for (let i = 0; i < Math.min(MAX_LEADING_LINES, lines.length); i++) {
45
+ const h = lineHash(lines[i]);
46
+ if (!seenL.has(h)) {
47
+ seenL.add(h);
48
+ countLeading.set(h, (countLeading.get(h) ?? 0) + 1);
49
+ }
55
50
  }
56
- // Mark hashes appearing in >50% of pages as boilerplate
57
- const threshold = pages.length / 2;
58
- for (const [hash, count] of hashPageCount) {
59
- if (count > threshold) {
60
- boilerplateHashes.add(hash);
61
- }
51
+ const seenT = /* @__PURE__ */ new Set();
52
+ for (let i = lines.length - 1; i >= Math.max(lines.length - MAX_TRAILING_LINES, 0); i--) {
53
+ const h = lineHash(lines[i]);
54
+ if (!seenT.has(h)) {
55
+ seenT.add(h);
56
+ countTrailing.set(h, (countTrailing.get(h) ?? 0) + 1);
57
+ }
62
58
  }
63
- // Store updated boilerplate hashes for this domain
64
- if (domain) {
65
- storeBoilerplate(domain, Array.from(boilerplateHashes));
59
+ }
60
+ const threshold = pages.length * NAV_DEDUPE_THRESHOLD;
61
+ const navLeading = new Set([...countLeading].filter(([, c]) => c >= threshold).map(([h]) => h));
62
+ const navTrailing = new Set([...countTrailing].filter(([, c]) => c >= threshold).map(([h]) => h));
63
+ return pages.map((page, i) => {
64
+ const lines = lineSets[i];
65
+ let head = 0;
66
+ while (head < lines.length && (lines[head].trim() === "" || navLeading.has(lineHash(lines[head])))) head++;
67
+ let tail = lines.length;
68
+ while (tail > head && (lines[tail - 1].trim() === "" || navTrailing.has(lineHash(lines[tail - 1])))) tail--;
69
+ return { url: page.url, markdown: lines.slice(head, tail).join("\n") };
70
+ });
71
+ }
72
+ function deduplicatePages(pages, domain) {
73
+ if (pages.length <= 1) return pages.map((p) => ({ url: p.url, markdown: p.markdown }));
74
+ const stripped = stripRepeatedNavigationLines(pages);
75
+ const storedHashes = domain ? getStoredBoilerplate(domain) : [];
76
+ const boilerplateHashes = new Set(storedHashes);
77
+ const pageBlocks = stripped.map((page) => ({
78
+ url: page.url,
79
+ blocks: splitIntoBlocks(page.markdown)
80
+ }));
81
+ const hashPageCount = /* @__PURE__ */ new Map();
82
+ for (const page of pageBlocks) {
83
+ const seenHashes = /* @__PURE__ */ new Set();
84
+ for (const block of page.blocks) {
85
+ const h = hashBlock(block);
86
+ if (!seenHashes.has(h)) {
87
+ seenHashes.add(h);
88
+ hashPageCount.set(h, (hashPageCount.get(h) ?? 0) + 1);
89
+ }
90
+ }
91
+ }
92
+ const threshold = pages.length / 2;
93
+ for (const [hash, count] of hashPageCount) {
94
+ if (count > threshold) {
95
+ boilerplateHashes.add(hash);
66
96
  }
67
- // Strip boilerplate blocks from each page
68
- return pageBlocks.map((page) => {
69
- const filtered = page.blocks.filter((block) => !boilerplateHashes.has(hashBlock(block)));
70
- return {
71
- url: page.url,
72
- markdown: filtered.join('\n\n'),
73
- };
74
- });
97
+ }
98
+ if (domain) {
99
+ storeBoilerplate(domain, Array.from(boilerplateHashes));
100
+ }
101
+ return pageBlocks.map((page) => {
102
+ const filtered = page.blocks.filter((block) => !boilerplateHashes.has(hashBlock(block)));
103
+ return {
104
+ url: page.url,
105
+ markdown: filtered.join("\n\n")
106
+ };
107
+ });
75
108
  }
76
- export function getStoredBoilerplate(domain) {
77
- const db = getDatabase();
78
- const rows = db.prepare('SELECT block_hash FROM domain_boilerplate WHERE domain = ?').all(domain);
79
- return rows.map(r => r.block_hash);
109
+ function getStoredBoilerplate(domain) {
110
+ const db = getDatabase();
111
+ const rows = db.prepare("SELECT block_hash FROM domain_boilerplate WHERE domain = ?").all(domain);
112
+ return rows.map((r) => r.block_hash);
80
113
  }
81
- export function storeBoilerplate(domain, hashes) {
82
- const db = getDatabase();
83
- const del = db.prepare('DELETE FROM domain_boilerplate WHERE domain = ?');
84
- const insert = db.prepare('INSERT OR IGNORE INTO domain_boilerplate (domain, block_hash, sample_text) VALUES (?, ?, ?)');
85
- const tx = db.transaction((items) => {
86
- del.run(domain);
87
- for (const hash of items) {
88
- insert.run(domain, hash, null);
89
- }
90
- });
91
- tx(hashes);
114
+ function storeBoilerplate(domain, hashes) {
115
+ const db = getDatabase();
116
+ const del = db.prepare("DELETE FROM domain_boilerplate WHERE domain = ?");
117
+ const insert = db.prepare(
118
+ "INSERT OR IGNORE INTO domain_boilerplate (domain, block_hash, sample_text) VALUES (?, ?, ?)"
119
+ );
120
+ const tx = db.transaction((items) => {
121
+ del.run(domain);
122
+ for (const hash of items) {
123
+ insert.run(domain, hash, null);
124
+ }
125
+ });
126
+ tx(hashes);
92
127
  }
128
+ export {
129
+ deduplicatePages,
130
+ getStoredBoilerplate,
131
+ normalizeBlockText,
132
+ splitIntoBlocks,
133
+ storeBoilerplate,
134
+ stripRepeatedNavigationLines
135
+ };
93
136
  //# sourceMappingURL=dedup.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"dedup.js","sourceRoot":"","sources":["../../src/crawl/dedup.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AACzC,OAAO,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAC;AAE7C,MAAM,UAAU,eAAe,CAAC,QAAgB;IAC9C,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE;QAAE,OAAO,EAAE,CAAC;IAEhC,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IACnC,MAAM,cAAc,GAAyC,EAAE,CAAC;IAEhE,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,MAAM,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,cAAc,CAAC,CAAC;QAC7C,IAAI,KAAK,EAAE,CAAC;YACV,cAAc,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,EAAE,OAAO,EAAE,CAAC,EAAE,CAAC,CAAC;QAC9D,CAAC;IACH,CAAC;IAED,6DAA6D;IAC7D,IAAI,cAAc,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAChC,OAAO,QAAQ,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;IACtE,CAAC;IAED,kGAAkG;IAClG,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,cAAc,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC/C,MAAM,KAAK,GAAG,cAAc,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC;QACxC,MAAM,GAAG,GAAG,CAAC,GAAG,CAAC,GAAG,cAAc,CAAC,MAAM,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC;QACzF,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;IACzD,CAAC;IAED,OAAO,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;AAChC,CAAC;AAED,MAAM,UAAU,kBAAkB,CAAC,IAAY;IAC7C,OAAO,IAAI,CAAC,WAAW,EAAE,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;AACxD,CAAC;AAED,SAAS,SAAS,CAAC,IAAY;IAC7B,OAAO,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,kBAAkB,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;AAC7E,CAAC;AAYD,MAAM,UAAU,gBAAgB,CAAC,KAAkB,EAAE,MAAe;IAClE,IAAI,KAAK,CAAC,MAAM,IAAI,CAAC;QAAE,OAAO,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,GAAG,EAAE,CAAC,CAAC,GAAG,EAAE,QAAQ,EAAE,CAAC,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC;IAEvF,qDAAqD;IACrD,MAAM,YAAY,GAAG,MAAM,CAAC,CAAC,CAAC,oBAAoB,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;IAChE,MAAM,iBAAiB,GAAG,IAAI,GAAG,CAAS,YAAY,CAAC,CAAC;IAExD,4CAA4C;IAC5C,MAAM,UAAU,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;QACtC,GAAG,EAAE,IAAI,CAAC,GAAG;QACb,MAAM,EAAE,eAAe,CAAC,IAAI,CAAC,QAAQ,CAAC;KACvC,CAAC,CAAC,CAAC;IAEJ,kDAAkD;IAClD,MAAM,aAAa,GAAG,IAAI,GAAG,EAAkB,CAAC;IAChD,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE,CAAC;QAC9B,MAAM,UAAU,GAAG,IAAI,GAAG,EAAU,CAAC;QACrC,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChC,MAAM,CAAC,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC;YAC3B,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;gBACvB,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;gBAClB,aAAa,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;YACxD,CAAC;QACH,CAAC;IACH,CAAC;IAED,wDAAwD;IACxD,MAAM,SAAS,GAAG,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC;IACnC,KAAK,MAAM,CAAC,IAAI,EAAE,KAAK,CAAC,IAAI,aAAa,EAAE,CAAC;QAC1C,IAAI,KAAK,GAAG,SAAS,EAAE,CAAC;YACtB,iBAAiB,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QAC9B,CAAC;IACH,CAAC;IAED,mDAAmD;IACnD,IAAI,MAAM,EAAE,CAAC;QACX,gBAAgB,CAAC,MAAM,EAAE,KAAK,CAAC,IAAI,CAAC,iBAAiB,CAAC,CAAC,CAAC;IAC1D,CAAC;IAED,0CAA0C;IAC1C,OAAO,UAAU,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;QAC7B,MAAM,QAAQ,GAAG,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC,iBAAiB,CAAC,GAAG,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QACzF,OAAO;YACL,GAAG,EAAE,IAAI,CAAC,GAAG;YACb,QAAQ,EAAE,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC;SAChC,CAAC;IACJ,CAAC,CAAC,CAAC;AACL,CAAC;AAED,MAAM,UAAU,oBAAoB,CAAC,MAAc;IACjD,MAAM,EAAE,GAAG,WAAW,EAAE,CAAC;IACzB,MAAM,IAAI,GAAG,EAAE,CAAC,OAAO,CAAC,4DAA4D,CAAC,CAAC,GAAG,CAAC,MAAM,CAA6B,CAAC;IAC9H,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC;AACrC,CAAC;AAED,MAAM,UAAU,gBAAgB,CAAC,MAAc,EAAE,MAAgB;IAC/D,MAAM,EAAE,GAAG,WAAW,EAAE,CAAC;IACzB,MAAM,GAAG,GAAG,EAAE,CAAC,OAAO,CAAC,iDAAiD,CAAC,CAAC;IAC1E,MAAM,MAAM,GAAG,EAAE,CAAC,OAAO,CACvB,6FAA6F,CAC9F,CAAC;IACF,MAAM,EAAE,GAAG,EAAE,CAAC,WAAW,CAAC,CAAC,KAAe,EAAE,EAAE;QAC5C,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;QAChB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,MAAM,CAAC,GAAG,CAAC,MAAM,EAAE,IAAI,EAAE,IAAI,CAAC,CAAC;QACjC,CAAC;IACH,CAAC,CAAC,CAAC;IACH,EAAE,CAAC,MAAM,CAAC,CAAC;AACb,CAAC"}
1
+ {"version":3,"sources":["../../src/crawl/dedup.ts"],"sourcesContent":["import { createHash } from 'node:crypto';\nimport { getDatabase } from '../cache/db.js';\n\nexport function splitIntoBlocks(markdown: string): string[] {\n if (!markdown.trim()) return [];\n\n const lines = markdown.split('\\n');\n const headingIndices: { level: number; lineIdx: number }[] = [];\n\n for (let i = 0; i < lines.length; i++) {\n const match = lines[i].match(/^(#{1,6})\\s+/);\n if (match) {\n headingIndices.push({ level: match[1].length, lineIdx: i });\n }\n }\n\n // If no headings, split by double-newline (paragraph blocks)\n if (headingIndices.length === 0) {\n return markdown.split(/\\n\\n+/).map((b) => b.trim()).filter(Boolean);\n }\n\n // Non-overlapping split: each heading starts a new block, ending at the next heading of ANY level\n const blocks: string[] = [];\n for (let i = 0; i < headingIndices.length; i++) {\n const start = headingIndices[i].lineIdx;\n const end = i + 1 < headingIndices.length ? headingIndices[i + 1].lineIdx : lines.length;\n blocks.push(lines.slice(start, end).join('\\n').trim());\n }\n\n return blocks.filter(Boolean);\n}\n\nexport function normalizeBlockText(text: string): string {\n return text.toLowerCase().replace(/\\s+/g, ' ').trim();\n}\n\nfunction hashBlock(text: string): string {\n return createHash('sha256').update(normalizeBlockText(text)).digest('hex');\n}\n\ninterface PageInput {\n url: string;\n markdown: string;\n}\n\ninterface PageOutput {\n url: string;\n markdown: string;\n}\n\nconst NAV_DEDUPE_THRESHOLD = 0.6;\nconst MAX_LEADING_LINES = 30;\nconst MAX_TRAILING_LINES = 20;\nconst MIN_CORPUS = 4;\n\nfunction lineHash(line: string): string {\n return createHash('sha1').update(line.trim().toLowerCase()).digest('hex');\n}\n\nexport function stripRepeatedNavigationLines(pages: PageInput[]): PageInput[] {\n if (pages.length < MIN_CORPUS) return pages;\n const lineSets = pages.map((p) => p.markdown.split('\\n'));\n\n const countLeading = new Map<string, number>();\n const countTrailing = new Map<string, number>();\n for (const lines of lineSets) {\n const seenL = new Set<string>();\n for (let i = 0; i < Math.min(MAX_LEADING_LINES, lines.length); i++) {\n const h = lineHash(lines[i]);\n if (!seenL.has(h)) {\n seenL.add(h);\n countLeading.set(h, (countLeading.get(h) ?? 0) + 1);\n }\n }\n const seenT = new Set<string>();\n for (let i = lines.length - 1; i >= Math.max(lines.length - MAX_TRAILING_LINES, 0); i--) {\n const h = lineHash(lines[i]);\n if (!seenT.has(h)) {\n seenT.add(h);\n countTrailing.set(h, (countTrailing.get(h) ?? 0) + 1);\n }\n }\n }\n\n const threshold = pages.length * NAV_DEDUPE_THRESHOLD;\n const navLeading = new Set([...countLeading].filter(([, c]) => c >= threshold).map(([h]) => h));\n const navTrailing = new Set([...countTrailing].filter(([, c]) => c >= threshold).map(([h]) => h));\n\n return pages.map((page, i) => {\n const lines = lineSets[i];\n let head = 0;\n while (head < lines.length && (lines[head].trim() === '' || navLeading.has(lineHash(lines[head])))) head++;\n let tail = lines.length;\n while (tail > head && (lines[tail - 1].trim() === '' || navTrailing.has(lineHash(lines[tail - 1])))) tail--;\n return { url: page.url, markdown: lines.slice(head, tail).join('\\n') };\n });\n}\n\nexport function deduplicatePages(pages: PageInput[], domain?: string): PageOutput[] {\n if (pages.length <= 1) return pages.map((p) => ({ url: p.url, markdown: p.markdown }));\n\n const stripped = stripRepeatedNavigationLines(pages);\n\n // Pre-load stored boilerplate hashes for this domain\n const storedHashes = domain ? getStoredBoilerplate(domain) : [];\n const boilerplateHashes = new Set<string>(storedHashes);\n\n // Split each page into blocks and hash them\n const pageBlocks = stripped.map((page) => ({\n url: page.url,\n blocks: splitIntoBlocks(page.markdown),\n }));\n\n // Count how many pages each block hash appears in\n const hashPageCount = new Map<string, number>();\n for (const page of pageBlocks) {\n const seenHashes = new Set<string>();\n for (const block of page.blocks) {\n const h = hashBlock(block);\n if (!seenHashes.has(h)) {\n seenHashes.add(h);\n hashPageCount.set(h, (hashPageCount.get(h) ?? 0) + 1);\n }\n }\n }\n\n // Mark hashes appearing in >50% of pages as boilerplate\n const threshold = pages.length / 2;\n for (const [hash, count] of hashPageCount) {\n if (count > threshold) {\n boilerplateHashes.add(hash);\n }\n }\n\n // Store updated boilerplate hashes for this domain\n if (domain) {\n storeBoilerplate(domain, Array.from(boilerplateHashes));\n }\n\n // Strip boilerplate blocks from each page\n return pageBlocks.map((page) => {\n const filtered = page.blocks.filter((block) => !boilerplateHashes.has(hashBlock(block)));\n return {\n url: page.url,\n markdown: filtered.join('\\n\\n'),\n };\n });\n}\n\nexport function getStoredBoilerplate(domain: string): string[] {\n const db = getDatabase();\n const rows = db.prepare('SELECT block_hash FROM domain_boilerplate WHERE domain = ?').all(domain) as { block_hash: string }[];\n return rows.map(r => r.block_hash);\n}\n\nexport function storeBoilerplate(domain: string, hashes: string[]): void {\n const db = getDatabase();\n const del = db.prepare('DELETE FROM domain_boilerplate WHERE domain = ?');\n const insert = db.prepare(\n 'INSERT OR IGNORE INTO domain_boilerplate (domain, block_hash, sample_text) VALUES (?, ?, ?)',\n );\n const tx = db.transaction((items: string[]) => {\n del.run(domain);\n for (const hash of items) {\n insert.run(domain, hash, null);\n }\n });\n tx(hashes);\n}\n"],"mappings":"AAAA,SAAS,kBAAkB;AAC3B,SAAS,mBAAmB;AAErB,SAAS,gBAAgB,UAA4B;AAC1D,MAAI,CAAC,SAAS,KAAK,EAAG,QAAO,CAAC;AAE9B,QAAM,QAAQ,SAAS,MAAM,IAAI;AACjC,QAAM,iBAAuD,CAAC;AAE9D,WAAS,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK;AACrC,UAAM,QAAQ,MAAM,CAAC,EAAE,MAAM,cAAc;AAC3C,QAAI,OAAO;AACT,qBAAe,KAAK,EAAE,OAAO,MAAM,CAAC,EAAE,QAAQ,SAAS,EAAE,CAAC;AAAA,IAC5D;AAAA,EACF;AAGA,MAAI,eAAe,WAAW,GAAG;AAC/B,WAAO,SAAS,MAAM,OAAO,EAAE,IAAI,CAAC,MAAM,EAAE,KAAK,CAAC,EAAE,OAAO,OAAO;AAAA,EACpE;AAGA,QAAM,SAAmB,CAAC;AAC1B,WAAS,IAAI,GAAG,IAAI,eAAe,QAAQ,KAAK;AAC9C,UAAM,QAAQ,eAAe,CAAC,EAAE;AAChC,UAAM,MAAM,IAAI,IAAI,eAAe,SAAS,eAAe,IAAI,CAAC,EAAE,UAAU,MAAM;AAClF,WAAO,KAAK,MAAM,MAAM,OAAO,GAAG,EAAE,KAAK,IAAI,EAAE,KAAK,CAAC;AAAA,EACvD;AAEA,SAAO,OAAO,OAAO,OAAO;AAC9B;AAEO,SAAS,mBAAmB,MAAsB;AACvD,SAAO,KAAK,YAAY,EAAE,QAAQ,QAAQ,GAAG,EAAE,KAAK;AACtD;AAEA,SAAS,UAAU,MAAsB;AACvC,SAAO,WAAW,QAAQ,EAAE,OAAO,mBAAmB,IAAI,CAAC,EAAE,OAAO,KAAK;AAC3E;AAYA,MAAM,uBAAuB;AAC7B,MAAM,oBAAoB;AAC1B,MAAM,qBAAqB;AAC3B,MAAM,aAAa;AAEnB,SAAS,SAAS,MAAsB;AACtC,SAAO,WAAW,MAAM,EAAE,OAAO,KAAK,KAAK,EAAE,YAAY,CAAC,EAAE,OAAO,KAAK;AAC1E;AAEO,SAAS,6BAA6B,OAAiC;AAC5E,MAAI,MAAM,SAAS,WAAY,QAAO;AACtC,QAAM,WAAW,MAAM,IAAI,CAAC,MAAM,EAAE,SAAS,MAAM,IAAI,CAAC;AAExD,QAAM,eAAe,oBAAI,IAAoB;AAC7C,QAAM,gBAAgB,oBAAI,IAAoB;AAC9C,aAAW,SAAS,UAAU;AAC5B,UAAM,QAAQ,oBAAI,IAAY;AAC9B,aAAS,IAAI,GAAG,IAAI,KAAK,IAAI,mBAAmB,MAAM,MAAM,GAAG,KAAK;AAClE,YAAM,IAAI,SAAS,MAAM,CAAC,CAAC;AAC3B,UAAI,CAAC,MAAM,IAAI,CAAC,GAAG;AACjB,cAAM,IAAI,CAAC;AACX,qBAAa,IAAI,IAAI,aAAa,IAAI,CAAC,KAAK,KAAK,CAAC;AAAA,MACpD;AAAA,IACF;AACA,UAAM,QAAQ,oBAAI,IAAY;AAC9B,aAAS,IAAI,MAAM,SAAS,GAAG,KAAK,KAAK,IAAI,MAAM,SAAS,oBAAoB,CAAC,GAAG,KAAK;AACvF,YAAM,IAAI,SAAS,MAAM,CAAC,CAAC;AAC3B,UAAI,CAAC,MAAM,IAAI,CAAC,GAAG;AACjB,cAAM,IAAI,CAAC;AACX,sBAAc,IAAI,IAAI,cAAc,IAAI,CAAC,KAAK,KAAK,CAAC;AAAA,MACtD;AAAA,IACF;AAAA,EACF;AAEA,QAAM,YAAY,MAAM,SAAS;AACjC,QAAM,aAAa,IAAI,IAAI,CAAC,GAAG,YAAY,EAAE,OAAO,CAAC,CAAC,EAAE,CAAC,MAAM,KAAK,SAAS,EAAE,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;AAC9F,QAAM,cAAc,IAAI,IAAI,CAAC,GAAG,aAAa,EAAE,OAAO,CAAC,CAAC,EAAE,CAAC,MAAM,KAAK,SAAS,EAAE,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;AAEhG,SAAO,MAAM,IAAI,CAAC,MAAM,MAAM;AAC5B,UAAM,QAAQ,SAAS,CAAC;AACxB,QAAI,OAAO;AACX,WAAO,OAAO,MAAM,WAAW,MAAM,IAAI,EAAE,KAAK,MAAM,MAAM,WAAW,IAAI,SAAS,MAAM,IAAI,CAAC,CAAC,GAAI;AACpG,QAAI,OAAO,MAAM;AACjB,WAAO,OAAO,SAAS,MAAM,OAAO,CAAC,EAAE,KAAK,MAAM,MAAM,YAAY,IAAI,SAAS,MAAM,OAAO,CAAC,CAAC,CAAC,GAAI;AACrG,WAAO,EAAE,KAAK,KAAK,KAAK,UAAU,MAAM,MAAM,MAAM,IAAI,EAAE,KAAK,IAAI,EAAE;AAAA,EACvE,CAAC;AACH;AAEO,SAAS,iBAAiB,OAAoB,QAA+B;AAClF,MAAI,MAAM,UAAU,EAAG,QAAO,MAAM,IAAI,CAAC,OAAO,EAAE,KAAK,EAAE,KAAK,UAAU,EAAE,SAAS,EAAE;AAErF,QAAM,WAAW,6BAA6B,KAAK;AAGnD,QAAM,eAAe,SAAS,qBAAqB,MAAM,IAAI,CAAC;AAC9D,QAAM,oBAAoB,IAAI,IAAY,YAAY;AAGtD,QAAM,aAAa,SAAS,IAAI,CAAC,UAAU;AAAA,IACzC,KAAK,KAAK;AAAA,IACV,QAAQ,gBAAgB,KAAK,QAAQ;AAAA,EACvC,EAAE;AAGF,QAAM,gBAAgB,oBAAI,IAAoB;AAC9C,aAAW,QAAQ,YAAY;AAC7B,UAAM,aAAa,oBAAI,IAAY;AACnC,eAAW,SAAS,KAAK,QAAQ;AAC/B,YAAM,IAAI,UAAU,KAAK;AACzB,UAAI,CAAC,WAAW,IAAI,CAAC,GAAG;AACtB,mBAAW,IAAI,CAAC;AAChB,sBAAc,IAAI,IAAI,cAAc,IAAI,CAAC,KAAK,KAAK,CAAC;AAAA,MACtD;AAAA,IACF;AAAA,EACF;AAGA,QAAM,YAAY,MAAM,SAAS;AACjC,aAAW,CAAC,MAAM,KAAK,KAAK,eAAe;AACzC,QAAI,QAAQ,WAAW;AACrB,wBAAkB,IAAI,IAAI;AAAA,IAC5B;AAAA,EACF;AAGA,MAAI,QAAQ;AACV,qBAAiB,QAAQ,MAAM,KAAK,iBAAiB,CAAC;AAAA,EACxD;AAGA,SAAO,WAAW,IAAI,CAAC,SAAS;AAC9B,UAAM,WAAW,KAAK,OAAO,OAAO,CAAC,UAAU,CAAC,kBAAkB,IAAI,UAAU,KAAK,CAAC,CAAC;AACvF,WAAO;AAAA,MACL,KAAK,KAAK;AAAA,MACV,UAAU,SAAS,KAAK,MAAM;AAAA,IAChC;AAAA,EACF,CAAC;AACH;AAEO,SAAS,qBAAqB,QAA0B;AAC7D,QAAM,KAAK,YAAY;AACvB,QAAM,OAAO,GAAG,QAAQ,4DAA4D,EAAE,IAAI,MAAM;AAChG,SAAO,KAAK,IAAI,OAAK,EAAE,UAAU;AACnC;AAEO,SAAS,iBAAiB,QAAgB,QAAwB;AACvE,QAAM,KAAK,YAAY;AACvB,QAAM,MAAM,GAAG,QAAQ,iDAAiD;AACxE,QAAM,SAAS,GAAG;AAAA,IAChB;AAAA,EACF;AACA,QAAM,KAAK,GAAG,YAAY,CAAC,UAAoB;AAC7C,QAAI,IAAI,MAAM;AACd,eAAW,QAAQ,OAAO;AACxB,aAAO,IAAI,QAAQ,MAAM,IAAI;AAAA,IAC/B;AAAA,EACF,CAAC;AACD,KAAG,MAAM;AACX;","names":[]}
@@ -0,0 +1,43 @@
1
+ import type { RawFetchResult } from '../types.js';
2
+ export interface CachedFetchHeaders {
3
+ etag?: string;
4
+ lastModified?: string;
5
+ fetchedAt: string;
6
+ }
7
+ /**
8
+ * Look up cached ETag + Last-Modified for a URL. Returns null when no row
9
+ * exists (first crawl, or table missing).
10
+ */
11
+ export declare function getCachedHeaders(url: string): CachedFetchHeaders | null;
12
+ /**
13
+ * Persist ETag + Last-Modified from a fresh fetch. Headers are looked up
14
+ * case-insensitively so both `ETag` and `etag` are accepted.
15
+ */
16
+ export declare function saveFetchHeaders(url: string, headers: Record<string, string>): void;
17
+ /**
18
+ * Update only fetched_at on the existing row (no header change).
19
+ */
20
+ export declare function markFetchedNotModified(url: string): void;
21
+ export interface ConditionalFetchOptions {
22
+ conditionalHeaders?: {
23
+ ifNoneMatch?: string;
24
+ ifModifiedSince?: string;
25
+ };
26
+ }
27
+ /**
28
+ * Conditional-fetch wrapper. The wrapped `rawFetchFn` accepts an options
29
+ * object so this layer can inject `If-None-Match` / `If-Modified-Since` from
30
+ * the cached crawl_etags row. When the server replies 304 the network
31
+ * payload is empty and the caller gets `notModified: true` for free; when
32
+ * the server returns 200 + the same ETag/Last-Modified (legacy fallback)
33
+ * the wrapper still detects the no-change case.
34
+ */
35
+ export declare function conditionalFetch(url: string, rawFetchFn: (url: string, opts?: ConditionalFetchOptions) => Promise<RawFetchResult>): Promise<RawFetchResult & {
36
+ notModified?: boolean;
37
+ }>;
38
+ /**
39
+ * Test helper: wipe the crawl_etags table. Safe to call when the DB is
40
+ * uninitialised — it just no-ops.
41
+ */
42
+ export declare function _clearEtagCacheForTest(): void;
43
+ //# sourceMappingURL=etag-incremental.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"etag-incremental.d.ts","sourceRoot":"","sources":["../../src/crawl/etag-incremental.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAIlD,MAAM,WAAW,kBAAkB;IACjC,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,SAAS,EAAE,MAAM,CAAC;CACnB;AAED;;;GAGG;AACH,wBAAgB,gBAAgB,CAAC,GAAG,EAAE,MAAM,GAAG,kBAAkB,GAAG,IAAI,CAevE;AAED;;;GAGG;AACH,wBAAgB,gBAAgB,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,GAAG,IAAI,CAsBnF;AAED;;GAEG;AACH,wBAAgB,sBAAsB,CAAC,GAAG,EAAE,MAAM,GAAG,IAAI,CAQxD;AAED,MAAM,WAAW,uBAAuB;IACtC,kBAAkB,CAAC,EAAE;QACnB,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,eAAe,CAAC,EAAE,MAAM,CAAC;KAC1B,CAAC;CACH;AAED;;;;;;;GAOG;AACH,wBAAsB,gBAAgB,CACpC,GAAG,EAAE,MAAM,EACX,UAAU,EAAE,CAAC,GAAG,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,uBAAuB,KAAK,OAAO,CAAC,cAAc,CAAC,GACnF,OAAO,CAAC,cAAc,GAAG;IAAE,WAAW,CAAC,EAAE,OAAO,CAAA;CAAE,CAAC,CA+CrD;AAED;;;GAGG;AACH,wBAAgB,sBAAsB,IAAI,IAAI,CAO7C"}
@@ -0,0 +1,94 @@
1
+ import { getDatabase } from "../cache/db.js";
2
+ import { createLogger } from "../logger.js";
3
+ const log = createLogger("crawl");
4
+ function getCachedHeaders(url) {
5
+ try {
6
+ const db = getDatabase();
7
+ const row = db.prepare(
8
+ "SELECT etag, last_modified, fetched_at FROM crawl_etags WHERE url = ?"
9
+ ).get(url);
10
+ if (!row) return null;
11
+ const out = { fetchedAt: row.fetched_at };
12
+ if (row.etag) out.etag = row.etag;
13
+ if (row.last_modified) out.lastModified = row.last_modified;
14
+ return out;
15
+ } catch (err) {
16
+ log.debug("getCachedHeaders failed", { url, error: String(err) });
17
+ return null;
18
+ }
19
+ }
20
+ function saveFetchHeaders(url, headers) {
21
+ try {
22
+ const lower = {};
23
+ for (const [k, v] of Object.entries(headers)) lower[k.toLowerCase()] = v;
24
+ const etag = lower["etag"] ?? null;
25
+ const lastModified = lower["last-modified"] ?? null;
26
+ const origin = new URL(url).origin;
27
+ const fetchedAt = (/* @__PURE__ */ new Date()).toISOString();
28
+ const db = getDatabase();
29
+ db.prepare(`
30
+ INSERT INTO crawl_etags (url, origin, etag, last_modified, fetched_at)
31
+ VALUES (?, ?, ?, ?, ?)
32
+ ON CONFLICT(url) DO UPDATE SET
33
+ origin = excluded.origin,
34
+ etag = excluded.etag,
35
+ last_modified = excluded.last_modified,
36
+ fetched_at = excluded.fetched_at
37
+ `).run(url, origin, etag, lastModified, fetchedAt);
38
+ } catch (err) {
39
+ log.debug("saveFetchHeaders failed", { url, error: String(err) });
40
+ }
41
+ }
42
+ function markFetchedNotModified(url) {
43
+ try {
44
+ const db = getDatabase();
45
+ db.prepare("UPDATE crawl_etags SET fetched_at = ? WHERE url = ?").run((/* @__PURE__ */ new Date()).toISOString(), url);
46
+ } catch (err) {
47
+ log.debug("markFetchedNotModified failed", { url, error: String(err) });
48
+ }
49
+ }
50
+ async function conditionalFetch(url, rawFetchFn) {
51
+ const cached = getCachedHeaders(url);
52
+ const conditionalHeaders = {};
53
+ if (cached?.etag) conditionalHeaders.ifNoneMatch = cached.etag;
54
+ if (cached?.lastModified) conditionalHeaders.ifModifiedSince = cached.lastModified;
55
+ const opts = conditionalHeaders.ifNoneMatch || conditionalHeaders.ifModifiedSince ? { conditionalHeaders } : void 0;
56
+ const result = await rawFetchFn(url, opts);
57
+ if (result.statusCode === 304) {
58
+ markFetchedNotModified(url);
59
+ return { ...result, notModified: true };
60
+ }
61
+ const respHeaders = {};
62
+ for (const [k, v] of Object.entries(result.headers ?? {})) respHeaders[k.toLowerCase()] = v;
63
+ const respEtag = respHeaders["etag"];
64
+ const respLastModified = respHeaders["last-modified"];
65
+ let notModified = false;
66
+ if (cached) {
67
+ if (cached.etag && respEtag && cached.etag === respEtag) notModified = true;
68
+ else if (!respEtag && cached.lastModified && respLastModified && cached.lastModified === respLastModified) notModified = true;
69
+ }
70
+ if (respEtag || respLastModified) {
71
+ saveFetchHeaders(url, result.headers ?? {});
72
+ } else if (cached) {
73
+ markFetchedNotModified(url);
74
+ } else {
75
+ saveFetchHeaders(url, result.headers ?? {});
76
+ }
77
+ if (notModified) return { ...result, notModified };
78
+ return result;
79
+ }
80
+ function _clearEtagCacheForTest() {
81
+ try {
82
+ const db = getDatabase();
83
+ db.exec("DELETE FROM crawl_etags");
84
+ } catch {
85
+ }
86
+ }
87
+ export {
88
+ _clearEtagCacheForTest,
89
+ conditionalFetch,
90
+ getCachedHeaders,
91
+ markFetchedNotModified,
92
+ saveFetchHeaders
93
+ };
94
+ //# sourceMappingURL=etag-incremental.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../../src/crawl/etag-incremental.ts"],"sourcesContent":["import { getDatabase } from '../cache/db.js';\nimport { createLogger } from '../logger.js';\nimport type { RawFetchResult } from '../types.js';\n\nconst log = createLogger('crawl');\n\nexport interface CachedFetchHeaders {\n etag?: string;\n lastModified?: string;\n fetchedAt: string;\n}\n\n/**\n * Look up cached ETag + Last-Modified for a URL. Returns null when no row\n * exists (first crawl, or table missing).\n */\nexport function getCachedHeaders(url: string): CachedFetchHeaders | null {\n try {\n const db = getDatabase();\n const row = db.prepare(\n 'SELECT etag, last_modified, fetched_at FROM crawl_etags WHERE url = ?',\n ).get(url) as { etag: string | null; last_modified: string | null; fetched_at: string } | undefined;\n if (!row) return null;\n const out: CachedFetchHeaders = { fetchedAt: row.fetched_at };\n if (row.etag) out.etag = row.etag;\n if (row.last_modified) out.lastModified = row.last_modified;\n return out;\n } catch (err) {\n log.debug('getCachedHeaders failed', { url, error: String(err) });\n return null;\n }\n}\n\n/**\n * Persist ETag + Last-Modified from a fresh fetch. Headers are looked up\n * case-insensitively so both `ETag` and `etag` are accepted.\n */\nexport function saveFetchHeaders(url: string, headers: Record<string, string>): void {\n try {\n const lower: Record<string, string> = {};\n for (const [k, v] of Object.entries(headers)) lower[k.toLowerCase()] = v;\n const etag = lower['etag'] ?? null;\n const lastModified = lower['last-modified'] ?? null;\n const origin = new URL(url).origin;\n const fetchedAt = new Date().toISOString();\n\n const db = getDatabase();\n db.prepare(`\n INSERT INTO crawl_etags (url, origin, etag, last_modified, fetched_at)\n VALUES (?, ?, ?, ?, ?)\n ON CONFLICT(url) DO UPDATE SET\n origin = excluded.origin,\n etag = excluded.etag,\n last_modified = excluded.last_modified,\n fetched_at = excluded.fetched_at\n `).run(url, origin, etag, lastModified, fetchedAt);\n } catch (err) {\n log.debug('saveFetchHeaders failed', { url, error: String(err) });\n }\n}\n\n/**\n * Update only fetched_at on the existing row (no header change).\n */\nexport function markFetchedNotModified(url: string): void {\n try {\n const db = getDatabase();\n db.prepare('UPDATE crawl_etags SET fetched_at = ? WHERE url = ?')\n .run(new Date().toISOString(), url);\n } catch (err) {\n log.debug('markFetchedNotModified failed', { url, error: String(err) });\n }\n}\n\nexport interface ConditionalFetchOptions {\n conditionalHeaders?: {\n ifNoneMatch?: string;\n ifModifiedSince?: string;\n };\n}\n\n/**\n * Conditional-fetch wrapper. The wrapped `rawFetchFn` accepts an options\n * object so this layer can inject `If-None-Match` / `If-Modified-Since` from\n * the cached crawl_etags row. When the server replies 304 the network\n * payload is empty and the caller gets `notModified: true` for free; when\n * the server returns 200 + the same ETag/Last-Modified (legacy fallback)\n * the wrapper still detects the no-change case.\n */\nexport async function conditionalFetch(\n url: string,\n rawFetchFn: (url: string, opts?: ConditionalFetchOptions) => Promise<RawFetchResult>,\n): Promise<RawFetchResult & { notModified?: boolean }> {\n const cached = getCachedHeaders(url);\n\n const conditionalHeaders: ConditionalFetchOptions['conditionalHeaders'] = {};\n if (cached?.etag) conditionalHeaders.ifNoneMatch = cached.etag;\n if (cached?.lastModified) conditionalHeaders.ifModifiedSince = cached.lastModified;\n\n const opts: ConditionalFetchOptions | undefined =\n conditionalHeaders.ifNoneMatch || conditionalHeaders.ifModifiedSince\n ? { conditionalHeaders }\n : undefined;\n\n const result = await rawFetchFn(url, opts);\n\n // Server honoured the conditional GET — true short-circuit.\n if (result.statusCode === 304) {\n markFetchedNotModified(url);\n return { ...result, notModified: true };\n }\n\n const respHeaders: Record<string, string> = {};\n for (const [k, v] of Object.entries(result.headers ?? {})) respHeaders[k.toLowerCase()] = v;\n const respEtag = respHeaders['etag'];\n const respLastModified = respHeaders['last-modified'];\n\n // 200 + unchanged validators (server didn't honour If-None-Match).\n let notModified = false;\n if (cached) {\n if (cached.etag && respEtag && cached.etag === respEtag) notModified = true;\n else if (\n !respEtag &&\n cached.lastModified &&\n respLastModified &&\n cached.lastModified === respLastModified\n ) notModified = true;\n }\n\n if (respEtag || respLastModified) {\n saveFetchHeaders(url, result.headers ?? {});\n } else if (cached) {\n markFetchedNotModified(url);\n } else {\n saveFetchHeaders(url, result.headers ?? {});\n }\n\n if (notModified) return { ...result, notModified };\n return result;\n}\n\n/**\n * Test helper: wipe the crawl_etags table. Safe to call when the DB is\n * uninitialised — it just no-ops.\n */\nexport function _clearEtagCacheForTest(): void {\n try {\n const db = getDatabase();\n db.exec('DELETE FROM crawl_etags');\n } catch {\n // DB not initialised — nothing to clear\n }\n}\n"],"mappings":"AAAA,SAAS,mBAAmB;AAC5B,SAAS,oBAAoB;AAG7B,MAAM,MAAM,aAAa,OAAO;AAYzB,SAAS,iBAAiB,KAAwC;AACvE,MAAI;AACF,UAAM,KAAK,YAAY;AACvB,UAAM,MAAM,GAAG;AAAA,MACb;AAAA,IACF,EAAE,IAAI,GAAG;AACT,QAAI,CAAC,IAAK,QAAO;AACjB,UAAM,MAA0B,EAAE,WAAW,IAAI,WAAW;AAC5D,QAAI,IAAI,KAAM,KAAI,OAAO,IAAI;AAC7B,QAAI,IAAI,cAAe,KAAI,eAAe,IAAI;AAC9C,WAAO;AAAA,EACT,SAAS,KAAK;AACZ,QAAI,MAAM,2BAA2B,EAAE,KAAK,OAAO,OAAO,GAAG,EAAE,CAAC;AAChE,WAAO;AAAA,EACT;AACF;AAMO,SAAS,iBAAiB,KAAa,SAAuC;AACnF,MAAI;AACF,UAAM,QAAgC,CAAC;AACvC,eAAW,CAAC,GAAG,CAAC,KAAK,OAAO,QAAQ,OAAO,EAAG,OAAM,EAAE,YAAY,CAAC,IAAI;AACvE,UAAM,OAAO,MAAM,MAAM,KAAK;AAC9B,UAAM,eAAe,MAAM,eAAe,KAAK;AAC/C,UAAM,SAAS,IAAI,IAAI,GAAG,EAAE;AAC5B,UAAM,aAAY,oBAAI,KAAK,GAAE,YAAY;AAEzC,UAAM,KAAK,YAAY;AACvB,OAAG,QAAQ;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,KAQV,EAAE,IAAI,KAAK,QAAQ,MAAM,cAAc,SAAS;AAAA,EACnD,SAAS,KAAK;AACZ,QAAI,MAAM,2BAA2B,EAAE,KAAK,OAAO,OAAO,GAAG,EAAE,CAAC;AAAA,EAClE;AACF;AAKO,SAAS,uBAAuB,KAAmB;AACxD,MAAI;AACF,UAAM,KAAK,YAAY;AACvB,OAAG,QAAQ,qDAAqD,EAC7D,KAAI,oBAAI,KAAK,GAAE,YAAY,GAAG,GAAG;AAAA,EACtC,SAAS,KAAK;AACZ,QAAI,MAAM,iCAAiC,EAAE,KAAK,OAAO,OAAO,GAAG,EAAE,CAAC;AAAA,EACxE;AACF;AAiBA,eAAsB,iBACpB,KACA,YACqD;AACrD,QAAM,SAAS,iBAAiB,GAAG;AAEnC,QAAM,qBAAoE,CAAC;AAC3E,MAAI,QAAQ,KAAM,oBAAmB,cAAc,OAAO;AAC1D,MAAI,QAAQ,aAAc,oBAAmB,kBAAkB,OAAO;AAEtE,QAAM,OACJ,mBAAmB,eAAe,mBAAmB,kBACjD,EAAE,mBAAmB,IACrB;AAEN,QAAM,SAAS,MAAM,WAAW,KAAK,IAAI;AAGzC,MAAI,OAAO,eAAe,KAAK;AAC7B,2BAAuB,GAAG;AAC1B,WAAO,EAAE,GAAG,QAAQ,aAAa,KAAK;AAAA,EACxC;AAEA,QAAM,cAAsC,CAAC;AAC7C,aAAW,CAAC,GAAG,CAAC,KAAK,OAAO,QAAQ,OAAO,WAAW,CAAC,CAAC,EAAG,aAAY,EAAE,YAAY,CAAC,IAAI;AAC1F,QAAM,WAAW,YAAY,MAAM;AACnC,QAAM,mBAAmB,YAAY,eAAe;AAGpD,MAAI,cAAc;AAClB,MAAI,QAAQ;AACV,QAAI,OAAO,QAAQ,YAAY,OAAO,SAAS,SAAU,eAAc;AAAA,aAErE,CAAC,YACD,OAAO,gBACP,oBACA,OAAO,iBAAiB,iBACxB,eAAc;AAAA,EAClB;AAEA,MAAI,YAAY,kBAAkB;AAChC,qBAAiB,KAAK,OAAO,WAAW,CAAC,CAAC;AAAA,EAC5C,WAAW,QAAQ;AACjB,2BAAuB,GAAG;AAAA,EAC5B,OAAO;AACL,qBAAiB,KAAK,OAAO,WAAW,CAAC,CAAC;AAAA,EAC5C;AAEA,MAAI,YAAa,QAAO,EAAE,GAAG,QAAQ,YAAY;AACjD,SAAO;AACT;AAMO,SAAS,yBAA+B;AAC7C,MAAI;AACF,UAAM,KAAK,YAAY;AACvB,OAAG,KAAK,yBAAyB;AAAA,EACnC,QAAQ;AAAA,EAER;AACF;","names":[]}
@@ -0,0 +1,10 @@
1
+ import type { CrawlResultItem } from '../types.js';
2
+ export declare function isIndexingEnabled(): boolean;
3
+ /**
4
+ * Opt-in: embed (title + first 500 chars of markdown) and upsert into the
5
+ * vector store. Errors are logged at debug and swallowed so a misbehaving
6
+ * embed provider can never break a crawl. Disabled by default — gate via
7
+ * WIGOLO_CRAWL_INDEX=1.
8
+ */
9
+ export declare function indexCrawlResult(item: CrawlResultItem): Promise<void>;
10
+ //# sourceMappingURL=index-to-vec.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index-to-vec.d.ts","sourceRoot":"","sources":["../../src/crawl/index-to-vec.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAOnD,wBAAgB,iBAAiB,IAAI,OAAO,CAE3C;AAED;;;;;GAKG;AACH,wBAAsB,gBAAgB,CAAC,IAAI,EAAE,eAAe,GAAG,OAAO,CAAC,IAAI,CAAC,CAgC3E"}
@@ -0,0 +1,44 @@
1
+ import { createHash } from "crypto";
2
+ import { createLogger } from "../logger.js";
3
+ import { getVectorStore } from "../providers/vector-store.js";
4
+ import { getEmbedProvider } from "../providers/embed-provider.js";
5
+ const log = createLogger("crawl");
6
+ const SUMMARY_CHARS = 500;
7
+ const MIN_TEXT_LEN = 20;
8
+ function isIndexingEnabled() {
9
+ return process.env.WIGOLO_CRAWL_INDEX === "1";
10
+ }
11
+ async function indexCrawlResult(item) {
12
+ try {
13
+ const summary = (item.markdown ?? "").slice(0, SUMMARY_CHARS);
14
+ const text = `${item.title ?? ""}
15
+ ${summary}`.trim();
16
+ if (text.length < MIN_TEXT_LEN) return;
17
+ const provider = await getEmbedProvider();
18
+ const vectors = await provider.embed([text]);
19
+ if (vectors.length === 0) return;
20
+ const store = await getVectorStore();
21
+ const contentHash = createHash("sha256").update(item.markdown ?? "").digest("hex");
22
+ await store.upsert([
23
+ {
24
+ id: item.url,
25
+ vector: vectors[0],
26
+ metadata: {
27
+ url: item.url,
28
+ contentHash,
29
+ modelId: provider.modelId
30
+ }
31
+ }
32
+ ]);
33
+ } catch (err) {
34
+ log.warn("crawl index-to-vec failed", {
35
+ url: item.url,
36
+ error: err instanceof Error ? err.message : String(err)
37
+ });
38
+ }
39
+ }
40
+ export {
41
+ indexCrawlResult,
42
+ isIndexingEnabled
43
+ };
44
+ //# sourceMappingURL=index-to-vec.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../../src/crawl/index-to-vec.ts"],"sourcesContent":["import { createHash } from 'crypto';\nimport { createLogger } from '../logger.js';\nimport { getVectorStore } from '../providers/vector-store.js';\nimport { getEmbedProvider } from '../providers/embed-provider.js';\nimport type { CrawlResultItem } from '../types.js';\n\nconst log = createLogger('crawl');\n\nconst SUMMARY_CHARS = 500;\nconst MIN_TEXT_LEN = 20;\n\nexport function isIndexingEnabled(): boolean {\n return process.env.WIGOLO_CRAWL_INDEX === '1';\n}\n\n/**\n * Opt-in: embed (title + first 500 chars of markdown) and upsert into the\n * vector store. Errors are logged at debug and swallowed so a misbehaving\n * embed provider can never break a crawl. Disabled by default — gate via\n * WIGOLO_CRAWL_INDEX=1.\n */\nexport async function indexCrawlResult(item: CrawlResultItem): Promise<void> {\n try {\n const summary = (item.markdown ?? '').slice(0, SUMMARY_CHARS);\n const text = `${item.title ?? ''}\\n${summary}`.trim();\n if (text.length < MIN_TEXT_LEN) return;\n\n const provider = await getEmbedProvider();\n const vectors = await provider.embed([text]);\n if (vectors.length === 0) return;\n\n const store = await getVectorStore();\n const contentHash = createHash('sha256')\n .update(item.markdown ?? '')\n .digest('hex');\n\n await store.upsert([\n {\n id: item.url,\n vector: vectors[0],\n metadata: {\n url: item.url,\n contentHash,\n modelId: provider.modelId,\n },\n },\n ]);\n } catch (err) {\n log.warn('crawl index-to-vec failed', {\n url: item.url,\n error: err instanceof Error ? err.message : String(err),\n });\n }\n}\n"],"mappings":"AAAA,SAAS,kBAAkB;AAC3B,SAAS,oBAAoB;AAC7B,SAAS,sBAAsB;AAC/B,SAAS,wBAAwB;AAGjC,MAAM,MAAM,aAAa,OAAO;AAEhC,MAAM,gBAAgB;AACtB,MAAM,eAAe;AAEd,SAAS,oBAA6B;AAC3C,SAAO,QAAQ,IAAI,uBAAuB;AAC5C;AAQA,eAAsB,iBAAiB,MAAsC;AAC3E,MAAI;AACF,UAAM,WAAW,KAAK,YAAY,IAAI,MAAM,GAAG,aAAa;AAC5D,UAAM,OAAO,GAAG,KAAK,SAAS,EAAE;AAAA,EAAK,OAAO,GAAG,KAAK;AACpD,QAAI,KAAK,SAAS,aAAc;AAEhC,UAAM,WAAW,MAAM,iBAAiB;AACxC,UAAM,UAAU,MAAM,SAAS,MAAM,CAAC,IAAI,CAAC;AAC3C,QAAI,QAAQ,WAAW,EAAG;AAE1B,UAAM,QAAQ,MAAM,eAAe;AACnC,UAAM,cAAc,WAAW,QAAQ,EACpC,OAAO,KAAK,YAAY,EAAE,EAC1B,OAAO,KAAK;AAEf,UAAM,MAAM,OAAO;AAAA,MACjB;AAAA,QACE,IAAI,KAAK;AAAA,QACT,QAAQ,QAAQ,CAAC;AAAA,QACjB,UAAU;AAAA,UACR,KAAK,KAAK;AAAA,UACV;AAAA,UACA,SAAS,SAAS;AAAA,QACpB;AAAA,MACF;AAAA,IACF,CAAC;AAAA,EACH,SAAS,KAAK;AACZ,QAAI,KAAK,6BAA6B;AAAA,MACpC,KAAK,KAAK;AAAA,MACV,OAAO,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAAA,IACxD,CAAC;AAAA,EACH;AACF;","names":[]}