@staticn0va/wigolo 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (982) hide show
  1. package/LICENSE +1 -1
  2. package/README.md +195 -73
  3. package/SKILL.md +382 -0
  4. package/assets/blocks/claude-code/CLAUDE.md.block +20 -0
  5. package/assets/blocks/claude-code/wigolo-command.md +40 -0
  6. package/assets/blocks/cursor/wigolo.mdc +46 -0
  7. package/assets/blocks/gemini-cli/GEMINI.md.block +18 -0
  8. package/assets/blocks/vscode/copilot-instructions.md.block +18 -0
  9. package/assets/skills/wigolo/SKILL.md +50 -0
  10. package/assets/skills/wigolo/rules/cache-first.md +30 -0
  11. package/assets/skills/wigolo/rules/synthesis.md +43 -0
  12. package/assets/skills/wigolo-agent/SKILL.md +73 -0
  13. package/assets/skills/wigolo-crawl/SKILL.md +60 -0
  14. package/assets/skills/wigolo-extract/SKILL.md +59 -0
  15. package/assets/skills/wigolo-fetch/SKILL.md +65 -0
  16. package/assets/skills/wigolo-find-similar/SKILL.md +72 -0
  17. package/assets/skills/wigolo-research/SKILL.md +77 -0
  18. package/assets/skills/wigolo-search/SKILL.md +78 -0
  19. package/dist/agent/executor.d.ts +33 -0
  20. package/dist/agent/executor.d.ts.map +1 -0
  21. package/dist/agent/executor.js +233 -0
  22. package/dist/agent/executor.js.map +1 -0
  23. package/dist/agent/pipeline.d.ts +5 -0
  24. package/dist/agent/pipeline.d.ts.map +1 -0
  25. package/dist/agent/pipeline.js +208 -0
  26. package/dist/agent/pipeline.js.map +1 -0
  27. package/dist/agent/planner.d.ts +13 -0
  28. package/dist/agent/planner.d.ts.map +1 -0
  29. package/dist/agent/planner.js +271 -0
  30. package/dist/agent/planner.js.map +1 -0
  31. package/dist/agent/relevance.d.ts +15 -0
  32. package/dist/agent/relevance.d.ts.map +1 -0
  33. package/dist/agent/relevance.js +60 -0
  34. package/dist/agent/relevance.js.map +1 -0
  35. package/dist/cache/backfill-embeddings.d.ts +23 -0
  36. package/dist/cache/backfill-embeddings.d.ts.map +1 -0
  37. package/dist/cache/backfill-embeddings.js +105 -0
  38. package/dist/cache/backfill-embeddings.js.map +1 -0
  39. package/dist/cache/change-detector.d.ts +7 -0
  40. package/dist/cache/change-detector.d.ts.map +1 -0
  41. package/dist/cache/change-detector.js +43 -0
  42. package/dist/cache/change-detector.js.map +1 -0
  43. package/dist/cache/db.d.ts +1 -0
  44. package/dist/cache/db.d.ts.map +1 -1
  45. package/dist/cache/db.js +94 -22
  46. package/dist/cache/db.js.map +1 -1
  47. package/dist/cache/diff-summary.d.ts +2 -0
  48. package/dist/cache/diff-summary.d.ts.map +1 -0
  49. package/dist/cache/diff-summary.js +82 -0
  50. package/dist/cache/diff-summary.js.map +1 -0
  51. package/dist/cache/migrations/runner.d.ts +29 -0
  52. package/dist/cache/migrations/runner.d.ts.map +1 -0
  53. package/dist/cache/migrations/runner.js +147 -0
  54. package/dist/cache/migrations/runner.js.map +1 -0
  55. package/dist/cache/sqlite-vec-store.d.ts +42 -0
  56. package/dist/cache/sqlite-vec-store.d.ts.map +1 -0
  57. package/dist/cache/sqlite-vec-store.js +176 -0
  58. package/dist/cache/sqlite-vec-store.js.map +1 -0
  59. package/dist/cache/store.d.ts +46 -1
  60. package/dist/cache/store.d.ts.map +1 -1
  61. package/dist/cache/store.js +362 -168
  62. package/dist/cache/store.js.map +1 -1
  63. package/dist/cli/agents/antigravity.d.ts +20 -0
  64. package/dist/cli/agents/antigravity.d.ts.map +1 -0
  65. package/dist/cli/agents/antigravity.js +49 -0
  66. package/dist/cli/agents/antigravity.js.map +1 -0
  67. package/dist/cli/agents/claude-code.d.ts +25 -0
  68. package/dist/cli/agents/claude-code.d.ts.map +1 -0
  69. package/dist/cli/agents/claude-code.js +111 -0
  70. package/dist/cli/agents/claude-code.js.map +1 -0
  71. package/dist/cli/agents/cursor.d.ts +21 -0
  72. package/dist/cli/agents/cursor.d.ts.map +1 -0
  73. package/dist/cli/agents/cursor.js +58 -0
  74. package/dist/cli/agents/cursor.js.map +1 -0
  75. package/dist/cli/agents/gemini-cli.d.ts +21 -0
  76. package/dist/cli/agents/gemini-cli.d.ts.map +1 -0
  77. package/dist/cli/agents/gemini-cli.js +55 -0
  78. package/dist/cli/agents/gemini-cli.js.map +1 -0
  79. package/dist/cli/agents/registry.d.ts +21 -0
  80. package/dist/cli/agents/registry.d.ts.map +1 -0
  81. package/dist/cli/agents/registry.js +27 -0
  82. package/dist/cli/agents/registry.js.map +1 -0
  83. package/dist/cli/agents/utils.d.ts +26 -0
  84. package/dist/cli/agents/utils.d.ts.map +1 -0
  85. package/dist/cli/agents/utils.js +136 -0
  86. package/dist/cli/agents/utils.js.map +1 -0
  87. package/dist/cli/agents/vscode.d.ts +21 -0
  88. package/dist/cli/agents/vscode.d.ts.map +1 -0
  89. package/dist/cli/agents/vscode.js +62 -0
  90. package/dist/cli/agents/vscode.js.map +1 -0
  91. package/dist/cli/auth.d.ts +2 -0
  92. package/dist/cli/auth.d.ts.map +1 -0
  93. package/dist/cli/auth.js +94 -0
  94. package/dist/cli/auth.js.map +1 -0
  95. package/dist/cli/backfill.d.ts +2 -0
  96. package/dist/cli/backfill.d.ts.map +1 -0
  97. package/dist/cli/backfill.js +58 -0
  98. package/dist/cli/backfill.js.map +1 -0
  99. package/dist/cli/daemon.d.ts +6 -1
  100. package/dist/cli/daemon.d.ts.map +1 -1
  101. package/dist/cli/daemon.js +61 -3
  102. package/dist/cli/daemon.js.map +1 -1
  103. package/dist/cli/doctor.d.ts +8 -0
  104. package/dist/cli/doctor.d.ts.map +1 -0
  105. package/dist/cli/doctor.js +318 -0
  106. package/dist/cli/doctor.js.map +1 -0
  107. package/dist/cli/health.d.ts +1 -1
  108. package/dist/cli/health.d.ts.map +1 -1
  109. package/dist/cli/health.js +42 -3
  110. package/dist/cli/health.js.map +1 -1
  111. package/dist/cli/help.d.ts +6 -0
  112. package/dist/cli/help.d.ts.map +1 -0
  113. package/dist/cli/help.js +63 -0
  114. package/dist/cli/help.js.map +1 -0
  115. package/dist/cli/index.d.ts +1 -1
  116. package/dist/cli/index.d.ts.map +1 -1
  117. package/dist/cli/index.js +35 -7
  118. package/dist/cli/index.js.map +1 -1
  119. package/dist/cli/init.d.ts +2 -0
  120. package/dist/cli/init.d.ts.map +1 -0
  121. package/dist/cli/init.js +201 -0
  122. package/dist/cli/init.js.map +1 -0
  123. package/dist/cli/plugin.d.ts +5 -0
  124. package/dist/cli/plugin.d.ts.map +1 -0
  125. package/dist/cli/plugin.js +185 -0
  126. package/dist/cli/plugin.js.map +1 -0
  127. package/dist/cli/setup-mcp.d.ts +2 -0
  128. package/dist/cli/setup-mcp.d.ts.map +1 -0
  129. package/dist/cli/setup-mcp.js +114 -0
  130. package/dist/cli/setup-mcp.js.map +1 -0
  131. package/dist/cli/shell.d.ts +2 -0
  132. package/dist/cli/shell.d.ts.map +1 -0
  133. package/dist/cli/shell.js +86 -0
  134. package/dist/cli/shell.js.map +1 -0
  135. package/dist/cli/status.d.ts +2 -0
  136. package/dist/cli/status.d.ts.map +1 -0
  137. package/dist/cli/status.js +31 -0
  138. package/dist/cli/status.js.map +1 -0
  139. package/dist/cli/telemetry.d.ts +10 -0
  140. package/dist/cli/telemetry.d.ts.map +1 -0
  141. package/dist/cli/telemetry.js +56 -0
  142. package/dist/cli/telemetry.js.map +1 -0
  143. package/dist/cli/tui/agents-types.d.ts +28 -0
  144. package/dist/cli/tui/agents-types.d.ts.map +1 -0
  145. package/dist/cli/tui/agents-types.js +1 -0
  146. package/dist/cli/tui/agents-types.js.map +1 -0
  147. package/dist/cli/tui/agents.d.ts +11 -0
  148. package/dist/cli/tui/agents.d.ts.map +1 -0
  149. package/dist/cli/tui/agents.js +93 -0
  150. package/dist/cli/tui/agents.js.map +1 -0
  151. package/dist/cli/tui/banner.d.ts +3 -0
  152. package/dist/cli/tui/banner.d.ts.map +1 -0
  153. package/dist/cli/tui/banner.js +30 -0
  154. package/dist/cli/tui/banner.js.map +1 -0
  155. package/dist/cli/tui/components/AgentSelect.d.ts +13 -0
  156. package/dist/cli/tui/components/AgentSelect.d.ts.map +1 -0
  157. package/dist/cli/tui/components/AgentSelect.js +116 -0
  158. package/dist/cli/tui/components/AgentSelect.js.map +1 -0
  159. package/dist/cli/tui/components/Banner.d.ts +6 -0
  160. package/dist/cli/tui/components/Banner.d.ts.map +1 -0
  161. package/dist/cli/tui/components/Banner.js +25 -0
  162. package/dist/cli/tui/components/Banner.js.map +1 -0
  163. package/dist/cli/tui/components/BrowserSelect.d.ts +7 -0
  164. package/dist/cli/tui/components/BrowserSelect.d.ts.map +1 -0
  165. package/dist/cli/tui/components/BrowserSelect.js +19 -0
  166. package/dist/cli/tui/components/BrowserSelect.js.map +1 -0
  167. package/dist/cli/tui/components/InstallProgress.d.ts +9 -0
  168. package/dist/cli/tui/components/InstallProgress.d.ts.map +1 -0
  169. package/dist/cli/tui/components/InstallProgress.js +67 -0
  170. package/dist/cli/tui/components/InstallProgress.js.map +1 -0
  171. package/dist/cli/tui/components/SkillInstall.d.ts +14 -0
  172. package/dist/cli/tui/components/SkillInstall.d.ts.map +1 -0
  173. package/dist/cli/tui/components/SkillInstall.js +94 -0
  174. package/dist/cli/tui/components/SkillInstall.js.map +1 -0
  175. package/dist/cli/tui/components/Summary.d.ts +22 -0
  176. package/dist/cli/tui/components/Summary.d.ts.map +1 -0
  177. package/dist/cli/tui/components/Summary.js +135 -0
  178. package/dist/cli/tui/components/Summary.js.map +1 -0
  179. package/dist/cli/tui/components/SystemCheck.d.ts +8 -0
  180. package/dist/cli/tui/components/SystemCheck.d.ts.map +1 -0
  181. package/dist/cli/tui/components/SystemCheck.js +71 -0
  182. package/dist/cli/tui/components/SystemCheck.js.map +1 -0
  183. package/dist/cli/tui/components/Verification.d.ts +8 -0
  184. package/dist/cli/tui/components/Verification.d.ts.map +1 -0
  185. package/dist/cli/tui/components/Verification.js +63 -0
  186. package/dist/cli/tui/components/Verification.js.map +1 -0
  187. package/dist/cli/tui/config-writer-cli.d.ts +12 -0
  188. package/dist/cli/tui/config-writer-cli.d.ts.map +1 -0
  189. package/dist/cli/tui/config-writer-cli.js +39 -0
  190. package/dist/cli/tui/config-writer-cli.js.map +1 -0
  191. package/dist/cli/tui/config-writer-json.d.ts +16 -0
  192. package/dist/cli/tui/config-writer-json.d.ts.map +1 -0
  193. package/dist/cli/tui/config-writer-json.js +86 -0
  194. package/dist/cli/tui/config-writer-json.js.map +1 -0
  195. package/dist/cli/tui/config-writer-toml.d.ts +16 -0
  196. package/dist/cli/tui/config-writer-toml.d.ts.map +1 -0
  197. package/dist/cli/tui/config-writer-toml.js +83 -0
  198. package/dist/cli/tui/config-writer-toml.js.map +1 -0
  199. package/dist/cli/tui/config-writer.d.ts +25 -0
  200. package/dist/cli/tui/config-writer.d.ts.map +1 -0
  201. package/dist/cli/tui/config-writer.js +101 -0
  202. package/dist/cli/tui/config-writer.js.map +1 -0
  203. package/dist/cli/tui/detect-helpers.d.ts +6 -0
  204. package/dist/cli/tui/detect-helpers.d.ts.map +1 -0
  205. package/dist/cli/tui/detect-helpers.js +45 -0
  206. package/dist/cli/tui/detect-helpers.js.map +1 -0
  207. package/dist/cli/tui/extras-prompt.d.ts +7 -0
  208. package/dist/cli/tui/extras-prompt.d.ts.map +1 -0
  209. package/dist/cli/tui/extras-prompt.js +42 -0
  210. package/dist/cli/tui/extras-prompt.js.map +1 -0
  211. package/dist/cli/tui/flags-types.d.ts +19 -0
  212. package/dist/cli/tui/flags-types.d.ts.map +1 -0
  213. package/dist/cli/tui/flags-types.js +23 -0
  214. package/dist/cli/tui/flags-types.js.map +1 -0
  215. package/dist/cli/tui/flags.d.ts +5 -0
  216. package/dist/cli/tui/flags.d.ts.map +1 -0
  217. package/dist/cli/tui/flags.js +132 -0
  218. package/dist/cli/tui/flags.js.map +1 -0
  219. package/dist/cli/tui/format.d.ts +14 -0
  220. package/dist/cli/tui/format.d.ts.map +1 -0
  221. package/dist/cli/tui/format.js +37 -0
  222. package/dist/cli/tui/format.js.map +1 -0
  223. package/dist/cli/tui/hooks/useAgentDetect.d.ts +6 -0
  224. package/dist/cli/tui/hooks/useAgentDetect.d.ts.map +1 -0
  225. package/dist/cli/tui/hooks/useAgentDetect.js +19 -0
  226. package/dist/cli/tui/hooks/useAgentDetect.js.map +1 -0
  227. package/dist/cli/tui/hooks/useInstall.d.ts +14 -0
  228. package/dist/cli/tui/hooks/useInstall.d.ts.map +1 -0
  229. package/dist/cli/tui/hooks/useInstall.js +90 -0
  230. package/dist/cli/tui/hooks/useInstall.js.map +1 -0
  231. package/dist/cli/tui/hooks/useSystemCheck.d.ts +13 -0
  232. package/dist/cli/tui/hooks/useSystemCheck.d.ts.map +1 -0
  233. package/dist/cli/tui/hooks/useSystemCheck.js +95 -0
  234. package/dist/cli/tui/hooks/useSystemCheck.js.map +1 -0
  235. package/dist/cli/tui/hooks/useVerify.d.ts +14 -0
  236. package/dist/cli/tui/hooks/useVerify.d.ts.map +1 -0
  237. package/dist/cli/tui/hooks/useVerify.js +71 -0
  238. package/dist/cli/tui/hooks/useVerify.js.map +1 -0
  239. package/dist/cli/tui/ink-init.d.ts +2 -0
  240. package/dist/cli/tui/ink-init.d.ts.map +1 -0
  241. package/dist/cli/tui/ink-init.js +198 -0
  242. package/dist/cli/tui/ink-init.js.map +1 -0
  243. package/dist/cli/tui/reporter-auto.d.ts +7 -0
  244. package/dist/cli/tui/reporter-auto.d.ts.map +1 -0
  245. package/dist/cli/tui/reporter-auto.js +15 -0
  246. package/dist/cli/tui/reporter-auto.js.map +1 -0
  247. package/dist/cli/tui/reporter.d.ts +26 -0
  248. package/dist/cli/tui/reporter.d.ts.map +1 -0
  249. package/dist/cli/tui/reporter.js +32 -0
  250. package/dist/cli/tui/reporter.js.map +1 -0
  251. package/dist/cli/tui/run-command.d.ts +14 -0
  252. package/dist/cli/tui/run-command.d.ts.map +1 -0
  253. package/dist/cli/tui/run-command.js +72 -0
  254. package/dist/cli/tui/run-command.js.map +1 -0
  255. package/dist/cli/tui/select-agents.d.ts +6 -0
  256. package/dist/cli/tui/select-agents.d.ts.map +1 -0
  257. package/dist/cli/tui/select-agents.js +32 -0
  258. package/dist/cli/tui/select-agents.js.map +1 -0
  259. package/dist/cli/tui/status-agents.d.ts +11 -0
  260. package/dist/cli/tui/status-agents.d.ts.map +1 -0
  261. package/dist/cli/tui/status-agents.js +53 -0
  262. package/dist/cli/tui/status-agents.js.map +1 -0
  263. package/dist/cli/tui/status-cache.d.ts +6 -0
  264. package/dist/cli/tui/status-cache.d.ts.map +1 -0
  265. package/dist/cli/tui/status-cache.js +39 -0
  266. package/dist/cli/tui/status-cache.js.map +1 -0
  267. package/dist/cli/tui/status-format.d.ts +14 -0
  268. package/dist/cli/tui/status-format.d.ts.map +1 -0
  269. package/dist/cli/tui/status-format.js +41 -0
  270. package/dist/cli/tui/status-format.js.map +1 -0
  271. package/dist/cli/tui/status-python.d.ts +6 -0
  272. package/dist/cli/tui/status-python.d.ts.map +1 -0
  273. package/dist/cli/tui/status-python.js +30 -0
  274. package/dist/cli/tui/status-python.js.map +1 -0
  275. package/dist/cli/tui/system-check.d.ts +24 -0
  276. package/dist/cli/tui/system-check.d.ts.map +1 -0
  277. package/dist/cli/tui/system-check.js +103 -0
  278. package/dist/cli/tui/system-check.js.map +1 -0
  279. package/dist/cli/tui/tui-reporter.d.ts +19 -0
  280. package/dist/cli/tui/tui-reporter.d.ts.map +1 -0
  281. package/dist/cli/tui/tui-reporter.js +95 -0
  282. package/dist/cli/tui/tui-reporter.js.map +1 -0
  283. package/dist/cli/tui/utils/config-writer.d.ts +3 -0
  284. package/dist/cli/tui/utils/config-writer.d.ts.map +1 -0
  285. package/dist/cli/tui/utils/config-writer.js +22 -0
  286. package/dist/cli/tui/utils/config-writer.js.map +1 -0
  287. package/dist/cli/tui/utils/suppress-logs.d.ts +3 -0
  288. package/dist/cli/tui/utils/suppress-logs.d.ts.map +1 -0
  289. package/dist/cli/tui/utils/suppress-logs.js +11 -0
  290. package/dist/cli/tui/utils/suppress-logs.js.map +1 -0
  291. package/dist/cli/tui/verify-suggestions.d.ts +5 -0
  292. package/dist/cli/tui/verify-suggestions.d.ts.map +1 -0
  293. package/dist/cli/tui/verify-suggestions.js +20 -0
  294. package/dist/cli/tui/verify-suggestions.js.map +1 -0
  295. package/dist/cli/tui/verify.d.ts +14 -0
  296. package/dist/cli/tui/verify.d.ts.map +1 -0
  297. package/dist/cli/tui/verify.js +101 -0
  298. package/dist/cli/tui/verify.js.map +1 -0
  299. package/dist/cli/tui/version.d.ts +2 -0
  300. package/dist/cli/tui/version.d.ts.map +1 -0
  301. package/dist/cli/tui/version.js +14 -0
  302. package/dist/cli/tui/version.js.map +1 -0
  303. package/dist/cli/uninstall.d.ts +2 -0
  304. package/dist/cli/uninstall.d.ts.map +1 -0
  305. package/dist/cli/uninstall.js +57 -0
  306. package/dist/cli/uninstall.js.map +1 -0
  307. package/dist/cli/warmup.d.ts +10 -2
  308. package/dist/cli/warmup.d.ts.map +1 -1
  309. package/dist/cli/warmup.js +226 -93
  310. package/dist/cli/warmup.js.map +1 -1
  311. package/dist/config.d.ts +28 -2
  312. package/dist/config.d.ts.map +1 -1
  313. package/dist/config.js +106 -56
  314. package/dist/config.js.map +1 -1
  315. package/dist/crawl/crawler.d.ts +6 -0
  316. package/dist/crawl/crawler.d.ts.map +1 -1
  317. package/dist/crawl/crawler.js +210 -209
  318. package/dist/crawl/crawler.js.map +1 -1
  319. package/dist/crawl/dedup.d.ts +1 -0
  320. package/dist/crawl/dedup.d.ts.map +1 -1
  321. package/dist/crawl/dedup.js +124 -81
  322. package/dist/crawl/dedup.js.map +1 -1
  323. package/dist/crawl/etag-incremental.d.ts +43 -0
  324. package/dist/crawl/etag-incremental.d.ts.map +1 -0
  325. package/dist/crawl/etag-incremental.js +94 -0
  326. package/dist/crawl/etag-incremental.js.map +1 -0
  327. package/dist/crawl/index-to-vec.d.ts +10 -0
  328. package/dist/crawl/index-to-vec.d.ts.map +1 -0
  329. package/dist/crawl/index-to-vec.js +44 -0
  330. package/dist/crawl/index-to-vec.js.map +1 -0
  331. package/dist/crawl/mapper.js +136 -164
  332. package/dist/crawl/mapper.js.map +1 -1
  333. package/dist/crawl/rate-limiter.js +63 -66
  334. package/dist/crawl/rate-limiter.js.map +1 -1
  335. package/dist/crawl/robots.js +58 -57
  336. package/dist/crawl/robots.js.map +1 -1
  337. package/dist/crawl/sitemap-first.d.ts +12 -0
  338. package/dist/crawl/sitemap-first.d.ts.map +1 -0
  339. package/dist/crawl/sitemap-first.js +47 -0
  340. package/dist/crawl/sitemap-first.js.map +1 -0
  341. package/dist/crawl/sitemap.js +33 -32
  342. package/dist/crawl/sitemap.js.map +1 -1
  343. package/dist/crawl/url-utils.d.ts +1 -0
  344. package/dist/crawl/url-utils.d.ts.map +1 -1
  345. package/dist/crawl/url-utils.js +49 -37
  346. package/dist/crawl/url-utils.js.map +1 -1
  347. package/dist/daemon/health-check.d.ts +16 -0
  348. package/dist/daemon/health-check.d.ts.map +1 -0
  349. package/dist/daemon/health-check.js +33 -0
  350. package/dist/daemon/health-check.js.map +1 -0
  351. package/dist/daemon/http-server.d.ts +26 -0
  352. package/dist/daemon/http-server.d.ts.map +1 -0
  353. package/dist/daemon/http-server.js +275 -0
  354. package/dist/daemon/http-server.js.map +1 -0
  355. package/dist/daemon/proxy.d.ts +10 -0
  356. package/dist/daemon/proxy.d.ts.map +1 -0
  357. package/dist/daemon/proxy.js +93 -0
  358. package/dist/daemon/proxy.js.map +1 -0
  359. package/dist/embedding/embed.d.ts +59 -0
  360. package/dist/embedding/embed.d.ts.map +1 -0
  361. package/dist/embedding/embed.js +233 -0
  362. package/dist/embedding/embed.js.map +1 -0
  363. package/dist/embedding/fastembed-provider.d.ts +19 -0
  364. package/dist/embedding/fastembed-provider.d.ts.map +1 -0
  365. package/dist/embedding/fastembed-provider.js +51 -0
  366. package/dist/embedding/fastembed-provider.js.map +1 -0
  367. package/dist/embedding/key-terms.d.ts +12 -0
  368. package/dist/embedding/key-terms.d.ts.map +1 -0
  369. package/dist/embedding/key-terms.js +234 -0
  370. package/dist/embedding/key-terms.js.map +1 -0
  371. package/dist/extraction/boilerplate.d.ts +15 -0
  372. package/dist/extraction/boilerplate.d.ts.map +1 -0
  373. package/dist/extraction/boilerplate.js +52 -0
  374. package/dist/extraction/boilerplate.js.map +1 -0
  375. package/dist/extraction/defuddle.d.ts.map +1 -1
  376. package/dist/extraction/defuddle.js +27 -23
  377. package/dist/extraction/defuddle.js.map +1 -1
  378. package/dist/extraction/extract.d.ts.map +1 -1
  379. package/dist/extraction/extract.js +76 -76
  380. package/dist/extraction/extract.js.map +1 -1
  381. package/dist/extraction/jsonld.js +50 -54
  382. package/dist/extraction/jsonld.js.map +1 -1
  383. package/dist/extraction/lang-hints.d.ts +2 -0
  384. package/dist/extraction/lang-hints.d.ts.map +1 -0
  385. package/dist/extraction/lang-hints.js +30 -0
  386. package/dist/extraction/lang-hints.js.map +1 -0
  387. package/dist/extraction/llm-fallback.d.ts +17 -0
  388. package/dist/extraction/llm-fallback.d.ts.map +1 -0
  389. package/dist/extraction/llm-fallback.js +130 -0
  390. package/dist/extraction/llm-fallback.js.map +1 -0
  391. package/dist/extraction/markdown-sanitize.d.ts +2 -0
  392. package/dist/extraction/markdown-sanitize.d.ts.map +1 -0
  393. package/dist/extraction/markdown-sanitize.js +151 -0
  394. package/dist/extraction/markdown-sanitize.js.map +1 -0
  395. package/dist/extraction/markdown.d.ts +11 -0
  396. package/dist/extraction/markdown.d.ts.map +1 -1
  397. package/dist/extraction/markdown.js +195 -91
  398. package/dist/extraction/markdown.js.map +1 -1
  399. package/dist/extraction/pipeline.d.ts +8 -0
  400. package/dist/extraction/pipeline.d.ts.map +1 -1
  401. package/dist/extraction/pipeline.js +57 -91
  402. package/dist/extraction/pipeline.js.map +1 -1
  403. package/dist/extraction/readability.d.ts +1 -1
  404. package/dist/extraction/readability.d.ts.map +1 -1
  405. package/dist/extraction/readability.js +28 -29
  406. package/dist/extraction/readability.js.map +1 -1
  407. package/dist/extraction/schema.d.ts +12 -0
  408. package/dist/extraction/schema.d.ts.map +1 -1
  409. package/dist/extraction/schema.js +135 -72
  410. package/dist/extraction/schema.js.map +1 -1
  411. package/dist/extraction/site-extractors/docs-generic.d.ts.map +1 -1
  412. package/dist/extraction/site-extractors/docs-generic.js +81 -91
  413. package/dist/extraction/site-extractors/docs-generic.js.map +1 -1
  414. package/dist/extraction/site-extractors/github.d.ts.map +1 -1
  415. package/dist/extraction/site-extractors/github.js +87 -95
  416. package/dist/extraction/site-extractors/github.js.map +1 -1
  417. package/dist/extraction/site-extractors/mdn.d.ts.map +1 -1
  418. package/dist/extraction/site-extractors/mdn.js +46 -54
  419. package/dist/extraction/site-extractors/mdn.js.map +1 -1
  420. package/dist/extraction/site-extractors/stackoverflow.d.ts.map +1 -1
  421. package/dist/extraction/site-extractors/stackoverflow.js +71 -80
  422. package/dist/extraction/site-extractors/stackoverflow.js.map +1 -1
  423. package/dist/extraction/structured-data.d.ts +4 -0
  424. package/dist/extraction/structured-data.d.ts.map +1 -0
  425. package/dist/extraction/structured-data.js +173 -0
  426. package/dist/extraction/structured-data.js.map +1 -0
  427. package/dist/extraction/structured.d.ts +4 -0
  428. package/dist/extraction/structured.d.ts.map +1 -0
  429. package/dist/extraction/structured.js +163 -0
  430. package/dist/extraction/structured.js.map +1 -0
  431. package/dist/extraction/v1/classifier.d.ts +3 -0
  432. package/dist/extraction/v1/classifier.d.ts.map +1 -0
  433. package/dist/extraction/v1/classifier.js +110 -0
  434. package/dist/extraction/v1/classifier.js.map +1 -0
  435. package/dist/extraction/v1/extract-provider.d.ts +16 -0
  436. package/dist/extraction/v1/extract-provider.d.ts.map +1 -0
  437. package/dist/extraction/v1/extract-provider.js +43 -0
  438. package/dist/extraction/v1/extract-provider.js.map +1 -0
  439. package/dist/extraction/v1/local-llm.d.ts +8 -0
  440. package/dist/extraction/v1/local-llm.d.ts.map +1 -0
  441. package/dist/extraction/v1/local-llm.js +58 -0
  442. package/dist/extraction/v1/local-llm.js.map +1 -0
  443. package/dist/extraction/v1/news.d.ts +3 -0
  444. package/dist/extraction/v1/news.d.ts.map +1 -0
  445. package/dist/extraction/v1/news.js +61 -0
  446. package/dist/extraction/v1/news.js.map +1 -0
  447. package/dist/extraction/v1/product.d.ts +3 -0
  448. package/dist/extraction/v1/product.d.ts.map +1 -0
  449. package/dist/extraction/v1/product.js +166 -0
  450. package/dist/extraction/v1/product.js.map +1 -0
  451. package/dist/extraction/v1/recipe.d.ts +3 -0
  452. package/dist/extraction/v1/recipe.d.ts.map +1 -0
  453. package/dist/extraction/v1/recipe.js +136 -0
  454. package/dist/extraction/v1/recipe.js.map +1 -0
  455. package/dist/extraction/v1/routed.d.ts +17 -0
  456. package/dist/extraction/v1/routed.d.ts.map +1 -0
  457. package/dist/extraction/v1/routed.js +68 -0
  458. package/dist/extraction/v1/routed.js.map +1 -0
  459. package/dist/extraction/v1/schemas/Article.d.ts +11 -0
  460. package/dist/extraction/v1/schemas/Article.d.ts.map +1 -0
  461. package/dist/extraction/v1/schemas/Article.js +23 -0
  462. package/dist/extraction/v1/schemas/Article.js.map +1 -0
  463. package/dist/extraction/v1/schemas/CodeSnippet.d.ts +9 -0
  464. package/dist/extraction/v1/schemas/CodeSnippet.d.ts.map +1 -0
  465. package/dist/extraction/v1/schemas/CodeSnippet.js +90 -0
  466. package/dist/extraction/v1/schemas/CodeSnippet.js.map +1 -0
  467. package/dist/extraction/v1/schemas/EventListing.d.ts +10 -0
  468. package/dist/extraction/v1/schemas/EventListing.d.ts.map +1 -0
  469. package/dist/extraction/v1/schemas/EventListing.js +122 -0
  470. package/dist/extraction/v1/schemas/EventListing.js.map +1 -0
  471. package/dist/extraction/v1/schemas/Paper.d.ts +10 -0
  472. package/dist/extraction/v1/schemas/Paper.d.ts.map +1 -0
  473. package/dist/extraction/v1/schemas/Paper.js +156 -0
  474. package/dist/extraction/v1/schemas/Paper.js.map +1 -0
  475. package/dist/extraction/v1/schemas/Product.d.ts +17 -0
  476. package/dist/extraction/v1/schemas/Product.d.ts.map +1 -0
  477. package/dist/extraction/v1/schemas/Product.js +149 -0
  478. package/dist/extraction/v1/schemas/Product.js.map +1 -0
  479. package/dist/extraction/v1/schemas/Recipe.d.ts +14 -0
  480. package/dist/extraction/v1/schemas/Recipe.d.ts.map +1 -0
  481. package/dist/extraction/v1/schemas/Recipe.js +160 -0
  482. package/dist/extraction/v1/schemas/Recipe.js.map +1 -0
  483. package/dist/extraction/v1/schemas/index.d.ts +13 -0
  484. package/dist/extraction/v1/schemas/index.d.ts.map +1 -0
  485. package/dist/extraction/v1/schemas/index.js +44 -0
  486. package/dist/extraction/v1/schemas/index.js.map +1 -0
  487. package/dist/extraction/v1/site-extractors.d.ts +5 -0
  488. package/dist/extraction/v1/site-extractors.d.ts.map +1 -0
  489. package/dist/extraction/v1/site-extractors.js +31 -0
  490. package/dist/extraction/v1/site-extractors.js.map +1 -0
  491. package/dist/fetch/action-executor.d.ts +28 -0
  492. package/dist/fetch/action-executor.d.ts.map +1 -0
  493. package/dist/fetch/action-executor.js +88 -0
  494. package/dist/fetch/action-executor.js.map +1 -0
  495. package/dist/fetch/auth.d.ts +2 -1
  496. package/dist/fetch/auth.d.ts.map +1 -1
  497. package/dist/fetch/auth.js +56 -26
  498. package/dist/fetch/auth.js.map +1 -1
  499. package/dist/fetch/browser-pool.d.ts +30 -11
  500. package/dist/fetch/browser-pool.d.ts.map +1 -1
  501. package/dist/fetch/browser-pool.js +303 -127
  502. package/dist/fetch/browser-pool.js.map +1 -1
  503. package/dist/fetch/browser-selector.d.ts +17 -0
  504. package/dist/fetch/browser-selector.d.ts.map +1 -0
  505. package/dist/fetch/browser-selector.js +72 -0
  506. package/dist/fetch/browser-selector.js.map +1 -0
  507. package/dist/fetch/browser-types.d.ts +3 -0
  508. package/dist/fetch/browser-types.d.ts.map +1 -0
  509. package/dist/fetch/browser-types.js +45 -0
  510. package/dist/fetch/browser-types.js.map +1 -0
  511. package/dist/fetch/cdp-client.d.ts +9 -0
  512. package/dist/fetch/cdp-client.d.ts.map +1 -0
  513. package/dist/fetch/cdp-client.js +89 -0
  514. package/dist/fetch/cdp-client.js.map +1 -0
  515. package/dist/fetch/content-check.js +39 -46
  516. package/dist/fetch/content-check.js.map +1 -1
  517. package/dist/fetch/http-client.d.ts +4 -0
  518. package/dist/fetch/http-client.d.ts.map +1 -1
  519. package/dist/fetch/http-client.js +147 -128
  520. package/dist/fetch/http-client.js.map +1 -1
  521. package/dist/fetch/lightpanda.d.ts +28 -0
  522. package/dist/fetch/lightpanda.d.ts.map +1 -0
  523. package/dist/fetch/lightpanda.js +174 -0
  524. package/dist/fetch/lightpanda.js.map +1 -0
  525. package/dist/fetch/playwright-tier.d.ts +19 -0
  526. package/dist/fetch/playwright-tier.d.ts.map +1 -0
  527. package/dist/fetch/playwright-tier.js +76 -0
  528. package/dist/fetch/playwright-tier.js.map +1 -0
  529. package/dist/fetch/router.d.ts +49 -3
  530. package/dist/fetch/router.d.ts.map +1 -1
  531. package/dist/fetch/router.js +185 -81
  532. package/dist/fetch/router.js.map +1 -1
  533. package/dist/index.js +97 -17
  534. package/dist/index.js.map +1 -1
  535. package/dist/instructions.d.ts +31 -0
  536. package/dist/instructions.d.ts.map +1 -0
  537. package/dist/instructions.js +245 -0
  538. package/dist/instructions.js.map +1 -0
  539. package/dist/integrations/cloud/llm/anthropic.d.ts +3 -0
  540. package/dist/integrations/cloud/llm/anthropic.d.ts.map +1 -0
  541. package/dist/integrations/cloud/llm/anthropic.js +41 -0
  542. package/dist/integrations/cloud/llm/anthropic.js.map +1 -0
  543. package/dist/integrations/cloud/llm/cache.d.ts +5 -0
  544. package/dist/integrations/cloud/llm/cache.d.ts.map +1 -0
  545. package/dist/integrations/cloud/llm/cache.js +49 -0
  546. package/dist/integrations/cloud/llm/cache.js.map +1 -0
  547. package/dist/integrations/cloud/llm/gemini.d.ts +3 -0
  548. package/dist/integrations/cloud/llm/gemini.d.ts.map +1 -0
  549. package/dist/integrations/cloud/llm/gemini.js +37 -0
  550. package/dist/integrations/cloud/llm/gemini.js.map +1 -0
  551. package/dist/integrations/cloud/llm/groq.d.ts +3 -0
  552. package/dist/integrations/cloud/llm/groq.d.ts.map +1 -0
  553. package/dist/integrations/cloud/llm/groq.js +74 -0
  554. package/dist/integrations/cloud/llm/groq.js.map +1 -0
  555. package/dist/integrations/cloud/llm/hash.d.ts +3 -0
  556. package/dist/integrations/cloud/llm/hash.d.ts.map +1 -0
  557. package/dist/integrations/cloud/llm/hash.js +26 -0
  558. package/dist/integrations/cloud/llm/hash.js.map +1 -0
  559. package/dist/integrations/cloud/llm/openai.d.ts +3 -0
  560. package/dist/integrations/cloud/llm/openai.d.ts.map +1 -0
  561. package/dist/integrations/cloud/llm/openai.js +43 -0
  562. package/dist/integrations/cloud/llm/openai.js.map +1 -0
  563. package/dist/integrations/cloud/llm/select.d.ts +5 -0
  564. package/dist/integrations/cloud/llm/select.d.ts.map +1 -0
  565. package/dist/integrations/cloud/llm/select.js +30 -0
  566. package/dist/integrations/cloud/llm/select.js.map +1 -0
  567. package/dist/integrations/cloud/llm/types.d.ts +24 -0
  568. package/dist/integrations/cloud/llm/types.d.ts.map +1 -0
  569. package/dist/integrations/cloud/llm/types.js +1 -0
  570. package/dist/integrations/cloud/llm/types.js.map +1 -0
  571. package/dist/integrations/cloud/llm/validate.d.ts +6 -0
  572. package/dist/integrations/cloud/llm/validate.d.ts.map +1 -0
  573. package/dist/integrations/cloud/llm/validate.js +63 -0
  574. package/dist/integrations/cloud/llm/validate.js.map +1 -0
  575. package/dist/logger.d.ts +4 -1
  576. package/dist/logger.d.ts.map +1 -1
  577. package/dist/logger.js +71 -30
  578. package/dist/logger.js.map +1 -1
  579. package/dist/pdf-parse.d.js +1 -0
  580. package/dist/pdf-parse.d.js.map +1 -0
  581. package/dist/plugins/loader.d.ts +20 -0
  582. package/dist/plugins/loader.d.ts.map +1 -0
  583. package/dist/plugins/loader.js +157 -0
  584. package/dist/plugins/loader.js.map +1 -0
  585. package/dist/plugins/registry.d.ts +26 -0
  586. package/dist/plugins/registry.d.ts.map +1 -0
  587. package/dist/plugins/registry.js +71 -0
  588. package/dist/plugins/registry.js.map +1 -0
  589. package/dist/plugins/validate.d.ts +9 -0
  590. package/dist/plugins/validate.d.ts.map +1 -0
  591. package/dist/plugins/validate.js +79 -0
  592. package/dist/plugins/validate.js.map +1 -0
  593. package/dist/providers/embed-provider.d.ts +11 -0
  594. package/dist/providers/embed-provider.d.ts.map +1 -0
  595. package/dist/providers/embed-provider.js +24 -0
  596. package/dist/providers/embed-provider.js.map +1 -0
  597. package/dist/providers/extract-provider.d.ts +23 -0
  598. package/dist/providers/extract-provider.d.ts.map +1 -0
  599. package/dist/providers/extract-provider.js +25 -0
  600. package/dist/providers/extract-provider.js.map +1 -0
  601. package/dist/providers/rerank-provider.d.ts +16 -0
  602. package/dist/providers/rerank-provider.d.ts.map +1 -0
  603. package/dist/providers/rerank-provider.js +28 -0
  604. package/dist/providers/rerank-provider.js.map +1 -0
  605. package/dist/providers/search-provider.d.ts +25 -0
  606. package/dist/providers/search-provider.d.ts.map +1 -0
  607. package/dist/providers/search-provider.js +44 -0
  608. package/dist/providers/search-provider.js.map +1 -0
  609. package/dist/providers/vector-store.d.ts +27 -0
  610. package/dist/providers/vector-store.d.ts.map +1 -0
  611. package/dist/providers/vector-store.js +27 -0
  612. package/dist/providers/vector-store.js.map +1 -0
  613. package/dist/python-env.d.ts +9 -0
  614. package/dist/python-env.d.ts.map +1 -0
  615. package/dist/python-env.js +13 -0
  616. package/dist/python-env.js.map +1 -0
  617. package/dist/repl/commands/agent.d.ts +5 -0
  618. package/dist/repl/commands/agent.d.ts.map +1 -0
  619. package/dist/repl/commands/agent.js +62 -0
  620. package/dist/repl/commands/agent.js.map +1 -0
  621. package/dist/repl/commands/cache.d.ts +4 -0
  622. package/dist/repl/commands/cache.d.ts.map +1 -0
  623. package/dist/repl/commands/cache.js +43 -0
  624. package/dist/repl/commands/cache.js.map +1 -0
  625. package/dist/repl/commands/crawl.d.ts +7 -0
  626. package/dist/repl/commands/crawl.d.ts.map +1 -0
  627. package/dist/repl/commands/crawl.js +44 -0
  628. package/dist/repl/commands/crawl.js.map +1 -0
  629. package/dist/repl/commands/extract.d.ts +5 -0
  630. package/dist/repl/commands/extract.d.ts.map +1 -0
  631. package/dist/repl/commands/extract.js +47 -0
  632. package/dist/repl/commands/extract.js.map +1 -0
  633. package/dist/repl/commands/fetch.d.ts +5 -0
  634. package/dist/repl/commands/fetch.d.ts.map +1 -0
  635. package/dist/repl/commands/fetch.js +67 -0
  636. package/dist/repl/commands/fetch.js.map +1 -0
  637. package/dist/repl/commands/find-similar.d.ts +5 -0
  638. package/dist/repl/commands/find-similar.d.ts.map +1 -0
  639. package/dist/repl/commands/find-similar.js +74 -0
  640. package/dist/repl/commands/find-similar.js.map +1 -0
  641. package/dist/repl/commands/research.d.ts +5 -0
  642. package/dist/repl/commands/research.d.ts.map +1 -0
  643. package/dist/repl/commands/research.js +65 -0
  644. package/dist/repl/commands/research.js.map +1 -0
  645. package/dist/repl/commands/search.d.ts +5 -0
  646. package/dist/repl/commands/search.d.ts.map +1 -0
  647. package/dist/repl/commands/search.js +74 -0
  648. package/dist/repl/commands/search.js.map +1 -0
  649. package/dist/repl/commands/types.d.ts +9 -0
  650. package/dist/repl/commands/types.d.ts.map +1 -0
  651. package/dist/repl/commands/types.js +1 -0
  652. package/dist/repl/commands/types.js.map +1 -0
  653. package/dist/repl/formatters.d.ts +13 -0
  654. package/dist/repl/formatters.d.ts.map +1 -0
  655. package/dist/repl/formatters.js +283 -0
  656. package/dist/repl/formatters.js.map +1 -0
  657. package/dist/repl/parser.d.ts +9 -0
  658. package/dist/repl/parser.d.ts.map +1 -0
  659. package/dist/repl/parser.js +86 -0
  660. package/dist/repl/parser.js.map +1 -0
  661. package/dist/repl/shell.d.ts +8 -0
  662. package/dist/repl/shell.d.ts.map +1 -0
  663. package/dist/repl/shell.js +184 -0
  664. package/dist/repl/shell.js.map +1 -0
  665. package/dist/research/branch-exploration.d.ts +14 -0
  666. package/dist/research/branch-exploration.d.ts.map +1 -0
  667. package/dist/research/branch-exploration.js +100 -0
  668. package/dist/research/branch-exploration.js.map +1 -0
  669. package/dist/research/brief.d.ts +5 -0
  670. package/dist/research/brief.d.ts.map +1 -0
  671. package/dist/research/brief.js +242 -0
  672. package/dist/research/brief.js.map +1 -0
  673. package/dist/research/citation-graph.d.ts +9 -0
  674. package/dist/research/citation-graph.d.ts.map +1 -0
  675. package/dist/research/citation-graph.js +114 -0
  676. package/dist/research/citation-graph.js.map +1 -0
  677. package/dist/research/decompose.d.ts +14 -0
  678. package/dist/research/decompose.d.ts.map +1 -0
  679. package/dist/research/decompose.js +439 -0
  680. package/dist/research/decompose.js.map +1 -0
  681. package/dist/research/pipeline.d.ts +5 -0
  682. package/dist/research/pipeline.d.ts.map +1 -0
  683. package/dist/research/pipeline.js +269 -0
  684. package/dist/research/pipeline.js.map +1 -0
  685. package/dist/research/synthesis-local.d.ts +16 -0
  686. package/dist/research/synthesis-local.d.ts.map +1 -0
  687. package/dist/research/synthesis-local.js +73 -0
  688. package/dist/research/synthesis-local.js.map +1 -0
  689. package/dist/research/synthesize.d.ts +10 -0
  690. package/dist/research/synthesize.d.ts.map +1 -0
  691. package/dist/research/synthesize.js +137 -0
  692. package/dist/research/synthesize.js.map +1 -0
  693. package/dist/search/answer-synthesis.d.ts +33 -0
  694. package/dist/search/answer-synthesis.d.ts.map +1 -0
  695. package/dist/search/answer-synthesis.js +244 -0
  696. package/dist/search/answer-synthesis.js.map +1 -0
  697. package/dist/search/context-formatter.d.ts +3 -0
  698. package/dist/search/context-formatter.d.ts.map +1 -0
  699. package/dist/search/context-formatter.js +56 -0
  700. package/dist/search/context-formatter.js.map +1 -0
  701. package/dist/search/dedup.d.ts +1 -0
  702. package/dist/search/dedup.d.ts.map +1 -1
  703. package/dist/search/dedup.js +40 -32
  704. package/dist/search/dedup.js.map +1 -1
  705. package/dist/search/engines/arxiv.d.ts +7 -0
  706. package/dist/search/engines/arxiv.d.ts.map +1 -0
  707. package/dist/search/engines/arxiv.js +70 -0
  708. package/dist/search/engines/arxiv.js.map +1 -0
  709. package/dist/search/engines/bing-news.d.ts +7 -0
  710. package/dist/search/engines/bing-news.d.ts.map +1 -0
  711. package/dist/search/engines/bing-news.js +97 -0
  712. package/dist/search/engines/bing-news.js.map +1 -0
  713. package/dist/search/engines/bing.d.ts +1 -0
  714. package/dist/search/engines/bing.d.ts.map +1 -1
  715. package/dist/search/engines/bing.js +100 -44
  716. package/dist/search/engines/bing.js.map +1 -1
  717. package/dist/search/engines/devdocs.d.ts +6 -0
  718. package/dist/search/engines/devdocs.d.ts.map +1 -0
  719. package/dist/search/engines/devdocs.js +56 -0
  720. package/dist/search/engines/devdocs.js.map +1 -0
  721. package/dist/search/engines/duckduckgo.d.ts.map +1 -1
  722. package/dist/search/engines/duckduckgo.js +56 -44
  723. package/dist/search/engines/duckduckgo.js.map +1 -1
  724. package/dist/search/engines/github-code.d.ts +7 -0
  725. package/dist/search/engines/github-code.d.ts.map +1 -0
  726. package/dist/search/engines/github-code.js +55 -0
  727. package/dist/search/engines/github-code.js.map +1 -0
  728. package/dist/search/engines/hn-algolia.d.ts +7 -0
  729. package/dist/search/engines/hn-algolia.d.ts.map +1 -0
  730. package/dist/search/engines/hn-algolia.js +76 -0
  731. package/dist/search/engines/hn-algolia.js.map +1 -0
  732. package/dist/search/engines/lobsters.d.ts +7 -0
  733. package/dist/search/engines/lobsters.d.ts.map +1 -0
  734. package/dist/search/engines/lobsters.js +83 -0
  735. package/dist/search/engines/lobsters.js.map +1 -0
  736. package/dist/search/engines/mdn.d.ts +7 -0
  737. package/dist/search/engines/mdn.d.ts.map +1 -0
  738. package/dist/search/engines/mdn.js +48 -0
  739. package/dist/search/engines/mdn.js.map +1 -0
  740. package/dist/search/engines/semantic-scholar.d.ts +7 -0
  741. package/dist/search/engines/semantic-scholar.d.ts.map +1 -0
  742. package/dist/search/engines/semantic-scholar.js +69 -0
  743. package/dist/search/engines/semantic-scholar.js.map +1 -0
  744. package/dist/search/engines/stackoverflow.d.ts +7 -0
  745. package/dist/search/engines/stackoverflow.d.ts.map +1 -0
  746. package/dist/search/engines/stackoverflow.js +73 -0
  747. package/dist/search/engines/stackoverflow.js.map +1 -0
  748. package/dist/search/engines/startpage.d.ts.map +1 -1
  749. package/dist/search/engines/startpage.js +65 -46
  750. package/dist/search/engines/startpage.js.map +1 -1
  751. package/dist/search/evidence.d.ts +25 -0
  752. package/dist/search/evidence.d.ts.map +1 -0
  753. package/dist/search/evidence.js +220 -0
  754. package/dist/search/evidence.js.map +1 -0
  755. package/dist/search/filters.js +49 -55
  756. package/dist/search/filters.js.map +1 -1
  757. package/dist/search/find-similar/crawl-rank.d.ts +9 -0
  758. package/dist/search/find-similar/crawl-rank.d.ts.map +1 -0
  759. package/dist/search/find-similar/crawl-rank.js +272 -0
  760. package/dist/search/find-similar/crawl-rank.js.map +1 -0
  761. package/dist/search/find-similar/mode.d.ts +4 -0
  762. package/dist/search/find-similar/mode.d.ts.map +1 -0
  763. package/dist/search/find-similar/mode.js +12 -0
  764. package/dist/search/find-similar/mode.js.map +1 -0
  765. package/dist/search/find-similar.d.ts +5 -0
  766. package/dist/search/find-similar.d.ts.map +1 -0
  767. package/dist/search/find-similar.js +509 -0
  768. package/dist/search/find-similar.js.map +1 -0
  769. package/dist/search/highlights.d.ts +19 -0
  770. package/dist/search/highlights.d.ts.map +1 -0
  771. package/dist/search/highlights.js +167 -0
  772. package/dist/search/highlights.js.map +1 -0
  773. package/dist/search/language-filter.d.ts +29 -0
  774. package/dist/search/language-filter.d.ts.map +1 -0
  775. package/dist/search/language-filter.js +126 -0
  776. package/dist/search/language-filter.js.map +1 -0
  777. package/dist/search/legacy/searxng-orchestrator.d.ts +4 -0
  778. package/dist/search/legacy/searxng-orchestrator.d.ts.map +1 -0
  779. package/dist/search/legacy/searxng-orchestrator.js +501 -0
  780. package/dist/search/legacy/searxng-orchestrator.js.map +1 -0
  781. package/dist/search/legacy/searxng-provider.d.ts +7 -0
  782. package/dist/search/legacy/searxng-provider.d.ts.map +1 -0
  783. package/dist/search/legacy/searxng-provider.js +11 -0
  784. package/dist/search/legacy/searxng-provider.js.map +1 -0
  785. package/dist/search/multi-query.d.ts +25 -0
  786. package/dist/search/multi-query.d.ts.map +1 -0
  787. package/dist/search/multi-query.js +228 -0
  788. package/dist/search/multi-query.js.map +1 -0
  789. package/dist/search/query.js +32 -34
  790. package/dist/search/query.js.map +1 -1
  791. package/dist/search/rerank.d.ts +3 -1
  792. package/dist/search/rerank.d.ts.map +1 -1
  793. package/dist/search/rerank.js +44 -35
  794. package/dist/search/rerank.js.map +1 -1
  795. package/dist/search/reranker/authority-boost.d.ts +3 -0
  796. package/dist/search/reranker/authority-boost.d.ts.map +1 -0
  797. package/dist/search/reranker/authority-boost.js +179 -0
  798. package/dist/search/reranker/authority-boost.js.map +1 -0
  799. package/dist/search/reranker/consensus-boost.d.ts +3 -0
  800. package/dist/search/reranker/consensus-boost.d.ts.map +1 -0
  801. package/dist/search/reranker/consensus-boost.js +27 -0
  802. package/dist/search/reranker/consensus-boost.js.map +1 -0
  803. package/dist/search/reranker/recency-boost.d.ts +3 -0
  804. package/dist/search/reranker/recency-boost.d.ts.map +1 -0
  805. package/dist/search/reranker/recency-boost.js +13 -0
  806. package/dist/search/reranker/recency-boost.js.map +1 -0
  807. package/dist/search/reranker/recency.d.ts +3 -0
  808. package/dist/search/reranker/recency.d.ts.map +1 -0
  809. package/dist/search/reranker/recency.js +23 -0
  810. package/dist/search/reranker/recency.js.map +1 -0
  811. package/dist/search/reranker/transformers-rerank-provider.d.ts +12 -0
  812. package/dist/search/reranker/transformers-rerank-provider.d.ts.map +1 -0
  813. package/dist/search/reranker/transformers-rerank-provider.js +78 -0
  814. package/dist/search/reranker/transformers-rerank-provider.js.map +1 -0
  815. package/dist/search/rrf.d.ts +17 -0
  816. package/dist/search/rrf.d.ts.map +1 -0
  817. package/dist/search/rrf.js +39 -0
  818. package/dist/search/rrf.js.map +1 -0
  819. package/dist/search/sampling.d.ts +25 -0
  820. package/dist/search/sampling.d.ts.map +1 -0
  821. package/dist/search/sampling.js +52 -0
  822. package/dist/search/sampling.js.map +1 -0
  823. package/dist/search/searxng.d.ts.map +1 -1
  824. package/dist/search/searxng.js +69 -79
  825. package/dist/search/searxng.js.map +1 -1
  826. package/dist/search/tokens.d.ts +3 -0
  827. package/dist/search/tokens.d.ts.map +1 -0
  828. package/dist/search/tokens.js +39 -0
  829. package/dist/search/tokens.js.map +1 -0
  830. package/dist/search/truncate.d.ts +6 -0
  831. package/dist/search/truncate.d.ts.map +1 -0
  832. package/dist/search/truncate.js +26 -0
  833. package/dist/search/truncate.js.map +1 -0
  834. package/dist/search/url-unwrap.d.ts +3 -0
  835. package/dist/search/url-unwrap.d.ts.map +1 -0
  836. package/dist/search/url-unwrap.js +43 -0
  837. package/dist/search/url-unwrap.js.map +1 -0
  838. package/dist/search/v1/context-rank.d.ts +13 -0
  839. package/dist/search/v1/context-rank.d.ts.map +1 -0
  840. package/dist/search/v1/context-rank.js +74 -0
  841. package/dist/search/v1/context-rank.js.map +1 -0
  842. package/dist/search/v1/engine-base.d.ts +27 -0
  843. package/dist/search/v1/engine-base.d.ts.map +1 -0
  844. package/dist/search/v1/engine-base.js +110 -0
  845. package/dist/search/v1/engine-base.js.map +1 -0
  846. package/dist/search/v1/intent-router.d.ts +22 -0
  847. package/dist/search/v1/intent-router.d.ts.map +1 -0
  848. package/dist/search/v1/intent-router.js +138 -0
  849. package/dist/search/v1/intent-router.js.map +1 -0
  850. package/dist/search/v1/orchestrator.d.ts +24 -0
  851. package/dist/search/v1/orchestrator.d.ts.map +1 -0
  852. package/dist/search/v1/orchestrator.js +163 -0
  853. package/dist/search/v1/orchestrator.js.map +1 -0
  854. package/dist/search/v1/recency-boost.d.ts +9 -0
  855. package/dist/search/v1/recency-boost.d.ts.map +1 -0
  856. package/dist/search/v1/recency-boost.js +37 -0
  857. package/dist/search/v1/recency-boost.js.map +1 -0
  858. package/dist/search/v1/recent-cache-dedup.d.ts +6 -0
  859. package/dist/search/v1/recent-cache-dedup.d.ts.map +1 -0
  860. package/dist/search/v1/recent-cache-dedup.js +85 -0
  861. package/dist/search/v1/recent-cache-dedup.js.map +1 -0
  862. package/dist/search/v1/rss/feed-config.d.ts +21 -0
  863. package/dist/search/v1/rss/feed-config.d.ts.map +1 -0
  864. package/dist/search/v1/rss/feed-config.js +90 -0
  865. package/dist/search/v1/rss/feed-config.js.map +1 -0
  866. package/dist/search/v1/rss/feed-parser.d.ts +14 -0
  867. package/dist/search/v1/rss/feed-parser.d.ts.map +1 -0
  868. package/dist/search/v1/rss/feed-parser.js +104 -0
  869. package/dist/search/v1/rss/feed-parser.js.map +1 -0
  870. package/dist/search/v1/rss/feed-poller.d.ts +22 -0
  871. package/dist/search/v1/rss/feed-poller.d.ts.map +1 -0
  872. package/dist/search/v1/rss/feed-poller.js +102 -0
  873. package/dist/search/v1/rss/feed-poller.js.map +1 -0
  874. package/dist/search/v1/rss/feed-store.d.ts +30 -0
  875. package/dist/search/v1/rss/feed-store.d.ts.map +1 -0
  876. package/dist/search/v1/rss/feed-store.js +134 -0
  877. package/dist/search/v1/rss/feed-store.js.map +1 -0
  878. package/dist/search/v1/rss/rss-engine.d.ts +6 -0
  879. package/dist/search/v1/rss/rss-engine.d.ts.map +1 -0
  880. package/dist/search/v1/rss/rss-engine.js +28 -0
  881. package/dist/search/v1/rss/rss-engine.js.map +1 -0
  882. package/dist/search/v1/v1-provider.d.ts +7 -0
  883. package/dist/search/v1/v1-provider.d.ts.map +1 -0
  884. package/dist/search/v1/v1-provider.js +68 -0
  885. package/dist/search/v1/v1-provider.js.map +1 -0
  886. package/dist/search/v1/verticals/code.d.ts +4 -0
  887. package/dist/search/v1/verticals/code.d.ts.map +1 -0
  888. package/dist/search/v1/verticals/code.js +20 -0
  889. package/dist/search/v1/verticals/code.js.map +1 -0
  890. package/dist/search/v1/verticals/docs.d.ts +4 -0
  891. package/dist/search/v1/verticals/docs.d.ts.map +1 -0
  892. package/dist/search/v1/verticals/docs.js +20 -0
  893. package/dist/search/v1/verticals/docs.js.map +1 -0
  894. package/dist/search/v1/verticals/general.d.ts +4 -0
  895. package/dist/search/v1/verticals/general.d.ts.map +1 -0
  896. package/dist/search/v1/verticals/general.js +22 -0
  897. package/dist/search/v1/verticals/general.js.map +1 -0
  898. package/dist/search/v1/verticals/news.d.ts +10 -0
  899. package/dist/search/v1/verticals/news.d.ts.map +1 -0
  900. package/dist/search/v1/verticals/news.js +52 -0
  901. package/dist/search/v1/verticals/news.js.map +1 -0
  902. package/dist/search/v1/verticals/papers.d.ts +4 -0
  903. package/dist/search/v1/verticals/papers.d.ts.map +1 -0
  904. package/dist/search/v1/verticals/papers.js +23 -0
  905. package/dist/search/v1/verticals/papers.js.map +1 -0
  906. package/dist/search/validator.js +31 -31
  907. package/dist/search/validator.js.map +1 -1
  908. package/dist/searxng/bootstrap.d.ts +30 -0
  909. package/dist/searxng/bootstrap.d.ts.map +1 -1
  910. package/dist/searxng/bootstrap.js +223 -85
  911. package/dist/searxng/bootstrap.js.map +1 -1
  912. package/dist/searxng/docker.d.ts.map +1 -1
  913. package/dist/searxng/docker.js +69 -60
  914. package/dist/searxng/docker.js.map +1 -1
  915. package/dist/searxng/process.d.ts +13 -1
  916. package/dist/searxng/process.d.ts.map +1 -1
  917. package/dist/searxng/process.js +231 -164
  918. package/dist/searxng/process.js.map +1 -1
  919. package/dist/server/backend-status.d.ts +13 -0
  920. package/dist/server/backend-status.d.ts.map +1 -0
  921. package/dist/server/backend-status.js +40 -0
  922. package/dist/server/backend-status.js.map +1 -0
  923. package/dist/server/tool-schemas.d.ts +549 -0
  924. package/dist/server/tool-schemas.d.ts.map +1 -0
  925. package/dist/server/tool-schemas.js +464 -0
  926. package/dist/server/tool-schemas.js.map +1 -0
  927. package/dist/server/warmup-on-start.d.ts +9 -0
  928. package/dist/server/warmup-on-start.d.ts.map +1 -0
  929. package/dist/server/warmup-on-start.js +55 -0
  930. package/dist/server/warmup-on-start.js.map +1 -0
  931. package/dist/server.d.ts +17 -0
  932. package/dist/server.d.ts.map +1 -1
  933. package/dist/server.js +454 -297
  934. package/dist/server.js.map +1 -1
  935. package/dist/tools/agent.d.ts +5 -0
  936. package/dist/tools/agent.d.ts.map +1 -0
  937. package/dist/tools/agent.js +128 -0
  938. package/dist/tools/agent.js.map +1 -0
  939. package/dist/tools/cache.d.ts +2 -1
  940. package/dist/tools/cache.d.ts.map +1 -1
  941. package/dist/tools/cache.js +175 -44
  942. package/dist/tools/cache.js.map +1 -1
  943. package/dist/tools/crawl.d.ts.map +1 -1
  944. package/dist/tools/crawl.js +171 -88
  945. package/dist/tools/crawl.js.map +1 -1
  946. package/dist/tools/extract.d.ts +2 -2
  947. package/dist/tools/extract.d.ts.map +1 -1
  948. package/dist/tools/extract.js +175 -59
  949. package/dist/tools/extract.js.map +1 -1
  950. package/dist/tools/fetch.d.ts +2 -2
  951. package/dist/tools/fetch.d.ts.map +1 -1
  952. package/dist/tools/fetch.js +161 -68
  953. package/dist/tools/fetch.js.map +1 -1
  954. package/dist/tools/find-similar.d.ts +5 -0
  955. package/dist/tools/find-similar.d.ts.map +1 -0
  956. package/dist/tools/find-similar.js +127 -0
  957. package/dist/tools/find-similar.js.map +1 -0
  958. package/dist/tools/research.d.ts +5 -0
  959. package/dist/tools/research.d.ts.map +1 -0
  960. package/dist/tools/research.js +107 -0
  961. package/dist/tools/research.js.map +1 -0
  962. package/dist/tools/search.d.ts +10 -2
  963. package/dist/tools/search.d.ts.map +1 -1
  964. package/dist/tools/search.js +13 -158
  965. package/dist/tools/search.js.map +1 -1
  966. package/dist/types.d.ts +350 -7
  967. package/dist/types.d.ts.map +1 -1
  968. package/dist/types.js +6 -1
  969. package/dist/types.js.map +1 -1
  970. package/dist/util/mode.d.ts +4 -0
  971. package/dist/util/mode.d.ts.map +1 -0
  972. package/dist/util/mode.js +34 -0
  973. package/dist/util/mode.js.map +1 -0
  974. package/package.json +78 -8
  975. package/dist/extraction/trafilatura.d.ts +0 -6
  976. package/dist/extraction/trafilatura.d.ts.map +0 -1
  977. package/dist/extraction/trafilatura.js +0 -105
  978. package/dist/extraction/trafilatura.js.map +0 -1
  979. package/dist/search/flashrank.d.ts +0 -12
  980. package/dist/search/flashrank.d.ts.map +0 -1
  981. package/dist/search/flashrank.js +0 -63
  982. package/dist/search/flashrank.js.map +0 -1
@@ -0,0 +1,130 @@
1
+ import { getConfig } from "../config.js";
2
+ import { callAnthropic } from "../integrations/cloud/llm/anthropic.js";
3
+ import { callOpenAI } from "../integrations/cloud/llm/openai.js";
4
+ import { callGemini } from "../integrations/cloud/llm/gemini.js";
5
+ import { callGroq } from "../integrations/cloud/llm/groq.js";
6
+ import {
7
+ ensureLLMCacheTable,
8
+ insertLLMCache,
9
+ lookupLLMCache
10
+ } from "../integrations/cloud/llm/cache.js";
11
+ import { hashPrompt, hashSchema } from "../integrations/cloud/llm/hash.js";
12
+ import { allProviders, providerEnvVar, selectProvider } from "../integrations/cloud/llm/select.js";
13
+ import { validateAgainstSchema } from "../integrations/cloud/llm/validate.js";
14
+ const MAX_HTML_BYTES = 5e4;
15
+ const ADAPTERS = {
16
+ anthropic: callAnthropic,
17
+ openai: callOpenAI,
18
+ gemini: callGemini,
19
+ groq: callGroq
20
+ };
21
+ async function extractWithLLM(input) {
22
+ if (input.missing.length === 0) {
23
+ return emptyResult(input.partial, []);
24
+ }
25
+ const cfg = getConfig();
26
+ const budget = input.budget ?? { remaining: cfg.llmMaxCallsPerRequest };
27
+ if (budget.remaining <= 0) {
28
+ return emptyResult(input.partial, [
29
+ `LLM fallback skipped: per-request budget exhausted (cap ${cfg.llmMaxCallsPerRequest}). Override via WIGOLO_LLM_MAX_CALLS_PER_REQUEST.`
30
+ ]);
31
+ }
32
+ const provider = selectProvider(process.env);
33
+ if (!provider) {
34
+ const envList = allProviders().map((p) => providerEnvVar(p)).join(", ");
35
+ return emptyResult(input.partial, [
36
+ `LLM fallback skipped: no provider key set (${envList}). ${input.missing.length} required field(s) still missing: ${input.missing.join(", ")}.`
37
+ ]);
38
+ }
39
+ const apiKey = process.env[providerEnvVar(provider)];
40
+ const prompt = buildPrompt(input);
41
+ const promptHash = hashPrompt(prompt);
42
+ const schemaHash = hashSchema(input.jsonSchema);
43
+ const modelId = `${provider}:default`;
44
+ ensureLLMCacheTable();
45
+ const cached = lookupLLMCache(modelId, promptHash, schemaHash);
46
+ if (cached) {
47
+ const values = JSON.parse(cached);
48
+ return {
49
+ values: mergeOnlyMissing(input.partial, values, input.missing),
50
+ provider,
51
+ model: modelId,
52
+ cached: true,
53
+ latencyMs: 0,
54
+ warnings: []
55
+ };
56
+ }
57
+ let result;
58
+ try {
59
+ result = await ADAPTERS[provider](
60
+ { prompt, jsonSchema: input.jsonSchema, signal: input.signal },
61
+ apiKey
62
+ );
63
+ } catch (e) {
64
+ return emptyResult(input.partial, [
65
+ `LLM fallback (${provider}) failed: ${e.message}`
66
+ ]);
67
+ } finally {
68
+ budget.remaining = Math.max(0, budget.remaining - 1);
69
+ }
70
+ const errors = validateAgainstSchema(result.values, input.jsonSchema);
71
+ if (errors.length > 0) {
72
+ return emptyResult(input.partial, [
73
+ `LLM fallback (${provider}) response failed schema validation: ${errors.map((e) => `${e.path} ${e.message}`).join("; ")}`
74
+ ]);
75
+ }
76
+ const ttlMs = cfg.llmCacheTtlDays * 24 * 60 * 60 * 1e3;
77
+ const now = Date.now();
78
+ insertLLMCache({
79
+ modelId,
80
+ promptHash,
81
+ schemaHash,
82
+ response: JSON.stringify(result.values),
83
+ createdAt: now,
84
+ expiresAt: now + ttlMs
85
+ });
86
+ return {
87
+ values: mergeOnlyMissing(input.partial, result.values, input.missing),
88
+ provider,
89
+ model: result.model,
90
+ cached: false,
91
+ latencyMs: result.latencyMs,
92
+ warnings: result.warnings ?? []
93
+ };
94
+ }
95
+ function emptyResult(partial, warnings) {
96
+ return {
97
+ values: { ...partial },
98
+ provider: "anthropic",
99
+ model: "",
100
+ cached: false,
101
+ latencyMs: 0,
102
+ warnings
103
+ };
104
+ }
105
+ function mergeOnlyMissing(partial, filled, missing) {
106
+ const out = { ...partial };
107
+ for (const key of missing) {
108
+ if (filled[key] !== void 0) out[key] = filled[key];
109
+ }
110
+ return out;
111
+ }
112
+ function buildPrompt(input) {
113
+ const html = truncate(input.html, MAX_HTML_BYTES);
114
+ return [
115
+ "Extract the following missing fields from the HTML below.",
116
+ `Missing fields: ${input.missing.join(", ")}.`,
117
+ "Return JSON matching the provided schema. Do not invent values; if a field is not present in the HTML, omit it.",
118
+ "",
119
+ "HTML:",
120
+ html
121
+ ].join("\n");
122
+ }
123
+ function truncate(s, maxBytes) {
124
+ if (s.length <= maxBytes) return s;
125
+ return s.slice(0, maxBytes);
126
+ }
127
+ export {
128
+ extractWithLLM
129
+ };
130
+ //# sourceMappingURL=llm-fallback.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../../src/extraction/llm-fallback.ts"],"sourcesContent":["import { getConfig } from '../config.js';\nimport { callAnthropic } from '../integrations/cloud/llm/anthropic.js';\nimport { callOpenAI } from '../integrations/cloud/llm/openai.js';\nimport { callGemini } from '../integrations/cloud/llm/gemini.js';\nimport { callGroq } from '../integrations/cloud/llm/groq.js';\nimport {\n ensureLLMCacheTable,\n insertLLMCache,\n lookupLLMCache,\n} from '../integrations/cloud/llm/cache.js';\nimport { hashPrompt, hashSchema } from '../integrations/cloud/llm/hash.js';\nimport { allProviders, providerEnvVar, selectProvider } from '../integrations/cloud/llm/select.js';\nimport type { LLMExtractResult, LLMProvider } from '../integrations/cloud/llm/types.js';\nimport { validateAgainstSchema } from '../integrations/cloud/llm/validate.js';\n\nconst MAX_HTML_BYTES = 50_000;\n\nexport interface LLMFallbackBudget {\n remaining: number;\n}\n\nexport interface LLMFallbackInput {\n html: string;\n jsonSchema: Record<string, unknown>;\n partial: Record<string, unknown>;\n missing: string[];\n signal?: AbortSignal;\n budget?: LLMFallbackBudget;\n}\n\nexport interface LLMFallbackResult extends LLMExtractResult {\n warnings: string[];\n}\n\nconst ADAPTERS: Record<\n LLMProvider,\n (\n opts: { prompt: string; jsonSchema: Record<string, unknown>; signal?: AbortSignal },\n apiKey: string,\n ) => Promise<LLMExtractResult>\n> = {\n anthropic: callAnthropic,\n openai: callOpenAI,\n gemini: callGemini,\n groq: callGroq,\n};\n\nexport async function extractWithLLM(\n input: LLMFallbackInput,\n): Promise<LLMFallbackResult> {\n if (input.missing.length === 0) {\n return emptyResult(input.partial, []);\n }\n\n const cfg = getConfig();\n const budget = input.budget ?? { remaining: cfg.llmMaxCallsPerRequest };\n if (budget.remaining <= 0) {\n return emptyResult(input.partial, [\n `LLM fallback skipped: per-request budget exhausted (cap ${cfg.llmMaxCallsPerRequest}). Override via WIGOLO_LLM_MAX_CALLS_PER_REQUEST.`,\n ]);\n }\n\n const provider = selectProvider(process.env);\n if (!provider) {\n const envList = allProviders()\n .map((p) => providerEnvVar(p))\n .join(', ');\n return emptyResult(input.partial, [\n `LLM fallback skipped: no provider key set (${envList}). ` +\n `${input.missing.length} required field(s) still missing: ${input.missing.join(', ')}.`,\n ]);\n }\n\n const apiKey = process.env[providerEnvVar(provider)] as string;\n const prompt = buildPrompt(input);\n const promptHash = hashPrompt(prompt);\n const schemaHash = hashSchema(input.jsonSchema);\n const modelId = `${provider}:default`;\n\n ensureLLMCacheTable();\n const cached = lookupLLMCache(modelId, promptHash, schemaHash);\n if (cached) {\n const values = JSON.parse(cached) as Record<string, unknown>;\n return {\n values: mergeOnlyMissing(input.partial, values, input.missing),\n provider,\n model: modelId,\n cached: true,\n latencyMs: 0,\n warnings: [],\n };\n }\n\n let result: LLMExtractResult;\n try {\n result = await ADAPTERS[provider](\n { prompt, jsonSchema: input.jsonSchema, signal: input.signal },\n apiKey,\n );\n } catch (e) {\n return emptyResult(input.partial, [\n `LLM fallback (${provider}) failed: ${(e as Error).message}`,\n ]);\n } finally {\n budget.remaining = Math.max(0, budget.remaining - 1);\n }\n\n const errors = validateAgainstSchema(result.values, input.jsonSchema);\n if (errors.length > 0) {\n return emptyResult(input.partial, [\n `LLM fallback (${provider}) response failed schema validation: ${errors\n .map((e) => `${e.path} ${e.message}`)\n .join('; ')}`,\n ]);\n }\n\n const ttlMs = cfg.llmCacheTtlDays * 24 * 60 * 60 * 1000;\n const now = Date.now();\n insertLLMCache({\n modelId,\n promptHash,\n schemaHash,\n response: JSON.stringify(result.values),\n createdAt: now,\n expiresAt: now + ttlMs,\n });\n\n return {\n values: mergeOnlyMissing(input.partial, result.values, input.missing),\n provider,\n model: result.model,\n cached: false,\n latencyMs: result.latencyMs,\n warnings: result.warnings ?? [],\n };\n}\n\nfunction emptyResult(\n partial: Record<string, unknown>,\n warnings: string[],\n): LLMFallbackResult {\n return {\n values: { ...partial },\n provider: 'anthropic',\n model: '',\n cached: false,\n latencyMs: 0,\n warnings,\n };\n}\n\nfunction mergeOnlyMissing(\n partial: Record<string, unknown>,\n filled: Record<string, unknown>,\n missing: string[],\n): Record<string, unknown> {\n const out = { ...partial };\n for (const key of missing) {\n if (filled[key] !== undefined) out[key] = filled[key];\n }\n return out;\n}\n\nfunction buildPrompt(input: LLMFallbackInput): string {\n const html = truncate(input.html, MAX_HTML_BYTES);\n return [\n 'Extract the following missing fields from the HTML below.',\n `Missing fields: ${input.missing.join(', ')}.`,\n 'Return JSON matching the provided schema. Do not invent values; if a field is not present in the HTML, omit it.',\n '',\n 'HTML:',\n html,\n ].join('\\n');\n}\n\nfunction truncate(s: string, maxBytes: number): string {\n if (s.length <= maxBytes) return s;\n return s.slice(0, maxBytes);\n}\n"],"mappings":"AAAA,SAAS,iBAAiB;AAC1B,SAAS,qBAAqB;AAC9B,SAAS,kBAAkB;AAC3B,SAAS,kBAAkB;AAC3B,SAAS,gBAAgB;AACzB;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,OACK;AACP,SAAS,YAAY,kBAAkB;AACvC,SAAS,cAAc,gBAAgB,sBAAsB;AAE7D,SAAS,6BAA6B;AAEtC,MAAM,iBAAiB;AAmBvB,MAAM,WAMF;AAAA,EACF,WAAW;AAAA,EACX,QAAQ;AAAA,EACR,QAAQ;AAAA,EACR,MAAM;AACR;AAEA,eAAsB,eACpB,OAC4B;AAC5B,MAAI,MAAM,QAAQ,WAAW,GAAG;AAC9B,WAAO,YAAY,MAAM,SAAS,CAAC,CAAC;AAAA,EACtC;AAEA,QAAM,MAAM,UAAU;AACtB,QAAM,SAAS,MAAM,UAAU,EAAE,WAAW,IAAI,sBAAsB;AACtE,MAAI,OAAO,aAAa,GAAG;AACzB,WAAO,YAAY,MAAM,SAAS;AAAA,MAChC,2DAA2D,IAAI,qBAAqB;AAAA,IACtF,CAAC;AAAA,EACH;AAEA,QAAM,WAAW,eAAe,QAAQ,GAAG;AAC3C,MAAI,CAAC,UAAU;AACb,UAAM,UAAU,aAAa,EAC1B,IAAI,CAAC,MAAM,eAAe,CAAC,CAAC,EAC5B,KAAK,IAAI;AACZ,WAAO,YAAY,MAAM,SAAS;AAAA,MAChC,8CAA8C,OAAO,MAChD,MAAM,QAAQ,MAAM,qCAAqC,MAAM,QAAQ,KAAK,IAAI,CAAC;AAAA,IACxF,CAAC;AAAA,EACH;AAEA,QAAM,SAAS,QAAQ,IAAI,eAAe,QAAQ,CAAC;AACnD,QAAM,SAAS,YAAY,KAAK;AAChC,QAAM,aAAa,WAAW,MAAM;AACpC,QAAM,aAAa,WAAW,MAAM,UAAU;AAC9C,QAAM,UAAU,GAAG,QAAQ;AAE3B,sBAAoB;AACpB,QAAM,SAAS,eAAe,SAAS,YAAY,UAAU;AAC7D,MAAI,QAAQ;AACV,UAAM,SAAS,KAAK,MAAM,MAAM;AAChC,WAAO;AAAA,MACL,QAAQ,iBAAiB,MAAM,SAAS,QAAQ,MAAM,OAAO;AAAA,MAC7D;AAAA,MACA,OAAO;AAAA,MACP,QAAQ;AAAA,MACR,WAAW;AAAA,MACX,UAAU,CAAC;AAAA,IACb;AAAA,EACF;AAEA,MAAI;AACJ,MAAI;AACF,aAAS,MAAM,SAAS,QAAQ;AAAA,MAC9B,EAAE,QAAQ,YAAY,MAAM,YAAY,QAAQ,MAAM,OAAO;AAAA,MAC7D;AAAA,IACF;AAAA,EACF,SAAS,GAAG;AACV,WAAO,YAAY,MAAM,SAAS;AAAA,MAChC,iBAAiB,QAAQ,aAAc,EAAY,OAAO;AAAA,IAC5D,CAAC;AAAA,EACH,UAAE;AACA,WAAO,YAAY,KAAK,IAAI,GAAG,OAAO,YAAY,CAAC;AAAA,EACrD;AAEA,QAAM,SAAS,sBAAsB,OAAO,QAAQ,MAAM,UAAU;AACpE,MAAI,OAAO,SAAS,GAAG;AACrB,WAAO,YAAY,MAAM,SAAS;AAAA,MAChC,iBAAiB,QAAQ,wCAAwC,OAC9D,IAAI,CAAC,MAAM,GAAG,EAAE,IAAI,IAAI,EAAE,OAAO,EAAE,EACnC,KAAK,IAAI,CAAC;AAAA,IACf,CAAC;AAAA,EACH;AAEA,QAAM,QAAQ,IAAI,kBAAkB,KAAK,KAAK,KAAK;AACnD,QAAM,MAAM,KAAK,IAAI;AACrB,iBAAe;AAAA,IACb;AAAA,IACA;AAAA,IACA;AAAA,IACA,UAAU,KAAK,UAAU,OAAO,MAAM;AAAA,IACtC,WAAW;AAAA,IACX,WAAW,MAAM;AAAA,EACnB,CAAC;AAED,SAAO;AAAA,IACL,QAAQ,iBAAiB,MAAM,SAAS,OAAO,QAAQ,MAAM,OAAO;AAAA,IACpE;AAAA,IACA,OAAO,OAAO;AAAA,IACd,QAAQ;AAAA,IACR,WAAW,OAAO;AAAA,IAClB,UAAU,OAAO,YAAY,CAAC;AAAA,EAChC;AACF;AAEA,SAAS,YACP,SACA,UACmB;AACnB,SAAO;AAAA,IACL,QAAQ,EAAE,GAAG,QAAQ;AAAA,IACrB,UAAU;AAAA,IACV,OAAO;AAAA,IACP,QAAQ;AAAA,IACR,WAAW;AAAA,IACX;AAAA,EACF;AACF;AAEA,SAAS,iBACP,SACA,QACA,SACyB;AACzB,QAAM,MAAM,EAAE,GAAG,QAAQ;AACzB,aAAW,OAAO,SAAS;AACzB,QAAI,OAAO,GAAG,MAAM,OAAW,KAAI,GAAG,IAAI,OAAO,GAAG;AAAA,EACtD;AACA,SAAO;AACT;AAEA,SAAS,YAAY,OAAiC;AACpD,QAAM,OAAO,SAAS,MAAM,MAAM,cAAc;AAChD,SAAO;AAAA,IACL;AAAA,IACA,mBAAmB,MAAM,QAAQ,KAAK,IAAI,CAAC;AAAA,IAC3C;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,EAAE,KAAK,IAAI;AACb;AAEA,SAAS,SAAS,GAAW,UAA0B;AACrD,MAAI,EAAE,UAAU,SAAU,QAAO;AACjC,SAAO,EAAE,MAAM,GAAG,QAAQ;AAC5B;","names":[]}
@@ -0,0 +1,2 @@
1
+ export declare function sanitizeExtractedMarkdown(md: string): string;
2
+ //# sourceMappingURL=markdown-sanitize.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"markdown-sanitize.d.ts","sourceRoot":"","sources":["../../src/extraction/markdown-sanitize.ts"],"names":[],"mappings":"AAkFA,wBAAgB,yBAAyB,CAAC,EAAE,EAAE,MAAM,GAAG,MAAM,CA6D5D"}
@@ -0,0 +1,151 @@
1
+ const STRAY_LABELS = /* @__PURE__ */ new Set([
2
+ "javascript",
3
+ "typescript",
4
+ "mjs",
5
+ "cjs",
6
+ "json",
7
+ "html",
8
+ "css",
9
+ "bash",
10
+ "sh",
11
+ "shell",
12
+ "python",
13
+ "py",
14
+ "go",
15
+ "rust",
16
+ "java",
17
+ "kotlin",
18
+ "swift",
19
+ "cpp",
20
+ "c++",
21
+ "csharp",
22
+ "ruby",
23
+ "php"
24
+ ]);
25
+ const GLUED_LANG_PREFIXES = {
26
+ ts: "ts",
27
+ js: "js",
28
+ tsx: "tsx",
29
+ jsx: "jsx",
30
+ py: "python",
31
+ rb: "ruby",
32
+ go: "go",
33
+ rs: "rust",
34
+ sh: "bash",
35
+ json: "json",
36
+ html: "html",
37
+ css: "css",
38
+ yaml: "yaml",
39
+ yml: "yaml",
40
+ toml: "toml",
41
+ md: "markdown"
42
+ };
43
+ const POST_PREFIX_TOKENS = [
44
+ "function",
45
+ "const",
46
+ "let",
47
+ "var",
48
+ "class",
49
+ "interface",
50
+ "type",
51
+ "enum",
52
+ "import",
53
+ "export",
54
+ "async",
55
+ "await",
56
+ "return",
57
+ "if",
58
+ "for",
59
+ "while",
60
+ "def",
61
+ "print",
62
+ "echo",
63
+ "package",
64
+ "public",
65
+ "private",
66
+ "protected",
67
+ "struct",
68
+ "fn",
69
+ "pub",
70
+ "use",
71
+ "mod"
72
+ ];
73
+ function unglueLangPrefix(line) {
74
+ for (const [prefix, lang] of Object.entries(GLUED_LANG_PREFIXES)) {
75
+ if (!line.startsWith(prefix)) continue;
76
+ const rest = line.slice(prefix.length);
77
+ for (const tok of POST_PREFIX_TOKENS) {
78
+ if (rest.startsWith(tok)) {
79
+ const next = rest.charAt(tok.length);
80
+ if (next === "" || /[\s({<\[]/.test(next)) {
81
+ return { lang, line: rest };
82
+ }
83
+ }
84
+ }
85
+ }
86
+ return null;
87
+ }
88
+ function isFenceLine(line) {
89
+ const m = line.match(/^(```+|~~~+)([a-zA-Z0-9_+-]*)\s*$/);
90
+ if (!m) return { open: false, close: false };
91
+ const lang = m[2]?.trim() || void 0;
92
+ return { open: !!lang, close: !lang, lang };
93
+ }
94
+ function sanitizeExtractedMarkdown(md) {
95
+ if (!md.includes("```") && !md.includes("~~~")) return md;
96
+ const lines = md.split("\n");
97
+ const out = [];
98
+ let inFence = false;
99
+ let fenceMarker = null;
100
+ let pendingFirstContentLine = false;
101
+ for (let i = 0; i < lines.length; i++) {
102
+ const line = lines[i];
103
+ if (!inFence) {
104
+ const f = line.match(/^(```+|~~~+)([a-zA-Z0-9_+-]*)\s*$/);
105
+ if (f) {
106
+ inFence = true;
107
+ fenceMarker = f[1];
108
+ pendingFirstContentLine = true;
109
+ const declaredLang = f[2];
110
+ if (declaredLang === "markdown") {
111
+ out.push(fenceMarker);
112
+ } else {
113
+ out.push(line);
114
+ }
115
+ continue;
116
+ }
117
+ out.push(line);
118
+ continue;
119
+ }
120
+ if (fenceMarker && line.startsWith(fenceMarker) && line.replace(fenceMarker, "").trim() === "") {
121
+ inFence = false;
122
+ fenceMarker = null;
123
+ pendingFirstContentLine = false;
124
+ out.push(line);
125
+ continue;
126
+ }
127
+ if (pendingFirstContentLine) {
128
+ pendingFirstContentLine = false;
129
+ const unglued = unglueLangPrefix(line);
130
+ if (unglued) {
131
+ const lastIdx = out.length - 1;
132
+ const prev = out[lastIdx];
133
+ if (prev.startsWith("```") || prev.startsWith("~~~")) {
134
+ out[lastIdx] = `${prev.match(/^(```+|~~~+)/)[1]}${unglued.lang ?? ""}`;
135
+ }
136
+ out.push(unglued.line);
137
+ continue;
138
+ }
139
+ }
140
+ const trimmed = line.trim();
141
+ if (trimmed && STRAY_LABELS.has(trimmed.toLowerCase()) && !line.includes(" ")) {
142
+ continue;
143
+ }
144
+ out.push(line);
145
+ }
146
+ return out.join("\n");
147
+ }
148
+ export {
149
+ sanitizeExtractedMarkdown
150
+ };
151
+ //# sourceMappingURL=markdown-sanitize.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../../src/extraction/markdown-sanitize.ts"],"sourcesContent":["// Post-extraction sanitizer for markdown returned by ensemble extractors.\n// Currently targets a Node-docs pattern where tab labels (\"javascript\",\n// \"typescript\", etc.) leak into a fenced code block as a bare line.\n\nconst STRAY_LABELS = new Set([\n 'javascript',\n 'typescript',\n 'mjs',\n 'cjs',\n 'json',\n 'html',\n 'css',\n 'bash',\n 'sh',\n 'shell',\n 'python',\n 'py',\n 'go',\n 'rust',\n 'java',\n 'kotlin',\n 'swift',\n 'cpp',\n 'c++',\n 'csharp',\n 'ruby',\n 'php',\n]);\n\n// Short aliases that can appear *glued* to the first identifier on the first\n// line of a fenced block (e.g. the TypeScript docs render `<span>ts</span>` and\n// the next token concatenates without a separator → `tsfunction`, `jsconst`).\nconst GLUED_LANG_PREFIXES: Record<string, string> = {\n ts: 'ts',\n js: 'js',\n tsx: 'tsx',\n jsx: 'jsx',\n py: 'python',\n rb: 'ruby',\n go: 'go',\n rs: 'rust',\n sh: 'bash',\n json: 'json',\n html: 'html',\n css: 'css',\n yaml: 'yaml',\n yml: 'yaml',\n toml: 'toml',\n md: 'markdown',\n};\n\n// Identifier keywords that commonly follow a stuck language prefix.\nconst POST_PREFIX_TOKENS = [\n 'function', 'const', 'let', 'var', 'class', 'interface', 'type', 'enum',\n 'import', 'export', 'async', 'await', 'return', 'if', 'for', 'while',\n 'def', 'print', 'echo', 'package', 'public', 'private', 'protected',\n 'struct', 'fn', 'pub', 'use', 'mod',\n];\n\nfunction unglueLangPrefix(line: string): { lang?: string; line: string } | null {\n for (const [prefix, lang] of Object.entries(GLUED_LANG_PREFIXES)) {\n if (!line.startsWith(prefix)) continue;\n const rest = line.slice(prefix.length);\n for (const tok of POST_PREFIX_TOKENS) {\n if (rest.startsWith(tok)) {\n const next = rest.charAt(tok.length);\n if (next === '' || /[\\s({<\\[]/.test(next)) {\n return { lang, line: rest };\n }\n }\n }\n }\n return null;\n}\n\nfunction isFenceLine(line: string): { open: boolean; close: boolean; lang?: string } {\n const m = line.match(/^(```+|~~~+)([a-zA-Z0-9_+-]*)\\s*$/);\n if (!m) return { open: false, close: false };\n const lang = m[2]?.trim() || undefined;\n return { open: !!lang, close: !lang, lang };\n}\n\nexport function sanitizeExtractedMarkdown(md: string): string {\n if (!md.includes('```') && !md.includes('~~~')) return md;\n const lines = md.split('\\n');\n const out: string[] = [];\n let inFence = false;\n let fenceMarker: string | null = null;\n let pendingFirstContentLine = false;\n\n for (let i = 0; i < lines.length; i++) {\n const line = lines[i];\n if (!inFence) {\n const f = line.match(/^(```+|~~~+)([a-zA-Z0-9_+-]*)\\s*$/);\n if (f) {\n inFence = true;\n fenceMarker = f[1];\n pendingFirstContentLine = true;\n const declaredLang = f[2];\n // 'markdown' is a sentinel many extractors emit when the source code\n // tag didn't carry a language class. Reset it; we'll try to recover\n // the real lang from the first content line below.\n if (declaredLang === 'markdown') {\n out.push(fenceMarker);\n } else {\n out.push(line);\n }\n continue;\n }\n out.push(line);\n continue;\n }\n // inside a fence\n if (fenceMarker && line.startsWith(fenceMarker) && line.replace(fenceMarker, '').trim() === '') {\n inFence = false;\n fenceMarker = null;\n pendingFirstContentLine = false;\n out.push(line);\n continue;\n }\n if (pendingFirstContentLine) {\n pendingFirstContentLine = false;\n const unglued = unglueLangPrefix(line);\n if (unglued) {\n // Replace the most recently pushed fence-open line with one that\n // carries the recovered language tag.\n const lastIdx = out.length - 1;\n const prev = out[lastIdx];\n if (prev.startsWith('```') || prev.startsWith('~~~')) {\n out[lastIdx] = `${prev.match(/^(```+|~~~+)/)![1]}${unglued.lang ?? ''}`;\n }\n out.push(unglued.line);\n continue;\n }\n }\n const trimmed = line.trim();\n if (trimmed && STRAY_LABELS.has(trimmed.toLowerCase()) && !line.includes(' ')) {\n // Drop the stray language label line.\n continue;\n }\n out.push(line);\n }\n return out.join('\\n');\n}\n"],"mappings":"AAIA,MAAM,eAAe,oBAAI,IAAI;AAAA,EAC3B;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF,CAAC;AAKD,MAAM,sBAA8C;AAAA,EAClD,IAAI;AAAA,EACJ,IAAI;AAAA,EACJ,KAAK;AAAA,EACL,KAAK;AAAA,EACL,IAAI;AAAA,EACJ,IAAI;AAAA,EACJ,IAAI;AAAA,EACJ,IAAI;AAAA,EACJ,IAAI;AAAA,EACJ,MAAM;AAAA,EACN,MAAM;AAAA,EACN,KAAK;AAAA,EACL,MAAM;AAAA,EACN,KAAK;AAAA,EACL,MAAM;AAAA,EACN,IAAI;AACN;AAGA,MAAM,qBAAqB;AAAA,EACzB;AAAA,EAAY;AAAA,EAAS;AAAA,EAAO;AAAA,EAAO;AAAA,EAAS;AAAA,EAAa;AAAA,EAAQ;AAAA,EACjE;AAAA,EAAU;AAAA,EAAU;AAAA,EAAS;AAAA,EAAS;AAAA,EAAU;AAAA,EAAM;AAAA,EAAO;AAAA,EAC7D;AAAA,EAAO;AAAA,EAAS;AAAA,EAAQ;AAAA,EAAW;AAAA,EAAU;AAAA,EAAW;AAAA,EACxD;AAAA,EAAU;AAAA,EAAM;AAAA,EAAO;AAAA,EAAO;AAChC;AAEA,SAAS,iBAAiB,MAAsD;AAC9E,aAAW,CAAC,QAAQ,IAAI,KAAK,OAAO,QAAQ,mBAAmB,GAAG;AAChE,QAAI,CAAC,KAAK,WAAW,MAAM,EAAG;AAC9B,UAAM,OAAO,KAAK,MAAM,OAAO,MAAM;AACrC,eAAW,OAAO,oBAAoB;AACpC,UAAI,KAAK,WAAW,GAAG,GAAG;AACxB,cAAM,OAAO,KAAK,OAAO,IAAI,MAAM;AACnC,YAAI,SAAS,MAAM,YAAY,KAAK,IAAI,GAAG;AACzC,iBAAO,EAAE,MAAM,MAAM,KAAK;AAAA,QAC5B;AAAA,MACF;AAAA,IACF;AAAA,EACF;AACA,SAAO;AACT;AAEA,SAAS,YAAY,MAAgE;AACnF,QAAM,IAAI,KAAK,MAAM,mCAAmC;AACxD,MAAI,CAAC,EAAG,QAAO,EAAE,MAAM,OAAO,OAAO,MAAM;AAC3C,QAAM,OAAO,EAAE,CAAC,GAAG,KAAK,KAAK;AAC7B,SAAO,EAAE,MAAM,CAAC,CAAC,MAAM,OAAO,CAAC,MAAM,KAAK;AAC5C;AAEO,SAAS,0BAA0B,IAAoB;AAC5D,MAAI,CAAC,GAAG,SAAS,KAAK,KAAK,CAAC,GAAG,SAAS,KAAK,EAAG,QAAO;AACvD,QAAM,QAAQ,GAAG,MAAM,IAAI;AAC3B,QAAM,MAAgB,CAAC;AACvB,MAAI,UAAU;AACd,MAAI,cAA6B;AACjC,MAAI,0BAA0B;AAE9B,WAAS,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK;AACrC,UAAM,OAAO,MAAM,CAAC;AACpB,QAAI,CAAC,SAAS;AACZ,YAAM,IAAI,KAAK,MAAM,mCAAmC;AACxD,UAAI,GAAG;AACL,kBAAU;AACV,sBAAc,EAAE,CAAC;AACjB,kCAA0B;AAC1B,cAAM,eAAe,EAAE,CAAC;AAIxB,YAAI,iBAAiB,YAAY;AAC/B,cAAI,KAAK,WAAW;AAAA,QACtB,OAAO;AACL,cAAI,KAAK,IAAI;AAAA,QACf;AACA;AAAA,MACF;AACA,UAAI,KAAK,IAAI;AACb;AAAA,IACF;AAEA,QAAI,eAAe,KAAK,WAAW,WAAW,KAAK,KAAK,QAAQ,aAAa,EAAE,EAAE,KAAK,MAAM,IAAI;AAC9F,gBAAU;AACV,oBAAc;AACd,gCAA0B;AAC1B,UAAI,KAAK,IAAI;AACb;AAAA,IACF;AACA,QAAI,yBAAyB;AAC3B,gCAA0B;AAC1B,YAAM,UAAU,iBAAiB,IAAI;AACrC,UAAI,SAAS;AAGX,cAAM,UAAU,IAAI,SAAS;AAC7B,cAAM,OAAO,IAAI,OAAO;AACxB,YAAI,KAAK,WAAW,KAAK,KAAK,KAAK,WAAW,KAAK,GAAG;AACpD,cAAI,OAAO,IAAI,GAAG,KAAK,MAAM,cAAc,EAAG,CAAC,CAAC,GAAG,QAAQ,QAAQ,EAAE;AAAA,QACvE;AACA,YAAI,KAAK,QAAQ,IAAI;AACrB;AAAA,MACF;AAAA,IACF;AACA,UAAM,UAAU,KAAK,KAAK;AAC1B,QAAI,WAAW,aAAa,IAAI,QAAQ,YAAY,CAAC,KAAK,CAAC,KAAK,SAAS,GAAG,GAAG;AAE7E;AAAA,IACF;AACA,QAAI,KAAK,IAAI;AAAA,EACf;AACA,SAAO,IAAI,KAAK,IAAI;AACtB;","names":[]}
@@ -1,4 +1,13 @@
1
+ import TurndownService from 'turndown';
2
+ export declare function buildTurndown(): TurndownService;
1
3
  export declare function htmlToMarkdown(html: string): string;
4
+ export interface Heading {
5
+ level: number;
6
+ text: string;
7
+ lineIndex: number;
8
+ }
9
+ export declare function parseHeadings(lines: string[]): Heading[];
10
+ export declare function lineStartCharOffsets(lines: string[]): number[];
2
11
  export declare function extractSection(markdown: string, section: string, sectionIndex?: number): {
3
12
  content: string;
4
13
  matched: boolean;
@@ -7,4 +16,6 @@ export declare function extractLinksAndImages(markdown: string): {
7
16
  links: string[];
8
17
  images: string[];
9
18
  };
19
+ export declare function filterDecorativeImages(markdown: string): string;
20
+ export declare function resolveRelativeUrls(markdown: string, baseUrl: string): string;
10
21
  //# sourceMappingURL=markdown.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"markdown.d.ts","sourceRoot":"","sources":["../../src/extraction/markdown.ts"],"names":[],"mappings":"AAkDA,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAGnD;AAmCD,wBAAgB,cAAc,CAC5B,QAAQ,EAAE,MAAM,EAChB,OAAO,EAAE,MAAM,EACf,YAAY,SAAI,GACf;IAAE,OAAO,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,OAAO,CAAA;CAAE,CA2BvC;AAED,wBAAgB,qBAAqB,CAAC,QAAQ,EAAE,MAAM,GAAG;IAAE,KAAK,EAAE,MAAM,EAAE,CAAC;IAAC,MAAM,EAAE,MAAM,EAAE,CAAA;CAAE,CAoB7F"}
1
+ {"version":3,"file":"markdown.d.ts","sourceRoot":"","sources":["../../src/extraction/markdown.ts"],"names":[],"mappings":"AAAA,OAAO,eAAe,MAAM,UAAU,CAAC;AAiBvC,wBAAgB,aAAa,IAAI,eAAe,CA2D/C;AAID,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAGnD;AAED,MAAM,WAAW,OAAO;IACtB,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,wBAAgB,aAAa,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,EAAE,CASxD;AAID,wBAAgB,oBAAoB,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,MAAM,EAAE,CAQ9D;AAkBD,wBAAgB,cAAc,CAC5B,QAAQ,EAAE,MAAM,EAChB,OAAO,EAAE,MAAM,EACf,YAAY,SAAI,GACf;IAAE,OAAO,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,OAAO,CAAA;CAAE,CA2BvC;AAED,wBAAgB,qBAAqB,CAAC,QAAQ,EAAE,MAAM,GAAG;IAAE,KAAK,EAAE,MAAM,EAAE,CAAC;IAAC,MAAM,EAAE,MAAM,EAAE,CAAA;CAAE,CAoB7F;AAkBD,wBAAgB,sBAAsB,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,CAsB/D;AAID,wBAAgB,mBAAmB,CAAC,QAAQ,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,MAAM,CAyC7E"}
@@ -1,107 +1,211 @@
1
- import TurndownService from 'turndown';
1
+ import TurndownService from "turndown";
2
+ import { detectCodeLanguage } from "./lang-hints.js";
3
+ function longestBacktickRun(s) {
4
+ let max = 0;
5
+ let cur = 0;
6
+ for (let i = 0; i < s.length; i++) {
7
+ if (s.charCodeAt(i) === 96) {
8
+ cur++;
9
+ if (cur > max) max = cur;
10
+ } else {
11
+ cur = 0;
12
+ }
13
+ }
14
+ return max;
15
+ }
2
16
  function buildTurndown() {
3
- const td = new TurndownService({ headingStyle: 'atx', codeBlockStyle: 'fenced' });
4
- // Remove script and style tags entirely
5
- td.remove(['script', 'style']);
6
- // Custom rule: convert <table> to markdown table
7
- td.addRule('table', {
8
- filter: 'table',
9
- replacement(_content, node) {
10
- const el = node;
11
- const rows = Array.from(el.querySelectorAll('tr'));
12
- if (rows.length === 0)
13
- return '';
14
- const renderRow = (row) => {
15
- const cells = Array.from(row.querySelectorAll('th, td'));
16
- return '| ' + cells.map(c => c.textContent?.replace(/\n/g, ' ').trim() ?? '').join(' | ') + ' |';
17
- };
18
- const headerRow = rows[0];
19
- const isHeaderRow = headerRow.querySelectorAll('th').length > 0;
20
- const headerCells = Array.from(headerRow.querySelectorAll('th, td'));
21
- const separator = '| ' + headerCells.map(() => '---').join(' | ') + ' |';
22
- if (isHeaderRow) {
23
- const bodyRows = rows.slice(1);
24
- const lines = [renderRow(headerRow), separator, ...bodyRows.map(renderRow)];
25
- return '\n\n' + lines.join('\n') + '\n\n';
26
- }
27
- const lines = [renderRow(headerRow), separator, ...rows.slice(1).map(renderRow)];
28
- return '\n\n' + lines.join('\n') + '\n\n';
29
- },
30
- });
31
- // Suppress thead/tbody/tr/th/td individually since table rule handles the whole node
32
- td.addRule('tableCell', {
33
- filter: ['thead', 'tbody', 'tfoot', 'tr', 'th', 'td'],
34
- replacement(content) {
35
- return content;
36
- },
37
- });
38
- return td;
17
+ const td = new TurndownService({ headingStyle: "atx", codeBlockStyle: "fenced" });
18
+ td.remove(["script", "style"]);
19
+ td.addRule("table", {
20
+ filter: "table",
21
+ replacement(_content, node) {
22
+ const el = node;
23
+ const rows = Array.from(el.querySelectorAll("tr"));
24
+ if (rows.length === 0) return "";
25
+ const renderRow = (row) => {
26
+ const cells = Array.from(row.querySelectorAll("th, td"));
27
+ return "| " + cells.map((c) => c.textContent?.replace(/\n/g, " ").trim() ?? "").join(" | ") + " |";
28
+ };
29
+ const headerRow = rows[0];
30
+ const isHeaderRow = headerRow.querySelectorAll("th").length > 0;
31
+ const headerCells = Array.from(headerRow.querySelectorAll("th, td"));
32
+ const separator = "| " + headerCells.map(() => "---").join(" | ") + " |";
33
+ if (isHeaderRow) {
34
+ const bodyRows = rows.slice(1);
35
+ const lines2 = [renderRow(headerRow), separator, ...bodyRows.map(renderRow)];
36
+ return "\n\n" + lines2.join("\n") + "\n\n";
37
+ }
38
+ const lines = [renderRow(headerRow), separator, ...rows.slice(1).map(renderRow)];
39
+ return "\n\n" + lines.join("\n") + "\n\n";
40
+ }
41
+ });
42
+ td.addRule("tableCell", {
43
+ filter: ["thead", "tbody", "tfoot", "tr", "th", "td"],
44
+ replacement(content) {
45
+ return content;
46
+ }
47
+ });
48
+ td.addRule("codeBlockLang", {
49
+ filter(node) {
50
+ return node.nodeName === "PRE" && node.querySelector("code") !== null;
51
+ },
52
+ replacement(_content, node) {
53
+ const pre = node;
54
+ const code = pre.querySelector("code");
55
+ const cls = code?.getAttribute("class") ?? pre.getAttribute("class") ?? "";
56
+ const lang = detectCodeLanguage(cls);
57
+ const body = code?.textContent ?? pre.textContent ?? "";
58
+ const fence = "`".repeat(Math.max(3, longestBacktickRun(body) + 1));
59
+ return `
60
+
61
+ ${fence}${lang ?? ""}
62
+ ${body.replace(/\n+$/, "")}
63
+ ${fence}
64
+
65
+ `;
66
+ }
67
+ });
68
+ return td;
39
69
  }
40
70
  const turndown = buildTurndown();
41
- export function htmlToMarkdown(html) {
42
- if (!html)
43
- return '';
44
- return turndown.turndown(html);
71
+ function htmlToMarkdown(html) {
72
+ if (!html) return "";
73
+ return turndown.turndown(html);
45
74
  }
46
75
  function parseHeadings(lines) {
47
- const headings = [];
48
- for (let i = 0; i < lines.length; i++) {
49
- const match = lines[i].match(/^(#{1,6})\s+(.+)/);
50
- if (match) {
51
- headings.push({ level: match[1].length, text: match[2].trim(), lineIndex: i });
52
- }
76
+ const headings = [];
77
+ for (let i = 0; i < lines.length; i++) {
78
+ const match = lines[i].match(/^(#{1,6})\s+(.+)/);
79
+ if (match) {
80
+ headings.push({ level: match[1].length, text: match[2].trim(), lineIndex: i });
53
81
  }
54
- return headings;
82
+ }
83
+ return headings;
84
+ }
85
+ function lineStartCharOffsets(lines) {
86
+ const offsets = new Array(lines.length);
87
+ let acc = 0;
88
+ for (let i = 0; i < lines.length; i++) {
89
+ offsets[i] = acc;
90
+ acc += lines[i].length + 1;
91
+ }
92
+ return offsets;
55
93
  }
56
94
  function extractFromHeading(lines, headings, headingIdx) {
57
- const heading = headings[headingIdx];
58
- const start = heading.lineIndex;
59
- // Find the next heading of equal or higher level (lower or equal # count)
60
- let end = lines.length;
61
- for (let i = headingIdx + 1; i < headings.length; i++) {
62
- if (headings[i].level <= heading.level) {
63
- end = headings[i].lineIndex;
64
- break;
65
- }
95
+ const heading = headings[headingIdx];
96
+ const start = heading.lineIndex;
97
+ let end = lines.length;
98
+ for (let i = headingIdx + 1; i < headings.length; i++) {
99
+ if (headings[i].level <= heading.level) {
100
+ end = headings[i].lineIndex;
101
+ break;
66
102
  }
67
- return lines.slice(start, end).join('\n');
103
+ }
104
+ return lines.slice(start, end).join("\n");
68
105
  }
69
- export function extractSection(markdown, section, sectionIndex = 0) {
70
- const lines = markdown.split('\n');
71
- const headings = parseHeadings(lines);
72
- if (headings.length === 0)
73
- return { content: markdown, matched: false };
74
- const lower = section.toLowerCase();
75
- const indexed = headings.map((h, i) => ({ h, i }));
76
- // Collect exact matches first
77
- const exactMatches = indexed.filter(({ h }) => h.text.toLowerCase() === lower);
78
- // If exact matches satisfy the requested index, use them
79
- if (exactMatches.length > 0 && sectionIndex < exactMatches.length) {
80
- const { i } = exactMatches[sectionIndex];
81
- return { content: extractFromHeading(lines, headings, i), matched: true };
82
- }
83
- // Fall back to substring matches (includes exact headings and partial ones)
84
- const substringMatches = indexed.filter(({ h }) => h.text.toLowerCase().includes(lower));
85
- if (substringMatches.length === 0 || sectionIndex >= substringMatches.length) {
86
- return { content: markdown, matched: false };
106
+ function extractSection(markdown, section, sectionIndex = 0) {
107
+ const lines = markdown.split("\n");
108
+ const headings = parseHeadings(lines);
109
+ if (headings.length === 0) return { content: markdown, matched: false };
110
+ const lower = section.toLowerCase();
111
+ const indexed = headings.map((h, i2) => ({ h, i: i2 }));
112
+ const exactMatches = indexed.filter(({ h }) => h.text.toLowerCase() === lower);
113
+ if (exactMatches.length > 0 && sectionIndex < exactMatches.length) {
114
+ const { i: i2 } = exactMatches[sectionIndex];
115
+ return { content: extractFromHeading(lines, headings, i2), matched: true };
116
+ }
117
+ const substringMatches = indexed.filter(({ h }) => h.text.toLowerCase().includes(lower));
118
+ if (substringMatches.length === 0 || sectionIndex >= substringMatches.length) {
119
+ return { content: markdown, matched: false };
120
+ }
121
+ const { i } = substringMatches[sectionIndex];
122
+ return { content: extractFromHeading(lines, headings, i), matched: true };
123
+ }
124
+ function extractLinksAndImages(markdown) {
125
+ const imagePattern = /!\[[^\]]*\]\(([^)]+)\)/g;
126
+ const linkPattern = /(?<!!)\[[^\]]*\]\(([^)]+)\)/g;
127
+ const images = /* @__PURE__ */ new Set();
128
+ const links = /* @__PURE__ */ new Set();
129
+ let match;
130
+ while ((match = imagePattern.exec(markdown)) !== null) {
131
+ images.add(match[1]);
132
+ }
133
+ while ((match = linkPattern.exec(markdown)) !== null) {
134
+ links.add(match[1]);
135
+ }
136
+ return { links: Array.from(links), images: Array.from(images) };
137
+ }
138
+ const DECORATIVE_URL_MARKERS = [
139
+ "avatar",
140
+ "icon",
141
+ "logo",
142
+ "badge",
143
+ "shield",
144
+ "tracking",
145
+ "pixel",
146
+ "sprite",
147
+ "emoji",
148
+ "favicon"
149
+ ];
150
+ function filterDecorativeImages(markdown) {
151
+ if (!markdown) return markdown;
152
+ return markdown.replace(/!\[([^\]]*)\]\(([^)]+)\)/g, (match, alt, src) => {
153
+ const trimmedAlt = alt.trim();
154
+ const lowerSrc = src.toLowerCase();
155
+ if (lowerSrc.startsWith("data:image/gif;base64,")) return "";
156
+ if (lowerSrc.startsWith("data:image/svg+xml") && src.length < 200) return "";
157
+ for (const marker of DECORATIVE_URL_MARKERS) {
158
+ if (lowerSrc.includes(marker)) return "";
87
159
  }
88
- const { i } = substringMatches[sectionIndex];
89
- return { content: extractFromHeading(lines, headings, i), matched: true };
160
+ if (!trimmedAlt) return "";
161
+ return match;
162
+ });
90
163
  }
91
- export function extractLinksAndImages(markdown) {
92
- const imagePattern = /!\[[^\]]*\]\(([^)]+)\)/g;
93
- const linkPattern = /(?<!!)\[[^\]]*\]\(([^)]+)\)/g;
94
- const images = new Set();
95
- const links = new Set();
96
- let match;
97
- // Extract images first
98
- while ((match = imagePattern.exec(markdown)) !== null) {
99
- images.add(match[1]);
164
+ function resolveRelativeUrls(markdown, baseUrl) {
165
+ if (!markdown || !baseUrl) return markdown;
166
+ const rewrite = (path) => {
167
+ const trimmed = path.trim();
168
+ if (!trimmed) return path;
169
+ if (/^(?:https?:|mailto:|tel:|javascript:|data:)/i.test(trimmed)) return path;
170
+ if (trimmed.startsWith("#")) {
171
+ try {
172
+ return new URL(trimmed, baseUrl).href;
173
+ } catch {
174
+ return path;
175
+ }
176
+ }
177
+ if (trimmed.startsWith("//")) {
178
+ try {
179
+ const base = new URL(baseUrl);
180
+ return `${base.protocol}${trimmed}`;
181
+ } catch {
182
+ return path;
183
+ }
100
184
  }
101
- // Extract links (non-image)
102
- while ((match = linkPattern.exec(markdown)) !== null) {
103
- links.add(match[1]);
185
+ try {
186
+ return new URL(trimmed, baseUrl).href;
187
+ } catch {
188
+ return path;
104
189
  }
105
- return { links: Array.from(links), images: Array.from(images) };
190
+ };
191
+ let result = markdown.replace(
192
+ /(!\[[^\]]*\]\()([^)\s]+)(\s*(?:"[^"]*")?\))/g,
193
+ (_m, open, path, close) => `${open}${rewrite(path)}${close}`
194
+ );
195
+ result = result.replace(
196
+ /(^|[^!])(\[[^\]]*\]\()([^)\s]+)(\s*(?:"[^"]*")?\))/g,
197
+ (_m, pre, open, path, close) => `${pre}${open}${rewrite(path)}${close}`
198
+ );
199
+ return result;
106
200
  }
201
+ export {
202
+ buildTurndown,
203
+ extractLinksAndImages,
204
+ extractSection,
205
+ filterDecorativeImages,
206
+ htmlToMarkdown,
207
+ lineStartCharOffsets,
208
+ parseHeadings,
209
+ resolveRelativeUrls
210
+ };
107
211
  //# sourceMappingURL=markdown.js.map