@staticn0va/wigolo 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (1003) hide show
  1. package/LICENSE +1 -1
  2. package/README.md +146 -227
  3. package/SKILL.md +382 -0
  4. package/assets/blocks/claude-code/CLAUDE.md.block +20 -0
  5. package/assets/blocks/claude-code/wigolo-command.md +40 -0
  6. package/assets/blocks/cursor/wigolo.mdc +46 -0
  7. package/assets/blocks/gemini-cli/GEMINI.md.block +18 -0
  8. package/assets/blocks/vscode/copilot-instructions.md.block +18 -0
  9. package/assets/skills/wigolo/SKILL.md +50 -0
  10. package/assets/skills/wigolo/rules/cache-first.md +30 -0
  11. package/assets/skills/wigolo/rules/synthesis.md +43 -0
  12. package/assets/skills/wigolo-agent/SKILL.md +73 -0
  13. package/assets/skills/wigolo-crawl/SKILL.md +60 -0
  14. package/assets/skills/wigolo-extract/SKILL.md +59 -0
  15. package/assets/skills/wigolo-fetch/SKILL.md +65 -0
  16. package/assets/skills/wigolo-find-similar/SKILL.md +72 -0
  17. package/assets/skills/wigolo-research/SKILL.md +77 -0
  18. package/assets/skills/wigolo-search/SKILL.md +78 -0
  19. package/dist/agent/executor.d.ts +33 -0
  20. package/dist/agent/executor.d.ts.map +1 -0
  21. package/dist/agent/executor.js +233 -0
  22. package/dist/agent/executor.js.map +1 -0
  23. package/dist/agent/pipeline.d.ts +5 -0
  24. package/dist/agent/pipeline.d.ts.map +1 -0
  25. package/dist/agent/pipeline.js +238 -0
  26. package/dist/agent/pipeline.js.map +1 -0
  27. package/dist/agent/planner.d.ts +13 -0
  28. package/dist/agent/planner.d.ts.map +1 -0
  29. package/dist/agent/planner.js +271 -0
  30. package/dist/agent/planner.js.map +1 -0
  31. package/dist/agent/relevance.d.ts +15 -0
  32. package/dist/agent/relevance.d.ts.map +1 -0
  33. package/dist/agent/relevance.js +60 -0
  34. package/dist/agent/relevance.js.map +1 -0
  35. package/dist/cache/backfill-embeddings.d.ts +23 -0
  36. package/dist/cache/backfill-embeddings.d.ts.map +1 -0
  37. package/dist/cache/backfill-embeddings.js +105 -0
  38. package/dist/cache/backfill-embeddings.js.map +1 -0
  39. package/dist/cache/change-detector.d.ts +7 -0
  40. package/dist/cache/change-detector.d.ts.map +1 -0
  41. package/dist/cache/change-detector.js +43 -0
  42. package/dist/cache/change-detector.js.map +1 -0
  43. package/dist/cache/db.d.ts +1 -0
  44. package/dist/cache/db.d.ts.map +1 -1
  45. package/dist/cache/db.js +94 -22
  46. package/dist/cache/db.js.map +1 -1
  47. package/dist/cache/diff-summary.d.ts +2 -0
  48. package/dist/cache/diff-summary.d.ts.map +1 -0
  49. package/dist/cache/diff-summary.js +82 -0
  50. package/dist/cache/diff-summary.js.map +1 -0
  51. package/dist/cache/migrations/runner.d.ts +29 -0
  52. package/dist/cache/migrations/runner.d.ts.map +1 -0
  53. package/dist/cache/migrations/runner.js +147 -0
  54. package/dist/cache/migrations/runner.js.map +1 -0
  55. package/dist/cache/sqlite-vec-store.d.ts +42 -0
  56. package/dist/cache/sqlite-vec-store.d.ts.map +1 -0
  57. package/dist/cache/sqlite-vec-store.js +176 -0
  58. package/dist/cache/sqlite-vec-store.js.map +1 -0
  59. package/dist/cache/store.d.ts +47 -1
  60. package/dist/cache/store.d.ts.map +1 -1
  61. package/dist/cache/store.js +364 -168
  62. package/dist/cache/store.js.map +1 -1
  63. package/dist/cli/agents/antigravity.d.ts +20 -0
  64. package/dist/cli/agents/antigravity.d.ts.map +1 -0
  65. package/dist/cli/agents/antigravity.js +49 -0
  66. package/dist/cli/agents/antigravity.js.map +1 -0
  67. package/dist/cli/agents/claude-code.d.ts +25 -0
  68. package/dist/cli/agents/claude-code.d.ts.map +1 -0
  69. package/dist/cli/agents/claude-code.js +111 -0
  70. package/dist/cli/agents/claude-code.js.map +1 -0
  71. package/dist/cli/agents/cursor.d.ts +21 -0
  72. package/dist/cli/agents/cursor.d.ts.map +1 -0
  73. package/dist/cli/agents/cursor.js +58 -0
  74. package/dist/cli/agents/cursor.js.map +1 -0
  75. package/dist/cli/agents/gemini-cli.d.ts +21 -0
  76. package/dist/cli/agents/gemini-cli.d.ts.map +1 -0
  77. package/dist/cli/agents/gemini-cli.js +55 -0
  78. package/dist/cli/agents/gemini-cli.js.map +1 -0
  79. package/dist/cli/agents/registry.d.ts +21 -0
  80. package/dist/cli/agents/registry.d.ts.map +1 -0
  81. package/dist/cli/agents/registry.js +27 -0
  82. package/dist/cli/agents/registry.js.map +1 -0
  83. package/dist/cli/agents/utils.d.ts +26 -0
  84. package/dist/cli/agents/utils.d.ts.map +1 -0
  85. package/dist/cli/agents/utils.js +136 -0
  86. package/dist/cli/agents/utils.js.map +1 -0
  87. package/dist/cli/agents/vscode.d.ts +21 -0
  88. package/dist/cli/agents/vscode.d.ts.map +1 -0
  89. package/dist/cli/agents/vscode.js +62 -0
  90. package/dist/cli/agents/vscode.js.map +1 -0
  91. package/dist/cli/auth.d.ts +2 -0
  92. package/dist/cli/auth.d.ts.map +1 -0
  93. package/dist/cli/auth.js +94 -0
  94. package/dist/cli/auth.js.map +1 -0
  95. package/dist/cli/backfill.d.ts +2 -0
  96. package/dist/cli/backfill.d.ts.map +1 -0
  97. package/dist/cli/backfill.js +58 -0
  98. package/dist/cli/backfill.js.map +1 -0
  99. package/dist/cli/daemon.d.ts +6 -1
  100. package/dist/cli/daemon.d.ts.map +1 -1
  101. package/dist/cli/daemon.js +61 -3
  102. package/dist/cli/daemon.js.map +1 -1
  103. package/dist/cli/doctor.d.ts +8 -0
  104. package/dist/cli/doctor.d.ts.map +1 -0
  105. package/dist/cli/doctor.js +344 -0
  106. package/dist/cli/doctor.js.map +1 -0
  107. package/dist/cli/health.d.ts +1 -1
  108. package/dist/cli/health.d.ts.map +1 -1
  109. package/dist/cli/health.js +42 -3
  110. package/dist/cli/health.js.map +1 -1
  111. package/dist/cli/help.d.ts +6 -0
  112. package/dist/cli/help.d.ts.map +1 -0
  113. package/dist/cli/help.js +63 -0
  114. package/dist/cli/help.js.map +1 -0
  115. package/dist/cli/index.d.ts +1 -1
  116. package/dist/cli/index.d.ts.map +1 -1
  117. package/dist/cli/index.js +35 -7
  118. package/dist/cli/index.js.map +1 -1
  119. package/dist/cli/init.d.ts +2 -0
  120. package/dist/cli/init.d.ts.map +1 -0
  121. package/dist/cli/init.js +201 -0
  122. package/dist/cli/init.js.map +1 -0
  123. package/dist/cli/plugin.d.ts +5 -0
  124. package/dist/cli/plugin.d.ts.map +1 -0
  125. package/dist/cli/plugin.js +185 -0
  126. package/dist/cli/plugin.js.map +1 -0
  127. package/dist/cli/setup-mcp.d.ts +2 -0
  128. package/dist/cli/setup-mcp.d.ts.map +1 -0
  129. package/dist/cli/setup-mcp.js +114 -0
  130. package/dist/cli/setup-mcp.js.map +1 -0
  131. package/dist/cli/shell.d.ts +2 -0
  132. package/dist/cli/shell.d.ts.map +1 -0
  133. package/dist/cli/shell.js +86 -0
  134. package/dist/cli/shell.js.map +1 -0
  135. package/dist/cli/shutdown.d.ts +2 -0
  136. package/dist/cli/shutdown.d.ts.map +1 -0
  137. package/dist/cli/shutdown.js +26 -0
  138. package/dist/cli/shutdown.js.map +1 -0
  139. package/dist/cli/status.d.ts +2 -0
  140. package/dist/cli/status.d.ts.map +1 -0
  141. package/dist/cli/status.js +31 -0
  142. package/dist/cli/status.js.map +1 -0
  143. package/dist/cli/telemetry.d.ts +10 -0
  144. package/dist/cli/telemetry.d.ts.map +1 -0
  145. package/dist/cli/telemetry.js +56 -0
  146. package/dist/cli/telemetry.js.map +1 -0
  147. package/dist/cli/tui/agents-types.d.ts +28 -0
  148. package/dist/cli/tui/agents-types.d.ts.map +1 -0
  149. package/dist/cli/tui/agents-types.js +1 -0
  150. package/dist/cli/tui/agents-types.js.map +1 -0
  151. package/dist/cli/tui/agents.d.ts +11 -0
  152. package/dist/cli/tui/agents.d.ts.map +1 -0
  153. package/dist/cli/tui/agents.js +93 -0
  154. package/dist/cli/tui/agents.js.map +1 -0
  155. package/dist/cli/tui/banner.d.ts +3 -0
  156. package/dist/cli/tui/banner.d.ts.map +1 -0
  157. package/dist/cli/tui/banner.js +30 -0
  158. package/dist/cli/tui/banner.js.map +1 -0
  159. package/dist/cli/tui/components/AgentSelect.d.ts +13 -0
  160. package/dist/cli/tui/components/AgentSelect.d.ts.map +1 -0
  161. package/dist/cli/tui/components/AgentSelect.js +116 -0
  162. package/dist/cli/tui/components/AgentSelect.js.map +1 -0
  163. package/dist/cli/tui/components/Banner.d.ts +6 -0
  164. package/dist/cli/tui/components/Banner.d.ts.map +1 -0
  165. package/dist/cli/tui/components/Banner.js +25 -0
  166. package/dist/cli/tui/components/Banner.js.map +1 -0
  167. package/dist/cli/tui/components/BrowserSelect.d.ts +7 -0
  168. package/dist/cli/tui/components/BrowserSelect.d.ts.map +1 -0
  169. package/dist/cli/tui/components/BrowserSelect.js +19 -0
  170. package/dist/cli/tui/components/BrowserSelect.js.map +1 -0
  171. package/dist/cli/tui/components/InstallProgress.d.ts +9 -0
  172. package/dist/cli/tui/components/InstallProgress.d.ts.map +1 -0
  173. package/dist/cli/tui/components/InstallProgress.js +67 -0
  174. package/dist/cli/tui/components/InstallProgress.js.map +1 -0
  175. package/dist/cli/tui/components/SkillInstall.d.ts +14 -0
  176. package/dist/cli/tui/components/SkillInstall.d.ts.map +1 -0
  177. package/dist/cli/tui/components/SkillInstall.js +94 -0
  178. package/dist/cli/tui/components/SkillInstall.js.map +1 -0
  179. package/dist/cli/tui/components/Summary.d.ts +22 -0
  180. package/dist/cli/tui/components/Summary.d.ts.map +1 -0
  181. package/dist/cli/tui/components/Summary.js +135 -0
  182. package/dist/cli/tui/components/Summary.js.map +1 -0
  183. package/dist/cli/tui/components/SystemCheck.d.ts +8 -0
  184. package/dist/cli/tui/components/SystemCheck.d.ts.map +1 -0
  185. package/dist/cli/tui/components/SystemCheck.js +71 -0
  186. package/dist/cli/tui/components/SystemCheck.js.map +1 -0
  187. package/dist/cli/tui/components/Verification.d.ts +8 -0
  188. package/dist/cli/tui/components/Verification.d.ts.map +1 -0
  189. package/dist/cli/tui/components/Verification.js +63 -0
  190. package/dist/cli/tui/components/Verification.js.map +1 -0
  191. package/dist/cli/tui/config-writer-cli.d.ts +12 -0
  192. package/dist/cli/tui/config-writer-cli.d.ts.map +1 -0
  193. package/dist/cli/tui/config-writer-cli.js +39 -0
  194. package/dist/cli/tui/config-writer-cli.js.map +1 -0
  195. package/dist/cli/tui/config-writer-json.d.ts +16 -0
  196. package/dist/cli/tui/config-writer-json.d.ts.map +1 -0
  197. package/dist/cli/tui/config-writer-json.js +86 -0
  198. package/dist/cli/tui/config-writer-json.js.map +1 -0
  199. package/dist/cli/tui/config-writer-toml.d.ts +16 -0
  200. package/dist/cli/tui/config-writer-toml.d.ts.map +1 -0
  201. package/dist/cli/tui/config-writer-toml.js +83 -0
  202. package/dist/cli/tui/config-writer-toml.js.map +1 -0
  203. package/dist/cli/tui/config-writer.d.ts +25 -0
  204. package/dist/cli/tui/config-writer.d.ts.map +1 -0
  205. package/dist/cli/tui/config-writer.js +101 -0
  206. package/dist/cli/tui/config-writer.js.map +1 -0
  207. package/dist/cli/tui/detect-helpers.d.ts +6 -0
  208. package/dist/cli/tui/detect-helpers.d.ts.map +1 -0
  209. package/dist/cli/tui/detect-helpers.js +45 -0
  210. package/dist/cli/tui/detect-helpers.js.map +1 -0
  211. package/dist/cli/tui/extras-prompt.d.ts +7 -0
  212. package/dist/cli/tui/extras-prompt.d.ts.map +1 -0
  213. package/dist/cli/tui/extras-prompt.js +42 -0
  214. package/dist/cli/tui/extras-prompt.js.map +1 -0
  215. package/dist/cli/tui/flags-types.d.ts +19 -0
  216. package/dist/cli/tui/flags-types.d.ts.map +1 -0
  217. package/dist/cli/tui/flags-types.js +23 -0
  218. package/dist/cli/tui/flags-types.js.map +1 -0
  219. package/dist/cli/tui/flags.d.ts +5 -0
  220. package/dist/cli/tui/flags.d.ts.map +1 -0
  221. package/dist/cli/tui/flags.js +132 -0
  222. package/dist/cli/tui/flags.js.map +1 -0
  223. package/dist/cli/tui/format.d.ts +14 -0
  224. package/dist/cli/tui/format.d.ts.map +1 -0
  225. package/dist/cli/tui/format.js +37 -0
  226. package/dist/cli/tui/format.js.map +1 -0
  227. package/dist/cli/tui/hooks/useAgentDetect.d.ts +6 -0
  228. package/dist/cli/tui/hooks/useAgentDetect.d.ts.map +1 -0
  229. package/dist/cli/tui/hooks/useAgentDetect.js +19 -0
  230. package/dist/cli/tui/hooks/useAgentDetect.js.map +1 -0
  231. package/dist/cli/tui/hooks/useInstall.d.ts +14 -0
  232. package/dist/cli/tui/hooks/useInstall.d.ts.map +1 -0
  233. package/dist/cli/tui/hooks/useInstall.js +90 -0
  234. package/dist/cli/tui/hooks/useInstall.js.map +1 -0
  235. package/dist/cli/tui/hooks/useSystemCheck.d.ts +13 -0
  236. package/dist/cli/tui/hooks/useSystemCheck.d.ts.map +1 -0
  237. package/dist/cli/tui/hooks/useSystemCheck.js +95 -0
  238. package/dist/cli/tui/hooks/useSystemCheck.js.map +1 -0
  239. package/dist/cli/tui/hooks/useVerify.d.ts +14 -0
  240. package/dist/cli/tui/hooks/useVerify.d.ts.map +1 -0
  241. package/dist/cli/tui/hooks/useVerify.js +71 -0
  242. package/dist/cli/tui/hooks/useVerify.js.map +1 -0
  243. package/dist/cli/tui/ink-init.d.ts +2 -0
  244. package/dist/cli/tui/ink-init.d.ts.map +1 -0
  245. package/dist/cli/tui/ink-init.js +198 -0
  246. package/dist/cli/tui/ink-init.js.map +1 -0
  247. package/dist/cli/tui/reporter-auto.d.ts +7 -0
  248. package/dist/cli/tui/reporter-auto.d.ts.map +1 -0
  249. package/dist/cli/tui/reporter-auto.js +15 -0
  250. package/dist/cli/tui/reporter-auto.js.map +1 -0
  251. package/dist/cli/tui/reporter.d.ts +26 -0
  252. package/dist/cli/tui/reporter.d.ts.map +1 -0
  253. package/dist/cli/tui/reporter.js +32 -0
  254. package/dist/cli/tui/reporter.js.map +1 -0
  255. package/dist/cli/tui/run-command.d.ts +14 -0
  256. package/dist/cli/tui/run-command.d.ts.map +1 -0
  257. package/dist/cli/tui/run-command.js +72 -0
  258. package/dist/cli/tui/run-command.js.map +1 -0
  259. package/dist/cli/tui/select-agents.d.ts +6 -0
  260. package/dist/cli/tui/select-agents.d.ts.map +1 -0
  261. package/dist/cli/tui/select-agents.js +32 -0
  262. package/dist/cli/tui/select-agents.js.map +1 -0
  263. package/dist/cli/tui/status-agents.d.ts +11 -0
  264. package/dist/cli/tui/status-agents.d.ts.map +1 -0
  265. package/dist/cli/tui/status-agents.js +53 -0
  266. package/dist/cli/tui/status-agents.js.map +1 -0
  267. package/dist/cli/tui/status-cache.d.ts +6 -0
  268. package/dist/cli/tui/status-cache.d.ts.map +1 -0
  269. package/dist/cli/tui/status-cache.js +39 -0
  270. package/dist/cli/tui/status-cache.js.map +1 -0
  271. package/dist/cli/tui/status-format.d.ts +14 -0
  272. package/dist/cli/tui/status-format.d.ts.map +1 -0
  273. package/dist/cli/tui/status-format.js +41 -0
  274. package/dist/cli/tui/status-format.js.map +1 -0
  275. package/dist/cli/tui/status-python.d.ts +6 -0
  276. package/dist/cli/tui/status-python.d.ts.map +1 -0
  277. package/dist/cli/tui/status-python.js +30 -0
  278. package/dist/cli/tui/status-python.js.map +1 -0
  279. package/dist/cli/tui/system-check.d.ts +24 -0
  280. package/dist/cli/tui/system-check.d.ts.map +1 -0
  281. package/dist/cli/tui/system-check.js +103 -0
  282. package/dist/cli/tui/system-check.js.map +1 -0
  283. package/dist/cli/tui/tui-reporter.d.ts +19 -0
  284. package/dist/cli/tui/tui-reporter.d.ts.map +1 -0
  285. package/dist/cli/tui/tui-reporter.js +95 -0
  286. package/dist/cli/tui/tui-reporter.js.map +1 -0
  287. package/dist/cli/tui/utils/config-writer.d.ts +3 -0
  288. package/dist/cli/tui/utils/config-writer.d.ts.map +1 -0
  289. package/dist/cli/tui/utils/config-writer.js +22 -0
  290. package/dist/cli/tui/utils/config-writer.js.map +1 -0
  291. package/dist/cli/tui/utils/suppress-logs.d.ts +3 -0
  292. package/dist/cli/tui/utils/suppress-logs.d.ts.map +1 -0
  293. package/dist/cli/tui/utils/suppress-logs.js +11 -0
  294. package/dist/cli/tui/utils/suppress-logs.js.map +1 -0
  295. package/dist/cli/tui/verify-suggestions.d.ts +5 -0
  296. package/dist/cli/tui/verify-suggestions.d.ts.map +1 -0
  297. package/dist/cli/tui/verify-suggestions.js +20 -0
  298. package/dist/cli/tui/verify-suggestions.js.map +1 -0
  299. package/dist/cli/tui/verify.d.ts +14 -0
  300. package/dist/cli/tui/verify.d.ts.map +1 -0
  301. package/dist/cli/tui/verify.js +101 -0
  302. package/dist/cli/tui/verify.js.map +1 -0
  303. package/dist/cli/tui/version.d.ts +2 -0
  304. package/dist/cli/tui/version.d.ts.map +1 -0
  305. package/dist/cli/tui/version.js +14 -0
  306. package/dist/cli/tui/version.js.map +1 -0
  307. package/dist/cli/uninstall.d.ts +2 -0
  308. package/dist/cli/uninstall.d.ts.map +1 -0
  309. package/dist/cli/uninstall.js +57 -0
  310. package/dist/cli/uninstall.js.map +1 -0
  311. package/dist/cli/warmup.d.ts +10 -2
  312. package/dist/cli/warmup.d.ts.map +1 -1
  313. package/dist/cli/warmup.js +226 -93
  314. package/dist/cli/warmup.js.map +1 -1
  315. package/dist/config.d.ts +28 -2
  316. package/dist/config.d.ts.map +1 -1
  317. package/dist/config.js +106 -56
  318. package/dist/config.js.map +1 -1
  319. package/dist/crawl/crawler.d.ts +6 -0
  320. package/dist/crawl/crawler.d.ts.map +1 -1
  321. package/dist/crawl/crawler.js +210 -209
  322. package/dist/crawl/crawler.js.map +1 -1
  323. package/dist/crawl/dedup.d.ts +1 -0
  324. package/dist/crawl/dedup.d.ts.map +1 -1
  325. package/dist/crawl/dedup.js +124 -81
  326. package/dist/crawl/dedup.js.map +1 -1
  327. package/dist/crawl/etag-incremental.d.ts +43 -0
  328. package/dist/crawl/etag-incremental.d.ts.map +1 -0
  329. package/dist/crawl/etag-incremental.js +94 -0
  330. package/dist/crawl/etag-incremental.js.map +1 -0
  331. package/dist/crawl/index-to-vec.d.ts +10 -0
  332. package/dist/crawl/index-to-vec.d.ts.map +1 -0
  333. package/dist/crawl/index-to-vec.js +44 -0
  334. package/dist/crawl/index-to-vec.js.map +1 -0
  335. package/dist/crawl/mapper.js +136 -164
  336. package/dist/crawl/mapper.js.map +1 -1
  337. package/dist/crawl/rate-limiter.js +63 -66
  338. package/dist/crawl/rate-limiter.js.map +1 -1
  339. package/dist/crawl/robots.js +58 -57
  340. package/dist/crawl/robots.js.map +1 -1
  341. package/dist/crawl/sitemap-first.d.ts +12 -0
  342. package/dist/crawl/sitemap-first.d.ts.map +1 -0
  343. package/dist/crawl/sitemap-first.js +47 -0
  344. package/dist/crawl/sitemap-first.js.map +1 -0
  345. package/dist/crawl/sitemap.js +33 -32
  346. package/dist/crawl/sitemap.js.map +1 -1
  347. package/dist/crawl/url-utils.d.ts +1 -0
  348. package/dist/crawl/url-utils.d.ts.map +1 -1
  349. package/dist/crawl/url-utils.js +49 -37
  350. package/dist/crawl/url-utils.js.map +1 -1
  351. package/dist/daemon/health-check.d.ts +16 -0
  352. package/dist/daemon/health-check.d.ts.map +1 -0
  353. package/dist/daemon/health-check.js +33 -0
  354. package/dist/daemon/health-check.js.map +1 -0
  355. package/dist/daemon/http-server.d.ts +26 -0
  356. package/dist/daemon/http-server.d.ts.map +1 -0
  357. package/dist/daemon/http-server.js +275 -0
  358. package/dist/daemon/http-server.js.map +1 -0
  359. package/dist/daemon/proxy.d.ts +10 -0
  360. package/dist/daemon/proxy.d.ts.map +1 -0
  361. package/dist/daemon/proxy.js +93 -0
  362. package/dist/daemon/proxy.js.map +1 -0
  363. package/dist/embedding/embed.d.ts +59 -0
  364. package/dist/embedding/embed.d.ts.map +1 -0
  365. package/dist/embedding/embed.js +233 -0
  366. package/dist/embedding/embed.js.map +1 -0
  367. package/dist/embedding/fastembed-provider.d.ts +19 -0
  368. package/dist/embedding/fastembed-provider.d.ts.map +1 -0
  369. package/dist/embedding/fastembed-provider.js +51 -0
  370. package/dist/embedding/fastembed-provider.js.map +1 -0
  371. package/dist/embedding/key-terms.d.ts +12 -0
  372. package/dist/embedding/key-terms.d.ts.map +1 -0
  373. package/dist/embedding/key-terms.js +234 -0
  374. package/dist/embedding/key-terms.js.map +1 -0
  375. package/dist/extraction/boilerplate.d.ts +15 -0
  376. package/dist/extraction/boilerplate.d.ts.map +1 -0
  377. package/dist/extraction/boilerplate.js +52 -0
  378. package/dist/extraction/boilerplate.js.map +1 -0
  379. package/dist/extraction/defuddle.d.ts.map +1 -1
  380. package/dist/extraction/defuddle.js +27 -23
  381. package/dist/extraction/defuddle.js.map +1 -1
  382. package/dist/extraction/extract.d.ts.map +1 -1
  383. package/dist/extraction/extract.js +76 -76
  384. package/dist/extraction/extract.js.map +1 -1
  385. package/dist/extraction/jsonld.js +50 -54
  386. package/dist/extraction/jsonld.js.map +1 -1
  387. package/dist/extraction/lang-hints.d.ts +2 -0
  388. package/dist/extraction/lang-hints.d.ts.map +1 -0
  389. package/dist/extraction/lang-hints.js +30 -0
  390. package/dist/extraction/lang-hints.js.map +1 -0
  391. package/dist/extraction/llm-fallback.d.ts +17 -0
  392. package/dist/extraction/llm-fallback.d.ts.map +1 -0
  393. package/dist/extraction/llm-fallback.js +130 -0
  394. package/dist/extraction/llm-fallback.js.map +1 -0
  395. package/dist/extraction/markdown-sanitize.d.ts +2 -0
  396. package/dist/extraction/markdown-sanitize.d.ts.map +1 -0
  397. package/dist/extraction/markdown-sanitize.js +151 -0
  398. package/dist/extraction/markdown-sanitize.js.map +1 -0
  399. package/dist/extraction/markdown.d.ts +11 -0
  400. package/dist/extraction/markdown.d.ts.map +1 -1
  401. package/dist/extraction/markdown.js +195 -91
  402. package/dist/extraction/markdown.js.map +1 -1
  403. package/dist/extraction/pipeline.d.ts +8 -0
  404. package/dist/extraction/pipeline.d.ts.map +1 -1
  405. package/dist/extraction/pipeline.js +57 -91
  406. package/dist/extraction/pipeline.js.map +1 -1
  407. package/dist/extraction/readability.d.ts +1 -1
  408. package/dist/extraction/readability.d.ts.map +1 -1
  409. package/dist/extraction/readability.js +28 -29
  410. package/dist/extraction/readability.js.map +1 -1
  411. package/dist/extraction/schema.d.ts +12 -0
  412. package/dist/extraction/schema.d.ts.map +1 -1
  413. package/dist/extraction/schema.js +135 -72
  414. package/dist/extraction/schema.js.map +1 -1
  415. package/dist/extraction/site-extractors/docs-generic.d.ts.map +1 -1
  416. package/dist/extraction/site-extractors/docs-generic.js +81 -91
  417. package/dist/extraction/site-extractors/docs-generic.js.map +1 -1
  418. package/dist/extraction/site-extractors/github.d.ts.map +1 -1
  419. package/dist/extraction/site-extractors/github.js +87 -95
  420. package/dist/extraction/site-extractors/github.js.map +1 -1
  421. package/dist/extraction/site-extractors/mdn.d.ts.map +1 -1
  422. package/dist/extraction/site-extractors/mdn.js +46 -54
  423. package/dist/extraction/site-extractors/mdn.js.map +1 -1
  424. package/dist/extraction/site-extractors/stackoverflow.d.ts.map +1 -1
  425. package/dist/extraction/site-extractors/stackoverflow.js +71 -80
  426. package/dist/extraction/site-extractors/stackoverflow.js.map +1 -1
  427. package/dist/extraction/structured-data.d.ts +4 -0
  428. package/dist/extraction/structured-data.d.ts.map +1 -0
  429. package/dist/extraction/structured-data.js +173 -0
  430. package/dist/extraction/structured-data.js.map +1 -0
  431. package/dist/extraction/structured.d.ts +4 -0
  432. package/dist/extraction/structured.d.ts.map +1 -0
  433. package/dist/extraction/structured.js +163 -0
  434. package/dist/extraction/structured.js.map +1 -0
  435. package/dist/extraction/v1/classifier.d.ts +3 -0
  436. package/dist/extraction/v1/classifier.d.ts.map +1 -0
  437. package/dist/extraction/v1/classifier.js +110 -0
  438. package/dist/extraction/v1/classifier.js.map +1 -0
  439. package/dist/extraction/v1/extract-provider.d.ts +16 -0
  440. package/dist/extraction/v1/extract-provider.d.ts.map +1 -0
  441. package/dist/extraction/v1/extract-provider.js +43 -0
  442. package/dist/extraction/v1/extract-provider.js.map +1 -0
  443. package/dist/extraction/v1/local-llm.d.ts +8 -0
  444. package/dist/extraction/v1/local-llm.d.ts.map +1 -0
  445. package/dist/extraction/v1/local-llm.js +34 -0
  446. package/dist/extraction/v1/local-llm.js.map +1 -0
  447. package/dist/extraction/v1/news.d.ts +3 -0
  448. package/dist/extraction/v1/news.d.ts.map +1 -0
  449. package/dist/extraction/v1/news.js +61 -0
  450. package/dist/extraction/v1/news.js.map +1 -0
  451. package/dist/extraction/v1/product.d.ts +3 -0
  452. package/dist/extraction/v1/product.d.ts.map +1 -0
  453. package/dist/extraction/v1/product.js +166 -0
  454. package/dist/extraction/v1/product.js.map +1 -0
  455. package/dist/extraction/v1/recipe.d.ts +3 -0
  456. package/dist/extraction/v1/recipe.d.ts.map +1 -0
  457. package/dist/extraction/v1/recipe.js +136 -0
  458. package/dist/extraction/v1/recipe.js.map +1 -0
  459. package/dist/extraction/v1/routed.d.ts +17 -0
  460. package/dist/extraction/v1/routed.d.ts.map +1 -0
  461. package/dist/extraction/v1/routed.js +68 -0
  462. package/dist/extraction/v1/routed.js.map +1 -0
  463. package/dist/extraction/v1/schemas/Article.d.ts +11 -0
  464. package/dist/extraction/v1/schemas/Article.d.ts.map +1 -0
  465. package/dist/extraction/v1/schemas/Article.js +23 -0
  466. package/dist/extraction/v1/schemas/Article.js.map +1 -0
  467. package/dist/extraction/v1/schemas/CodeSnippet.d.ts +9 -0
  468. package/dist/extraction/v1/schemas/CodeSnippet.d.ts.map +1 -0
  469. package/dist/extraction/v1/schemas/CodeSnippet.js +90 -0
  470. package/dist/extraction/v1/schemas/CodeSnippet.js.map +1 -0
  471. package/dist/extraction/v1/schemas/EventListing.d.ts +10 -0
  472. package/dist/extraction/v1/schemas/EventListing.d.ts.map +1 -0
  473. package/dist/extraction/v1/schemas/EventListing.js +122 -0
  474. package/dist/extraction/v1/schemas/EventListing.js.map +1 -0
  475. package/dist/extraction/v1/schemas/Paper.d.ts +10 -0
  476. package/dist/extraction/v1/schemas/Paper.d.ts.map +1 -0
  477. package/dist/extraction/v1/schemas/Paper.js +156 -0
  478. package/dist/extraction/v1/schemas/Paper.js.map +1 -0
  479. package/dist/extraction/v1/schemas/Product.d.ts +17 -0
  480. package/dist/extraction/v1/schemas/Product.d.ts.map +1 -0
  481. package/dist/extraction/v1/schemas/Product.js +149 -0
  482. package/dist/extraction/v1/schemas/Product.js.map +1 -0
  483. package/dist/extraction/v1/schemas/Recipe.d.ts +14 -0
  484. package/dist/extraction/v1/schemas/Recipe.d.ts.map +1 -0
  485. package/dist/extraction/v1/schemas/Recipe.js +160 -0
  486. package/dist/extraction/v1/schemas/Recipe.js.map +1 -0
  487. package/dist/extraction/v1/schemas/index.d.ts +13 -0
  488. package/dist/extraction/v1/schemas/index.d.ts.map +1 -0
  489. package/dist/extraction/v1/schemas/index.js +44 -0
  490. package/dist/extraction/v1/schemas/index.js.map +1 -0
  491. package/dist/extraction/v1/site-extractors.d.ts +5 -0
  492. package/dist/extraction/v1/site-extractors.d.ts.map +1 -0
  493. package/dist/extraction/v1/site-extractors.js +31 -0
  494. package/dist/extraction/v1/site-extractors.js.map +1 -0
  495. package/dist/fetch/action-executor.d.ts +28 -0
  496. package/dist/fetch/action-executor.d.ts.map +1 -0
  497. package/dist/fetch/action-executor.js +88 -0
  498. package/dist/fetch/action-executor.js.map +1 -0
  499. package/dist/fetch/auth.d.ts +2 -1
  500. package/dist/fetch/auth.d.ts.map +1 -1
  501. package/dist/fetch/auth.js +56 -26
  502. package/dist/fetch/auth.js.map +1 -1
  503. package/dist/fetch/browser-pool.d.ts +30 -11
  504. package/dist/fetch/browser-pool.d.ts.map +1 -1
  505. package/dist/fetch/browser-pool.js +303 -127
  506. package/dist/fetch/browser-pool.js.map +1 -1
  507. package/dist/fetch/browser-selector.d.ts +17 -0
  508. package/dist/fetch/browser-selector.d.ts.map +1 -0
  509. package/dist/fetch/browser-selector.js +72 -0
  510. package/dist/fetch/browser-selector.js.map +1 -0
  511. package/dist/fetch/browser-types.d.ts +3 -0
  512. package/dist/fetch/browser-types.d.ts.map +1 -0
  513. package/dist/fetch/browser-types.js +45 -0
  514. package/dist/fetch/browser-types.js.map +1 -0
  515. package/dist/fetch/cdp-client.d.ts +9 -0
  516. package/dist/fetch/cdp-client.d.ts.map +1 -0
  517. package/dist/fetch/cdp-client.js +89 -0
  518. package/dist/fetch/cdp-client.js.map +1 -0
  519. package/dist/fetch/content-check.js +39 -46
  520. package/dist/fetch/content-check.js.map +1 -1
  521. package/dist/fetch/error-describe.d.ts +7 -0
  522. package/dist/fetch/error-describe.d.ts.map +1 -0
  523. package/dist/fetch/error-describe.js +37 -0
  524. package/dist/fetch/error-describe.js.map +1 -0
  525. package/dist/fetch/http-client.d.ts +4 -0
  526. package/dist/fetch/http-client.d.ts.map +1 -1
  527. package/dist/fetch/http-client.js +147 -128
  528. package/dist/fetch/http-client.js.map +1 -1
  529. package/dist/fetch/lightpanda.d.ts +28 -0
  530. package/dist/fetch/lightpanda.d.ts.map +1 -0
  531. package/dist/fetch/lightpanda.js +174 -0
  532. package/dist/fetch/lightpanda.js.map +1 -0
  533. package/dist/fetch/playwright-tier.d.ts +19 -0
  534. package/dist/fetch/playwright-tier.d.ts.map +1 -0
  535. package/dist/fetch/playwright-tier.js +76 -0
  536. package/dist/fetch/playwright-tier.js.map +1 -0
  537. package/dist/fetch/router.d.ts +49 -3
  538. package/dist/fetch/router.d.ts.map +1 -1
  539. package/dist/fetch/router.js +187 -81
  540. package/dist/fetch/router.js.map +1 -1
  541. package/dist/index.js +102 -17
  542. package/dist/index.js.map +1 -1
  543. package/dist/instructions.d.ts +31 -0
  544. package/dist/instructions.d.ts.map +1 -0
  545. package/dist/instructions.js +245 -0
  546. package/dist/instructions.js.map +1 -0
  547. package/dist/integrations/cloud/llm/anthropic.d.ts +3 -0
  548. package/dist/integrations/cloud/llm/anthropic.d.ts.map +1 -0
  549. package/dist/integrations/cloud/llm/anthropic.js +41 -0
  550. package/dist/integrations/cloud/llm/anthropic.js.map +1 -0
  551. package/dist/integrations/cloud/llm/cache.d.ts +5 -0
  552. package/dist/integrations/cloud/llm/cache.d.ts.map +1 -0
  553. package/dist/integrations/cloud/llm/cache.js +49 -0
  554. package/dist/integrations/cloud/llm/cache.js.map +1 -0
  555. package/dist/integrations/cloud/llm/gemini.d.ts +3 -0
  556. package/dist/integrations/cloud/llm/gemini.d.ts.map +1 -0
  557. package/dist/integrations/cloud/llm/gemini.js +37 -0
  558. package/dist/integrations/cloud/llm/gemini.js.map +1 -0
  559. package/dist/integrations/cloud/llm/groq.d.ts +3 -0
  560. package/dist/integrations/cloud/llm/groq.d.ts.map +1 -0
  561. package/dist/integrations/cloud/llm/groq.js +74 -0
  562. package/dist/integrations/cloud/llm/groq.js.map +1 -0
  563. package/dist/integrations/cloud/llm/hash.d.ts +3 -0
  564. package/dist/integrations/cloud/llm/hash.d.ts.map +1 -0
  565. package/dist/integrations/cloud/llm/hash.js +26 -0
  566. package/dist/integrations/cloud/llm/hash.js.map +1 -0
  567. package/dist/integrations/cloud/llm/model-select.d.ts +5 -0
  568. package/dist/integrations/cloud/llm/model-select.d.ts.map +1 -0
  569. package/dist/integrations/cloud/llm/model-select.js +32 -0
  570. package/dist/integrations/cloud/llm/model-select.js.map +1 -0
  571. package/dist/integrations/cloud/llm/openai.d.ts +3 -0
  572. package/dist/integrations/cloud/llm/openai.d.ts.map +1 -0
  573. package/dist/integrations/cloud/llm/openai.js +43 -0
  574. package/dist/integrations/cloud/llm/openai.js.map +1 -0
  575. package/dist/integrations/cloud/llm/run.d.ts +27 -0
  576. package/dist/integrations/cloud/llm/run.d.ts.map +1 -0
  577. package/dist/integrations/cloud/llm/run.js +99 -0
  578. package/dist/integrations/cloud/llm/run.js.map +1 -0
  579. package/dist/integrations/cloud/llm/select.d.ts +5 -0
  580. package/dist/integrations/cloud/llm/select.d.ts.map +1 -0
  581. package/dist/integrations/cloud/llm/select.js +30 -0
  582. package/dist/integrations/cloud/llm/select.js.map +1 -0
  583. package/dist/integrations/cloud/llm/text-adapters.d.ts +19 -0
  584. package/dist/integrations/cloud/llm/text-adapters.d.ts.map +1 -0
  585. package/dist/integrations/cloud/llm/text-adapters.js +103 -0
  586. package/dist/integrations/cloud/llm/text-adapters.js.map +1 -0
  587. package/dist/integrations/cloud/llm/types.d.ts +24 -0
  588. package/dist/integrations/cloud/llm/types.d.ts.map +1 -0
  589. package/dist/integrations/cloud/llm/types.js +1 -0
  590. package/dist/integrations/cloud/llm/types.js.map +1 -0
  591. package/dist/integrations/cloud/llm/validate.d.ts +6 -0
  592. package/dist/integrations/cloud/llm/validate.d.ts.map +1 -0
  593. package/dist/integrations/cloud/llm/validate.js +63 -0
  594. package/dist/integrations/cloud/llm/validate.js.map +1 -0
  595. package/dist/logger.d.ts +4 -1
  596. package/dist/logger.d.ts.map +1 -1
  597. package/dist/logger.js +71 -30
  598. package/dist/logger.js.map +1 -1
  599. package/dist/pdf-parse.d.js +1 -0
  600. package/dist/pdf-parse.d.js.map +1 -0
  601. package/dist/plugins/loader.d.ts +20 -0
  602. package/dist/plugins/loader.d.ts.map +1 -0
  603. package/dist/plugins/loader.js +157 -0
  604. package/dist/plugins/loader.js.map +1 -0
  605. package/dist/plugins/registry.d.ts +26 -0
  606. package/dist/plugins/registry.d.ts.map +1 -0
  607. package/dist/plugins/registry.js +71 -0
  608. package/dist/plugins/registry.js.map +1 -0
  609. package/dist/plugins/validate.d.ts +9 -0
  610. package/dist/plugins/validate.d.ts.map +1 -0
  611. package/dist/plugins/validate.js +79 -0
  612. package/dist/plugins/validate.js.map +1 -0
  613. package/dist/providers/embed-provider.d.ts +11 -0
  614. package/dist/providers/embed-provider.d.ts.map +1 -0
  615. package/dist/providers/embed-provider.js +24 -0
  616. package/dist/providers/embed-provider.js.map +1 -0
  617. package/dist/providers/extract-provider.d.ts +23 -0
  618. package/dist/providers/extract-provider.d.ts.map +1 -0
  619. package/dist/providers/extract-provider.js +25 -0
  620. package/dist/providers/extract-provider.js.map +1 -0
  621. package/dist/providers/rerank-provider.d.ts +17 -0
  622. package/dist/providers/rerank-provider.d.ts.map +1 -0
  623. package/dist/providers/rerank-provider.js +41 -0
  624. package/dist/providers/rerank-provider.js.map +1 -0
  625. package/dist/providers/search-provider.d.ts +25 -0
  626. package/dist/providers/search-provider.d.ts.map +1 -0
  627. package/dist/providers/search-provider.js +44 -0
  628. package/dist/providers/search-provider.js.map +1 -0
  629. package/dist/providers/vector-store.d.ts +27 -0
  630. package/dist/providers/vector-store.d.ts.map +1 -0
  631. package/dist/providers/vector-store.js +27 -0
  632. package/dist/providers/vector-store.js.map +1 -0
  633. package/dist/python-env.d.ts +9 -0
  634. package/dist/python-env.d.ts.map +1 -0
  635. package/dist/python-env.js +13 -0
  636. package/dist/python-env.js.map +1 -0
  637. package/dist/repl/commands/agent.d.ts +5 -0
  638. package/dist/repl/commands/agent.d.ts.map +1 -0
  639. package/dist/repl/commands/agent.js +62 -0
  640. package/dist/repl/commands/agent.js.map +1 -0
  641. package/dist/repl/commands/cache.d.ts +4 -0
  642. package/dist/repl/commands/cache.d.ts.map +1 -0
  643. package/dist/repl/commands/cache.js +43 -0
  644. package/dist/repl/commands/cache.js.map +1 -0
  645. package/dist/repl/commands/crawl.d.ts +7 -0
  646. package/dist/repl/commands/crawl.d.ts.map +1 -0
  647. package/dist/repl/commands/crawl.js +44 -0
  648. package/dist/repl/commands/crawl.js.map +1 -0
  649. package/dist/repl/commands/extract.d.ts +5 -0
  650. package/dist/repl/commands/extract.d.ts.map +1 -0
  651. package/dist/repl/commands/extract.js +47 -0
  652. package/dist/repl/commands/extract.js.map +1 -0
  653. package/dist/repl/commands/fetch.d.ts +5 -0
  654. package/dist/repl/commands/fetch.d.ts.map +1 -0
  655. package/dist/repl/commands/fetch.js +67 -0
  656. package/dist/repl/commands/fetch.js.map +1 -0
  657. package/dist/repl/commands/find-similar.d.ts +5 -0
  658. package/dist/repl/commands/find-similar.d.ts.map +1 -0
  659. package/dist/repl/commands/find-similar.js +74 -0
  660. package/dist/repl/commands/find-similar.js.map +1 -0
  661. package/dist/repl/commands/research.d.ts +5 -0
  662. package/dist/repl/commands/research.d.ts.map +1 -0
  663. package/dist/repl/commands/research.js +65 -0
  664. package/dist/repl/commands/research.js.map +1 -0
  665. package/dist/repl/commands/search.d.ts +5 -0
  666. package/dist/repl/commands/search.d.ts.map +1 -0
  667. package/dist/repl/commands/search.js +74 -0
  668. package/dist/repl/commands/search.js.map +1 -0
  669. package/dist/repl/commands/types.d.ts +9 -0
  670. package/dist/repl/commands/types.d.ts.map +1 -0
  671. package/dist/repl/commands/types.js +1 -0
  672. package/dist/repl/commands/types.js.map +1 -0
  673. package/dist/repl/formatters.d.ts +13 -0
  674. package/dist/repl/formatters.d.ts.map +1 -0
  675. package/dist/repl/formatters.js +283 -0
  676. package/dist/repl/formatters.js.map +1 -0
  677. package/dist/repl/parser.d.ts +9 -0
  678. package/dist/repl/parser.d.ts.map +1 -0
  679. package/dist/repl/parser.js +86 -0
  680. package/dist/repl/parser.js.map +1 -0
  681. package/dist/repl/shell.d.ts +8 -0
  682. package/dist/repl/shell.d.ts.map +1 -0
  683. package/dist/repl/shell.js +184 -0
  684. package/dist/repl/shell.js.map +1 -0
  685. package/dist/research/branch-exploration.d.ts +14 -0
  686. package/dist/research/branch-exploration.d.ts.map +1 -0
  687. package/dist/research/branch-exploration.js +100 -0
  688. package/dist/research/branch-exploration.js.map +1 -0
  689. package/dist/research/brief.d.ts +6 -0
  690. package/dist/research/brief.d.ts.map +1 -0
  691. package/dist/research/brief.js +246 -0
  692. package/dist/research/brief.js.map +1 -0
  693. package/dist/research/citation-graph.d.ts +9 -0
  694. package/dist/research/citation-graph.d.ts.map +1 -0
  695. package/dist/research/citation-graph.js +114 -0
  696. package/dist/research/citation-graph.js.map +1 -0
  697. package/dist/research/decompose.d.ts +14 -0
  698. package/dist/research/decompose.d.ts.map +1 -0
  699. package/dist/research/decompose.js +439 -0
  700. package/dist/research/decompose.js.map +1 -0
  701. package/dist/research/pipeline.d.ts +5 -0
  702. package/dist/research/pipeline.d.ts.map +1 -0
  703. package/dist/research/pipeline.js +269 -0
  704. package/dist/research/pipeline.js.map +1 -0
  705. package/dist/research/synthesis-local.d.ts +19 -0
  706. package/dist/research/synthesis-local.d.ts.map +1 -0
  707. package/dist/research/synthesis-local.js +62 -0
  708. package/dist/research/synthesis-local.js.map +1 -0
  709. package/dist/research/synthesize.d.ts +10 -0
  710. package/dist/research/synthesize.d.ts.map +1 -0
  711. package/dist/research/synthesize.js +137 -0
  712. package/dist/research/synthesize.js.map +1 -0
  713. package/dist/search/answer-synthesis.d.ts +33 -0
  714. package/dist/search/answer-synthesis.d.ts.map +1 -0
  715. package/dist/search/answer-synthesis.js +244 -0
  716. package/dist/search/answer-synthesis.js.map +1 -0
  717. package/dist/search/context-formatter.d.ts +3 -0
  718. package/dist/search/context-formatter.d.ts.map +1 -0
  719. package/dist/search/context-formatter.js +56 -0
  720. package/dist/search/context-formatter.js.map +1 -0
  721. package/dist/search/dedup.d.ts +1 -0
  722. package/dist/search/dedup.d.ts.map +1 -1
  723. package/dist/search/dedup.js +40 -32
  724. package/dist/search/dedup.js.map +1 -1
  725. package/dist/search/engines/arxiv.d.ts +7 -0
  726. package/dist/search/engines/arxiv.d.ts.map +1 -0
  727. package/dist/search/engines/arxiv.js +70 -0
  728. package/dist/search/engines/arxiv.js.map +1 -0
  729. package/dist/search/engines/bing-news.d.ts +7 -0
  730. package/dist/search/engines/bing-news.d.ts.map +1 -0
  731. package/dist/search/engines/bing-news.js +97 -0
  732. package/dist/search/engines/bing-news.js.map +1 -0
  733. package/dist/search/engines/bing.d.ts +1 -0
  734. package/dist/search/engines/bing.d.ts.map +1 -1
  735. package/dist/search/engines/bing.js +100 -44
  736. package/dist/search/engines/bing.js.map +1 -1
  737. package/dist/search/engines/devdocs.d.ts +6 -0
  738. package/dist/search/engines/devdocs.d.ts.map +1 -0
  739. package/dist/search/engines/devdocs.js +56 -0
  740. package/dist/search/engines/devdocs.js.map +1 -0
  741. package/dist/search/engines/duckduckgo.d.ts.map +1 -1
  742. package/dist/search/engines/duckduckgo.js +56 -44
  743. package/dist/search/engines/duckduckgo.js.map +1 -1
  744. package/dist/search/engines/github-code.d.ts +7 -0
  745. package/dist/search/engines/github-code.d.ts.map +1 -0
  746. package/dist/search/engines/github-code.js +55 -0
  747. package/dist/search/engines/github-code.js.map +1 -0
  748. package/dist/search/engines/hn-algolia.d.ts +7 -0
  749. package/dist/search/engines/hn-algolia.d.ts.map +1 -0
  750. package/dist/search/engines/hn-algolia.js +76 -0
  751. package/dist/search/engines/hn-algolia.js.map +1 -0
  752. package/dist/search/engines/lobsters.d.ts +7 -0
  753. package/dist/search/engines/lobsters.d.ts.map +1 -0
  754. package/dist/search/engines/lobsters.js +83 -0
  755. package/dist/search/engines/lobsters.js.map +1 -0
  756. package/dist/search/engines/mdn.d.ts +7 -0
  757. package/dist/search/engines/mdn.d.ts.map +1 -0
  758. package/dist/search/engines/mdn.js +48 -0
  759. package/dist/search/engines/mdn.js.map +1 -0
  760. package/dist/search/engines/semantic-scholar.d.ts +7 -0
  761. package/dist/search/engines/semantic-scholar.d.ts.map +1 -0
  762. package/dist/search/engines/semantic-scholar.js +69 -0
  763. package/dist/search/engines/semantic-scholar.js.map +1 -0
  764. package/dist/search/engines/stackoverflow.d.ts +7 -0
  765. package/dist/search/engines/stackoverflow.d.ts.map +1 -0
  766. package/dist/search/engines/stackoverflow.js +73 -0
  767. package/dist/search/engines/stackoverflow.js.map +1 -0
  768. package/dist/search/engines/startpage.d.ts.map +1 -1
  769. package/dist/search/engines/startpage.js +65 -46
  770. package/dist/search/engines/startpage.js.map +1 -1
  771. package/dist/search/evidence.d.ts +25 -0
  772. package/dist/search/evidence.d.ts.map +1 -0
  773. package/dist/search/evidence.js +220 -0
  774. package/dist/search/evidence.js.map +1 -0
  775. package/dist/search/filters.d.ts.map +1 -1
  776. package/dist/search/filters.js +58 -54
  777. package/dist/search/filters.js.map +1 -1
  778. package/dist/search/find-similar/crawl-rank.d.ts +9 -0
  779. package/dist/search/find-similar/crawl-rank.d.ts.map +1 -0
  780. package/dist/search/find-similar/crawl-rank.js +272 -0
  781. package/dist/search/find-similar/crawl-rank.js.map +1 -0
  782. package/dist/search/find-similar/mode.d.ts +4 -0
  783. package/dist/search/find-similar/mode.d.ts.map +1 -0
  784. package/dist/search/find-similar/mode.js +12 -0
  785. package/dist/search/find-similar/mode.js.map +1 -0
  786. package/dist/search/find-similar.d.ts +5 -0
  787. package/dist/search/find-similar.d.ts.map +1 -0
  788. package/dist/search/find-similar.js +509 -0
  789. package/dist/search/find-similar.js.map +1 -0
  790. package/dist/search/highlights.d.ts +19 -0
  791. package/dist/search/highlights.d.ts.map +1 -0
  792. package/dist/search/highlights.js +167 -0
  793. package/dist/search/highlights.js.map +1 -0
  794. package/dist/search/language-filter.d.ts +29 -0
  795. package/dist/search/language-filter.d.ts.map +1 -0
  796. package/dist/search/language-filter.js +126 -0
  797. package/dist/search/language-filter.js.map +1 -0
  798. package/dist/search/legacy/searxng-orchestrator.d.ts +4 -0
  799. package/dist/search/legacy/searxng-orchestrator.d.ts.map +1 -0
  800. package/dist/search/legacy/searxng-orchestrator.js +501 -0
  801. package/dist/search/legacy/searxng-orchestrator.js.map +1 -0
  802. package/dist/search/legacy/searxng-provider.d.ts +7 -0
  803. package/dist/search/legacy/searxng-provider.d.ts.map +1 -0
  804. package/dist/search/legacy/searxng-provider.js +11 -0
  805. package/dist/search/legacy/searxng-provider.js.map +1 -0
  806. package/dist/search/multi-query.d.ts +25 -0
  807. package/dist/search/multi-query.d.ts.map +1 -0
  808. package/dist/search/multi-query.js +228 -0
  809. package/dist/search/multi-query.js.map +1 -0
  810. package/dist/search/query.js +32 -34
  811. package/dist/search/query.js.map +1 -1
  812. package/dist/search/rerank.d.ts +3 -1
  813. package/dist/search/rerank.d.ts.map +1 -1
  814. package/dist/search/rerank.js +44 -35
  815. package/dist/search/rerank.js.map +1 -1
  816. package/dist/search/reranker/authority-boost.d.ts +3 -0
  817. package/dist/search/reranker/authority-boost.d.ts.map +1 -0
  818. package/dist/search/reranker/authority-boost.js +179 -0
  819. package/dist/search/reranker/authority-boost.js.map +1 -0
  820. package/dist/search/reranker/consensus-boost.d.ts +3 -0
  821. package/dist/search/reranker/consensus-boost.d.ts.map +1 -0
  822. package/dist/search/reranker/consensus-boost.js +27 -0
  823. package/dist/search/reranker/consensus-boost.js.map +1 -0
  824. package/dist/search/reranker/recency-boost.d.ts +3 -0
  825. package/dist/search/reranker/recency-boost.d.ts.map +1 -0
  826. package/dist/search/reranker/recency-boost.js +13 -0
  827. package/dist/search/reranker/recency-boost.js.map +1 -0
  828. package/dist/search/reranker/recency.d.ts +3 -0
  829. package/dist/search/reranker/recency.d.ts.map +1 -0
  830. package/dist/search/reranker/recency.js +23 -0
  831. package/dist/search/reranker/recency.js.map +1 -0
  832. package/dist/search/reranker/transformers-rerank-provider.d.ts +13 -0
  833. package/dist/search/reranker/transformers-rerank-provider.d.ts.map +1 -0
  834. package/dist/search/reranker/transformers-rerank-provider.js +94 -0
  835. package/dist/search/reranker/transformers-rerank-provider.js.map +1 -0
  836. package/dist/search/rrf.d.ts +17 -0
  837. package/dist/search/rrf.d.ts.map +1 -0
  838. package/dist/search/rrf.js +39 -0
  839. package/dist/search/rrf.js.map +1 -0
  840. package/dist/search/sampling.d.ts +25 -0
  841. package/dist/search/sampling.d.ts.map +1 -0
  842. package/dist/search/sampling.js +52 -0
  843. package/dist/search/sampling.js.map +1 -0
  844. package/dist/search/searxng.d.ts.map +1 -1
  845. package/dist/search/searxng.js +69 -79
  846. package/dist/search/searxng.js.map +1 -1
  847. package/dist/search/tokens.d.ts +3 -0
  848. package/dist/search/tokens.d.ts.map +1 -0
  849. package/dist/search/tokens.js +39 -0
  850. package/dist/search/tokens.js.map +1 -0
  851. package/dist/search/truncate.d.ts +6 -0
  852. package/dist/search/truncate.d.ts.map +1 -0
  853. package/dist/search/truncate.js +26 -0
  854. package/dist/search/truncate.js.map +1 -0
  855. package/dist/search/url-unwrap.d.ts +3 -0
  856. package/dist/search/url-unwrap.d.ts.map +1 -0
  857. package/dist/search/url-unwrap.js +43 -0
  858. package/dist/search/url-unwrap.js.map +1 -0
  859. package/dist/search/v1/context-rank.d.ts +13 -0
  860. package/dist/search/v1/context-rank.d.ts.map +1 -0
  861. package/dist/search/v1/context-rank.js +74 -0
  862. package/dist/search/v1/context-rank.js.map +1 -0
  863. package/dist/search/v1/engine-base.d.ts +27 -0
  864. package/dist/search/v1/engine-base.d.ts.map +1 -0
  865. package/dist/search/v1/engine-base.js +110 -0
  866. package/dist/search/v1/engine-base.js.map +1 -0
  867. package/dist/search/v1/intent-router.d.ts +22 -0
  868. package/dist/search/v1/intent-router.d.ts.map +1 -0
  869. package/dist/search/v1/intent-router.js +138 -0
  870. package/dist/search/v1/intent-router.js.map +1 -0
  871. package/dist/search/v1/orchestrator.d.ts +24 -0
  872. package/dist/search/v1/orchestrator.d.ts.map +1 -0
  873. package/dist/search/v1/orchestrator.js +163 -0
  874. package/dist/search/v1/orchestrator.js.map +1 -0
  875. package/dist/search/v1/recency-boost.d.ts +9 -0
  876. package/dist/search/v1/recency-boost.d.ts.map +1 -0
  877. package/dist/search/v1/recency-boost.js +37 -0
  878. package/dist/search/v1/recency-boost.js.map +1 -0
  879. package/dist/search/v1/recent-cache-dedup.d.ts +6 -0
  880. package/dist/search/v1/recent-cache-dedup.d.ts.map +1 -0
  881. package/dist/search/v1/recent-cache-dedup.js +85 -0
  882. package/dist/search/v1/recent-cache-dedup.js.map +1 -0
  883. package/dist/search/v1/rss/feed-config.d.ts +21 -0
  884. package/dist/search/v1/rss/feed-config.d.ts.map +1 -0
  885. package/dist/search/v1/rss/feed-config.js +90 -0
  886. package/dist/search/v1/rss/feed-config.js.map +1 -0
  887. package/dist/search/v1/rss/feed-parser.d.ts +14 -0
  888. package/dist/search/v1/rss/feed-parser.d.ts.map +1 -0
  889. package/dist/search/v1/rss/feed-parser.js +104 -0
  890. package/dist/search/v1/rss/feed-parser.js.map +1 -0
  891. package/dist/search/v1/rss/feed-poller.d.ts +22 -0
  892. package/dist/search/v1/rss/feed-poller.d.ts.map +1 -0
  893. package/dist/search/v1/rss/feed-poller.js +102 -0
  894. package/dist/search/v1/rss/feed-poller.js.map +1 -0
  895. package/dist/search/v1/rss/feed-store.d.ts +30 -0
  896. package/dist/search/v1/rss/feed-store.d.ts.map +1 -0
  897. package/dist/search/v1/rss/feed-store.js +134 -0
  898. package/dist/search/v1/rss/feed-store.js.map +1 -0
  899. package/dist/search/v1/rss/rss-engine.d.ts +6 -0
  900. package/dist/search/v1/rss/rss-engine.d.ts.map +1 -0
  901. package/dist/search/v1/rss/rss-engine.js +28 -0
  902. package/dist/search/v1/rss/rss-engine.js.map +1 -0
  903. package/dist/search/v1/v1-provider.d.ts +7 -0
  904. package/dist/search/v1/v1-provider.d.ts.map +1 -0
  905. package/dist/search/v1/v1-provider.js +68 -0
  906. package/dist/search/v1/v1-provider.js.map +1 -0
  907. package/dist/search/v1/verticals/code.d.ts +4 -0
  908. package/dist/search/v1/verticals/code.d.ts.map +1 -0
  909. package/dist/search/v1/verticals/code.js +20 -0
  910. package/dist/search/v1/verticals/code.js.map +1 -0
  911. package/dist/search/v1/verticals/docs.d.ts +4 -0
  912. package/dist/search/v1/verticals/docs.d.ts.map +1 -0
  913. package/dist/search/v1/verticals/docs.js +20 -0
  914. package/dist/search/v1/verticals/docs.js.map +1 -0
  915. package/dist/search/v1/verticals/general.d.ts +4 -0
  916. package/dist/search/v1/verticals/general.d.ts.map +1 -0
  917. package/dist/search/v1/verticals/general.js +22 -0
  918. package/dist/search/v1/verticals/general.js.map +1 -0
  919. package/dist/search/v1/verticals/news.d.ts +10 -0
  920. package/dist/search/v1/verticals/news.d.ts.map +1 -0
  921. package/dist/search/v1/verticals/news.js +52 -0
  922. package/dist/search/v1/verticals/news.js.map +1 -0
  923. package/dist/search/v1/verticals/papers.d.ts +4 -0
  924. package/dist/search/v1/verticals/papers.d.ts.map +1 -0
  925. package/dist/search/v1/verticals/papers.js +23 -0
  926. package/dist/search/v1/verticals/papers.js.map +1 -0
  927. package/dist/search/validator.js +31 -31
  928. package/dist/search/validator.js.map +1 -1
  929. package/dist/searxng/bootstrap.d.ts +30 -0
  930. package/dist/searxng/bootstrap.d.ts.map +1 -1
  931. package/dist/searxng/bootstrap.js +223 -85
  932. package/dist/searxng/bootstrap.js.map +1 -1
  933. package/dist/searxng/docker.d.ts.map +1 -1
  934. package/dist/searxng/docker.js +69 -60
  935. package/dist/searxng/docker.js.map +1 -1
  936. package/dist/searxng/process.d.ts +13 -1
  937. package/dist/searxng/process.d.ts.map +1 -1
  938. package/dist/searxng/process.js +231 -164
  939. package/dist/searxng/process.js.map +1 -1
  940. package/dist/server/backend-status.d.ts +13 -0
  941. package/dist/server/backend-status.d.ts.map +1 -0
  942. package/dist/server/backend-status.js +40 -0
  943. package/dist/server/backend-status.js.map +1 -0
  944. package/dist/server/tool-schemas.d.ts +549 -0
  945. package/dist/server/tool-schemas.d.ts.map +1 -0
  946. package/dist/server/tool-schemas.js +464 -0
  947. package/dist/server/tool-schemas.js.map +1 -0
  948. package/dist/server/warmup-on-start.d.ts +9 -0
  949. package/dist/server/warmup-on-start.d.ts.map +1 -0
  950. package/dist/server/warmup-on-start.js +55 -0
  951. package/dist/server/warmup-on-start.js.map +1 -0
  952. package/dist/server.d.ts +17 -0
  953. package/dist/server.d.ts.map +1 -1
  954. package/dist/server.js +454 -297
  955. package/dist/server.js.map +1 -1
  956. package/dist/tools/agent.d.ts +5 -0
  957. package/dist/tools/agent.d.ts.map +1 -0
  958. package/dist/tools/agent.js +128 -0
  959. package/dist/tools/agent.js.map +1 -0
  960. package/dist/tools/cache.d.ts +2 -1
  961. package/dist/tools/cache.d.ts.map +1 -1
  962. package/dist/tools/cache.js +177 -44
  963. package/dist/tools/cache.js.map +1 -1
  964. package/dist/tools/crawl.d.ts.map +1 -1
  965. package/dist/tools/crawl.js +171 -88
  966. package/dist/tools/crawl.js.map +1 -1
  967. package/dist/tools/extract.d.ts +2 -2
  968. package/dist/tools/extract.d.ts.map +1 -1
  969. package/dist/tools/extract.js +175 -59
  970. package/dist/tools/extract.js.map +1 -1
  971. package/dist/tools/fetch.d.ts +2 -2
  972. package/dist/tools/fetch.d.ts.map +1 -1
  973. package/dist/tools/fetch.js +174 -68
  974. package/dist/tools/fetch.js.map +1 -1
  975. package/dist/tools/find-similar.d.ts +5 -0
  976. package/dist/tools/find-similar.d.ts.map +1 -0
  977. package/dist/tools/find-similar.js +127 -0
  978. package/dist/tools/find-similar.js.map +1 -0
  979. package/dist/tools/research.d.ts +5 -0
  980. package/dist/tools/research.d.ts.map +1 -0
  981. package/dist/tools/research.js +107 -0
  982. package/dist/tools/research.js.map +1 -0
  983. package/dist/tools/search.d.ts +10 -2
  984. package/dist/tools/search.d.ts.map +1 -1
  985. package/dist/tools/search.js +13 -158
  986. package/dist/tools/search.js.map +1 -1
  987. package/dist/types.d.ts +350 -7
  988. package/dist/types.d.ts.map +1 -1
  989. package/dist/types.js +6 -1
  990. package/dist/types.js.map +1 -1
  991. package/dist/util/mode.d.ts +4 -0
  992. package/dist/util/mode.d.ts.map +1 -0
  993. package/dist/util/mode.js +34 -0
  994. package/dist/util/mode.js.map +1 -0
  995. package/package.json +78 -8
  996. package/dist/extraction/trafilatura.d.ts +0 -6
  997. package/dist/extraction/trafilatura.d.ts.map +0 -1
  998. package/dist/extraction/trafilatura.js +0 -105
  999. package/dist/extraction/trafilatura.js.map +0 -1
  1000. package/dist/search/flashrank.d.ts +0 -12
  1001. package/dist/search/flashrank.d.ts.map +0 -1
  1002. package/dist/search/flashrank.js +0 -63
  1003. package/dist/search/flashrank.js.map +0 -1
@@ -1,97 +1,180 @@
1
- import { Crawler } from '../crawl/crawler.js';
2
- import { deduplicatePages } from '../crawl/dedup.js';
3
- import { mapUrls } from '../crawl/mapper.js';
4
- import { handleFetch } from './fetch.js';
5
- import { createLogger } from '../logger.js';
6
- const log = createLogger('crawl');
7
- const DEFAULT_MAX_TOTAL_CHARS = 100000;
8
- export async function handleCrawl(input, router) {
9
- try {
10
- // Map strategy: lightweight URL-only discovery, skip full crawl pipeline
11
- if (input.strategy === 'map') {
12
- return handleMapStrategy(input, router);
13
- }
14
- const fetchFn = async (url) => handleFetch({ url, use_auth: input.use_auth }, router);
15
- const rawFetchFn = async (url) => router.fetch(url, { renderJs: 'never' });
16
- const crawler = new Crawler(fetchFn, rawFetchFn);
17
- const result = await crawler.crawl(input);
18
- // Deduplicate cross-page content (pass domain for SQLite boilerplate caching)
19
- const domain = new URL(input.url).hostname;
20
- const dedupedPages = deduplicatePages(result.pages.map((p) => ({ url: p.url, markdown: p.markdown })), domain);
21
- // Apply deduped markdown back to pages
22
- const pages = result.pages.map((page, i) => ({
23
- ...page,
24
- markdown: dedupedPages[i]?.markdown ?? page.markdown,
25
- }));
26
- // Enforce max_total_chars budget
27
- const maxTotalChars = input.max_total_chars ?? DEFAULT_MAX_TOTAL_CHARS;
28
- const budgetedPages = [];
29
- let charCount = 0;
30
- for (const page of pages) {
31
- if (charCount + page.markdown.length > maxTotalChars && budgetedPages.length > 0) {
32
- break;
33
- }
34
- budgetedPages.push(page);
35
- charCount += page.markdown.length;
36
- }
37
- log.info('Crawl complete', {
38
- url: input.url,
39
- crawled: result.crawled,
40
- returned: budgetedPages.length,
41
- totalChars: charCount,
42
- });
43
- return {
44
- pages: budgetedPages,
45
- total_found: result.total_found,
46
- crawled: result.crawled,
47
- ...(result.links ? { links: result.links } : {}),
48
- };
1
+ import { Crawler } from "../crawl/crawler.js";
2
+ import { deduplicatePages } from "../crawl/dedup.js";
3
+ import { mapUrls } from "../crawl/mapper.js";
4
+ import { handleFetch } from "./fetch.js";
5
+ import {
6
+ buildEvidenceFromMarkdown,
7
+ applyAggregateMarkdownBudget
8
+ } from "../search/evidence.js";
9
+ import { countTokens } from "../search/tokens.js";
10
+ import { createLogger } from "../logger.js";
11
+ const log = createLogger("crawl");
12
+ const DEFAULT_MAX_TOTAL_CHARS = 1e5;
13
+ const DEFAULT_MAX_TOKENS_OUT = 4e3;
14
+ async function handleCrawl(input, router) {
15
+ try {
16
+ if (input.strategy === "map") {
17
+ return handleMapStrategy(input, router);
49
18
  }
50
- catch (err) {
51
- log.error('Crawl failed', { url: input.url, error: String(err) });
19
+ const fetchFn = async (url) => {
20
+ const r = await handleFetch({ url, use_auth: input.use_auth, include_full_markdown: true }, router);
21
+ if (!r.ok) {
52
22
  return {
53
- pages: [],
54
- total_found: 0,
55
- crawled: 0,
56
- error: err instanceof Error ? err.message : String(err),
23
+ url,
24
+ title: "",
25
+ markdown: "",
26
+ metadata: {},
27
+ links: [],
28
+ images: [],
29
+ cached: false,
30
+ error: r.error_reason
57
31
  };
32
+ }
33
+ return r.data;
34
+ };
35
+ const rawFetchFn = async (url) => router.fetch(url, { renderJs: "never" });
36
+ const crawler = new Crawler(fetchFn, rawFetchFn);
37
+ const result = await crawler.crawl(input);
38
+ const domain = new URL(input.url).hostname;
39
+ const dedupedPages = deduplicatePages(
40
+ result.pages.map((p) => ({ url: p.url, markdown: p.markdown })),
41
+ domain
42
+ );
43
+ const pages = result.pages.map((page, i) => ({
44
+ ...page,
45
+ markdown: dedupedPages[i]?.markdown ?? page.markdown
46
+ }));
47
+ const maxTotalChars = input.max_total_chars ?? DEFAULT_MAX_TOTAL_CHARS;
48
+ const budgetedPages = [];
49
+ let charCount = 0;
50
+ for (const page of pages) {
51
+ if (charCount + page.markdown.length > maxTotalChars && budgetedPages.length > 0) {
52
+ break;
53
+ }
54
+ budgetedPages.push(page);
55
+ charCount += page.markdown.length;
58
56
  }
59
- }
60
- async function handleMapStrategy(input, router) {
61
- const httpFetchFn = async (url) => {
62
- const raw = await router.fetch(url, { renderJs: 'never' });
63
- return { html: raw.html, finalUrl: raw.finalUrl, statusCode: raw.statusCode };
57
+ const droppedOverBudget = result.crawled - budgetedPages.length;
58
+ log.info("Crawl complete", {
59
+ url: input.url,
60
+ crawled: result.crawled,
61
+ returned: budgetedPages.length,
62
+ totalChars: charCount,
63
+ droppedOverBudget
64
+ });
65
+ const out = {
66
+ pages: budgetedPages,
67
+ total_found: result.total_found,
68
+ crawled: budgetedPages.length,
69
+ ...droppedOverBudget > 0 ? { dropped_over_budget: droppedOverBudget } : {},
70
+ ...result.links ? { links: result.links } : {}
64
71
  };
65
- try {
66
- const mapResult = await mapUrls({
67
- url: input.url,
68
- max_depth: input.max_depth,
69
- max_pages: input.max_pages,
70
- include_patterns: input.include_patterns,
71
- exclude_patterns: input.exclude_patterns,
72
- }, httpFetchFn);
73
- log.info('Map complete', {
74
- url: input.url,
75
- total_found: mapResult.total_found,
76
- sitemap_found: mapResult.sitemap_found,
77
- });
78
- return {
79
- urls: mapResult.urls,
80
- total_found: mapResult.total_found,
81
- sitemap_found: mapResult.sitemap_found,
82
- crawled: 0,
83
- ...(mapResult.error ? { error: mapResult.error } : {}),
84
- };
72
+ await attachEvidence(out, input);
73
+ return out;
74
+ } catch (err) {
75
+ log.error("Crawl failed", { url: input.url, error: String(err) });
76
+ return {
77
+ pages: [],
78
+ total_found: 0,
79
+ crawled: 0,
80
+ error: err instanceof Error ? err.message : String(err)
81
+ };
82
+ }
83
+ }
84
+ function buildPageExcerpt(markdown, maxChars = 600) {
85
+ if (!markdown) return "";
86
+ const paragraphs = markdown.split(/\n\n+/).map((p) => p.trim()).filter(Boolean);
87
+ let out = "";
88
+ for (const p of paragraphs) {
89
+ if (out.length + p.length + 2 > maxChars) {
90
+ const remaining = maxChars - out.length;
91
+ if (remaining > 80) out += (out ? "\n\n" : "") + p.slice(0, remaining) + "\u2026";
92
+ break;
85
93
  }
86
- catch (err) {
87
- log.error('Map strategy failed', { url: input.url, error: String(err) });
88
- return {
89
- urls: [],
90
- total_found: 0,
91
- sitemap_found: false,
92
- crawled: 0,
93
- error: err instanceof Error ? err.message : String(err),
94
- };
94
+ out += (out ? "\n\n" : "") + p;
95
+ }
96
+ return out;
97
+ }
98
+ async function attachEvidence(out, input) {
99
+ if (out.pages.length === 0) return;
100
+ const includeFull = input.include_full_markdown ?? false;
101
+ const maxTokensOut = input.max_tokens_out ?? DEFAULT_MAX_TOKENS_OUT;
102
+ let used = 0;
103
+ for (const page of out.pages) {
104
+ if (!page.markdown) continue;
105
+ const remaining = maxTokensOut - used;
106
+ if (remaining <= 0) break;
107
+ const evs = await buildEvidenceFromMarkdown(
108
+ page.title || page.url,
109
+ page.title,
110
+ page.url,
111
+ page.markdown,
112
+ { maxItems: 1, maxTokensOut: remaining }
113
+ );
114
+ if (evs.length > 0) {
115
+ page.evidence = evs;
116
+ for (const ev of evs) used += countTokens(ev.excerpt);
117
+ }
118
+ }
119
+ if (!includeFull) {
120
+ for (const page of out.pages) {
121
+ if (!page.evidence || page.evidence.length === 0) {
122
+ const excerpt = buildPageExcerpt(page.markdown);
123
+ if (excerpt) page.excerpt = excerpt;
124
+ }
125
+ page.markdown = "";
95
126
  }
127
+ } else {
128
+ applyAggregateMarkdownBudget(
129
+ out.pages,
130
+ (p) => p.markdown ?? "",
131
+ (p, body) => {
132
+ p.markdown = body;
133
+ },
134
+ { maxTokensOut }
135
+ );
136
+ }
137
+ }
138
+ async function handleMapStrategy(input, router) {
139
+ const httpFetchFn = async (url) => {
140
+ const raw = await router.fetch(url, { renderJs: "never" });
141
+ return { html: raw.html, finalUrl: raw.finalUrl, statusCode: raw.statusCode };
142
+ };
143
+ try {
144
+ const mapResult = await mapUrls(
145
+ {
146
+ url: input.url,
147
+ max_depth: input.max_depth,
148
+ max_pages: input.max_pages,
149
+ include_patterns: input.include_patterns,
150
+ exclude_patterns: input.exclude_patterns
151
+ },
152
+ httpFetchFn
153
+ );
154
+ log.info("Map complete", {
155
+ url: input.url,
156
+ total_found: mapResult.total_found,
157
+ sitemap_found: mapResult.sitemap_found
158
+ });
159
+ return {
160
+ urls: mapResult.urls,
161
+ total_found: mapResult.total_found,
162
+ sitemap_found: mapResult.sitemap_found,
163
+ crawled: 0,
164
+ ...mapResult.error ? { error: mapResult.error } : {}
165
+ };
166
+ } catch (err) {
167
+ log.error("Map strategy failed", { url: input.url, error: String(err) });
168
+ return {
169
+ urls: [],
170
+ total_found: 0,
171
+ sitemap_found: false,
172
+ crawled: 0,
173
+ error: err instanceof Error ? err.message : String(err)
174
+ };
175
+ }
96
176
  }
177
+ export {
178
+ handleCrawl
179
+ };
97
180
  //# sourceMappingURL=crawl.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"crawl.js","sourceRoot":"","sources":["../../src/tools/crawl.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,OAAO,EAAE,MAAM,qBAAqB,CAAC;AAC9C,OAAO,EAAE,gBAAgB,EAAE,MAAM,mBAAmB,CAAC;AACrD,OAAO,EAAE,OAAO,EAAE,MAAM,oBAAoB,CAAC;AAC7C,OAAO,EAAE,WAAW,EAAE,MAAM,YAAY,CAAC;AACzC,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAE5C,MAAM,GAAG,GAAG,YAAY,CAAC,OAAO,CAAC,CAAC;AAElC,MAAM,uBAAuB,GAAG,MAAM,CAAC;AAEvC,MAAM,CAAC,KAAK,UAAU,WAAW,CAC/B,KAAiB,EACjB,MAAmB;IAEnB,IAAI,CAAC;QACH,yEAAyE;QACzE,IAAI,KAAK,CAAC,QAAQ,KAAK,KAAK,EAAE,CAAC;YAC7B,OAAO,iBAAiB,CAAC,KAAK,EAAE,MAAM,CAAC,CAAC;QAC1C,CAAC;QAED,MAAM,OAAO,GAAG,KAAK,EAAE,GAAW,EAAE,EAAE,CACpC,WAAW,CAAC,EAAE,GAAG,EAAE,QAAQ,EAAE,KAAK,CAAC,QAAQ,EAAE,EAAE,MAAM,CAAC,CAAC;QAEzD,MAAM,UAAU,GAAG,KAAK,EAAE,GAAW,EAAE,EAAE,CACvC,MAAM,CAAC,KAAK,CAAC,GAAG,EAAE,EAAE,QAAQ,EAAE,OAAO,EAAE,CAAC,CAAC;QAE3C,MAAM,OAAO,GAAG,IAAI,OAAO,CAAC,OAAO,EAAE,UAAU,CAAC,CAAC;QACjD,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;QAE1C,8EAA8E;QAC9E,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;QAC3C,MAAM,YAAY,GAAG,gBAAgB,CACnC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,GAAG,EAAE,CAAC,CAAC,GAAG,EAAE,QAAQ,EAAE,CAAC,CAAC,QAAQ,EAAE,CAAC,CAAC,EAC/D,MAAM,CACP,CAAC;QAEF,uCAAuC;QACvC,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC;YAC3C,GAAG,IAAI;YACP,QAAQ,EAAE,YAAY,CAAC,CAAC,CAAC,EAAE,QAAQ,IAAI,IAAI,CAAC,QAAQ;SACrD,CAAC,CAAC,CAAC;QAEJ,iCAAiC;QACjC,MAAM,aAAa,GAAG,KAAK,CAAC,eAAe,IAAI,uBAAuB,CAAC;QACvE,MAAM,aAAa,GAAG,EAAE,CAAC;QACzB,IAAI,SAAS,GAAG,CAAC,CAAC;QAElB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,IAAI,SAAS,GAAG,IAAI,CAAC,QAAQ,CAAC,MAAM,GAAG,aAAa,IAAI,aAAa,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACjF,MAAM;YACR,CAAC;YACD,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACzB,SAAS,IAAI,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC;QACpC,CAAC;QAED,GAAG,CAAC,IAAI,CAAC,gBAAgB,EAAE;YACzB,GAAG,EAAE,KAAK,CAAC,GAAG;YACd,OAAO,EAAE,MAAM,CAAC,OAAO;YACvB,QAAQ,EAAE,aAAa,CAAC,MAAM;YAC9B,UAAU,EAAE,SAAS;SACtB,CAAC,CAAC;QAEH,OAAO;YACL,KAAK,EAAE,aAAa;YACpB,WAAW,EAAE,MAAM,CAAC,WAAW;YAC/B,OAAO,EAAE,MAAM,CAAC,OAAO;YACvB,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,MAAM,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;SACjD,CAAC;IACJ,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,GAAG,CAAC,KAAK,CAAC,cAAc,EAAE,EAAE,GAAG,EAAE,KAAK,CAAC,GAAG,EAAE,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QAClE,OAAO;YACL,KAAK,EAAE,EAAE;YACT,WAAW,EAAE,CAAC;YACd,OAAO,EAAE,CAAC;YACV,KAAK,EAAE,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC;SACxD,CAAC;IACJ,CAAC;AACH,CAAC;AAED,KAAK,UAAU,iBAAiB,CAC9B,KAAiB,EACjB,MAAmB;IAEnB,MAAM,WAAW,GAAG,KAAK,EAAE,GAAW,EAAE,EAAE;QACxC,MAAM,GAAG,GAAG,MAAM,MAAM,CAAC,KAAK,CAAC,GAAG,EAAE,EAAE,QAAQ,EAAE,OAAO,EAAE,CAAC,CAAC;QAC3D,OAAO,EAAE,IAAI,EAAE,GAAG,CAAC,IAAI,EAAE,QAAQ,EAAE,GAAG,CAAC,QAAQ,EAAE,UAAU,EAAE,GAAG,CAAC,UAAU,EAAE,CAAC;IAChF,CAAC,CAAC;IAEF,IAAI,CAAC;QACH,MAAM,SAAS,GAAG,MAAM,OAAO,CAC7B;YACE,GAAG,EAAE,KAAK,CAAC,GAAG;YACd,SAAS,EAAE,KAAK,CAAC,SAAS;YAC1B,SAAS,EAAE,KAAK,CAAC,SAAS;YAC1B,gBAAgB,EAAE,KAAK,CAAC,gBAAgB;YACxC,gBAAgB,EAAE,KAAK,CAAC,gBAAgB;SACzC,EACD,WAAW,CACZ,CAAC;QAEF,GAAG,CAAC,IAAI,CAAC,cAAc,EAAE;YACvB,GAAG,EAAE,KAAK,CAAC,GAAG;YACd,WAAW,EAAE,SAAS,CAAC,WAAW;YAClC,aAAa,EAAE,SAAS,CAAC,aAAa;SACvC,CAAC,CAAC;QAEH,OAAO;YACL,IAAI,EAAE,SAAS,CAAC,IAAI;YACpB,WAAW,EAAE,SAAS,CAAC,WAAW;YAClC,aAAa,EAAE,SAAS,CAAC,aAAa;YACtC,OAAO,EAAE,CAAC;YACV,GAAG,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,SAAS,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;SACvD,CAAC;IACJ,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,GAAG,CAAC,KAAK,CAAC,qBAAqB,EAAE,EAAE,GAAG,EAAE,KAAK,CAAC,GAAG,EAAE,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QACzE,OAAO;YACL,IAAI,EAAE,EAAE;YACR,WAAW,EAAE,CAAC;YACd,aAAa,EAAE,KAAK;YACpB,OAAO,EAAE,CAAC;YACV,KAAK,EAAE,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC;SACxD,CAAC;IACJ,CAAC;AACH,CAAC"}
1
+ {"version":3,"sources":["../../src/tools/crawl.ts"],"sourcesContent":["import type {\n CrawlInput,\n CrawlOutput,\n MapOutput,\n} from '../types.js';\nimport type { SmartRouter } from '../fetch/router.js';\nimport { Crawler } from '../crawl/crawler.js';\nimport { deduplicatePages } from '../crawl/dedup.js';\nimport { mapUrls } from '../crawl/mapper.js';\nimport { handleFetch } from './fetch.js';\nimport {\n buildEvidenceFromMarkdown,\n applyAggregateMarkdownBudget,\n} from '../search/evidence.js';\nimport { countTokens } from '../search/tokens.js';\nimport { createLogger } from '../logger.js';\n\nconst log = createLogger('crawl');\n\nconst DEFAULT_MAX_TOTAL_CHARS = 100000;\nconst DEFAULT_MAX_TOKENS_OUT = 4000;\n\nexport async function handleCrawl(\n input: CrawlInput,\n router: SmartRouter,\n): Promise<CrawlOutput | (MapOutput & { crawled: number })> {\n try {\n // Map strategy: lightweight URL-only discovery, skip full crawl pipeline\n if (input.strategy === 'map') {\n return handleMapStrategy(input, router);\n }\n\n // Crawler needs full markdown internally for dedup; opt in explicitly so\n // handleFetch's default strip does not steal page bodies mid-crawl.\n const fetchFn = async (url: string) => {\n const r = await handleFetch({ url, use_auth: input.use_auth, include_full_markdown: true }, router);\n if (!r.ok) {\n return {\n url,\n title: '',\n markdown: '',\n metadata: {},\n links: [],\n images: [],\n cached: false,\n error: r.error_reason,\n };\n }\n return r.data;\n };\n\n const rawFetchFn = async (url: string) =>\n router.fetch(url, { renderJs: 'never' });\n\n const crawler = new Crawler(fetchFn, rawFetchFn);\n const result = await crawler.crawl(input);\n\n // Deduplicate cross-page content (pass domain for SQLite boilerplate caching)\n const domain = new URL(input.url).hostname;\n const dedupedPages = deduplicatePages(\n result.pages.map((p) => ({ url: p.url, markdown: p.markdown })),\n domain,\n );\n\n // Apply deduped markdown back to pages\n const pages = result.pages.map((page, i) => ({\n ...page,\n markdown: dedupedPages[i]?.markdown ?? page.markdown,\n }));\n\n // Enforce max_total_chars budget\n const maxTotalChars = input.max_total_chars ?? DEFAULT_MAX_TOTAL_CHARS;\n const budgetedPages = [];\n let charCount = 0;\n\n for (const page of pages) {\n if (charCount + page.markdown.length > maxTotalChars && budgetedPages.length > 0) {\n break;\n }\n budgetedPages.push(page);\n charCount += page.markdown.length;\n }\n\n const droppedOverBudget = result.crawled - budgetedPages.length;\n log.info('Crawl complete', {\n url: input.url,\n crawled: result.crawled,\n returned: budgetedPages.length,\n totalChars: charCount,\n droppedOverBudget,\n });\n\n const out: CrawlOutput = {\n pages: budgetedPages,\n total_found: result.total_found,\n crawled: budgetedPages.length,\n ...(droppedOverBudget > 0 ? { dropped_over_budget: droppedOverBudget } : {}),\n ...(result.links ? { links: result.links } : {}),\n };\n\n await attachEvidence(out, input);\n return out;\n } catch (err) {\n log.error('Crawl failed', { url: input.url, error: String(err) });\n return {\n pages: [],\n total_found: 0,\n crawled: 0,\n error: err instanceof Error ? err.message : String(err),\n };\n }\n}\n\nfunction buildPageExcerpt(markdown: string, maxChars = 600): string {\n if (!markdown) return '';\n const paragraphs = markdown.split(/\\n\\n+/).map((p) => p.trim()).filter(Boolean);\n let out = '';\n for (const p of paragraphs) {\n if (out.length + p.length + 2 > maxChars) {\n const remaining = maxChars - out.length;\n if (remaining > 80) out += (out ? '\\n\\n' : '') + p.slice(0, remaining) + '…';\n break;\n }\n out += (out ? '\\n\\n' : '') + p;\n }\n return out;\n}\n\nasync function attachEvidence(out: CrawlOutput, input: CrawlInput): Promise<void> {\n if (out.pages.length === 0) return;\n const includeFull = input.include_full_markdown ?? false;\n const maxTokensOut = input.max_tokens_out ?? DEFAULT_MAX_TOKENS_OUT;\n\n let used = 0;\n for (const page of out.pages) {\n if (!page.markdown) continue;\n const remaining = maxTokensOut - used;\n if (remaining <= 0) break;\n const evs = await buildEvidenceFromMarkdown(\n page.title || page.url,\n page.title,\n page.url,\n page.markdown,\n { maxItems: 1, maxTokensOut: remaining },\n );\n if (evs.length > 0) {\n page.evidence = evs;\n for (const ev of evs) used += countTokens(ev.excerpt);\n }\n }\n\n if (!includeFull) {\n // No full markdown: still surface a short excerpt per page so the\n // result is useful when evidence couldn't be built (no query to highlight).\n for (const page of out.pages) {\n if (!page.evidence || page.evidence.length === 0) {\n const excerpt = buildPageExcerpt(page.markdown);\n if (excerpt) page.excerpt = excerpt;\n }\n page.markdown = '';\n }\n } else {\n applyAggregateMarkdownBudget(\n out.pages,\n (p) => p.markdown ?? '',\n (p, body) => { p.markdown = body; },\n { maxTokensOut },\n );\n }\n}\n\nasync function handleMapStrategy(\n input: CrawlInput,\n router: SmartRouter,\n): Promise<MapOutput & { crawled: number }> {\n const httpFetchFn = async (url: string) => {\n const raw = await router.fetch(url, { renderJs: 'never' });\n return { html: raw.html, finalUrl: raw.finalUrl, statusCode: raw.statusCode };\n };\n\n try {\n const mapResult = await mapUrls(\n {\n url: input.url,\n max_depth: input.max_depth,\n max_pages: input.max_pages,\n include_patterns: input.include_patterns,\n exclude_patterns: input.exclude_patterns,\n },\n httpFetchFn,\n );\n\n log.info('Map complete', {\n url: input.url,\n total_found: mapResult.total_found,\n sitemap_found: mapResult.sitemap_found,\n });\n\n return {\n urls: mapResult.urls,\n total_found: mapResult.total_found,\n sitemap_found: mapResult.sitemap_found,\n crawled: 0,\n ...(mapResult.error ? { error: mapResult.error } : {}),\n };\n } catch (err) {\n log.error('Map strategy failed', { url: input.url, error: String(err) });\n return {\n urls: [],\n total_found: 0,\n sitemap_found: false,\n crawled: 0,\n error: err instanceof Error ? err.message : String(err),\n };\n }\n}\n"],"mappings":"AAMA,SAAS,eAAe;AACxB,SAAS,wBAAwB;AACjC,SAAS,eAAe;AACxB,SAAS,mBAAmB;AAC5B;AAAA,EACE;AAAA,EACA;AAAA,OACK;AACP,SAAS,mBAAmB;AAC5B,SAAS,oBAAoB;AAE7B,MAAM,MAAM,aAAa,OAAO;AAEhC,MAAM,0BAA0B;AAChC,MAAM,yBAAyB;AAE/B,eAAsB,YACpB,OACA,QAC0D;AAC1D,MAAI;AAEF,QAAI,MAAM,aAAa,OAAO;AAC5B,aAAO,kBAAkB,OAAO,MAAM;AAAA,IACxC;AAIA,UAAM,UAAU,OAAO,QAAgB;AACrC,YAAM,IAAI,MAAM,YAAY,EAAE,KAAK,UAAU,MAAM,UAAU,uBAAuB,KAAK,GAAG,MAAM;AAClG,UAAI,CAAC,EAAE,IAAI;AACT,eAAO;AAAA,UACL;AAAA,UACA,OAAO;AAAA,UACP,UAAU;AAAA,UACV,UAAU,CAAC;AAAA,UACX,OAAO,CAAC;AAAA,UACR,QAAQ,CAAC;AAAA,UACT,QAAQ;AAAA,UACR,OAAO,EAAE;AAAA,QACX;AAAA,MACF;AACA,aAAO,EAAE;AAAA,IACX;AAEA,UAAM,aAAa,OAAO,QACxB,OAAO,MAAM,KAAK,EAAE,UAAU,QAAQ,CAAC;AAEzC,UAAM,UAAU,IAAI,QAAQ,SAAS,UAAU;AAC/C,UAAM,SAAS,MAAM,QAAQ,MAAM,KAAK;AAGxC,UAAM,SAAS,IAAI,IAAI,MAAM,GAAG,EAAE;AAClC,UAAM,eAAe;AAAA,MACnB,OAAO,MAAM,IAAI,CAAC,OAAO,EAAE,KAAK,EAAE,KAAK,UAAU,EAAE,SAAS,EAAE;AAAA,MAC9D;AAAA,IACF;AAGA,UAAM,QAAQ,OAAO,MAAM,IAAI,CAAC,MAAM,OAAO;AAAA,MAC3C,GAAG;AAAA,MACH,UAAU,aAAa,CAAC,GAAG,YAAY,KAAK;AAAA,IAC9C,EAAE;AAGF,UAAM,gBAAgB,MAAM,mBAAmB;AAC/C,UAAM,gBAAgB,CAAC;AACvB,QAAI,YAAY;AAEhB,eAAW,QAAQ,OAAO;AACxB,UAAI,YAAY,KAAK,SAAS,SAAS,iBAAiB,cAAc,SAAS,GAAG;AAChF;AAAA,MACF;AACA,oBAAc,KAAK,IAAI;AACvB,mBAAa,KAAK,SAAS;AAAA,IAC7B;AAEA,UAAM,oBAAoB,OAAO,UAAU,cAAc;AACzD,QAAI,KAAK,kBAAkB;AAAA,MACzB,KAAK,MAAM;AAAA,MACX,SAAS,OAAO;AAAA,MAChB,UAAU,cAAc;AAAA,MACxB,YAAY;AAAA,MACZ;AAAA,IACF,CAAC;AAED,UAAM,MAAmB;AAAA,MACvB,OAAO;AAAA,MACP,aAAa,OAAO;AAAA,MACpB,SAAS,cAAc;AAAA,MACvB,GAAI,oBAAoB,IAAI,EAAE,qBAAqB,kBAAkB,IAAI,CAAC;AAAA,MAC1E,GAAI,OAAO,QAAQ,EAAE,OAAO,OAAO,MAAM,IAAI,CAAC;AAAA,IAChD;AAEA,UAAM,eAAe,KAAK,KAAK;AAC/B,WAAO;AAAA,EACT,SAAS,KAAK;AACZ,QAAI,MAAM,gBAAgB,EAAE,KAAK,MAAM,KAAK,OAAO,OAAO,GAAG,EAAE,CAAC;AAChE,WAAO;AAAA,MACL,OAAO,CAAC;AAAA,MACR,aAAa;AAAA,MACb,SAAS;AAAA,MACT,OAAO,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAAA,IACxD;AAAA,EACF;AACF;AAEA,SAAS,iBAAiB,UAAkB,WAAW,KAAa;AAClE,MAAI,CAAC,SAAU,QAAO;AACtB,QAAM,aAAa,SAAS,MAAM,OAAO,EAAE,IAAI,CAAC,MAAM,EAAE,KAAK,CAAC,EAAE,OAAO,OAAO;AAC9E,MAAI,MAAM;AACV,aAAW,KAAK,YAAY;AAC1B,QAAI,IAAI,SAAS,EAAE,SAAS,IAAI,UAAU;AACxC,YAAM,YAAY,WAAW,IAAI;AACjC,UAAI,YAAY,GAAI,SAAQ,MAAM,SAAS,MAAM,EAAE,MAAM,GAAG,SAAS,IAAI;AACzE;AAAA,IACF;AACA,YAAQ,MAAM,SAAS,MAAM;AAAA,EAC/B;AACA,SAAO;AACT;AAEA,eAAe,eAAe,KAAkB,OAAkC;AAChF,MAAI,IAAI,MAAM,WAAW,EAAG;AAC5B,QAAM,cAAc,MAAM,yBAAyB;AACnD,QAAM,eAAe,MAAM,kBAAkB;AAE7C,MAAI,OAAO;AACX,aAAW,QAAQ,IAAI,OAAO;AAC5B,QAAI,CAAC,KAAK,SAAU;AACpB,UAAM,YAAY,eAAe;AACjC,QAAI,aAAa,EAAG;AACpB,UAAM,MAAM,MAAM;AAAA,MAChB,KAAK,SAAS,KAAK;AAAA,MACnB,KAAK;AAAA,MACL,KAAK;AAAA,MACL,KAAK;AAAA,MACL,EAAE,UAAU,GAAG,cAAc,UAAU;AAAA,IACzC;AACA,QAAI,IAAI,SAAS,GAAG;AAClB,WAAK,WAAW;AAChB,iBAAW,MAAM,IAAK,SAAQ,YAAY,GAAG,OAAO;AAAA,IACtD;AAAA,EACF;AAEA,MAAI,CAAC,aAAa;AAGhB,eAAW,QAAQ,IAAI,OAAO;AAC5B,UAAI,CAAC,KAAK,YAAY,KAAK,SAAS,WAAW,GAAG;AAChD,cAAM,UAAU,iBAAiB,KAAK,QAAQ;AAC9C,YAAI,QAAS,MAAK,UAAU;AAAA,MAC9B;AACA,WAAK,WAAW;AAAA,IAClB;AAAA,EACF,OAAO;AACL;AAAA,MACE,IAAI;AAAA,MACJ,CAAC,MAAM,EAAE,YAAY;AAAA,MACrB,CAAC,GAAG,SAAS;AAAE,UAAE,WAAW;AAAA,MAAM;AAAA,MAClC,EAAE,aAAa;AAAA,IACjB;AAAA,EACF;AACF;AAEA,eAAe,kBACb,OACA,QAC0C;AAC1C,QAAM,cAAc,OAAO,QAAgB;AACzC,UAAM,MAAM,MAAM,OAAO,MAAM,KAAK,EAAE,UAAU,QAAQ,CAAC;AACzD,WAAO,EAAE,MAAM,IAAI,MAAM,UAAU,IAAI,UAAU,YAAY,IAAI,WAAW;AAAA,EAC9E;AAEA,MAAI;AACF,UAAM,YAAY,MAAM;AAAA,MACtB;AAAA,QACE,KAAK,MAAM;AAAA,QACX,WAAW,MAAM;AAAA,QACjB,WAAW,MAAM;AAAA,QACjB,kBAAkB,MAAM;AAAA,QACxB,kBAAkB,MAAM;AAAA,MAC1B;AAAA,MACA;AAAA,IACF;AAEA,QAAI,KAAK,gBAAgB;AAAA,MACvB,KAAK,MAAM;AAAA,MACX,aAAa,UAAU;AAAA,MACvB,eAAe,UAAU;AAAA,IAC3B,CAAC;AAED,WAAO;AAAA,MACL,MAAM,UAAU;AAAA,MAChB,aAAa,UAAU;AAAA,MACvB,eAAe,UAAU;AAAA,MACzB,SAAS;AAAA,MACT,GAAI,UAAU,QAAQ,EAAE,OAAO,UAAU,MAAM,IAAI,CAAC;AAAA,IACtD;AAAA,EACF,SAAS,KAAK;AACZ,QAAI,MAAM,uBAAuB,EAAE,KAAK,MAAM,KAAK,OAAO,OAAO,GAAG,EAAE,CAAC;AACvE,WAAO;AAAA,MACL,MAAM,CAAC;AAAA,MACP,aAAa;AAAA,MACb,eAAe;AAAA,MACf,SAAS;AAAA,MACT,OAAO,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAAA,IACxD;AAAA,EACF;AACF;","names":[]}
@@ -1,4 +1,4 @@
1
- import type { ExtractInput, ExtractOutput } from '../types.js';
1
+ import type { ExtractInput, ExtractOutput, StageResult } from '../types.js';
2
2
  import type { SmartRouter } from '../fetch/router.js';
3
- export declare function handleExtract(input: ExtractInput, router: SmartRouter): Promise<ExtractOutput>;
3
+ export declare function handleExtract(input: ExtractInput, router: SmartRouter): Promise<StageResult<ExtractOutput>>;
4
4
  //# sourceMappingURL=extract.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"extract.d.ts","sourceRoot":"","sources":["../../src/tools/extract.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAC/D,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AA8BtD,wBAAsB,aAAa,CACjC,KAAK,EAAE,YAAY,EACnB,MAAM,EAAE,WAAW,GAClB,OAAO,CAAC,aAAa,CAAC,CAoDxB"}
1
+ {"version":3,"file":"extract.d.ts","sourceRoot":"","sources":["../../src/tools/extract.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,aAAa,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AAC5E,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AA8CtD,wBAAsB,aAAa,CACjC,KAAK,EAAE,YAAY,EACnB,MAAM,EAAE,WAAW,GAClB,OAAO,CAAC,WAAW,CAAC,aAAa,CAAC,CAAC,CA+JrC"}
@@ -1,69 +1,185 @@
1
- import { extractMetadata, extractSelector, extractTables } from '../extraction/extract.js';
2
- import { extractWithSchema } from '../extraction/schema.js';
3
- import { extractJsonLd } from '../extraction/jsonld.js';
4
- import { getCachedContent, isExpired } from '../cache/store.js';
5
- import { createLogger } from '../logger.js';
6
- const log = createLogger('extract');
1
+ import { extractMetadata, extractSelector, extractTables } from "../extraction/extract.js";
2
+ import {
3
+ extractWithSchema,
4
+ extractWithSchemaDetailedAsync
5
+ } from "../extraction/schema.js";
6
+ import { extractJsonLd } from "../extraction/jsonld.js";
7
+ import { extractStructured } from "../extraction/structured.js";
8
+ import { getCachedContent, isExpired } from "../cache/store.js";
9
+ import { fetchWithPlaywright } from "../fetch/playwright-tier.js";
10
+ import { createLogger } from "../logger.js";
11
+ import {
12
+ isNamedSchemaType,
13
+ extractNamedSchema,
14
+ NAMED_SCHEMAS
15
+ } from "../extraction/v1/schemas/index.js";
16
+ import { isLocalLlmEnabled, extractWithLocalLlm } from "../extraction/v1/local-llm.js";
17
+ const log = createLogger("extract");
7
18
  async function resolveHtml(input, router) {
8
- if (input.url) {
9
- const cached = getCachedContent(input.url);
10
- if (cached && !isExpired(cached)) {
11
- log.info('Using cached HTML', { url: input.url });
12
- return { html: cached.rawHtml, sourceUrl: cached.url };
13
- }
14
- const raw = await router.fetch(input.url, {
15
- renderJs: 'auto',
16
- useAuth: false,
17
- });
18
- return { html: raw.html, sourceUrl: raw.finalUrl };
19
+ if (input.execution_mode === "stealth" && input.url) {
20
+ const pw = await fetchWithPlaywright(input.url);
21
+ return { html: pw.html, sourceUrl: input.url };
22
+ }
23
+ if (input.url) {
24
+ const cached = getCachedContent(input.url);
25
+ if (cached && !isExpired(cached)) {
26
+ log.info("Using cached HTML", { url: input.url });
27
+ return { html: cached.rawHtml, sourceUrl: cached.url };
19
28
  }
20
- return { html: input.html };
29
+ const raw = await router.fetch(input.url, {
30
+ renderJs: "auto",
31
+ useAuth: false
32
+ });
33
+ return { html: raw.html, sourceUrl: raw.finalUrl };
34
+ }
35
+ return { html: input.html };
21
36
  }
22
- export async function handleExtract(input, router) {
23
- const mode = input.mode ?? 'metadata';
24
- if (!input.url && !input.html) {
25
- return { data: {}, mode, error: 'Either url or html must be provided' };
26
- }
27
- if (mode === 'selector' && !input.css_selector) {
28
- return { data: '', mode, error: 'css_selector is required when mode is "selector"' };
37
+ async function handleExtract(input, router) {
38
+ const mode = input.mode ?? "metadata";
39
+ if (!input.url && !input.html) {
40
+ return {
41
+ ok: false,
42
+ error: "invalid_input",
43
+ error_reason: "Either url or html must be provided",
44
+ stage: "extract"
45
+ };
46
+ }
47
+ if (input.named_schema && input.schema) {
48
+ return {
49
+ ok: false,
50
+ error: "invalid_input",
51
+ error_reason: "schema and named_schema are mutually exclusive",
52
+ stage: "extract"
53
+ };
54
+ }
55
+ if (input.named_schema && !isNamedSchemaType(input.named_schema)) {
56
+ return {
57
+ ok: false,
58
+ error: "invalid_input",
59
+ error_reason: `Unknown named_schema. Valid: ${NAMED_SCHEMAS.join(", ")}`,
60
+ stage: "extract"
61
+ };
62
+ }
63
+ if (mode === "selector" && !input.css_selector) {
64
+ return {
65
+ ok: false,
66
+ error: "invalid_input",
67
+ error_reason: 'css_selector is required when mode is "selector"',
68
+ stage: "extract"
69
+ };
70
+ }
71
+ if (mode === "schema" && !input.named_schema && (!input.schema || !input.schema.properties)) {
72
+ return {
73
+ ok: false,
74
+ error: "invalid_input",
75
+ error_reason: 'schema is required when mode is "schema" and must have properties',
76
+ stage: "extract"
77
+ };
78
+ }
79
+ try {
80
+ const { html, sourceUrl } = await resolveHtml(input, router);
81
+ if (input.named_schema) {
82
+ const namedData = await extractNamedSchema(input.named_schema, html, sourceUrl ?? input.url ?? "");
83
+ if (namedData === null) {
84
+ return {
85
+ ok: true,
86
+ data: {
87
+ data: {},
88
+ source_url: sourceUrl,
89
+ mode: "schema",
90
+ error: `No ${input.named_schema} data found on page`
91
+ }
92
+ };
93
+ }
94
+ return {
95
+ ok: true,
96
+ data: {
97
+ data: namedData,
98
+ source_url: sourceUrl,
99
+ mode: "schema"
100
+ }
101
+ };
29
102
  }
30
- if (mode === 'schema' && (!input.schema || !input.schema.properties)) {
31
- return { data: {}, mode, error: 'schema is required when mode is "schema" and must have properties' };
103
+ if (mode === "schema" && input.schema && isLocalLlmEnabled()) {
104
+ const llmData = await extractWithLocalLlm({
105
+ schema: input.schema,
106
+ html,
107
+ url: sourceUrl ?? input.url ?? ""
108
+ });
109
+ return {
110
+ ok: true,
111
+ data: {
112
+ data: llmData ?? {},
113
+ source_url: sourceUrl,
114
+ mode: "schema"
115
+ }
116
+ };
32
117
  }
33
- try {
34
- const { html, sourceUrl } = await resolveHtml(input, router);
35
- let data;
36
- switch (mode) {
37
- case 'selector':
38
- data = extractSelector(html, input.css_selector, input.multiple ?? false);
39
- break;
40
- case 'tables':
41
- data = extractTables(html);
42
- break;
43
- case 'schema':
44
- data = extractWithSchema(html, input.schema);
45
- break;
46
- case 'metadata':
47
- default: {
48
- const meta = extractMetadata(html);
49
- const jsonld = extractJsonLd(html);
50
- if (jsonld.length > 0) {
51
- meta.jsonld = jsonld;
52
- }
53
- data = meta;
54
- break;
55
- }
118
+ let data;
119
+ switch (mode) {
120
+ case "selector":
121
+ data = extractSelector(html, input.css_selector, input.multiple ?? false);
122
+ break;
123
+ case "tables":
124
+ data = extractTables(html);
125
+ break;
126
+ case "structured":
127
+ data = extractStructured(html);
128
+ break;
129
+ case "schema": {
130
+ const schema = input.schema;
131
+ if (Array.isArray(schema.required) && schema.required.length > 0) {
132
+ const detailed = await extractWithSchemaDetailedAsync(html, schema);
133
+ data = detailed.values;
134
+ if (detailed.warnings.length > 0) {
135
+ return {
136
+ ok: true,
137
+ data: {
138
+ data,
139
+ source_url: sourceUrl,
140
+ mode,
141
+ warnings: detailed.warnings
142
+ }
143
+ };
144
+ }
145
+ } else {
146
+ data = extractWithSchema(html, schema);
147
+ }
148
+ break;
149
+ }
150
+ case "metadata":
151
+ default: {
152
+ const meta = extractMetadata(html);
153
+ const jsonld = extractJsonLd(html);
154
+ if (jsonld.length > 0) {
155
+ meta.jsonld = jsonld;
56
156
  }
57
- return { data, source_url: sourceUrl, mode };
157
+ data = meta;
158
+ break;
159
+ }
58
160
  }
59
- catch (err) {
60
- log.error('Extract failed', { url: input.url, error: String(err) });
61
- return {
62
- data: mode === 'selector' ? '' : mode === 'tables' ? [] : {},
63
- source_url: input.url,
64
- mode,
65
- error: err instanceof Error ? err.message : String(err),
66
- };
161
+ if (mode === "tables" && Array.isArray(data) && data.length === 0) {
162
+ const hint = input.execution_mode === "stealth" ? "no_tables_detected \u2014 page genuinely contains no tables" : 'no_tables_detected \u2014 page may require JavaScript; retry with execution_mode: "stealth"';
163
+ return {
164
+ ok: false,
165
+ error: "no_tables_detected",
166
+ error_reason: "No tables found on page",
167
+ stage: "extract",
168
+ hint
169
+ };
67
170
  }
171
+ return { ok: true, data: { data, source_url: sourceUrl, mode } };
172
+ } catch (err) {
173
+ log.error("Extract failed", { url: input.url, error: String(err) });
174
+ return {
175
+ ok: false,
176
+ error: "extract_failed",
177
+ error_reason: err instanceof Error ? err.message : String(err),
178
+ stage: "extract"
179
+ };
180
+ }
68
181
  }
182
+ export {
183
+ handleExtract
184
+ };
69
185
  //# sourceMappingURL=extract.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"extract.js","sourceRoot":"","sources":["../../src/tools/extract.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,eAAe,EAAE,eAAe,EAAE,aAAa,EAAE,MAAM,0BAA0B,CAAC;AAC3F,OAAO,EAAE,iBAAiB,EAAE,MAAM,yBAAyB,CAAC;AAC5D,OAAO,EAAE,aAAa,EAAE,MAAM,yBAAyB,CAAC;AACxD,OAAO,EAAE,gBAAgB,EAAE,SAAS,EAAE,MAAM,mBAAmB,CAAC;AAChE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAE5C,MAAM,GAAG,GAAG,YAAY,CAAC,SAAS,CAAC,CAAC;AAEpC,KAAK,UAAU,WAAW,CACxB,KAAmB,EACnB,MAAmB;IAEnB,IAAI,KAAK,CAAC,GAAG,EAAE,CAAC;QACd,MAAM,MAAM,GAAG,gBAAgB,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QAC3C,IAAI,MAAM,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,EAAE,CAAC;YACjC,GAAG,CAAC,IAAI,CAAC,mBAAmB,EAAE,EAAE,GAAG,EAAE,KAAK,CAAC,GAAG,EAAE,CAAC,CAAC;YAClD,OAAO,EAAE,IAAI,EAAE,MAAM,CAAC,OAAO,EAAE,SAAS,EAAE,MAAM,CAAC,GAAG,EAAE,CAAC;QACzD,CAAC;QAED,MAAM,GAAG,GAAG,MAAM,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,EAAE;YACxC,QAAQ,EAAE,MAAM;YAChB,OAAO,EAAE,KAAK;SACf,CAAC,CAAC;QACH,OAAO,EAAE,IAAI,EAAE,GAAG,CAAC,IAAI,EAAE,SAAS,EAAE,GAAG,CAAC,QAAQ,EAAE,CAAC;IACrD,CAAC;IAED,OAAO,EAAE,IAAI,EAAE,KAAK,CAAC,IAAK,EAAE,CAAC;AAC/B,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,KAAmB,EACnB,MAAmB;IAEnB,MAAM,IAAI,GAAG,KAAK,CAAC,IAAI,IAAI,UAAU,CAAC;IAEtC,IAAI,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;QAC9B,OAAO,EAAE,IAAI,EAAE,EAAE,EAAE,IAAI,EAAE,KAAK,EAAE,qCAAqC,EAAE,CAAC;IAC1E,CAAC;IAED,IAAI,IAAI,KAAK,UAAU,IAAI,CAAC,KAAK,CAAC,YAAY,EAAE,CAAC;QAC/C,OAAO,EAAE,IAAI,EAAE,EAAE,EAAE,IAAI,EAAE,KAAK,EAAE,kDAAkD,EAAE,CAAC;IACvF,CAAC;IAED,IAAI,IAAI,KAAK,QAAQ,IAAI,CAAC,CAAC,KAAK,CAAC,MAAM,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,UAAU,CAAC,EAAE,CAAC;QACrE,OAAO,EAAE,IAAI,EAAE,EAAE,EAAE,IAAI,EAAE,KAAK,EAAE,mEAAmE,EAAE,CAAC;IACxG,CAAC;IAED,IAAI,CAAC;QACH,MAAM,EAAE,IAAI,EAAE,SAAS,EAAE,GAAG,MAAM,WAAW,CAAC,KAAK,EAAE,MAAM,CAAC,CAAC;QAE7D,IAAI,IAA2B,CAAC;QAEhC,QAAQ,IAAI,EAAE,CAAC;YACb,KAAK,UAAU;gBACb,IAAI,GAAG,eAAe,CAAC,IAAI,EAAE,KAAK,CAAC,YAAa,EAAE,KAAK,CAAC,QAAQ,IAAI,KAAK,CAAC,CAAC;gBAC3E,MAAM;YACR,KAAK,QAAQ;gBACX,IAAI,GAAG,aAAa,CAAC,IAAI,CAAC,CAAC;gBAC3B,MAAM;YACR,KAAK,QAAQ;gBACX,IAAI,GAAG,iBAAiB,CAAC,IAAI,EAAE,KAAK,CAAC,MAAO,CAAC,CAAC;gBAC9C,MAAM;YACR,KAAK,UAAU,CAAC;YAChB,OAAO,CAAC,CAAC,CAAC;gBACR,MAAM,IAAI,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;gBACnC,MAAM,MAAM,GAAG,aAAa,CAAC,IAAI,CAAC,CAAC;gBACnC,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBACtB,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;gBACvB,CAAC;gBACD,IAAI,GAAG,IAAI,CAAC;gBACZ,MAAM;YACR,CAAC;QACH,CAAC;QAED,OAAO,EAAE,IAAI,EAAE,UAAU,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC;IAC/C,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,GAAG,CAAC,KAAK,CAAC,gBAAgB,EAAE,EAAE,GAAG,EAAE,KAAK,CAAC,GAAG,EAAE,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QACpE,OAAO;YACL,IAAI,EAAE,IAAI,KAAK,UAAU,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,QAAQ,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE;YAC5D,UAAU,EAAE,KAAK,CAAC,GAAG;YACrB,IAAI;YACJ,KAAK,EAAE,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC;SACxD,CAAC;IACJ,CAAC;AACH,CAAC"}
1
+ {"version":3,"sources":["../../src/tools/extract.ts"],"sourcesContent":["import type { ExtractInput, ExtractOutput, StageResult } from '../types.js';\nimport type { SmartRouter } from '../fetch/router.js';\nimport { extractMetadata, extractSelector, extractTables } from '../extraction/extract.js';\nimport {\n extractWithSchema,\n extractWithSchemaDetailedAsync,\n} from '../extraction/schema.js';\nimport { extractJsonLd } from '../extraction/jsonld.js';\nimport { extractStructured } from '../extraction/structured.js';\nimport { getCachedContent, isExpired } from '../cache/store.js';\nimport { fetchWithPlaywright } from '../fetch/playwright-tier.js';\nimport { createLogger } from '../logger.js';\nimport {\n isNamedSchemaType,\n extractNamedSchema,\n NAMED_SCHEMAS,\n} from '../extraction/v1/schemas/index.js';\nimport { isLocalLlmEnabled, extractWithLocalLlm } from '../extraction/v1/local-llm.js';\n\nconst log = createLogger('extract');\n\nasync function resolveHtml(\n input: ExtractInput,\n router: SmartRouter,\n): Promise<{ html: string; sourceUrl?: string }> {\n if (input.execution_mode === 'stealth' && input.url) {\n const pw = await fetchWithPlaywright(input.url);\n return { html: pw.html, sourceUrl: input.url };\n }\n\n if (input.url) {\n const cached = getCachedContent(input.url);\n if (cached && !isExpired(cached)) {\n log.info('Using cached HTML', { url: input.url });\n return { html: cached.rawHtml, sourceUrl: cached.url };\n }\n\n const raw = await router.fetch(input.url, {\n renderJs: 'auto',\n useAuth: false,\n });\n return { html: raw.html, sourceUrl: raw.finalUrl };\n }\n\n return { html: input.html! };\n}\n\nexport async function handleExtract(\n input: ExtractInput,\n router: SmartRouter,\n): Promise<StageResult<ExtractOutput>> {\n const mode = input.mode ?? 'metadata';\n\n if (!input.url && !input.html) {\n return {\n ok: false,\n error: 'invalid_input',\n error_reason: 'Either url or html must be provided',\n stage: 'extract',\n };\n }\n\n if (input.named_schema && input.schema) {\n return {\n ok: false,\n error: 'invalid_input',\n error_reason: 'schema and named_schema are mutually exclusive',\n stage: 'extract',\n };\n }\n\n if (input.named_schema && !isNamedSchemaType(input.named_schema)) {\n return {\n ok: false,\n error: 'invalid_input',\n error_reason: `Unknown named_schema. Valid: ${NAMED_SCHEMAS.join(', ')}`,\n stage: 'extract',\n };\n }\n\n if (mode === 'selector' && !input.css_selector) {\n return {\n ok: false,\n error: 'invalid_input',\n error_reason: 'css_selector is required when mode is \"selector\"',\n stage: 'extract',\n };\n }\n\n if (mode === 'schema' && !input.named_schema && (!input.schema || !input.schema.properties)) {\n return {\n ok: false,\n error: 'invalid_input',\n error_reason: 'schema is required when mode is \"schema\" and must have properties',\n stage: 'extract',\n };\n }\n\n try {\n const { html, sourceUrl } = await resolveHtml(input, router);\n\n if (input.named_schema) {\n const namedData = await extractNamedSchema(input.named_schema, html, sourceUrl ?? input.url ?? '');\n if (namedData === null) {\n return {\n ok: true,\n data: {\n data: {},\n source_url: sourceUrl,\n mode: 'schema',\n error: `No ${input.named_schema} data found on page`,\n },\n };\n }\n return {\n ok: true,\n data: {\n data: namedData as unknown as Record<string, unknown>,\n source_url: sourceUrl,\n mode: 'schema',\n },\n };\n }\n\n if (mode === 'schema' && input.schema && isLocalLlmEnabled()) {\n const llmData = await extractWithLocalLlm({\n schema: input.schema as unknown as Record<string, unknown>,\n html,\n url: sourceUrl ?? input.url ?? '',\n });\n return {\n ok: true,\n data: {\n data: (llmData ?? {}) as Record<string, unknown>,\n source_url: sourceUrl,\n mode: 'schema',\n },\n };\n }\n\n let data: ExtractOutput['data'];\n\n switch (mode) {\n case 'selector':\n data = extractSelector(html, input.css_selector!, input.multiple ?? false);\n break;\n case 'tables':\n data = extractTables(html);\n break;\n case 'structured':\n data = extractStructured(html);\n break;\n case 'schema': {\n const schema = input.schema!;\n if (Array.isArray(schema.required) && schema.required.length > 0) {\n const detailed = await extractWithSchemaDetailedAsync(html, schema);\n data = detailed.values;\n if (detailed.warnings.length > 0) {\n return {\n ok: true,\n data: {\n data,\n source_url: sourceUrl,\n mode,\n warnings: detailed.warnings,\n },\n };\n }\n } else {\n data = extractWithSchema(html, schema);\n }\n break;\n }\n case 'metadata':\n default: {\n const meta = extractMetadata(html);\n const jsonld = extractJsonLd(html);\n if (jsonld.length > 0) {\n meta.jsonld = jsonld;\n }\n data = meta;\n break;\n }\n }\n\n if (mode === 'tables' && Array.isArray(data) && data.length === 0) {\n const hint =\n input.execution_mode === 'stealth'\n ? 'no_tables_detected — page genuinely contains no tables'\n : 'no_tables_detected — page may require JavaScript; retry with execution_mode: \"stealth\"';\n return {\n ok: false,\n error: 'no_tables_detected',\n error_reason: 'No tables found on page',\n stage: 'extract',\n hint,\n };\n }\n\n return { ok: true, data: { data, source_url: sourceUrl, mode } };\n } catch (err) {\n log.error('Extract failed', { url: input.url, error: String(err) });\n return {\n ok: false,\n error: 'extract_failed',\n error_reason: err instanceof Error ? err.message : String(err),\n stage: 'extract',\n };\n }\n}\n"],"mappings":"AAEA,SAAS,iBAAiB,iBAAiB,qBAAqB;AAChE;AAAA,EACE;AAAA,EACA;AAAA,OACK;AACP,SAAS,qBAAqB;AAC9B,SAAS,yBAAyB;AAClC,SAAS,kBAAkB,iBAAiB;AAC5C,SAAS,2BAA2B;AACpC,SAAS,oBAAoB;AAC7B;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,OACK;AACP,SAAS,mBAAmB,2BAA2B;AAEvD,MAAM,MAAM,aAAa,SAAS;AAElC,eAAe,YACb,OACA,QAC+C;AAC/C,MAAI,MAAM,mBAAmB,aAAa,MAAM,KAAK;AACnD,UAAM,KAAK,MAAM,oBAAoB,MAAM,GAAG;AAC9C,WAAO,EAAE,MAAM,GAAG,MAAM,WAAW,MAAM,IAAI;AAAA,EAC/C;AAEA,MAAI,MAAM,KAAK;AACb,UAAM,SAAS,iBAAiB,MAAM,GAAG;AACzC,QAAI,UAAU,CAAC,UAAU,MAAM,GAAG;AAChC,UAAI,KAAK,qBAAqB,EAAE,KAAK,MAAM,IAAI,CAAC;AAChD,aAAO,EAAE,MAAM,OAAO,SAAS,WAAW,OAAO,IAAI;AAAA,IACvD;AAEA,UAAM,MAAM,MAAM,OAAO,MAAM,MAAM,KAAK;AAAA,MACxC,UAAU;AAAA,MACV,SAAS;AAAA,IACX,CAAC;AACD,WAAO,EAAE,MAAM,IAAI,MAAM,WAAW,IAAI,SAAS;AAAA,EACnD;AAEA,SAAO,EAAE,MAAM,MAAM,KAAM;AAC7B;AAEA,eAAsB,cACpB,OACA,QACqC;AACrC,QAAM,OAAO,MAAM,QAAQ;AAE3B,MAAI,CAAC,MAAM,OAAO,CAAC,MAAM,MAAM;AAC7B,WAAO;AAAA,MACL,IAAI;AAAA,MACJ,OAAO;AAAA,MACP,cAAc;AAAA,MACd,OAAO;AAAA,IACT;AAAA,EACF;AAEA,MAAI,MAAM,gBAAgB,MAAM,QAAQ;AACtC,WAAO;AAAA,MACL,IAAI;AAAA,MACJ,OAAO;AAAA,MACP,cAAc;AAAA,MACd,OAAO;AAAA,IACT;AAAA,EACF;AAEA,MAAI,MAAM,gBAAgB,CAAC,kBAAkB,MAAM,YAAY,GAAG;AAChE,WAAO;AAAA,MACL,IAAI;AAAA,MACJ,OAAO;AAAA,MACP,cAAc,gCAAgC,cAAc,KAAK,IAAI,CAAC;AAAA,MACtE,OAAO;AAAA,IACT;AAAA,EACF;AAEA,MAAI,SAAS,cAAc,CAAC,MAAM,cAAc;AAC9C,WAAO;AAAA,MACL,IAAI;AAAA,MACJ,OAAO;AAAA,MACP,cAAc;AAAA,MACd,OAAO;AAAA,IACT;AAAA,EACF;AAEA,MAAI,SAAS,YAAY,CAAC,MAAM,iBAAiB,CAAC,MAAM,UAAU,CAAC,MAAM,OAAO,aAAa;AAC3F,WAAO;AAAA,MACL,IAAI;AAAA,MACJ,OAAO;AAAA,MACP,cAAc;AAAA,MACd,OAAO;AAAA,IACT;AAAA,EACF;AAEA,MAAI;AACF,UAAM,EAAE,MAAM,UAAU,IAAI,MAAM,YAAY,OAAO,MAAM;AAE3D,QAAI,MAAM,cAAc;AACtB,YAAM,YAAY,MAAM,mBAAmB,MAAM,cAAc,MAAM,aAAa,MAAM,OAAO,EAAE;AACjG,UAAI,cAAc,MAAM;AACtB,eAAO;AAAA,UACL,IAAI;AAAA,UACJ,MAAM;AAAA,YACJ,MAAM,CAAC;AAAA,YACP,YAAY;AAAA,YACZ,MAAM;AAAA,YACN,OAAO,MAAM,MAAM,YAAY;AAAA,UACjC;AAAA,QACF;AAAA,MACF;AACA,aAAO;AAAA,QACL,IAAI;AAAA,QACJ,MAAM;AAAA,UACJ,MAAM;AAAA,UACN,YAAY;AAAA,UACZ,MAAM;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,QAAI,SAAS,YAAY,MAAM,UAAU,kBAAkB,GAAG;AAC5D,YAAM,UAAU,MAAM,oBAAoB;AAAA,QACxC,QAAQ,MAAM;AAAA,QACd;AAAA,QACA,KAAK,aAAa,MAAM,OAAO;AAAA,MACjC,CAAC;AACD,aAAO;AAAA,QACL,IAAI;AAAA,QACJ,MAAM;AAAA,UACJ,MAAO,WAAW,CAAC;AAAA,UACnB,YAAY;AAAA,UACZ,MAAM;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,QAAI;AAEJ,YAAQ,MAAM;AAAA,MACZ,KAAK;AACH,eAAO,gBAAgB,MAAM,MAAM,cAAe,MAAM,YAAY,KAAK;AACzE;AAAA,MACF,KAAK;AACH,eAAO,cAAc,IAAI;AACzB;AAAA,MACF,KAAK;AACH,eAAO,kBAAkB,IAAI;AAC7B;AAAA,MACF,KAAK,UAAU;AACb,cAAM,SAAS,MAAM;AACrB,YAAI,MAAM,QAAQ,OAAO,QAAQ,KAAK,OAAO,SAAS,SAAS,GAAG;AAChE,gBAAM,WAAW,MAAM,+BAA+B,MAAM,MAAM;AAClE,iBAAO,SAAS;AAChB,cAAI,SAAS,SAAS,SAAS,GAAG;AAChC,mBAAO;AAAA,cACL,IAAI;AAAA,cACJ,MAAM;AAAA,gBACJ;AAAA,gBACA,YAAY;AAAA,gBACZ;AAAA,gBACA,UAAU,SAAS;AAAA,cACrB;AAAA,YACF;AAAA,UACF;AAAA,QACF,OAAO;AACL,iBAAO,kBAAkB,MAAM,MAAM;AAAA,QACvC;AACA;AAAA,MACF;AAAA,MACA,KAAK;AAAA,MACL,SAAS;AACP,cAAM,OAAO,gBAAgB,IAAI;AACjC,cAAM,SAAS,cAAc,IAAI;AACjC,YAAI,OAAO,SAAS,GAAG;AACrB,eAAK,SAAS;AAAA,QAChB;AACA,eAAO;AACP;AAAA,MACF;AAAA,IACF;AAEA,QAAI,SAAS,YAAY,MAAM,QAAQ,IAAI,KAAK,KAAK,WAAW,GAAG;AACjE,YAAM,OACJ,MAAM,mBAAmB,YACrB,gEACA;AACN,aAAO;AAAA,QACL,IAAI;AAAA,QACJ,OAAO;AAAA,QACP,cAAc;AAAA,QACd,OAAO;AAAA,QACP;AAAA,MACF;AAAA,IACF;AAEA,WAAO,EAAE,IAAI,MAAM,MAAM,EAAE,MAAM,YAAY,WAAW,KAAK,EAAE;AAAA,EACjE,SAAS,KAAK;AACZ,QAAI,MAAM,kBAAkB,EAAE,KAAK,MAAM,KAAK,OAAO,OAAO,GAAG,EAAE,CAAC;AAClE,WAAO;AAAA,MACL,IAAI;AAAA,MACJ,OAAO;AAAA,MACP,cAAc,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAAA,MAC7D,OAAO;AAAA,IACT;AAAA,EACF;AACF;","names":[]}
@@ -1,4 +1,4 @@
1
- import type { FetchInput, FetchOutput } from '../types.js';
1
+ import type { FetchInput, FetchOutput, StageResult } from '../types.js';
2
2
  import type { SmartRouter } from '../fetch/router.js';
3
- export declare function handleFetch(input: FetchInput, router: SmartRouter): Promise<FetchOutput>;
3
+ export declare function handleFetch(input: FetchInput, router: SmartRouter): Promise<StageResult<FetchOutput>>;
4
4
  //# sourceMappingURL=fetch.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"fetch.d.ts","sourceRoot":"","sources":["../../src/tools/fetch.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,WAAW,EAAiB,MAAM,aAAa,CAAC;AAC1E,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AAoCtD,wBAAsB,WAAW,CAC/B,KAAK,EAAE,UAAU,EACjB,MAAM,EAAE,WAAW,GAClB,OAAO,CAAC,WAAW,CAAC,CAgDtB"}
1
+ {"version":3,"file":"fetch.d.ts","sourceRoot":"","sources":["../../src/tools/fetch.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,WAAW,EAAiB,WAAW,EAAE,MAAM,aAAa,CAAC;AAEvF,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AA6FtD,wBAAsB,WAAW,CAC/B,KAAK,EAAE,UAAU,EACjB,MAAM,EAAE,WAAW,GAClB,OAAO,CAAC,WAAW,CAAC,WAAW,CAAC,CAAC,CAsHnC"}