@staticn0va/wigolo 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (1003) hide show
  1. package/LICENSE +1 -1
  2. package/README.md +146 -227
  3. package/SKILL.md +382 -0
  4. package/assets/blocks/claude-code/CLAUDE.md.block +20 -0
  5. package/assets/blocks/claude-code/wigolo-command.md +40 -0
  6. package/assets/blocks/cursor/wigolo.mdc +46 -0
  7. package/assets/blocks/gemini-cli/GEMINI.md.block +18 -0
  8. package/assets/blocks/vscode/copilot-instructions.md.block +18 -0
  9. package/assets/skills/wigolo/SKILL.md +50 -0
  10. package/assets/skills/wigolo/rules/cache-first.md +30 -0
  11. package/assets/skills/wigolo/rules/synthesis.md +43 -0
  12. package/assets/skills/wigolo-agent/SKILL.md +73 -0
  13. package/assets/skills/wigolo-crawl/SKILL.md +60 -0
  14. package/assets/skills/wigolo-extract/SKILL.md +59 -0
  15. package/assets/skills/wigolo-fetch/SKILL.md +65 -0
  16. package/assets/skills/wigolo-find-similar/SKILL.md +72 -0
  17. package/assets/skills/wigolo-research/SKILL.md +77 -0
  18. package/assets/skills/wigolo-search/SKILL.md +78 -0
  19. package/dist/agent/executor.d.ts +33 -0
  20. package/dist/agent/executor.d.ts.map +1 -0
  21. package/dist/agent/executor.js +233 -0
  22. package/dist/agent/executor.js.map +1 -0
  23. package/dist/agent/pipeline.d.ts +5 -0
  24. package/dist/agent/pipeline.d.ts.map +1 -0
  25. package/dist/agent/pipeline.js +238 -0
  26. package/dist/agent/pipeline.js.map +1 -0
  27. package/dist/agent/planner.d.ts +13 -0
  28. package/dist/agent/planner.d.ts.map +1 -0
  29. package/dist/agent/planner.js +271 -0
  30. package/dist/agent/planner.js.map +1 -0
  31. package/dist/agent/relevance.d.ts +15 -0
  32. package/dist/agent/relevance.d.ts.map +1 -0
  33. package/dist/agent/relevance.js +60 -0
  34. package/dist/agent/relevance.js.map +1 -0
  35. package/dist/cache/backfill-embeddings.d.ts +23 -0
  36. package/dist/cache/backfill-embeddings.d.ts.map +1 -0
  37. package/dist/cache/backfill-embeddings.js +105 -0
  38. package/dist/cache/backfill-embeddings.js.map +1 -0
  39. package/dist/cache/change-detector.d.ts +7 -0
  40. package/dist/cache/change-detector.d.ts.map +1 -0
  41. package/dist/cache/change-detector.js +43 -0
  42. package/dist/cache/change-detector.js.map +1 -0
  43. package/dist/cache/db.d.ts +1 -0
  44. package/dist/cache/db.d.ts.map +1 -1
  45. package/dist/cache/db.js +94 -22
  46. package/dist/cache/db.js.map +1 -1
  47. package/dist/cache/diff-summary.d.ts +2 -0
  48. package/dist/cache/diff-summary.d.ts.map +1 -0
  49. package/dist/cache/diff-summary.js +82 -0
  50. package/dist/cache/diff-summary.js.map +1 -0
  51. package/dist/cache/migrations/runner.d.ts +29 -0
  52. package/dist/cache/migrations/runner.d.ts.map +1 -0
  53. package/dist/cache/migrations/runner.js +147 -0
  54. package/dist/cache/migrations/runner.js.map +1 -0
  55. package/dist/cache/sqlite-vec-store.d.ts +42 -0
  56. package/dist/cache/sqlite-vec-store.d.ts.map +1 -0
  57. package/dist/cache/sqlite-vec-store.js +176 -0
  58. package/dist/cache/sqlite-vec-store.js.map +1 -0
  59. package/dist/cache/store.d.ts +47 -1
  60. package/dist/cache/store.d.ts.map +1 -1
  61. package/dist/cache/store.js +364 -168
  62. package/dist/cache/store.js.map +1 -1
  63. package/dist/cli/agents/antigravity.d.ts +20 -0
  64. package/dist/cli/agents/antigravity.d.ts.map +1 -0
  65. package/dist/cli/agents/antigravity.js +49 -0
  66. package/dist/cli/agents/antigravity.js.map +1 -0
  67. package/dist/cli/agents/claude-code.d.ts +25 -0
  68. package/dist/cli/agents/claude-code.d.ts.map +1 -0
  69. package/dist/cli/agents/claude-code.js +111 -0
  70. package/dist/cli/agents/claude-code.js.map +1 -0
  71. package/dist/cli/agents/cursor.d.ts +21 -0
  72. package/dist/cli/agents/cursor.d.ts.map +1 -0
  73. package/dist/cli/agents/cursor.js +58 -0
  74. package/dist/cli/agents/cursor.js.map +1 -0
  75. package/dist/cli/agents/gemini-cli.d.ts +21 -0
  76. package/dist/cli/agents/gemini-cli.d.ts.map +1 -0
  77. package/dist/cli/agents/gemini-cli.js +55 -0
  78. package/dist/cli/agents/gemini-cli.js.map +1 -0
  79. package/dist/cli/agents/registry.d.ts +21 -0
  80. package/dist/cli/agents/registry.d.ts.map +1 -0
  81. package/dist/cli/agents/registry.js +27 -0
  82. package/dist/cli/agents/registry.js.map +1 -0
  83. package/dist/cli/agents/utils.d.ts +26 -0
  84. package/dist/cli/agents/utils.d.ts.map +1 -0
  85. package/dist/cli/agents/utils.js +136 -0
  86. package/dist/cli/agents/utils.js.map +1 -0
  87. package/dist/cli/agents/vscode.d.ts +21 -0
  88. package/dist/cli/agents/vscode.d.ts.map +1 -0
  89. package/dist/cli/agents/vscode.js +62 -0
  90. package/dist/cli/agents/vscode.js.map +1 -0
  91. package/dist/cli/auth.d.ts +2 -0
  92. package/dist/cli/auth.d.ts.map +1 -0
  93. package/dist/cli/auth.js +94 -0
  94. package/dist/cli/auth.js.map +1 -0
  95. package/dist/cli/backfill.d.ts +2 -0
  96. package/dist/cli/backfill.d.ts.map +1 -0
  97. package/dist/cli/backfill.js +58 -0
  98. package/dist/cli/backfill.js.map +1 -0
  99. package/dist/cli/daemon.d.ts +6 -1
  100. package/dist/cli/daemon.d.ts.map +1 -1
  101. package/dist/cli/daemon.js +61 -3
  102. package/dist/cli/daemon.js.map +1 -1
  103. package/dist/cli/doctor.d.ts +8 -0
  104. package/dist/cli/doctor.d.ts.map +1 -0
  105. package/dist/cli/doctor.js +344 -0
  106. package/dist/cli/doctor.js.map +1 -0
  107. package/dist/cli/health.d.ts +1 -1
  108. package/dist/cli/health.d.ts.map +1 -1
  109. package/dist/cli/health.js +42 -3
  110. package/dist/cli/health.js.map +1 -1
  111. package/dist/cli/help.d.ts +6 -0
  112. package/dist/cli/help.d.ts.map +1 -0
  113. package/dist/cli/help.js +63 -0
  114. package/dist/cli/help.js.map +1 -0
  115. package/dist/cli/index.d.ts +1 -1
  116. package/dist/cli/index.d.ts.map +1 -1
  117. package/dist/cli/index.js +35 -7
  118. package/dist/cli/index.js.map +1 -1
  119. package/dist/cli/init.d.ts +2 -0
  120. package/dist/cli/init.d.ts.map +1 -0
  121. package/dist/cli/init.js +201 -0
  122. package/dist/cli/init.js.map +1 -0
  123. package/dist/cli/plugin.d.ts +5 -0
  124. package/dist/cli/plugin.d.ts.map +1 -0
  125. package/dist/cli/plugin.js +185 -0
  126. package/dist/cli/plugin.js.map +1 -0
  127. package/dist/cli/setup-mcp.d.ts +2 -0
  128. package/dist/cli/setup-mcp.d.ts.map +1 -0
  129. package/dist/cli/setup-mcp.js +114 -0
  130. package/dist/cli/setup-mcp.js.map +1 -0
  131. package/dist/cli/shell.d.ts +2 -0
  132. package/dist/cli/shell.d.ts.map +1 -0
  133. package/dist/cli/shell.js +86 -0
  134. package/dist/cli/shell.js.map +1 -0
  135. package/dist/cli/shutdown.d.ts +2 -0
  136. package/dist/cli/shutdown.d.ts.map +1 -0
  137. package/dist/cli/shutdown.js +26 -0
  138. package/dist/cli/shutdown.js.map +1 -0
  139. package/dist/cli/status.d.ts +2 -0
  140. package/dist/cli/status.d.ts.map +1 -0
  141. package/dist/cli/status.js +31 -0
  142. package/dist/cli/status.js.map +1 -0
  143. package/dist/cli/telemetry.d.ts +10 -0
  144. package/dist/cli/telemetry.d.ts.map +1 -0
  145. package/dist/cli/telemetry.js +56 -0
  146. package/dist/cli/telemetry.js.map +1 -0
  147. package/dist/cli/tui/agents-types.d.ts +28 -0
  148. package/dist/cli/tui/agents-types.d.ts.map +1 -0
  149. package/dist/cli/tui/agents-types.js +1 -0
  150. package/dist/cli/tui/agents-types.js.map +1 -0
  151. package/dist/cli/tui/agents.d.ts +11 -0
  152. package/dist/cli/tui/agents.d.ts.map +1 -0
  153. package/dist/cli/tui/agents.js +93 -0
  154. package/dist/cli/tui/agents.js.map +1 -0
  155. package/dist/cli/tui/banner.d.ts +3 -0
  156. package/dist/cli/tui/banner.d.ts.map +1 -0
  157. package/dist/cli/tui/banner.js +30 -0
  158. package/dist/cli/tui/banner.js.map +1 -0
  159. package/dist/cli/tui/components/AgentSelect.d.ts +13 -0
  160. package/dist/cli/tui/components/AgentSelect.d.ts.map +1 -0
  161. package/dist/cli/tui/components/AgentSelect.js +116 -0
  162. package/dist/cli/tui/components/AgentSelect.js.map +1 -0
  163. package/dist/cli/tui/components/Banner.d.ts +6 -0
  164. package/dist/cli/tui/components/Banner.d.ts.map +1 -0
  165. package/dist/cli/tui/components/Banner.js +25 -0
  166. package/dist/cli/tui/components/Banner.js.map +1 -0
  167. package/dist/cli/tui/components/BrowserSelect.d.ts +7 -0
  168. package/dist/cli/tui/components/BrowserSelect.d.ts.map +1 -0
  169. package/dist/cli/tui/components/BrowserSelect.js +19 -0
  170. package/dist/cli/tui/components/BrowserSelect.js.map +1 -0
  171. package/dist/cli/tui/components/InstallProgress.d.ts +9 -0
  172. package/dist/cli/tui/components/InstallProgress.d.ts.map +1 -0
  173. package/dist/cli/tui/components/InstallProgress.js +67 -0
  174. package/dist/cli/tui/components/InstallProgress.js.map +1 -0
  175. package/dist/cli/tui/components/SkillInstall.d.ts +14 -0
  176. package/dist/cli/tui/components/SkillInstall.d.ts.map +1 -0
  177. package/dist/cli/tui/components/SkillInstall.js +94 -0
  178. package/dist/cli/tui/components/SkillInstall.js.map +1 -0
  179. package/dist/cli/tui/components/Summary.d.ts +22 -0
  180. package/dist/cli/tui/components/Summary.d.ts.map +1 -0
  181. package/dist/cli/tui/components/Summary.js +135 -0
  182. package/dist/cli/tui/components/Summary.js.map +1 -0
  183. package/dist/cli/tui/components/SystemCheck.d.ts +8 -0
  184. package/dist/cli/tui/components/SystemCheck.d.ts.map +1 -0
  185. package/dist/cli/tui/components/SystemCheck.js +71 -0
  186. package/dist/cli/tui/components/SystemCheck.js.map +1 -0
  187. package/dist/cli/tui/components/Verification.d.ts +8 -0
  188. package/dist/cli/tui/components/Verification.d.ts.map +1 -0
  189. package/dist/cli/tui/components/Verification.js +63 -0
  190. package/dist/cli/tui/components/Verification.js.map +1 -0
  191. package/dist/cli/tui/config-writer-cli.d.ts +12 -0
  192. package/dist/cli/tui/config-writer-cli.d.ts.map +1 -0
  193. package/dist/cli/tui/config-writer-cli.js +39 -0
  194. package/dist/cli/tui/config-writer-cli.js.map +1 -0
  195. package/dist/cli/tui/config-writer-json.d.ts +16 -0
  196. package/dist/cli/tui/config-writer-json.d.ts.map +1 -0
  197. package/dist/cli/tui/config-writer-json.js +86 -0
  198. package/dist/cli/tui/config-writer-json.js.map +1 -0
  199. package/dist/cli/tui/config-writer-toml.d.ts +16 -0
  200. package/dist/cli/tui/config-writer-toml.d.ts.map +1 -0
  201. package/dist/cli/tui/config-writer-toml.js +83 -0
  202. package/dist/cli/tui/config-writer-toml.js.map +1 -0
  203. package/dist/cli/tui/config-writer.d.ts +25 -0
  204. package/dist/cli/tui/config-writer.d.ts.map +1 -0
  205. package/dist/cli/tui/config-writer.js +101 -0
  206. package/dist/cli/tui/config-writer.js.map +1 -0
  207. package/dist/cli/tui/detect-helpers.d.ts +6 -0
  208. package/dist/cli/tui/detect-helpers.d.ts.map +1 -0
  209. package/dist/cli/tui/detect-helpers.js +45 -0
  210. package/dist/cli/tui/detect-helpers.js.map +1 -0
  211. package/dist/cli/tui/extras-prompt.d.ts +7 -0
  212. package/dist/cli/tui/extras-prompt.d.ts.map +1 -0
  213. package/dist/cli/tui/extras-prompt.js +42 -0
  214. package/dist/cli/tui/extras-prompt.js.map +1 -0
  215. package/dist/cli/tui/flags-types.d.ts +19 -0
  216. package/dist/cli/tui/flags-types.d.ts.map +1 -0
  217. package/dist/cli/tui/flags-types.js +23 -0
  218. package/dist/cli/tui/flags-types.js.map +1 -0
  219. package/dist/cli/tui/flags.d.ts +5 -0
  220. package/dist/cli/tui/flags.d.ts.map +1 -0
  221. package/dist/cli/tui/flags.js +132 -0
  222. package/dist/cli/tui/flags.js.map +1 -0
  223. package/dist/cli/tui/format.d.ts +14 -0
  224. package/dist/cli/tui/format.d.ts.map +1 -0
  225. package/dist/cli/tui/format.js +37 -0
  226. package/dist/cli/tui/format.js.map +1 -0
  227. package/dist/cli/tui/hooks/useAgentDetect.d.ts +6 -0
  228. package/dist/cli/tui/hooks/useAgentDetect.d.ts.map +1 -0
  229. package/dist/cli/tui/hooks/useAgentDetect.js +19 -0
  230. package/dist/cli/tui/hooks/useAgentDetect.js.map +1 -0
  231. package/dist/cli/tui/hooks/useInstall.d.ts +14 -0
  232. package/dist/cli/tui/hooks/useInstall.d.ts.map +1 -0
  233. package/dist/cli/tui/hooks/useInstall.js +90 -0
  234. package/dist/cli/tui/hooks/useInstall.js.map +1 -0
  235. package/dist/cli/tui/hooks/useSystemCheck.d.ts +13 -0
  236. package/dist/cli/tui/hooks/useSystemCheck.d.ts.map +1 -0
  237. package/dist/cli/tui/hooks/useSystemCheck.js +95 -0
  238. package/dist/cli/tui/hooks/useSystemCheck.js.map +1 -0
  239. package/dist/cli/tui/hooks/useVerify.d.ts +14 -0
  240. package/dist/cli/tui/hooks/useVerify.d.ts.map +1 -0
  241. package/dist/cli/tui/hooks/useVerify.js +71 -0
  242. package/dist/cli/tui/hooks/useVerify.js.map +1 -0
  243. package/dist/cli/tui/ink-init.d.ts +2 -0
  244. package/dist/cli/tui/ink-init.d.ts.map +1 -0
  245. package/dist/cli/tui/ink-init.js +198 -0
  246. package/dist/cli/tui/ink-init.js.map +1 -0
  247. package/dist/cli/tui/reporter-auto.d.ts +7 -0
  248. package/dist/cli/tui/reporter-auto.d.ts.map +1 -0
  249. package/dist/cli/tui/reporter-auto.js +15 -0
  250. package/dist/cli/tui/reporter-auto.js.map +1 -0
  251. package/dist/cli/tui/reporter.d.ts +26 -0
  252. package/dist/cli/tui/reporter.d.ts.map +1 -0
  253. package/dist/cli/tui/reporter.js +32 -0
  254. package/dist/cli/tui/reporter.js.map +1 -0
  255. package/dist/cli/tui/run-command.d.ts +14 -0
  256. package/dist/cli/tui/run-command.d.ts.map +1 -0
  257. package/dist/cli/tui/run-command.js +72 -0
  258. package/dist/cli/tui/run-command.js.map +1 -0
  259. package/dist/cli/tui/select-agents.d.ts +6 -0
  260. package/dist/cli/tui/select-agents.d.ts.map +1 -0
  261. package/dist/cli/tui/select-agents.js +32 -0
  262. package/dist/cli/tui/select-agents.js.map +1 -0
  263. package/dist/cli/tui/status-agents.d.ts +11 -0
  264. package/dist/cli/tui/status-agents.d.ts.map +1 -0
  265. package/dist/cli/tui/status-agents.js +53 -0
  266. package/dist/cli/tui/status-agents.js.map +1 -0
  267. package/dist/cli/tui/status-cache.d.ts +6 -0
  268. package/dist/cli/tui/status-cache.d.ts.map +1 -0
  269. package/dist/cli/tui/status-cache.js +39 -0
  270. package/dist/cli/tui/status-cache.js.map +1 -0
  271. package/dist/cli/tui/status-format.d.ts +14 -0
  272. package/dist/cli/tui/status-format.d.ts.map +1 -0
  273. package/dist/cli/tui/status-format.js +41 -0
  274. package/dist/cli/tui/status-format.js.map +1 -0
  275. package/dist/cli/tui/status-python.d.ts +6 -0
  276. package/dist/cli/tui/status-python.d.ts.map +1 -0
  277. package/dist/cli/tui/status-python.js +30 -0
  278. package/dist/cli/tui/status-python.js.map +1 -0
  279. package/dist/cli/tui/system-check.d.ts +24 -0
  280. package/dist/cli/tui/system-check.d.ts.map +1 -0
  281. package/dist/cli/tui/system-check.js +103 -0
  282. package/dist/cli/tui/system-check.js.map +1 -0
  283. package/dist/cli/tui/tui-reporter.d.ts +19 -0
  284. package/dist/cli/tui/tui-reporter.d.ts.map +1 -0
  285. package/dist/cli/tui/tui-reporter.js +95 -0
  286. package/dist/cli/tui/tui-reporter.js.map +1 -0
  287. package/dist/cli/tui/utils/config-writer.d.ts +3 -0
  288. package/dist/cli/tui/utils/config-writer.d.ts.map +1 -0
  289. package/dist/cli/tui/utils/config-writer.js +22 -0
  290. package/dist/cli/tui/utils/config-writer.js.map +1 -0
  291. package/dist/cli/tui/utils/suppress-logs.d.ts +3 -0
  292. package/dist/cli/tui/utils/suppress-logs.d.ts.map +1 -0
  293. package/dist/cli/tui/utils/suppress-logs.js +11 -0
  294. package/dist/cli/tui/utils/suppress-logs.js.map +1 -0
  295. package/dist/cli/tui/verify-suggestions.d.ts +5 -0
  296. package/dist/cli/tui/verify-suggestions.d.ts.map +1 -0
  297. package/dist/cli/tui/verify-suggestions.js +20 -0
  298. package/dist/cli/tui/verify-suggestions.js.map +1 -0
  299. package/dist/cli/tui/verify.d.ts +14 -0
  300. package/dist/cli/tui/verify.d.ts.map +1 -0
  301. package/dist/cli/tui/verify.js +101 -0
  302. package/dist/cli/tui/verify.js.map +1 -0
  303. package/dist/cli/tui/version.d.ts +2 -0
  304. package/dist/cli/tui/version.d.ts.map +1 -0
  305. package/dist/cli/tui/version.js +14 -0
  306. package/dist/cli/tui/version.js.map +1 -0
  307. package/dist/cli/uninstall.d.ts +2 -0
  308. package/dist/cli/uninstall.d.ts.map +1 -0
  309. package/dist/cli/uninstall.js +57 -0
  310. package/dist/cli/uninstall.js.map +1 -0
  311. package/dist/cli/warmup.d.ts +10 -2
  312. package/dist/cli/warmup.d.ts.map +1 -1
  313. package/dist/cli/warmup.js +226 -93
  314. package/dist/cli/warmup.js.map +1 -1
  315. package/dist/config.d.ts +28 -2
  316. package/dist/config.d.ts.map +1 -1
  317. package/dist/config.js +106 -56
  318. package/dist/config.js.map +1 -1
  319. package/dist/crawl/crawler.d.ts +6 -0
  320. package/dist/crawl/crawler.d.ts.map +1 -1
  321. package/dist/crawl/crawler.js +210 -209
  322. package/dist/crawl/crawler.js.map +1 -1
  323. package/dist/crawl/dedup.d.ts +1 -0
  324. package/dist/crawl/dedup.d.ts.map +1 -1
  325. package/dist/crawl/dedup.js +124 -81
  326. package/dist/crawl/dedup.js.map +1 -1
  327. package/dist/crawl/etag-incremental.d.ts +43 -0
  328. package/dist/crawl/etag-incremental.d.ts.map +1 -0
  329. package/dist/crawl/etag-incremental.js +94 -0
  330. package/dist/crawl/etag-incremental.js.map +1 -0
  331. package/dist/crawl/index-to-vec.d.ts +10 -0
  332. package/dist/crawl/index-to-vec.d.ts.map +1 -0
  333. package/dist/crawl/index-to-vec.js +44 -0
  334. package/dist/crawl/index-to-vec.js.map +1 -0
  335. package/dist/crawl/mapper.js +136 -164
  336. package/dist/crawl/mapper.js.map +1 -1
  337. package/dist/crawl/rate-limiter.js +63 -66
  338. package/dist/crawl/rate-limiter.js.map +1 -1
  339. package/dist/crawl/robots.js +58 -57
  340. package/dist/crawl/robots.js.map +1 -1
  341. package/dist/crawl/sitemap-first.d.ts +12 -0
  342. package/dist/crawl/sitemap-first.d.ts.map +1 -0
  343. package/dist/crawl/sitemap-first.js +47 -0
  344. package/dist/crawl/sitemap-first.js.map +1 -0
  345. package/dist/crawl/sitemap.js +33 -32
  346. package/dist/crawl/sitemap.js.map +1 -1
  347. package/dist/crawl/url-utils.d.ts +1 -0
  348. package/dist/crawl/url-utils.d.ts.map +1 -1
  349. package/dist/crawl/url-utils.js +49 -37
  350. package/dist/crawl/url-utils.js.map +1 -1
  351. package/dist/daemon/health-check.d.ts +16 -0
  352. package/dist/daemon/health-check.d.ts.map +1 -0
  353. package/dist/daemon/health-check.js +33 -0
  354. package/dist/daemon/health-check.js.map +1 -0
  355. package/dist/daemon/http-server.d.ts +26 -0
  356. package/dist/daemon/http-server.d.ts.map +1 -0
  357. package/dist/daemon/http-server.js +275 -0
  358. package/dist/daemon/http-server.js.map +1 -0
  359. package/dist/daemon/proxy.d.ts +10 -0
  360. package/dist/daemon/proxy.d.ts.map +1 -0
  361. package/dist/daemon/proxy.js +93 -0
  362. package/dist/daemon/proxy.js.map +1 -0
  363. package/dist/embedding/embed.d.ts +59 -0
  364. package/dist/embedding/embed.d.ts.map +1 -0
  365. package/dist/embedding/embed.js +233 -0
  366. package/dist/embedding/embed.js.map +1 -0
  367. package/dist/embedding/fastembed-provider.d.ts +19 -0
  368. package/dist/embedding/fastembed-provider.d.ts.map +1 -0
  369. package/dist/embedding/fastembed-provider.js +51 -0
  370. package/dist/embedding/fastembed-provider.js.map +1 -0
  371. package/dist/embedding/key-terms.d.ts +12 -0
  372. package/dist/embedding/key-terms.d.ts.map +1 -0
  373. package/dist/embedding/key-terms.js +234 -0
  374. package/dist/embedding/key-terms.js.map +1 -0
  375. package/dist/extraction/boilerplate.d.ts +15 -0
  376. package/dist/extraction/boilerplate.d.ts.map +1 -0
  377. package/dist/extraction/boilerplate.js +52 -0
  378. package/dist/extraction/boilerplate.js.map +1 -0
  379. package/dist/extraction/defuddle.d.ts.map +1 -1
  380. package/dist/extraction/defuddle.js +27 -23
  381. package/dist/extraction/defuddle.js.map +1 -1
  382. package/dist/extraction/extract.d.ts.map +1 -1
  383. package/dist/extraction/extract.js +76 -76
  384. package/dist/extraction/extract.js.map +1 -1
  385. package/dist/extraction/jsonld.js +50 -54
  386. package/dist/extraction/jsonld.js.map +1 -1
  387. package/dist/extraction/lang-hints.d.ts +2 -0
  388. package/dist/extraction/lang-hints.d.ts.map +1 -0
  389. package/dist/extraction/lang-hints.js +30 -0
  390. package/dist/extraction/lang-hints.js.map +1 -0
  391. package/dist/extraction/llm-fallback.d.ts +17 -0
  392. package/dist/extraction/llm-fallback.d.ts.map +1 -0
  393. package/dist/extraction/llm-fallback.js +130 -0
  394. package/dist/extraction/llm-fallback.js.map +1 -0
  395. package/dist/extraction/markdown-sanitize.d.ts +2 -0
  396. package/dist/extraction/markdown-sanitize.d.ts.map +1 -0
  397. package/dist/extraction/markdown-sanitize.js +151 -0
  398. package/dist/extraction/markdown-sanitize.js.map +1 -0
  399. package/dist/extraction/markdown.d.ts +11 -0
  400. package/dist/extraction/markdown.d.ts.map +1 -1
  401. package/dist/extraction/markdown.js +195 -91
  402. package/dist/extraction/markdown.js.map +1 -1
  403. package/dist/extraction/pipeline.d.ts +8 -0
  404. package/dist/extraction/pipeline.d.ts.map +1 -1
  405. package/dist/extraction/pipeline.js +57 -91
  406. package/dist/extraction/pipeline.js.map +1 -1
  407. package/dist/extraction/readability.d.ts +1 -1
  408. package/dist/extraction/readability.d.ts.map +1 -1
  409. package/dist/extraction/readability.js +28 -29
  410. package/dist/extraction/readability.js.map +1 -1
  411. package/dist/extraction/schema.d.ts +12 -0
  412. package/dist/extraction/schema.d.ts.map +1 -1
  413. package/dist/extraction/schema.js +135 -72
  414. package/dist/extraction/schema.js.map +1 -1
  415. package/dist/extraction/site-extractors/docs-generic.d.ts.map +1 -1
  416. package/dist/extraction/site-extractors/docs-generic.js +81 -91
  417. package/dist/extraction/site-extractors/docs-generic.js.map +1 -1
  418. package/dist/extraction/site-extractors/github.d.ts.map +1 -1
  419. package/dist/extraction/site-extractors/github.js +87 -95
  420. package/dist/extraction/site-extractors/github.js.map +1 -1
  421. package/dist/extraction/site-extractors/mdn.d.ts.map +1 -1
  422. package/dist/extraction/site-extractors/mdn.js +46 -54
  423. package/dist/extraction/site-extractors/mdn.js.map +1 -1
  424. package/dist/extraction/site-extractors/stackoverflow.d.ts.map +1 -1
  425. package/dist/extraction/site-extractors/stackoverflow.js +71 -80
  426. package/dist/extraction/site-extractors/stackoverflow.js.map +1 -1
  427. package/dist/extraction/structured-data.d.ts +4 -0
  428. package/dist/extraction/structured-data.d.ts.map +1 -0
  429. package/dist/extraction/structured-data.js +173 -0
  430. package/dist/extraction/structured-data.js.map +1 -0
  431. package/dist/extraction/structured.d.ts +4 -0
  432. package/dist/extraction/structured.d.ts.map +1 -0
  433. package/dist/extraction/structured.js +163 -0
  434. package/dist/extraction/structured.js.map +1 -0
  435. package/dist/extraction/v1/classifier.d.ts +3 -0
  436. package/dist/extraction/v1/classifier.d.ts.map +1 -0
  437. package/dist/extraction/v1/classifier.js +110 -0
  438. package/dist/extraction/v1/classifier.js.map +1 -0
  439. package/dist/extraction/v1/extract-provider.d.ts +16 -0
  440. package/dist/extraction/v1/extract-provider.d.ts.map +1 -0
  441. package/dist/extraction/v1/extract-provider.js +43 -0
  442. package/dist/extraction/v1/extract-provider.js.map +1 -0
  443. package/dist/extraction/v1/local-llm.d.ts +8 -0
  444. package/dist/extraction/v1/local-llm.d.ts.map +1 -0
  445. package/dist/extraction/v1/local-llm.js +34 -0
  446. package/dist/extraction/v1/local-llm.js.map +1 -0
  447. package/dist/extraction/v1/news.d.ts +3 -0
  448. package/dist/extraction/v1/news.d.ts.map +1 -0
  449. package/dist/extraction/v1/news.js +61 -0
  450. package/dist/extraction/v1/news.js.map +1 -0
  451. package/dist/extraction/v1/product.d.ts +3 -0
  452. package/dist/extraction/v1/product.d.ts.map +1 -0
  453. package/dist/extraction/v1/product.js +166 -0
  454. package/dist/extraction/v1/product.js.map +1 -0
  455. package/dist/extraction/v1/recipe.d.ts +3 -0
  456. package/dist/extraction/v1/recipe.d.ts.map +1 -0
  457. package/dist/extraction/v1/recipe.js +136 -0
  458. package/dist/extraction/v1/recipe.js.map +1 -0
  459. package/dist/extraction/v1/routed.d.ts +17 -0
  460. package/dist/extraction/v1/routed.d.ts.map +1 -0
  461. package/dist/extraction/v1/routed.js +68 -0
  462. package/dist/extraction/v1/routed.js.map +1 -0
  463. package/dist/extraction/v1/schemas/Article.d.ts +11 -0
  464. package/dist/extraction/v1/schemas/Article.d.ts.map +1 -0
  465. package/dist/extraction/v1/schemas/Article.js +23 -0
  466. package/dist/extraction/v1/schemas/Article.js.map +1 -0
  467. package/dist/extraction/v1/schemas/CodeSnippet.d.ts +9 -0
  468. package/dist/extraction/v1/schemas/CodeSnippet.d.ts.map +1 -0
  469. package/dist/extraction/v1/schemas/CodeSnippet.js +90 -0
  470. package/dist/extraction/v1/schemas/CodeSnippet.js.map +1 -0
  471. package/dist/extraction/v1/schemas/EventListing.d.ts +10 -0
  472. package/dist/extraction/v1/schemas/EventListing.d.ts.map +1 -0
  473. package/dist/extraction/v1/schemas/EventListing.js +122 -0
  474. package/dist/extraction/v1/schemas/EventListing.js.map +1 -0
  475. package/dist/extraction/v1/schemas/Paper.d.ts +10 -0
  476. package/dist/extraction/v1/schemas/Paper.d.ts.map +1 -0
  477. package/dist/extraction/v1/schemas/Paper.js +156 -0
  478. package/dist/extraction/v1/schemas/Paper.js.map +1 -0
  479. package/dist/extraction/v1/schemas/Product.d.ts +17 -0
  480. package/dist/extraction/v1/schemas/Product.d.ts.map +1 -0
  481. package/dist/extraction/v1/schemas/Product.js +149 -0
  482. package/dist/extraction/v1/schemas/Product.js.map +1 -0
  483. package/dist/extraction/v1/schemas/Recipe.d.ts +14 -0
  484. package/dist/extraction/v1/schemas/Recipe.d.ts.map +1 -0
  485. package/dist/extraction/v1/schemas/Recipe.js +160 -0
  486. package/dist/extraction/v1/schemas/Recipe.js.map +1 -0
  487. package/dist/extraction/v1/schemas/index.d.ts +13 -0
  488. package/dist/extraction/v1/schemas/index.d.ts.map +1 -0
  489. package/dist/extraction/v1/schemas/index.js +44 -0
  490. package/dist/extraction/v1/schemas/index.js.map +1 -0
  491. package/dist/extraction/v1/site-extractors.d.ts +5 -0
  492. package/dist/extraction/v1/site-extractors.d.ts.map +1 -0
  493. package/dist/extraction/v1/site-extractors.js +31 -0
  494. package/dist/extraction/v1/site-extractors.js.map +1 -0
  495. package/dist/fetch/action-executor.d.ts +28 -0
  496. package/dist/fetch/action-executor.d.ts.map +1 -0
  497. package/dist/fetch/action-executor.js +88 -0
  498. package/dist/fetch/action-executor.js.map +1 -0
  499. package/dist/fetch/auth.d.ts +2 -1
  500. package/dist/fetch/auth.d.ts.map +1 -1
  501. package/dist/fetch/auth.js +56 -26
  502. package/dist/fetch/auth.js.map +1 -1
  503. package/dist/fetch/browser-pool.d.ts +30 -11
  504. package/dist/fetch/browser-pool.d.ts.map +1 -1
  505. package/dist/fetch/browser-pool.js +303 -127
  506. package/dist/fetch/browser-pool.js.map +1 -1
  507. package/dist/fetch/browser-selector.d.ts +17 -0
  508. package/dist/fetch/browser-selector.d.ts.map +1 -0
  509. package/dist/fetch/browser-selector.js +72 -0
  510. package/dist/fetch/browser-selector.js.map +1 -0
  511. package/dist/fetch/browser-types.d.ts +3 -0
  512. package/dist/fetch/browser-types.d.ts.map +1 -0
  513. package/dist/fetch/browser-types.js +45 -0
  514. package/dist/fetch/browser-types.js.map +1 -0
  515. package/dist/fetch/cdp-client.d.ts +9 -0
  516. package/dist/fetch/cdp-client.d.ts.map +1 -0
  517. package/dist/fetch/cdp-client.js +89 -0
  518. package/dist/fetch/cdp-client.js.map +1 -0
  519. package/dist/fetch/content-check.js +39 -46
  520. package/dist/fetch/content-check.js.map +1 -1
  521. package/dist/fetch/error-describe.d.ts +7 -0
  522. package/dist/fetch/error-describe.d.ts.map +1 -0
  523. package/dist/fetch/error-describe.js +37 -0
  524. package/dist/fetch/error-describe.js.map +1 -0
  525. package/dist/fetch/http-client.d.ts +4 -0
  526. package/dist/fetch/http-client.d.ts.map +1 -1
  527. package/dist/fetch/http-client.js +147 -128
  528. package/dist/fetch/http-client.js.map +1 -1
  529. package/dist/fetch/lightpanda.d.ts +28 -0
  530. package/dist/fetch/lightpanda.d.ts.map +1 -0
  531. package/dist/fetch/lightpanda.js +174 -0
  532. package/dist/fetch/lightpanda.js.map +1 -0
  533. package/dist/fetch/playwright-tier.d.ts +19 -0
  534. package/dist/fetch/playwright-tier.d.ts.map +1 -0
  535. package/dist/fetch/playwright-tier.js +76 -0
  536. package/dist/fetch/playwright-tier.js.map +1 -0
  537. package/dist/fetch/router.d.ts +49 -3
  538. package/dist/fetch/router.d.ts.map +1 -1
  539. package/dist/fetch/router.js +187 -81
  540. package/dist/fetch/router.js.map +1 -1
  541. package/dist/index.js +102 -17
  542. package/dist/index.js.map +1 -1
  543. package/dist/instructions.d.ts +31 -0
  544. package/dist/instructions.d.ts.map +1 -0
  545. package/dist/instructions.js +245 -0
  546. package/dist/instructions.js.map +1 -0
  547. package/dist/integrations/cloud/llm/anthropic.d.ts +3 -0
  548. package/dist/integrations/cloud/llm/anthropic.d.ts.map +1 -0
  549. package/dist/integrations/cloud/llm/anthropic.js +41 -0
  550. package/dist/integrations/cloud/llm/anthropic.js.map +1 -0
  551. package/dist/integrations/cloud/llm/cache.d.ts +5 -0
  552. package/dist/integrations/cloud/llm/cache.d.ts.map +1 -0
  553. package/dist/integrations/cloud/llm/cache.js +49 -0
  554. package/dist/integrations/cloud/llm/cache.js.map +1 -0
  555. package/dist/integrations/cloud/llm/gemini.d.ts +3 -0
  556. package/dist/integrations/cloud/llm/gemini.d.ts.map +1 -0
  557. package/dist/integrations/cloud/llm/gemini.js +37 -0
  558. package/dist/integrations/cloud/llm/gemini.js.map +1 -0
  559. package/dist/integrations/cloud/llm/groq.d.ts +3 -0
  560. package/dist/integrations/cloud/llm/groq.d.ts.map +1 -0
  561. package/dist/integrations/cloud/llm/groq.js +74 -0
  562. package/dist/integrations/cloud/llm/groq.js.map +1 -0
  563. package/dist/integrations/cloud/llm/hash.d.ts +3 -0
  564. package/dist/integrations/cloud/llm/hash.d.ts.map +1 -0
  565. package/dist/integrations/cloud/llm/hash.js +26 -0
  566. package/dist/integrations/cloud/llm/hash.js.map +1 -0
  567. package/dist/integrations/cloud/llm/model-select.d.ts +5 -0
  568. package/dist/integrations/cloud/llm/model-select.d.ts.map +1 -0
  569. package/dist/integrations/cloud/llm/model-select.js +32 -0
  570. package/dist/integrations/cloud/llm/model-select.js.map +1 -0
  571. package/dist/integrations/cloud/llm/openai.d.ts +3 -0
  572. package/dist/integrations/cloud/llm/openai.d.ts.map +1 -0
  573. package/dist/integrations/cloud/llm/openai.js +43 -0
  574. package/dist/integrations/cloud/llm/openai.js.map +1 -0
  575. package/dist/integrations/cloud/llm/run.d.ts +27 -0
  576. package/dist/integrations/cloud/llm/run.d.ts.map +1 -0
  577. package/dist/integrations/cloud/llm/run.js +99 -0
  578. package/dist/integrations/cloud/llm/run.js.map +1 -0
  579. package/dist/integrations/cloud/llm/select.d.ts +5 -0
  580. package/dist/integrations/cloud/llm/select.d.ts.map +1 -0
  581. package/dist/integrations/cloud/llm/select.js +30 -0
  582. package/dist/integrations/cloud/llm/select.js.map +1 -0
  583. package/dist/integrations/cloud/llm/text-adapters.d.ts +19 -0
  584. package/dist/integrations/cloud/llm/text-adapters.d.ts.map +1 -0
  585. package/dist/integrations/cloud/llm/text-adapters.js +103 -0
  586. package/dist/integrations/cloud/llm/text-adapters.js.map +1 -0
  587. package/dist/integrations/cloud/llm/types.d.ts +24 -0
  588. package/dist/integrations/cloud/llm/types.d.ts.map +1 -0
  589. package/dist/integrations/cloud/llm/types.js +1 -0
  590. package/dist/integrations/cloud/llm/types.js.map +1 -0
  591. package/dist/integrations/cloud/llm/validate.d.ts +6 -0
  592. package/dist/integrations/cloud/llm/validate.d.ts.map +1 -0
  593. package/dist/integrations/cloud/llm/validate.js +63 -0
  594. package/dist/integrations/cloud/llm/validate.js.map +1 -0
  595. package/dist/logger.d.ts +4 -1
  596. package/dist/logger.d.ts.map +1 -1
  597. package/dist/logger.js +71 -30
  598. package/dist/logger.js.map +1 -1
  599. package/dist/pdf-parse.d.js +1 -0
  600. package/dist/pdf-parse.d.js.map +1 -0
  601. package/dist/plugins/loader.d.ts +20 -0
  602. package/dist/plugins/loader.d.ts.map +1 -0
  603. package/dist/plugins/loader.js +157 -0
  604. package/dist/plugins/loader.js.map +1 -0
  605. package/dist/plugins/registry.d.ts +26 -0
  606. package/dist/plugins/registry.d.ts.map +1 -0
  607. package/dist/plugins/registry.js +71 -0
  608. package/dist/plugins/registry.js.map +1 -0
  609. package/dist/plugins/validate.d.ts +9 -0
  610. package/dist/plugins/validate.d.ts.map +1 -0
  611. package/dist/plugins/validate.js +79 -0
  612. package/dist/plugins/validate.js.map +1 -0
  613. package/dist/providers/embed-provider.d.ts +11 -0
  614. package/dist/providers/embed-provider.d.ts.map +1 -0
  615. package/dist/providers/embed-provider.js +24 -0
  616. package/dist/providers/embed-provider.js.map +1 -0
  617. package/dist/providers/extract-provider.d.ts +23 -0
  618. package/dist/providers/extract-provider.d.ts.map +1 -0
  619. package/dist/providers/extract-provider.js +25 -0
  620. package/dist/providers/extract-provider.js.map +1 -0
  621. package/dist/providers/rerank-provider.d.ts +17 -0
  622. package/dist/providers/rerank-provider.d.ts.map +1 -0
  623. package/dist/providers/rerank-provider.js +41 -0
  624. package/dist/providers/rerank-provider.js.map +1 -0
  625. package/dist/providers/search-provider.d.ts +25 -0
  626. package/dist/providers/search-provider.d.ts.map +1 -0
  627. package/dist/providers/search-provider.js +44 -0
  628. package/dist/providers/search-provider.js.map +1 -0
  629. package/dist/providers/vector-store.d.ts +27 -0
  630. package/dist/providers/vector-store.d.ts.map +1 -0
  631. package/dist/providers/vector-store.js +27 -0
  632. package/dist/providers/vector-store.js.map +1 -0
  633. package/dist/python-env.d.ts +9 -0
  634. package/dist/python-env.d.ts.map +1 -0
  635. package/dist/python-env.js +13 -0
  636. package/dist/python-env.js.map +1 -0
  637. package/dist/repl/commands/agent.d.ts +5 -0
  638. package/dist/repl/commands/agent.d.ts.map +1 -0
  639. package/dist/repl/commands/agent.js +62 -0
  640. package/dist/repl/commands/agent.js.map +1 -0
  641. package/dist/repl/commands/cache.d.ts +4 -0
  642. package/dist/repl/commands/cache.d.ts.map +1 -0
  643. package/dist/repl/commands/cache.js +43 -0
  644. package/dist/repl/commands/cache.js.map +1 -0
  645. package/dist/repl/commands/crawl.d.ts +7 -0
  646. package/dist/repl/commands/crawl.d.ts.map +1 -0
  647. package/dist/repl/commands/crawl.js +44 -0
  648. package/dist/repl/commands/crawl.js.map +1 -0
  649. package/dist/repl/commands/extract.d.ts +5 -0
  650. package/dist/repl/commands/extract.d.ts.map +1 -0
  651. package/dist/repl/commands/extract.js +47 -0
  652. package/dist/repl/commands/extract.js.map +1 -0
  653. package/dist/repl/commands/fetch.d.ts +5 -0
  654. package/dist/repl/commands/fetch.d.ts.map +1 -0
  655. package/dist/repl/commands/fetch.js +67 -0
  656. package/dist/repl/commands/fetch.js.map +1 -0
  657. package/dist/repl/commands/find-similar.d.ts +5 -0
  658. package/dist/repl/commands/find-similar.d.ts.map +1 -0
  659. package/dist/repl/commands/find-similar.js +74 -0
  660. package/dist/repl/commands/find-similar.js.map +1 -0
  661. package/dist/repl/commands/research.d.ts +5 -0
  662. package/dist/repl/commands/research.d.ts.map +1 -0
  663. package/dist/repl/commands/research.js +65 -0
  664. package/dist/repl/commands/research.js.map +1 -0
  665. package/dist/repl/commands/search.d.ts +5 -0
  666. package/dist/repl/commands/search.d.ts.map +1 -0
  667. package/dist/repl/commands/search.js +74 -0
  668. package/dist/repl/commands/search.js.map +1 -0
  669. package/dist/repl/commands/types.d.ts +9 -0
  670. package/dist/repl/commands/types.d.ts.map +1 -0
  671. package/dist/repl/commands/types.js +1 -0
  672. package/dist/repl/commands/types.js.map +1 -0
  673. package/dist/repl/formatters.d.ts +13 -0
  674. package/dist/repl/formatters.d.ts.map +1 -0
  675. package/dist/repl/formatters.js +283 -0
  676. package/dist/repl/formatters.js.map +1 -0
  677. package/dist/repl/parser.d.ts +9 -0
  678. package/dist/repl/parser.d.ts.map +1 -0
  679. package/dist/repl/parser.js +86 -0
  680. package/dist/repl/parser.js.map +1 -0
  681. package/dist/repl/shell.d.ts +8 -0
  682. package/dist/repl/shell.d.ts.map +1 -0
  683. package/dist/repl/shell.js +184 -0
  684. package/dist/repl/shell.js.map +1 -0
  685. package/dist/research/branch-exploration.d.ts +14 -0
  686. package/dist/research/branch-exploration.d.ts.map +1 -0
  687. package/dist/research/branch-exploration.js +100 -0
  688. package/dist/research/branch-exploration.js.map +1 -0
  689. package/dist/research/brief.d.ts +6 -0
  690. package/dist/research/brief.d.ts.map +1 -0
  691. package/dist/research/brief.js +246 -0
  692. package/dist/research/brief.js.map +1 -0
  693. package/dist/research/citation-graph.d.ts +9 -0
  694. package/dist/research/citation-graph.d.ts.map +1 -0
  695. package/dist/research/citation-graph.js +114 -0
  696. package/dist/research/citation-graph.js.map +1 -0
  697. package/dist/research/decompose.d.ts +14 -0
  698. package/dist/research/decompose.d.ts.map +1 -0
  699. package/dist/research/decompose.js +439 -0
  700. package/dist/research/decompose.js.map +1 -0
  701. package/dist/research/pipeline.d.ts +5 -0
  702. package/dist/research/pipeline.d.ts.map +1 -0
  703. package/dist/research/pipeline.js +269 -0
  704. package/dist/research/pipeline.js.map +1 -0
  705. package/dist/research/synthesis-local.d.ts +19 -0
  706. package/dist/research/synthesis-local.d.ts.map +1 -0
  707. package/dist/research/synthesis-local.js +62 -0
  708. package/dist/research/synthesis-local.js.map +1 -0
  709. package/dist/research/synthesize.d.ts +10 -0
  710. package/dist/research/synthesize.d.ts.map +1 -0
  711. package/dist/research/synthesize.js +137 -0
  712. package/dist/research/synthesize.js.map +1 -0
  713. package/dist/search/answer-synthesis.d.ts +33 -0
  714. package/dist/search/answer-synthesis.d.ts.map +1 -0
  715. package/dist/search/answer-synthesis.js +244 -0
  716. package/dist/search/answer-synthesis.js.map +1 -0
  717. package/dist/search/context-formatter.d.ts +3 -0
  718. package/dist/search/context-formatter.d.ts.map +1 -0
  719. package/dist/search/context-formatter.js +56 -0
  720. package/dist/search/context-formatter.js.map +1 -0
  721. package/dist/search/dedup.d.ts +1 -0
  722. package/dist/search/dedup.d.ts.map +1 -1
  723. package/dist/search/dedup.js +40 -32
  724. package/dist/search/dedup.js.map +1 -1
  725. package/dist/search/engines/arxiv.d.ts +7 -0
  726. package/dist/search/engines/arxiv.d.ts.map +1 -0
  727. package/dist/search/engines/arxiv.js +70 -0
  728. package/dist/search/engines/arxiv.js.map +1 -0
  729. package/dist/search/engines/bing-news.d.ts +7 -0
  730. package/dist/search/engines/bing-news.d.ts.map +1 -0
  731. package/dist/search/engines/bing-news.js +97 -0
  732. package/dist/search/engines/bing-news.js.map +1 -0
  733. package/dist/search/engines/bing.d.ts +1 -0
  734. package/dist/search/engines/bing.d.ts.map +1 -1
  735. package/dist/search/engines/bing.js +100 -44
  736. package/dist/search/engines/bing.js.map +1 -1
  737. package/dist/search/engines/devdocs.d.ts +6 -0
  738. package/dist/search/engines/devdocs.d.ts.map +1 -0
  739. package/dist/search/engines/devdocs.js +56 -0
  740. package/dist/search/engines/devdocs.js.map +1 -0
  741. package/dist/search/engines/duckduckgo.d.ts.map +1 -1
  742. package/dist/search/engines/duckduckgo.js +56 -44
  743. package/dist/search/engines/duckduckgo.js.map +1 -1
  744. package/dist/search/engines/github-code.d.ts +7 -0
  745. package/dist/search/engines/github-code.d.ts.map +1 -0
  746. package/dist/search/engines/github-code.js +55 -0
  747. package/dist/search/engines/github-code.js.map +1 -0
  748. package/dist/search/engines/hn-algolia.d.ts +7 -0
  749. package/dist/search/engines/hn-algolia.d.ts.map +1 -0
  750. package/dist/search/engines/hn-algolia.js +76 -0
  751. package/dist/search/engines/hn-algolia.js.map +1 -0
  752. package/dist/search/engines/lobsters.d.ts +7 -0
  753. package/dist/search/engines/lobsters.d.ts.map +1 -0
  754. package/dist/search/engines/lobsters.js +83 -0
  755. package/dist/search/engines/lobsters.js.map +1 -0
  756. package/dist/search/engines/mdn.d.ts +7 -0
  757. package/dist/search/engines/mdn.d.ts.map +1 -0
  758. package/dist/search/engines/mdn.js +48 -0
  759. package/dist/search/engines/mdn.js.map +1 -0
  760. package/dist/search/engines/semantic-scholar.d.ts +7 -0
  761. package/dist/search/engines/semantic-scholar.d.ts.map +1 -0
  762. package/dist/search/engines/semantic-scholar.js +69 -0
  763. package/dist/search/engines/semantic-scholar.js.map +1 -0
  764. package/dist/search/engines/stackoverflow.d.ts +7 -0
  765. package/dist/search/engines/stackoverflow.d.ts.map +1 -0
  766. package/dist/search/engines/stackoverflow.js +73 -0
  767. package/dist/search/engines/stackoverflow.js.map +1 -0
  768. package/dist/search/engines/startpage.d.ts.map +1 -1
  769. package/dist/search/engines/startpage.js +65 -46
  770. package/dist/search/engines/startpage.js.map +1 -1
  771. package/dist/search/evidence.d.ts +25 -0
  772. package/dist/search/evidence.d.ts.map +1 -0
  773. package/dist/search/evidence.js +220 -0
  774. package/dist/search/evidence.js.map +1 -0
  775. package/dist/search/filters.d.ts.map +1 -1
  776. package/dist/search/filters.js +58 -54
  777. package/dist/search/filters.js.map +1 -1
  778. package/dist/search/find-similar/crawl-rank.d.ts +9 -0
  779. package/dist/search/find-similar/crawl-rank.d.ts.map +1 -0
  780. package/dist/search/find-similar/crawl-rank.js +272 -0
  781. package/dist/search/find-similar/crawl-rank.js.map +1 -0
  782. package/dist/search/find-similar/mode.d.ts +4 -0
  783. package/dist/search/find-similar/mode.d.ts.map +1 -0
  784. package/dist/search/find-similar/mode.js +12 -0
  785. package/dist/search/find-similar/mode.js.map +1 -0
  786. package/dist/search/find-similar.d.ts +5 -0
  787. package/dist/search/find-similar.d.ts.map +1 -0
  788. package/dist/search/find-similar.js +509 -0
  789. package/dist/search/find-similar.js.map +1 -0
  790. package/dist/search/highlights.d.ts +19 -0
  791. package/dist/search/highlights.d.ts.map +1 -0
  792. package/dist/search/highlights.js +167 -0
  793. package/dist/search/highlights.js.map +1 -0
  794. package/dist/search/language-filter.d.ts +29 -0
  795. package/dist/search/language-filter.d.ts.map +1 -0
  796. package/dist/search/language-filter.js +126 -0
  797. package/dist/search/language-filter.js.map +1 -0
  798. package/dist/search/legacy/searxng-orchestrator.d.ts +4 -0
  799. package/dist/search/legacy/searxng-orchestrator.d.ts.map +1 -0
  800. package/dist/search/legacy/searxng-orchestrator.js +501 -0
  801. package/dist/search/legacy/searxng-orchestrator.js.map +1 -0
  802. package/dist/search/legacy/searxng-provider.d.ts +7 -0
  803. package/dist/search/legacy/searxng-provider.d.ts.map +1 -0
  804. package/dist/search/legacy/searxng-provider.js +11 -0
  805. package/dist/search/legacy/searxng-provider.js.map +1 -0
  806. package/dist/search/multi-query.d.ts +25 -0
  807. package/dist/search/multi-query.d.ts.map +1 -0
  808. package/dist/search/multi-query.js +228 -0
  809. package/dist/search/multi-query.js.map +1 -0
  810. package/dist/search/query.js +32 -34
  811. package/dist/search/query.js.map +1 -1
  812. package/dist/search/rerank.d.ts +3 -1
  813. package/dist/search/rerank.d.ts.map +1 -1
  814. package/dist/search/rerank.js +44 -35
  815. package/dist/search/rerank.js.map +1 -1
  816. package/dist/search/reranker/authority-boost.d.ts +3 -0
  817. package/dist/search/reranker/authority-boost.d.ts.map +1 -0
  818. package/dist/search/reranker/authority-boost.js +179 -0
  819. package/dist/search/reranker/authority-boost.js.map +1 -0
  820. package/dist/search/reranker/consensus-boost.d.ts +3 -0
  821. package/dist/search/reranker/consensus-boost.d.ts.map +1 -0
  822. package/dist/search/reranker/consensus-boost.js +27 -0
  823. package/dist/search/reranker/consensus-boost.js.map +1 -0
  824. package/dist/search/reranker/recency-boost.d.ts +3 -0
  825. package/dist/search/reranker/recency-boost.d.ts.map +1 -0
  826. package/dist/search/reranker/recency-boost.js +13 -0
  827. package/dist/search/reranker/recency-boost.js.map +1 -0
  828. package/dist/search/reranker/recency.d.ts +3 -0
  829. package/dist/search/reranker/recency.d.ts.map +1 -0
  830. package/dist/search/reranker/recency.js +23 -0
  831. package/dist/search/reranker/recency.js.map +1 -0
  832. package/dist/search/reranker/transformers-rerank-provider.d.ts +13 -0
  833. package/dist/search/reranker/transformers-rerank-provider.d.ts.map +1 -0
  834. package/dist/search/reranker/transformers-rerank-provider.js +94 -0
  835. package/dist/search/reranker/transformers-rerank-provider.js.map +1 -0
  836. package/dist/search/rrf.d.ts +17 -0
  837. package/dist/search/rrf.d.ts.map +1 -0
  838. package/dist/search/rrf.js +39 -0
  839. package/dist/search/rrf.js.map +1 -0
  840. package/dist/search/sampling.d.ts +25 -0
  841. package/dist/search/sampling.d.ts.map +1 -0
  842. package/dist/search/sampling.js +52 -0
  843. package/dist/search/sampling.js.map +1 -0
  844. package/dist/search/searxng.d.ts.map +1 -1
  845. package/dist/search/searxng.js +69 -79
  846. package/dist/search/searxng.js.map +1 -1
  847. package/dist/search/tokens.d.ts +3 -0
  848. package/dist/search/tokens.d.ts.map +1 -0
  849. package/dist/search/tokens.js +39 -0
  850. package/dist/search/tokens.js.map +1 -0
  851. package/dist/search/truncate.d.ts +6 -0
  852. package/dist/search/truncate.d.ts.map +1 -0
  853. package/dist/search/truncate.js +26 -0
  854. package/dist/search/truncate.js.map +1 -0
  855. package/dist/search/url-unwrap.d.ts +3 -0
  856. package/dist/search/url-unwrap.d.ts.map +1 -0
  857. package/dist/search/url-unwrap.js +43 -0
  858. package/dist/search/url-unwrap.js.map +1 -0
  859. package/dist/search/v1/context-rank.d.ts +13 -0
  860. package/dist/search/v1/context-rank.d.ts.map +1 -0
  861. package/dist/search/v1/context-rank.js +74 -0
  862. package/dist/search/v1/context-rank.js.map +1 -0
  863. package/dist/search/v1/engine-base.d.ts +27 -0
  864. package/dist/search/v1/engine-base.d.ts.map +1 -0
  865. package/dist/search/v1/engine-base.js +110 -0
  866. package/dist/search/v1/engine-base.js.map +1 -0
  867. package/dist/search/v1/intent-router.d.ts +22 -0
  868. package/dist/search/v1/intent-router.d.ts.map +1 -0
  869. package/dist/search/v1/intent-router.js +138 -0
  870. package/dist/search/v1/intent-router.js.map +1 -0
  871. package/dist/search/v1/orchestrator.d.ts +24 -0
  872. package/dist/search/v1/orchestrator.d.ts.map +1 -0
  873. package/dist/search/v1/orchestrator.js +163 -0
  874. package/dist/search/v1/orchestrator.js.map +1 -0
  875. package/dist/search/v1/recency-boost.d.ts +9 -0
  876. package/dist/search/v1/recency-boost.d.ts.map +1 -0
  877. package/dist/search/v1/recency-boost.js +37 -0
  878. package/dist/search/v1/recency-boost.js.map +1 -0
  879. package/dist/search/v1/recent-cache-dedup.d.ts +6 -0
  880. package/dist/search/v1/recent-cache-dedup.d.ts.map +1 -0
  881. package/dist/search/v1/recent-cache-dedup.js +85 -0
  882. package/dist/search/v1/recent-cache-dedup.js.map +1 -0
  883. package/dist/search/v1/rss/feed-config.d.ts +21 -0
  884. package/dist/search/v1/rss/feed-config.d.ts.map +1 -0
  885. package/dist/search/v1/rss/feed-config.js +90 -0
  886. package/dist/search/v1/rss/feed-config.js.map +1 -0
  887. package/dist/search/v1/rss/feed-parser.d.ts +14 -0
  888. package/dist/search/v1/rss/feed-parser.d.ts.map +1 -0
  889. package/dist/search/v1/rss/feed-parser.js +104 -0
  890. package/dist/search/v1/rss/feed-parser.js.map +1 -0
  891. package/dist/search/v1/rss/feed-poller.d.ts +22 -0
  892. package/dist/search/v1/rss/feed-poller.d.ts.map +1 -0
  893. package/dist/search/v1/rss/feed-poller.js +102 -0
  894. package/dist/search/v1/rss/feed-poller.js.map +1 -0
  895. package/dist/search/v1/rss/feed-store.d.ts +30 -0
  896. package/dist/search/v1/rss/feed-store.d.ts.map +1 -0
  897. package/dist/search/v1/rss/feed-store.js +134 -0
  898. package/dist/search/v1/rss/feed-store.js.map +1 -0
  899. package/dist/search/v1/rss/rss-engine.d.ts +6 -0
  900. package/dist/search/v1/rss/rss-engine.d.ts.map +1 -0
  901. package/dist/search/v1/rss/rss-engine.js +28 -0
  902. package/dist/search/v1/rss/rss-engine.js.map +1 -0
  903. package/dist/search/v1/v1-provider.d.ts +7 -0
  904. package/dist/search/v1/v1-provider.d.ts.map +1 -0
  905. package/dist/search/v1/v1-provider.js +68 -0
  906. package/dist/search/v1/v1-provider.js.map +1 -0
  907. package/dist/search/v1/verticals/code.d.ts +4 -0
  908. package/dist/search/v1/verticals/code.d.ts.map +1 -0
  909. package/dist/search/v1/verticals/code.js +20 -0
  910. package/dist/search/v1/verticals/code.js.map +1 -0
  911. package/dist/search/v1/verticals/docs.d.ts +4 -0
  912. package/dist/search/v1/verticals/docs.d.ts.map +1 -0
  913. package/dist/search/v1/verticals/docs.js +20 -0
  914. package/dist/search/v1/verticals/docs.js.map +1 -0
  915. package/dist/search/v1/verticals/general.d.ts +4 -0
  916. package/dist/search/v1/verticals/general.d.ts.map +1 -0
  917. package/dist/search/v1/verticals/general.js +22 -0
  918. package/dist/search/v1/verticals/general.js.map +1 -0
  919. package/dist/search/v1/verticals/news.d.ts +10 -0
  920. package/dist/search/v1/verticals/news.d.ts.map +1 -0
  921. package/dist/search/v1/verticals/news.js +52 -0
  922. package/dist/search/v1/verticals/news.js.map +1 -0
  923. package/dist/search/v1/verticals/papers.d.ts +4 -0
  924. package/dist/search/v1/verticals/papers.d.ts.map +1 -0
  925. package/dist/search/v1/verticals/papers.js +23 -0
  926. package/dist/search/v1/verticals/papers.js.map +1 -0
  927. package/dist/search/validator.js +31 -31
  928. package/dist/search/validator.js.map +1 -1
  929. package/dist/searxng/bootstrap.d.ts +30 -0
  930. package/dist/searxng/bootstrap.d.ts.map +1 -1
  931. package/dist/searxng/bootstrap.js +223 -85
  932. package/dist/searxng/bootstrap.js.map +1 -1
  933. package/dist/searxng/docker.d.ts.map +1 -1
  934. package/dist/searxng/docker.js +69 -60
  935. package/dist/searxng/docker.js.map +1 -1
  936. package/dist/searxng/process.d.ts +13 -1
  937. package/dist/searxng/process.d.ts.map +1 -1
  938. package/dist/searxng/process.js +231 -164
  939. package/dist/searxng/process.js.map +1 -1
  940. package/dist/server/backend-status.d.ts +13 -0
  941. package/dist/server/backend-status.d.ts.map +1 -0
  942. package/dist/server/backend-status.js +40 -0
  943. package/dist/server/backend-status.js.map +1 -0
  944. package/dist/server/tool-schemas.d.ts +549 -0
  945. package/dist/server/tool-schemas.d.ts.map +1 -0
  946. package/dist/server/tool-schemas.js +464 -0
  947. package/dist/server/tool-schemas.js.map +1 -0
  948. package/dist/server/warmup-on-start.d.ts +9 -0
  949. package/dist/server/warmup-on-start.d.ts.map +1 -0
  950. package/dist/server/warmup-on-start.js +55 -0
  951. package/dist/server/warmup-on-start.js.map +1 -0
  952. package/dist/server.d.ts +17 -0
  953. package/dist/server.d.ts.map +1 -1
  954. package/dist/server.js +454 -297
  955. package/dist/server.js.map +1 -1
  956. package/dist/tools/agent.d.ts +5 -0
  957. package/dist/tools/agent.d.ts.map +1 -0
  958. package/dist/tools/agent.js +128 -0
  959. package/dist/tools/agent.js.map +1 -0
  960. package/dist/tools/cache.d.ts +2 -1
  961. package/dist/tools/cache.d.ts.map +1 -1
  962. package/dist/tools/cache.js +177 -44
  963. package/dist/tools/cache.js.map +1 -1
  964. package/dist/tools/crawl.d.ts.map +1 -1
  965. package/dist/tools/crawl.js +171 -88
  966. package/dist/tools/crawl.js.map +1 -1
  967. package/dist/tools/extract.d.ts +2 -2
  968. package/dist/tools/extract.d.ts.map +1 -1
  969. package/dist/tools/extract.js +175 -59
  970. package/dist/tools/extract.js.map +1 -1
  971. package/dist/tools/fetch.d.ts +2 -2
  972. package/dist/tools/fetch.d.ts.map +1 -1
  973. package/dist/tools/fetch.js +174 -68
  974. package/dist/tools/fetch.js.map +1 -1
  975. package/dist/tools/find-similar.d.ts +5 -0
  976. package/dist/tools/find-similar.d.ts.map +1 -0
  977. package/dist/tools/find-similar.js +127 -0
  978. package/dist/tools/find-similar.js.map +1 -0
  979. package/dist/tools/research.d.ts +5 -0
  980. package/dist/tools/research.d.ts.map +1 -0
  981. package/dist/tools/research.js +107 -0
  982. package/dist/tools/research.js.map +1 -0
  983. package/dist/tools/search.d.ts +10 -2
  984. package/dist/tools/search.d.ts.map +1 -1
  985. package/dist/tools/search.js +13 -158
  986. package/dist/tools/search.js.map +1 -1
  987. package/dist/types.d.ts +350 -7
  988. package/dist/types.d.ts.map +1 -1
  989. package/dist/types.js +6 -1
  990. package/dist/types.js.map +1 -1
  991. package/dist/util/mode.d.ts +4 -0
  992. package/dist/util/mode.d.ts.map +1 -0
  993. package/dist/util/mode.js +34 -0
  994. package/dist/util/mode.js.map +1 -0
  995. package/package.json +78 -8
  996. package/dist/extraction/trafilatura.d.ts +0 -6
  997. package/dist/extraction/trafilatura.d.ts.map +0 -1
  998. package/dist/extraction/trafilatura.js +0 -105
  999. package/dist/extraction/trafilatura.js.map +0 -1
  1000. package/dist/search/flashrank.d.ts +0 -12
  1001. package/dist/search/flashrank.d.ts.map +0 -1
  1002. package/dist/search/flashrank.js +0 -63
  1003. package/dist/search/flashrank.js.map +0 -1
@@ -1,63 +1,64 @@
1
- export class RobotsParser {
2
- rules = [];
3
- crawlDelay = null;
4
- constructor(robotsTxt) {
5
- this.parse(robotsTxt);
6
- }
7
- parse(text) {
8
- const lines = text.split('\n');
9
- let inWildcardAgent = false;
10
- for (const rawLine of lines) {
11
- const line = rawLine.trim();
12
- if (line.match(/^user-agent:\s*\*/i)) {
13
- inWildcardAgent = true;
14
- continue;
15
- }
16
- if (line.match(/^user-agent:/i) && !line.match(/^user-agent:\s*\*/i)) {
17
- inWildcardAgent = false;
18
- continue;
19
- }
20
- if (!inWildcardAgent)
21
- continue;
22
- const disallowMatch = line.match(/^disallow:\s*(.*)/i);
23
- if (disallowMatch) {
24
- const path = disallowMatch[1].trim();
25
- if (path) {
26
- this.rules.push({ type: 'disallow', path });
27
- }
28
- continue;
29
- }
30
- const allowMatch = line.match(/^allow:\s*(.*)/i);
31
- if (allowMatch) {
32
- const path = allowMatch[1].trim();
33
- if (path) {
34
- this.rules.push({ type: 'allow', path });
35
- }
36
- continue;
37
- }
38
- const delayMatch = line.match(/^crawl-delay:\s*(\d+(?:\.\d+)?)/i);
39
- if (delayMatch) {
40
- this.crawlDelay = parseFloat(delayMatch[1]);
41
- }
1
+ class RobotsParser {
2
+ rules = [];
3
+ crawlDelay = null;
4
+ constructor(robotsTxt) {
5
+ this.parse(robotsTxt);
6
+ }
7
+ parse(text) {
8
+ const lines = text.split("\n");
9
+ let inWildcardAgent = false;
10
+ for (const rawLine of lines) {
11
+ const line = rawLine.trim();
12
+ if (line.match(/^user-agent:\s*\*/i)) {
13
+ inWildcardAgent = true;
14
+ continue;
15
+ }
16
+ if (line.match(/^user-agent:/i) && !line.match(/^user-agent:\s*\*/i)) {
17
+ inWildcardAgent = false;
18
+ continue;
19
+ }
20
+ if (!inWildcardAgent) continue;
21
+ const disallowMatch = line.match(/^disallow:\s*(.*)/i);
22
+ if (disallowMatch) {
23
+ const path = disallowMatch[1].trim();
24
+ if (path) {
25
+ this.rules.push({ type: "disallow", path });
42
26
  }
43
- }
44
- isAllowed(path) {
45
- let bestMatch = null;
46
- let bestLength = -1;
47
- for (const rule of this.rules) {
48
- if (path.startsWith(rule.path)) {
49
- if (rule.path.length > bestLength || (rule.path.length === bestLength && rule.type === 'allow')) {
50
- bestMatch = rule;
51
- bestLength = rule.path.length;
52
- }
53
- }
27
+ continue;
28
+ }
29
+ const allowMatch = line.match(/^allow:\s*(.*)/i);
30
+ if (allowMatch) {
31
+ const path = allowMatch[1].trim();
32
+ if (path) {
33
+ this.rules.push({ type: "allow", path });
54
34
  }
55
- if (!bestMatch)
56
- return true;
57
- return bestMatch.type === 'allow';
35
+ continue;
36
+ }
37
+ const delayMatch = line.match(/^crawl-delay:\s*(\d+(?:\.\d+)?)/i);
38
+ if (delayMatch) {
39
+ this.crawlDelay = parseFloat(delayMatch[1]);
40
+ }
58
41
  }
59
- getCrawlDelay() {
60
- return this.crawlDelay;
42
+ }
43
+ isAllowed(path) {
44
+ let bestMatch = null;
45
+ let bestLength = -1;
46
+ for (const rule of this.rules) {
47
+ if (path.startsWith(rule.path)) {
48
+ if (rule.path.length > bestLength || rule.path.length === bestLength && rule.type === "allow") {
49
+ bestMatch = rule;
50
+ bestLength = rule.path.length;
51
+ }
52
+ }
61
53
  }
54
+ if (!bestMatch) return true;
55
+ return bestMatch.type === "allow";
56
+ }
57
+ getCrawlDelay() {
58
+ return this.crawlDelay;
59
+ }
62
60
  }
61
+ export {
62
+ RobotsParser
63
+ };
63
64
  //# sourceMappingURL=robots.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"robots.js","sourceRoot":"","sources":["../../src/crawl/robots.ts"],"names":[],"mappings":"AAKA,MAAM,OAAO,YAAY;IACf,KAAK,GAAiB,EAAE,CAAC;IACzB,UAAU,GAAkB,IAAI,CAAC;IAEzC,YAAY,SAAiB;QAC3B,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;IACxB,CAAC;IAEO,KAAK,CAAC,IAAY;QACxB,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAC/B,IAAI,eAAe,GAAG,KAAK,CAAC;QAE5B,KAAK,MAAM,OAAO,IAAI,KAAK,EAAE,CAAC;YAC5B,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,EAAE,CAAC;YAE5B,IAAI,IAAI,CAAC,KAAK,CAAC,oBAAoB,CAAC,EAAE,CAAC;gBACrC,eAAe,GAAG,IAAI,CAAC;gBACvB,SAAS;YACX,CAAC;YAED,IAAI,IAAI,CAAC,KAAK,CAAC,eAAe,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,oBAAoB,CAAC,EAAE,CAAC;gBACrE,eAAe,GAAG,KAAK,CAAC;gBACxB,SAAS;YACX,CAAC;YAED,IAAI,CAAC,eAAe;gBAAE,SAAS;YAE/B,MAAM,aAAa,GAAG,IAAI,CAAC,KAAK,CAAC,oBAAoB,CAAC,CAAC;YACvD,IAAI,aAAa,EAAE,CAAC;gBAClB,MAAM,IAAI,GAAG,aAAa,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;gBACrC,IAAI,IAAI,EAAE,CAAC;oBACT,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,UAAU,EAAE,IAAI,EAAE,CAAC,CAAC;gBAC9C,CAAC;gBACD,SAAS;YACX,CAAC;YAED,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,iBAAiB,CAAC,CAAC;YACjD,IAAI,UAAU,EAAE,CAAC;gBACf,MAAM,IAAI,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;gBAClC,IAAI,IAAI,EAAE,CAAC;oBACT,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC;gBAC3C,CAAC;gBACD,SAAS;YACX,CAAC;YAED,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,kCAAkC,CAAC,CAAC;YAClE,IAAI,UAAU,EAAE,CAAC;gBACf,IAAI,CAAC,UAAU,GAAG,UAAU,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC;YAC9C,CAAC;QACH,CAAC;IACH,CAAC;IAED,SAAS,CAAC,IAAY;QACpB,IAAI,SAAS,GAAsB,IAAI,CAAC;QACxC,IAAI,UAAU,GAAG,CAAC,CAAC,CAAC;QAEpB,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;YAC9B,IAAI,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;gBAC/B,IAAI,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,UAAU,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,KAAK,UAAU,IAAI,IAAI,CAAC,IAAI,KAAK,OAAO,CAAC,EAAE,CAAC;oBAChG,SAAS,GAAG,IAAI,CAAC;oBACjB,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC;gBAChC,CAAC;YACH,CAAC;QACH,CAAC;QAED,IAAI,CAAC,SAAS;YAAE,OAAO,IAAI,CAAC;QAC5B,OAAO,SAAS,CAAC,IAAI,KAAK,OAAO,CAAC;IACpC,CAAC;IAED,aAAa;QACX,OAAO,IAAI,CAAC,UAAU,CAAC;IACzB,CAAC;CACF"}
1
+ {"version":3,"sources":["../../src/crawl/robots.ts"],"sourcesContent":["interface RobotsRule {\n type: 'allow' | 'disallow';\n path: string;\n}\n\nexport class RobotsParser {\n private rules: RobotsRule[] = [];\n private crawlDelay: number | null = null;\n\n constructor(robotsTxt: string) {\n this.parse(robotsTxt);\n }\n\n private parse(text: string): void {\n const lines = text.split('\\n');\n let inWildcardAgent = false;\n\n for (const rawLine of lines) {\n const line = rawLine.trim();\n\n if (line.match(/^user-agent:\\s*\\*/i)) {\n inWildcardAgent = true;\n continue;\n }\n\n if (line.match(/^user-agent:/i) && !line.match(/^user-agent:\\s*\\*/i)) {\n inWildcardAgent = false;\n continue;\n }\n\n if (!inWildcardAgent) continue;\n\n const disallowMatch = line.match(/^disallow:\\s*(.*)/i);\n if (disallowMatch) {\n const path = disallowMatch[1].trim();\n if (path) {\n this.rules.push({ type: 'disallow', path });\n }\n continue;\n }\n\n const allowMatch = line.match(/^allow:\\s*(.*)/i);\n if (allowMatch) {\n const path = allowMatch[1].trim();\n if (path) {\n this.rules.push({ type: 'allow', path });\n }\n continue;\n }\n\n const delayMatch = line.match(/^crawl-delay:\\s*(\\d+(?:\\.\\d+)?)/i);\n if (delayMatch) {\n this.crawlDelay = parseFloat(delayMatch[1]);\n }\n }\n }\n\n isAllowed(path: string): boolean {\n let bestMatch: RobotsRule | null = null;\n let bestLength = -1;\n\n for (const rule of this.rules) {\n if (path.startsWith(rule.path)) {\n if (rule.path.length > bestLength || (rule.path.length === bestLength && rule.type === 'allow')) {\n bestMatch = rule;\n bestLength = rule.path.length;\n }\n }\n }\n\n if (!bestMatch) return true;\n return bestMatch.type === 'allow';\n }\n\n getCrawlDelay(): number | null {\n return this.crawlDelay;\n }\n}\n"],"mappings":"AAKO,MAAM,aAAa;AAAA,EAChB,QAAsB,CAAC;AAAA,EACvB,aAA4B;AAAA,EAEpC,YAAY,WAAmB;AAC7B,SAAK,MAAM,SAAS;AAAA,EACtB;AAAA,EAEQ,MAAM,MAAoB;AAChC,UAAM,QAAQ,KAAK,MAAM,IAAI;AAC7B,QAAI,kBAAkB;AAEtB,eAAW,WAAW,OAAO;AAC3B,YAAM,OAAO,QAAQ,KAAK;AAE1B,UAAI,KAAK,MAAM,oBAAoB,GAAG;AACpC,0BAAkB;AAClB;AAAA,MACF;AAEA,UAAI,KAAK,MAAM,eAAe,KAAK,CAAC,KAAK,MAAM,oBAAoB,GAAG;AACpE,0BAAkB;AAClB;AAAA,MACF;AAEA,UAAI,CAAC,gBAAiB;AAEtB,YAAM,gBAAgB,KAAK,MAAM,oBAAoB;AACrD,UAAI,eAAe;AACjB,cAAM,OAAO,cAAc,CAAC,EAAE,KAAK;AACnC,YAAI,MAAM;AACR,eAAK,MAAM,KAAK,EAAE,MAAM,YAAY,KAAK,CAAC;AAAA,QAC5C;AACA;AAAA,MACF;AAEA,YAAM,aAAa,KAAK,MAAM,iBAAiB;AAC/C,UAAI,YAAY;AACd,cAAM,OAAO,WAAW,CAAC,EAAE,KAAK;AAChC,YAAI,MAAM;AACR,eAAK,MAAM,KAAK,EAAE,MAAM,SAAS,KAAK,CAAC;AAAA,QACzC;AACA;AAAA,MACF;AAEA,YAAM,aAAa,KAAK,MAAM,kCAAkC;AAChE,UAAI,YAAY;AACd,aAAK,aAAa,WAAW,WAAW,CAAC,CAAC;AAAA,MAC5C;AAAA,IACF;AAAA,EACF;AAAA,EAEA,UAAU,MAAuB;AAC/B,QAAI,YAA+B;AACnC,QAAI,aAAa;AAEjB,eAAW,QAAQ,KAAK,OAAO;AAC7B,UAAI,KAAK,WAAW,KAAK,IAAI,GAAG;AAC9B,YAAI,KAAK,KAAK,SAAS,cAAe,KAAK,KAAK,WAAW,cAAc,KAAK,SAAS,SAAU;AAC/F,sBAAY;AACZ,uBAAa,KAAK,KAAK;AAAA,QACzB;AAAA,MACF;AAAA,IACF;AAEA,QAAI,CAAC,UAAW,QAAO;AACvB,WAAO,UAAU,SAAS;AAAA,EAC5B;AAAA,EAEA,gBAA+B;AAC7B,WAAO,KAAK;AAAA,EACd;AACF;","names":[]}
@@ -0,0 +1,12 @@
1
+ import type { RawFetchResult } from '../types.js';
2
+ export type RawFetchFn = (url: string) => Promise<RawFetchResult>;
3
+ /**
4
+ * Probe an origin for a usable sitemap. Returns the discovered URL list if
5
+ * at least SITEMAP_MIN_URLS were found; otherwise null so the caller can
6
+ * fall back to a traversal strategy.
7
+ *
8
+ * Order: robots.txt → /sitemap.xml → /sitemap_index.xml. .gz is skipped
9
+ * (decompression deferred).
10
+ */
11
+ export declare function probeSitemap(origin: string, rawFetch: RawFetchFn): Promise<string[] | null>;
12
+ //# sourceMappingURL=sitemap-first.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"sitemap-first.d.ts","sourceRoot":"","sources":["../../src/crawl/sitemap-first.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAMlD,MAAM,MAAM,UAAU,GAAG,CAAC,GAAG,EAAE,MAAM,KAAK,OAAO,CAAC,cAAc,CAAC,CAAC;AAElE;;;;;;;GAOG;AACH,wBAAsB,YAAY,CAAC,MAAM,EAAE,MAAM,EAAE,QAAQ,EAAE,UAAU,GAAG,OAAO,CAAC,MAAM,EAAE,GAAG,IAAI,CAAC,CAoBjG"}
@@ -0,0 +1,47 @@
1
+ import { parseSitemap, parseSitemapIndex, extractSitemapUrlFromRobots } from "./sitemap.js";
2
+ const PROBE_PATHS = ["/sitemap.xml", "/sitemap_index.xml", "/sitemap.xml.gz"];
3
+ const SITEMAP_MIN_URLS = 5;
4
+ const MAX_INDEX_CHILDREN = 5;
5
+ async function probeSitemap(origin, rawFetch) {
6
+ try {
7
+ const robots = await rawFetch(`${origin}/robots.txt`);
8
+ if (robots.html) {
9
+ const sitemapUrls = extractSitemapUrlFromRobots(robots.html);
10
+ for (const smUrl of sitemapUrls) {
11
+ const urls = await fetchAndParseSitemap(smUrl, rawFetch);
12
+ if (urls && urls.length >= SITEMAP_MIN_URLS) return urls;
13
+ }
14
+ }
15
+ } catch {
16
+ }
17
+ for (const path of PROBE_PATHS) {
18
+ if (path.endsWith(".gz")) continue;
19
+ const urls = await fetchAndParseSitemap(`${origin}${path}`, rawFetch);
20
+ if (urls && urls.length >= SITEMAP_MIN_URLS) return urls;
21
+ }
22
+ return null;
23
+ }
24
+ async function fetchAndParseSitemap(url, rawFetch) {
25
+ try {
26
+ const result = await rawFetch(url);
27
+ if (!result.html) return null;
28
+ if (result.statusCode && result.statusCode >= 400) return null;
29
+ if (result.html.includes("<sitemapindex")) {
30
+ const children = parseSitemapIndex(result.html);
31
+ const all = [];
32
+ for (const child of children.slice(0, MAX_INDEX_CHILDREN)) {
33
+ const grand = await fetchAndParseSitemap(child, rawFetch);
34
+ if (grand) all.push(...grand);
35
+ }
36
+ return all.length > 0 ? all : null;
37
+ }
38
+ const urls = parseSitemap(result.html);
39
+ return urls.length > 0 ? urls : null;
40
+ } catch {
41
+ return null;
42
+ }
43
+ }
44
+ export {
45
+ probeSitemap
46
+ };
47
+ //# sourceMappingURL=sitemap-first.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../../src/crawl/sitemap-first.ts"],"sourcesContent":["import { parseSitemap, parseSitemapIndex, extractSitemapUrlFromRobots } from './sitemap.js';\nimport type { RawFetchResult } from '../types.js';\n\nconst PROBE_PATHS = ['/sitemap.xml', '/sitemap_index.xml', '/sitemap.xml.gz'];\nconst SITEMAP_MIN_URLS = 5;\nconst MAX_INDEX_CHILDREN = 5;\n\nexport type RawFetchFn = (url: string) => Promise<RawFetchResult>;\n\n/**\n * Probe an origin for a usable sitemap. Returns the discovered URL list if\n * at least SITEMAP_MIN_URLS were found; otherwise null so the caller can\n * fall back to a traversal strategy.\n *\n * Order: robots.txt → /sitemap.xml → /sitemap_index.xml. .gz is skipped\n * (decompression deferred).\n */\nexport async function probeSitemap(origin: string, rawFetch: RawFetchFn): Promise<string[] | null> {\n try {\n const robots = await rawFetch(`${origin}/robots.txt`);\n if (robots.html) {\n const sitemapUrls = extractSitemapUrlFromRobots(robots.html);\n for (const smUrl of sitemapUrls) {\n const urls = await fetchAndParseSitemap(smUrl, rawFetch);\n if (urls && urls.length >= SITEMAP_MIN_URLS) return urls;\n }\n }\n } catch {\n // ignore; fall through to direct probe\n }\n\n for (const path of PROBE_PATHS) {\n if (path.endsWith('.gz')) continue;\n const urls = await fetchAndParseSitemap(`${origin}${path}`, rawFetch);\n if (urls && urls.length >= SITEMAP_MIN_URLS) return urls;\n }\n return null;\n}\n\nasync function fetchAndParseSitemap(url: string, rawFetch: RawFetchFn): Promise<string[] | null> {\n try {\n const result = await rawFetch(url);\n if (!result.html) return null;\n if (result.statusCode && result.statusCode >= 400) return null;\n\n if (result.html.includes('<sitemapindex')) {\n const children = parseSitemapIndex(result.html);\n const all: string[] = [];\n for (const child of children.slice(0, MAX_INDEX_CHILDREN)) {\n const grand = await fetchAndParseSitemap(child, rawFetch);\n if (grand) all.push(...grand);\n }\n return all.length > 0 ? all : null;\n }\n\n const urls = parseSitemap(result.html);\n return urls.length > 0 ? urls : null;\n } catch {\n return null;\n }\n}\n"],"mappings":"AAAA,SAAS,cAAc,mBAAmB,mCAAmC;AAG7E,MAAM,cAAc,CAAC,gBAAgB,sBAAsB,iBAAiB;AAC5E,MAAM,mBAAmB;AACzB,MAAM,qBAAqB;AAY3B,eAAsB,aAAa,QAAgB,UAAgD;AACjG,MAAI;AACF,UAAM,SAAS,MAAM,SAAS,GAAG,MAAM,aAAa;AACpD,QAAI,OAAO,MAAM;AACf,YAAM,cAAc,4BAA4B,OAAO,IAAI;AAC3D,iBAAW,SAAS,aAAa;AAC/B,cAAM,OAAO,MAAM,qBAAqB,OAAO,QAAQ;AACvD,YAAI,QAAQ,KAAK,UAAU,iBAAkB,QAAO;AAAA,MACtD;AAAA,IACF;AAAA,EACF,QAAQ;AAAA,EAER;AAEA,aAAW,QAAQ,aAAa;AAC9B,QAAI,KAAK,SAAS,KAAK,EAAG;AAC1B,UAAM,OAAO,MAAM,qBAAqB,GAAG,MAAM,GAAG,IAAI,IAAI,QAAQ;AACpE,QAAI,QAAQ,KAAK,UAAU,iBAAkB,QAAO;AAAA,EACtD;AACA,SAAO;AACT;AAEA,eAAe,qBAAqB,KAAa,UAAgD;AAC/F,MAAI;AACF,UAAM,SAAS,MAAM,SAAS,GAAG;AACjC,QAAI,CAAC,OAAO,KAAM,QAAO;AACzB,QAAI,OAAO,cAAc,OAAO,cAAc,IAAK,QAAO;AAE1D,QAAI,OAAO,KAAK,SAAS,eAAe,GAAG;AACzC,YAAM,WAAW,kBAAkB,OAAO,IAAI;AAC9C,YAAM,MAAgB,CAAC;AACvB,iBAAW,SAAS,SAAS,MAAM,GAAG,kBAAkB,GAAG;AACzD,cAAM,QAAQ,MAAM,qBAAqB,OAAO,QAAQ;AACxD,YAAI,MAAO,KAAI,KAAK,GAAG,KAAK;AAAA,MAC9B;AACA,aAAO,IAAI,SAAS,IAAI,MAAM;AAAA,IAChC;AAEA,UAAM,OAAO,aAAa,OAAO,IAAI;AACrC,WAAO,KAAK,SAAS,IAAI,OAAO;AAAA,EAClC,QAAQ;AACN,WAAO;AAAA,EACT;AACF;","names":[]}
@@ -1,38 +1,39 @@
1
- export function parseSitemap(xml) {
2
- // A sitemapindex document should be parsed with parseSitemapIndex, not here
3
- if (xml.includes('<sitemapindex'))
4
- return [];
5
- if (!xml.includes('<urlset') && !xml.includes('<loc>'))
6
- return [];
7
- const urls = [];
8
- const locMatches = xml.matchAll(/<loc>\s*([^<]+?)\s*<\/loc>/g);
9
- for (const match of locMatches) {
10
- const url = match[1].trim();
11
- if (url) {
12
- urls.push(url);
13
- }
1
+ function parseSitemap(xml) {
2
+ if (xml.includes("<sitemapindex")) return [];
3
+ if (!xml.includes("<urlset") && !xml.includes("<loc>")) return [];
4
+ const urls = [];
5
+ const locMatches = xml.matchAll(/<loc>\s*([^<]+?)\s*<\/loc>/g);
6
+ for (const match of locMatches) {
7
+ const url = match[1].trim();
8
+ if (url) {
9
+ urls.push(url);
14
10
  }
15
- return urls;
11
+ }
12
+ return urls;
16
13
  }
17
- export function parseSitemapIndex(xml) {
18
- if (!xml.includes('<sitemapindex'))
19
- return [];
20
- const urls = [];
21
- const locMatches = xml.matchAll(/<loc>\s*([^<]+?)\s*<\/loc>/g);
22
- for (const match of locMatches) {
23
- urls.push(match[1].trim());
24
- }
25
- return urls;
14
+ function parseSitemapIndex(xml) {
15
+ if (!xml.includes("<sitemapindex")) return [];
16
+ const urls = [];
17
+ const locMatches = xml.matchAll(/<loc>\s*([^<]+?)\s*<\/loc>/g);
18
+ for (const match of locMatches) {
19
+ urls.push(match[1].trim());
20
+ }
21
+ return urls;
26
22
  }
27
- export function extractSitemapUrlFromRobots(robotsTxt) {
28
- const urls = [];
29
- const lines = robotsTxt.split('\n');
30
- for (const line of lines) {
31
- const match = line.match(/^sitemap:\s*(.+)/i);
32
- if (match) {
33
- urls.push(match[1].trim());
34
- }
23
+ function extractSitemapUrlFromRobots(robotsTxt) {
24
+ const urls = [];
25
+ const lines = robotsTxt.split("\n");
26
+ for (const line of lines) {
27
+ const match = line.match(/^sitemap:\s*(.+)/i);
28
+ if (match) {
29
+ urls.push(match[1].trim());
35
30
  }
36
- return urls;
31
+ }
32
+ return urls;
37
33
  }
34
+ export {
35
+ extractSitemapUrlFromRobots,
36
+ parseSitemap,
37
+ parseSitemapIndex
38
+ };
38
39
  //# sourceMappingURL=sitemap.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"sitemap.js","sourceRoot":"","sources":["../../src/crawl/sitemap.ts"],"names":[],"mappings":"AAAA,MAAM,UAAU,YAAY,CAAC,GAAW;IACtC,4EAA4E;IAC5E,IAAI,GAAG,CAAC,QAAQ,CAAC,eAAe,CAAC;QAAE,OAAO,EAAE,CAAC;IAE7C,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,SAAS,CAAC,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,OAAO,CAAC;QAAE,OAAO,EAAE,CAAC;IAElE,MAAM,IAAI,GAAa,EAAE,CAAC;IAC1B,MAAM,UAAU,GAAG,GAAG,CAAC,QAAQ,CAAC,6BAA6B,CAAC,CAAC;IAC/D,KAAK,MAAM,KAAK,IAAI,UAAU,EAAE,CAAC;QAC/B,MAAM,GAAG,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QAC5B,IAAI,GAAG,EAAE,CAAC;YACR,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACjB,CAAC;IACH,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED,MAAM,UAAU,iBAAiB,CAAC,GAAW;IAC3C,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,eAAe,CAAC;QAAE,OAAO,EAAE,CAAC;IAE9C,MAAM,IAAI,GAAa,EAAE,CAAC;IAC1B,MAAM,UAAU,GAAG,GAAG,CAAC,QAAQ,CAAC,6BAA6B,CAAC,CAAC;IAC/D,KAAK,MAAM,KAAK,IAAI,UAAU,EAAE,CAAC;QAC/B,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;IAC7B,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,MAAM,UAAU,2BAA2B,CAAC,SAAiB;IAC3D,MAAM,IAAI,GAAa,EAAE,CAAC;IAC1B,MAAM,KAAK,GAAG,SAAS,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAEpC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,mBAAmB,CAAC,CAAC;QAC9C,IAAI,KAAK,EAAE,CAAC;YACV,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;QAC7B,CAAC;IACH,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC"}
1
+ {"version":3,"sources":["../../src/crawl/sitemap.ts"],"sourcesContent":["export function parseSitemap(xml: string): string[] {\n // A sitemapindex document should be parsed with parseSitemapIndex, not here\n if (xml.includes('<sitemapindex')) return [];\n\n if (!xml.includes('<urlset') && !xml.includes('<loc>')) return [];\n\n const urls: string[] = [];\n const locMatches = xml.matchAll(/<loc>\\s*([^<]+?)\\s*<\\/loc>/g);\n for (const match of locMatches) {\n const url = match[1].trim();\n if (url) {\n urls.push(url);\n }\n }\n\n return urls;\n}\n\nexport function parseSitemapIndex(xml: string): string[] {\n if (!xml.includes('<sitemapindex')) return [];\n\n const urls: string[] = [];\n const locMatches = xml.matchAll(/<loc>\\s*([^<]+?)\\s*<\\/loc>/g);\n for (const match of locMatches) {\n urls.push(match[1].trim());\n }\n return urls;\n}\n\nexport function extractSitemapUrlFromRobots(robotsTxt: string): string[] {\n const urls: string[] = [];\n const lines = robotsTxt.split('\\n');\n\n for (const line of lines) {\n const match = line.match(/^sitemap:\\s*(.+)/i);\n if (match) {\n urls.push(match[1].trim());\n }\n }\n\n return urls;\n}\n"],"mappings":"AAAO,SAAS,aAAa,KAAuB;AAElD,MAAI,IAAI,SAAS,eAAe,EAAG,QAAO,CAAC;AAE3C,MAAI,CAAC,IAAI,SAAS,SAAS,KAAK,CAAC,IAAI,SAAS,OAAO,EAAG,QAAO,CAAC;AAEhE,QAAM,OAAiB,CAAC;AACxB,QAAM,aAAa,IAAI,SAAS,6BAA6B;AAC7D,aAAW,SAAS,YAAY;AAC9B,UAAM,MAAM,MAAM,CAAC,EAAE,KAAK;AAC1B,QAAI,KAAK;AACP,WAAK,KAAK,GAAG;AAAA,IACf;AAAA,EACF;AAEA,SAAO;AACT;AAEO,SAAS,kBAAkB,KAAuB;AACvD,MAAI,CAAC,IAAI,SAAS,eAAe,EAAG,QAAO,CAAC;AAE5C,QAAM,OAAiB,CAAC;AACxB,QAAM,aAAa,IAAI,SAAS,6BAA6B;AAC7D,aAAW,SAAS,YAAY;AAC9B,SAAK,KAAK,MAAM,CAAC,EAAE,KAAK,CAAC;AAAA,EAC3B;AACA,SAAO;AACT;AAEO,SAAS,4BAA4B,WAA6B;AACvE,QAAM,OAAiB,CAAC;AACxB,QAAM,QAAQ,UAAU,MAAM,IAAI;AAElC,aAAW,QAAQ,OAAO;AACxB,UAAM,QAAQ,KAAK,MAAM,mBAAmB;AAC5C,QAAI,OAAO;AACT,WAAK,KAAK,MAAM,CAAC,EAAE,KAAK,CAAC;AAAA,IAC3B;AAAA,EACF;AAEA,SAAO;AACT;","names":[]}
@@ -1,3 +1,4 @@
1
+ export declare function canonicalForCrawl(url: string): string;
1
2
  export declare function isPrivateUrl(url: string): boolean;
2
3
  export declare function matchesPatterns(url: string, includePatterns: string[] | undefined, excludePatterns: string[] | undefined): boolean;
3
4
  //# sourceMappingURL=url-utils.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"url-utils.d.ts","sourceRoot":"","sources":["../../src/crawl/url-utils.ts"],"names":[],"mappings":"AAAA,wBAAgB,YAAY,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAgCjD;AAED,wBAAgB,eAAe,CAC7B,GAAG,EAAE,MAAM,EACX,eAAe,EAAE,MAAM,EAAE,GAAG,SAAS,EACrC,eAAe,EAAE,MAAM,EAAE,GAAG,SAAS,GACpC,OAAO,CAYT"}
1
+ {"version":3,"file":"url-utils.d.ts","sourceRoot":"","sources":["../../src/crawl/url-utils.ts"],"names":[],"mappings":"AAEA,wBAAgB,iBAAiB,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CAWrD;AAED,wBAAgB,YAAY,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAgCjD;AAED,wBAAgB,eAAe,CAC7B,GAAG,EAAE,MAAM,EACX,eAAe,EAAE,MAAM,EAAE,GAAG,SAAS,EACrC,eAAe,EAAE,MAAM,EAAE,GAAG,SAAS,GACpC,OAAO,CAYT"}
@@ -1,41 +1,53 @@
1
- export function isPrivateUrl(url) {
2
- const parsed = new URL(url);
3
- const hostname = parsed.hostname.replace(/^\[|\]$/g, ''); // strip IPv6 brackets
4
- if (hostname === 'localhost' || hostname === '127.0.0.1' || hostname === '::1' || hostname === '0.0.0.0') {
5
- return true;
6
- }
7
- if (hostname.endsWith('.local')) {
8
- return true;
9
- }
10
- // 10.x.x.x
11
- if (hostname.startsWith('10.')) {
12
- return true;
13
- }
14
- // 192.168.x.x
15
- if (hostname.startsWith('192.168.')) {
16
- return true;
17
- }
18
- // 172.16.0.0/12 (172.16.x.x – 172.31.x.x)
19
- if (hostname.startsWith('172.')) {
20
- const parts = hostname.split('.');
21
- const second = parseInt(parts[1], 10);
22
- if (second >= 16 && second <= 31) {
23
- return true;
24
- }
25
- }
26
- return false;
1
+ function canonicalForCrawl(url) {
2
+ try {
3
+ const u = new URL(url);
4
+ u.hash = "";
5
+ let pathname = u.pathname;
6
+ if (pathname.length > 1 && pathname.endsWith("/")) pathname = pathname.slice(0, -1);
7
+ u.pathname = pathname;
8
+ return u.toString();
9
+ } catch {
10
+ return url;
11
+ }
27
12
  }
28
- export function matchesPatterns(url, includePatterns, excludePatterns) {
29
- if (includePatterns && includePatterns.length > 0) {
30
- const matches = includePatterns.some((p) => new RegExp(p).test(url));
31
- if (!matches)
32
- return false;
33
- }
34
- if (excludePatterns && excludePatterns.length > 0) {
35
- const excluded = excludePatterns.some((p) => new RegExp(p).test(url));
36
- if (excluded)
37
- return false;
38
- }
13
+ function isPrivateUrl(url) {
14
+ const parsed = new URL(url);
15
+ const hostname = parsed.hostname.replace(/^\[|\]$/g, "");
16
+ if (hostname === "localhost" || hostname === "127.0.0.1" || hostname === "::1" || hostname === "0.0.0.0") {
17
+ return true;
18
+ }
19
+ if (hostname.endsWith(".local")) {
20
+ return true;
21
+ }
22
+ if (hostname.startsWith("10.")) {
39
23
  return true;
24
+ }
25
+ if (hostname.startsWith("192.168.")) {
26
+ return true;
27
+ }
28
+ if (hostname.startsWith("172.")) {
29
+ const parts = hostname.split(".");
30
+ const second = parseInt(parts[1], 10);
31
+ if (second >= 16 && second <= 31) {
32
+ return true;
33
+ }
34
+ }
35
+ return false;
36
+ }
37
+ function matchesPatterns(url, includePatterns, excludePatterns) {
38
+ if (includePatterns && includePatterns.length > 0) {
39
+ const matches = includePatterns.some((p) => new RegExp(p).test(url));
40
+ if (!matches) return false;
41
+ }
42
+ if (excludePatterns && excludePatterns.length > 0) {
43
+ const excluded = excludePatterns.some((p) => new RegExp(p).test(url));
44
+ if (excluded) return false;
45
+ }
46
+ return true;
40
47
  }
48
+ export {
49
+ canonicalForCrawl,
50
+ isPrivateUrl,
51
+ matchesPatterns
52
+ };
41
53
  //# sourceMappingURL=url-utils.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"url-utils.js","sourceRoot":"","sources":["../../src/crawl/url-utils.ts"],"names":[],"mappings":"AAAA,MAAM,UAAU,YAAY,CAAC,GAAW;IACtC,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;IAC5B,MAAM,QAAQ,GAAG,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC,CAAC,CAAC,sBAAsB;IAEhF,IAAI,QAAQ,KAAK,WAAW,IAAI,QAAQ,KAAK,WAAW,IAAI,QAAQ,KAAK,KAAK,IAAI,QAAQ,KAAK,SAAS,EAAE,CAAC;QACzG,OAAO,IAAI,CAAC;IACd,CAAC;IAED,IAAI,QAAQ,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;QAChC,OAAO,IAAI,CAAC;IACd,CAAC;IAED,WAAW;IACX,IAAI,QAAQ,CAAC,UAAU,CAAC,KAAK,CAAC,EAAE,CAAC;QAC/B,OAAO,IAAI,CAAC;IACd,CAAC;IAED,cAAc;IACd,IAAI,QAAQ,CAAC,UAAU,CAAC,UAAU,CAAC,EAAE,CAAC;QACpC,OAAO,IAAI,CAAC;IACd,CAAC;IAED,0CAA0C;IAC1C,IAAI,QAAQ,CAAC,UAAU,CAAC,MAAM,CAAC,EAAE,CAAC;QAChC,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QAClC,MAAM,MAAM,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QACtC,IAAI,MAAM,IAAI,EAAE,IAAI,MAAM,IAAI,EAAE,EAAE,CAAC;YACjC,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED,MAAM,UAAU,eAAe,CAC7B,GAAW,EACX,eAAqC,EACrC,eAAqC;IAErC,IAAI,eAAe,IAAI,eAAe,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAClD,MAAM,OAAO,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;QACrE,IAAI,CAAC,OAAO;YAAE,OAAO,KAAK,CAAC;IAC7B,CAAC;IAED,IAAI,eAAe,IAAI,eAAe,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAClD,MAAM,QAAQ,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;QACtE,IAAI,QAAQ;YAAE,OAAO,KAAK,CAAC;IAC7B,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC"}
1
+ {"version":3,"sources":["../../src/crawl/url-utils.ts"],"sourcesContent":["// Canonical form for visited-set comparison — drops fragments and the\n// trailing slash so /docs, /docs/, and /docs#anchor are treated as one page.\nexport function canonicalForCrawl(url: string): string {\n try {\n const u = new URL(url);\n u.hash = '';\n let pathname = u.pathname;\n if (pathname.length > 1 && pathname.endsWith('/')) pathname = pathname.slice(0, -1);\n u.pathname = pathname;\n return u.toString();\n } catch {\n return url;\n }\n}\n\nexport function isPrivateUrl(url: string): boolean {\n const parsed = new URL(url);\n const hostname = parsed.hostname.replace(/^\\[|\\]$/g, ''); // strip IPv6 brackets\n\n if (hostname === 'localhost' || hostname === '127.0.0.1' || hostname === '::1' || hostname === '0.0.0.0') {\n return true;\n }\n\n if (hostname.endsWith('.local')) {\n return true;\n }\n\n // 10.x.x.x\n if (hostname.startsWith('10.')) {\n return true;\n }\n\n // 192.168.x.x\n if (hostname.startsWith('192.168.')) {\n return true;\n }\n\n // 172.16.0.0/12 (172.16.x.x – 172.31.x.x)\n if (hostname.startsWith('172.')) {\n const parts = hostname.split('.');\n const second = parseInt(parts[1], 10);\n if (second >= 16 && second <= 31) {\n return true;\n }\n }\n\n return false;\n}\n\nexport function matchesPatterns(\n url: string,\n includePatterns: string[] | undefined,\n excludePatterns: string[] | undefined,\n): boolean {\n if (includePatterns && includePatterns.length > 0) {\n const matches = includePatterns.some((p) => new RegExp(p).test(url));\n if (!matches) return false;\n }\n\n if (excludePatterns && excludePatterns.length > 0) {\n const excluded = excludePatterns.some((p) => new RegExp(p).test(url));\n if (excluded) return false;\n }\n\n return true;\n}\n"],"mappings":"AAEO,SAAS,kBAAkB,KAAqB;AACrD,MAAI;AACF,UAAM,IAAI,IAAI,IAAI,GAAG;AACrB,MAAE,OAAO;AACT,QAAI,WAAW,EAAE;AACjB,QAAI,SAAS,SAAS,KAAK,SAAS,SAAS,GAAG,EAAG,YAAW,SAAS,MAAM,GAAG,EAAE;AAClF,MAAE,WAAW;AACb,WAAO,EAAE,SAAS;AAAA,EACpB,QAAQ;AACN,WAAO;AAAA,EACT;AACF;AAEO,SAAS,aAAa,KAAsB;AACjD,QAAM,SAAS,IAAI,IAAI,GAAG;AAC1B,QAAM,WAAW,OAAO,SAAS,QAAQ,YAAY,EAAE;AAEvD,MAAI,aAAa,eAAe,aAAa,eAAe,aAAa,SAAS,aAAa,WAAW;AACxG,WAAO;AAAA,EACT;AAEA,MAAI,SAAS,SAAS,QAAQ,GAAG;AAC/B,WAAO;AAAA,EACT;AAGA,MAAI,SAAS,WAAW,KAAK,GAAG;AAC9B,WAAO;AAAA,EACT;AAGA,MAAI,SAAS,WAAW,UAAU,GAAG;AACnC,WAAO;AAAA,EACT;AAGA,MAAI,SAAS,WAAW,MAAM,GAAG;AAC/B,UAAM,QAAQ,SAAS,MAAM,GAAG;AAChC,UAAM,SAAS,SAAS,MAAM,CAAC,GAAG,EAAE;AACpC,QAAI,UAAU,MAAM,UAAU,IAAI;AAChC,aAAO;AAAA,IACT;AAAA,EACF;AAEA,SAAO;AACT;AAEO,SAAS,gBACd,KACA,iBACA,iBACS;AACT,MAAI,mBAAmB,gBAAgB,SAAS,GAAG;AACjD,UAAM,UAAU,gBAAgB,KAAK,CAAC,MAAM,IAAI,OAAO,CAAC,EAAE,KAAK,GAAG,CAAC;AACnE,QAAI,CAAC,QAAS,QAAO;AAAA,EACvB;AAEA,MAAI,mBAAmB,gBAAgB,SAAS,GAAG;AACjD,UAAM,WAAW,gBAAgB,KAAK,CAAC,MAAM,IAAI,OAAO,CAAC,EAAE,KAAK,GAAG,CAAC;AACpE,QAAI,SAAU,QAAO;AAAA,EACvB;AAEA,SAAO;AACT;","names":[]}
@@ -0,0 +1,16 @@
1
+ import type { BackendStatus } from '../server/backend-status.js';
2
+ import type { MultiBrowserPool } from '../fetch/browser-pool.js';
3
+ export interface HealthProbeInput {
4
+ backendStatus: BackendStatus | null;
5
+ browserPool: MultiBrowserPool | null;
6
+ startedAt: number;
7
+ }
8
+ export interface HealthReport {
9
+ status: 'healthy' | 'degraded' | 'down';
10
+ searxng: 'active' | 'unavailable' | 'not_initialized';
11
+ browsers: 'ready' | 'not_initialized';
12
+ cache: 'active' | 'not_initialized';
13
+ uptime_seconds: number;
14
+ }
15
+ export declare function probeHealth(input: HealthProbeInput): HealthReport;
16
+ //# sourceMappingURL=health-check.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"health-check.d.ts","sourceRoot":"","sources":["../../src/daemon/health-check.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,6BAA6B,CAAC;AACjE,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,0BAA0B,CAAC;AAEjE,MAAM,WAAW,gBAAgB;IAC/B,aAAa,EAAE,aAAa,GAAG,IAAI,CAAC;IACpC,WAAW,EAAE,gBAAgB,GAAG,IAAI,CAAC;IACrC,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,YAAY;IAC3B,MAAM,EAAE,SAAS,GAAG,UAAU,GAAG,MAAM,CAAC;IACxC,OAAO,EAAE,QAAQ,GAAG,aAAa,GAAG,iBAAiB,CAAC;IACtD,QAAQ,EAAE,OAAO,GAAG,iBAAiB,CAAC;IACtC,KAAK,EAAE,QAAQ,GAAG,iBAAiB,CAAC;IACpC,cAAc,EAAE,MAAM,CAAC;CACxB;AAED,wBAAgB,WAAW,CAAC,KAAK,EAAE,gBAAgB,GAAG,YAAY,CAmCjE"}
@@ -0,0 +1,33 @@
1
+ function probeHealth(input) {
2
+ const uptimeMs = Date.now() - input.startedAt;
3
+ const uptimeSeconds = Math.round(uptimeMs / 1e3);
4
+ let searxng;
5
+ if (input.backendStatus === null) {
6
+ searxng = "not_initialized";
7
+ } else if (input.backendStatus.isActive) {
8
+ searxng = "active";
9
+ } else {
10
+ searxng = "unavailable";
11
+ }
12
+ const browsers = input.browserPool ? "ready" : "not_initialized";
13
+ const cache = "active";
14
+ let status;
15
+ if (searxng === "active" && browsers === "ready") {
16
+ status = "healthy";
17
+ } else if (browsers === "not_initialized" && searxng !== "active") {
18
+ status = "down";
19
+ } else {
20
+ status = "degraded";
21
+ }
22
+ return {
23
+ status,
24
+ searxng,
25
+ browsers,
26
+ cache,
27
+ uptime_seconds: uptimeSeconds
28
+ };
29
+ }
30
+ export {
31
+ probeHealth
32
+ };
33
+ //# sourceMappingURL=health-check.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../../src/daemon/health-check.ts"],"sourcesContent":["import type { BackendStatus } from '../server/backend-status.js';\nimport type { MultiBrowserPool } from '../fetch/browser-pool.js';\n\nexport interface HealthProbeInput {\n backendStatus: BackendStatus | null;\n browserPool: MultiBrowserPool | null;\n startedAt: number;\n}\n\nexport interface HealthReport {\n status: 'healthy' | 'degraded' | 'down';\n searxng: 'active' | 'unavailable' | 'not_initialized';\n browsers: 'ready' | 'not_initialized';\n cache: 'active' | 'not_initialized';\n uptime_seconds: number;\n}\n\nexport function probeHealth(input: HealthProbeInput): HealthReport {\n const uptimeMs = Date.now() - input.startedAt;\n const uptimeSeconds = Math.round(uptimeMs / 1000);\n\n let searxng: HealthReport['searxng'];\n if (input.backendStatus === null) {\n searxng = 'not_initialized';\n } else if (input.backendStatus.isActive) {\n searxng = 'active';\n } else {\n searxng = 'unavailable';\n }\n\n const browsers: HealthReport['browsers'] = input.browserPool\n ? 'ready'\n : 'not_initialized';\n\n const cache: HealthReport['cache'] = 'active';\n\n let status: HealthReport['status'];\n if (searxng === 'active' && browsers === 'ready') {\n status = 'healthy';\n } else if (browsers === 'not_initialized' && searxng !== 'active') {\n status = 'down';\n } else {\n status = 'degraded';\n }\n\n return {\n status,\n searxng,\n browsers,\n cache,\n uptime_seconds: uptimeSeconds,\n };\n}\n"],"mappings":"AAiBO,SAAS,YAAY,OAAuC;AACjE,QAAM,WAAW,KAAK,IAAI,IAAI,MAAM;AACpC,QAAM,gBAAgB,KAAK,MAAM,WAAW,GAAI;AAEhD,MAAI;AACJ,MAAI,MAAM,kBAAkB,MAAM;AAChC,cAAU;AAAA,EACZ,WAAW,MAAM,cAAc,UAAU;AACvC,cAAU;AAAA,EACZ,OAAO;AACL,cAAU;AAAA,EACZ;AAEA,QAAM,WAAqC,MAAM,cAC7C,UACA;AAEJ,QAAM,QAA+B;AAErC,MAAI;AACJ,MAAI,YAAY,YAAY,aAAa,SAAS;AAChD,aAAS;AAAA,EACX,WAAW,aAAa,qBAAqB,YAAY,UAAU;AACjE,aAAS;AAAA,EACX,OAAO;AACL,aAAS;AAAA,EACX;AAEA,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA,gBAAgB;AAAA,EAClB;AACF;","names":[]}
@@ -0,0 +1,26 @@
1
+ export interface DaemonOptions {
2
+ port: number;
3
+ host: string;
4
+ }
5
+ export declare class DaemonHttpServer {
6
+ private httpServer;
7
+ private subsystems;
8
+ private startedAt;
9
+ private stopped;
10
+ private sessions;
11
+ private sseSessions;
12
+ private readonly port;
13
+ private readonly host;
14
+ constructor(options: DaemonOptions);
15
+ start(): Promise<string>;
16
+ private handleRequest;
17
+ private handleHealthRequest;
18
+ private handleStreamableHttpRequest;
19
+ private handleStreamableHttpGet;
20
+ private handleStreamableHttpDelete;
21
+ private handleSseRequest;
22
+ private handleSseMessageRequest;
23
+ private readJsonBody;
24
+ stop(): Promise<void>;
25
+ }
26
+ //# sourceMappingURL=http-server.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"http-server.d.ts","sourceRoot":"","sources":["../../src/daemon/http-server.ts"],"names":[],"mappings":"AAYA,MAAM,WAAW,aAAa;IAC5B,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;CACd;AAED,qBAAa,gBAAgB;IAC3B,OAAO,CAAC,UAAU,CAA2B;IAC7C,OAAO,CAAC,UAAU,CAA2B;IAC7C,OAAO,CAAC,SAAS,CAAa;IAC9B,OAAO,CAAC,OAAO,CAAS;IACxB,OAAO,CAAC,QAAQ,CAAmF;IACnG,OAAO,CAAC,WAAW,CAAwE;IAC3F,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAS;IAC9B,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAS;gBAElB,OAAO,EAAE,aAAa;IAK5B,KAAK,IAAI,OAAO,CAAC,MAAM,CAAC;YA4ChB,aAAa;IAkC3B,OAAO,CAAC,mBAAmB;YAkBb,2BAA2B;YAuD3B,uBAAuB;YAWvB,0BAA0B;YAW1B,gBAAgB;YA+BhB,uBAAuB;IAuBrC,OAAO,CAAC,YAAY;IAwBd,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;CAwC5B"}