@staticn0va/wigolo 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (982) hide show
  1. package/LICENSE +1 -1
  2. package/README.md +195 -73
  3. package/SKILL.md +382 -0
  4. package/assets/blocks/claude-code/CLAUDE.md.block +20 -0
  5. package/assets/blocks/claude-code/wigolo-command.md +40 -0
  6. package/assets/blocks/cursor/wigolo.mdc +46 -0
  7. package/assets/blocks/gemini-cli/GEMINI.md.block +18 -0
  8. package/assets/blocks/vscode/copilot-instructions.md.block +18 -0
  9. package/assets/skills/wigolo/SKILL.md +50 -0
  10. package/assets/skills/wigolo/rules/cache-first.md +30 -0
  11. package/assets/skills/wigolo/rules/synthesis.md +43 -0
  12. package/assets/skills/wigolo-agent/SKILL.md +73 -0
  13. package/assets/skills/wigolo-crawl/SKILL.md +60 -0
  14. package/assets/skills/wigolo-extract/SKILL.md +59 -0
  15. package/assets/skills/wigolo-fetch/SKILL.md +65 -0
  16. package/assets/skills/wigolo-find-similar/SKILL.md +72 -0
  17. package/assets/skills/wigolo-research/SKILL.md +77 -0
  18. package/assets/skills/wigolo-search/SKILL.md +78 -0
  19. package/dist/agent/executor.d.ts +33 -0
  20. package/dist/agent/executor.d.ts.map +1 -0
  21. package/dist/agent/executor.js +233 -0
  22. package/dist/agent/executor.js.map +1 -0
  23. package/dist/agent/pipeline.d.ts +5 -0
  24. package/dist/agent/pipeline.d.ts.map +1 -0
  25. package/dist/agent/pipeline.js +208 -0
  26. package/dist/agent/pipeline.js.map +1 -0
  27. package/dist/agent/planner.d.ts +13 -0
  28. package/dist/agent/planner.d.ts.map +1 -0
  29. package/dist/agent/planner.js +271 -0
  30. package/dist/agent/planner.js.map +1 -0
  31. package/dist/agent/relevance.d.ts +15 -0
  32. package/dist/agent/relevance.d.ts.map +1 -0
  33. package/dist/agent/relevance.js +60 -0
  34. package/dist/agent/relevance.js.map +1 -0
  35. package/dist/cache/backfill-embeddings.d.ts +23 -0
  36. package/dist/cache/backfill-embeddings.d.ts.map +1 -0
  37. package/dist/cache/backfill-embeddings.js +105 -0
  38. package/dist/cache/backfill-embeddings.js.map +1 -0
  39. package/dist/cache/change-detector.d.ts +7 -0
  40. package/dist/cache/change-detector.d.ts.map +1 -0
  41. package/dist/cache/change-detector.js +43 -0
  42. package/dist/cache/change-detector.js.map +1 -0
  43. package/dist/cache/db.d.ts +1 -0
  44. package/dist/cache/db.d.ts.map +1 -1
  45. package/dist/cache/db.js +94 -22
  46. package/dist/cache/db.js.map +1 -1
  47. package/dist/cache/diff-summary.d.ts +2 -0
  48. package/dist/cache/diff-summary.d.ts.map +1 -0
  49. package/dist/cache/diff-summary.js +82 -0
  50. package/dist/cache/diff-summary.js.map +1 -0
  51. package/dist/cache/migrations/runner.d.ts +29 -0
  52. package/dist/cache/migrations/runner.d.ts.map +1 -0
  53. package/dist/cache/migrations/runner.js +147 -0
  54. package/dist/cache/migrations/runner.js.map +1 -0
  55. package/dist/cache/sqlite-vec-store.d.ts +42 -0
  56. package/dist/cache/sqlite-vec-store.d.ts.map +1 -0
  57. package/dist/cache/sqlite-vec-store.js +176 -0
  58. package/dist/cache/sqlite-vec-store.js.map +1 -0
  59. package/dist/cache/store.d.ts +46 -1
  60. package/dist/cache/store.d.ts.map +1 -1
  61. package/dist/cache/store.js +362 -168
  62. package/dist/cache/store.js.map +1 -1
  63. package/dist/cli/agents/antigravity.d.ts +20 -0
  64. package/dist/cli/agents/antigravity.d.ts.map +1 -0
  65. package/dist/cli/agents/antigravity.js +49 -0
  66. package/dist/cli/agents/antigravity.js.map +1 -0
  67. package/dist/cli/agents/claude-code.d.ts +25 -0
  68. package/dist/cli/agents/claude-code.d.ts.map +1 -0
  69. package/dist/cli/agents/claude-code.js +111 -0
  70. package/dist/cli/agents/claude-code.js.map +1 -0
  71. package/dist/cli/agents/cursor.d.ts +21 -0
  72. package/dist/cli/agents/cursor.d.ts.map +1 -0
  73. package/dist/cli/agents/cursor.js +58 -0
  74. package/dist/cli/agents/cursor.js.map +1 -0
  75. package/dist/cli/agents/gemini-cli.d.ts +21 -0
  76. package/dist/cli/agents/gemini-cli.d.ts.map +1 -0
  77. package/dist/cli/agents/gemini-cli.js +55 -0
  78. package/dist/cli/agents/gemini-cli.js.map +1 -0
  79. package/dist/cli/agents/registry.d.ts +21 -0
  80. package/dist/cli/agents/registry.d.ts.map +1 -0
  81. package/dist/cli/agents/registry.js +27 -0
  82. package/dist/cli/agents/registry.js.map +1 -0
  83. package/dist/cli/agents/utils.d.ts +26 -0
  84. package/dist/cli/agents/utils.d.ts.map +1 -0
  85. package/dist/cli/agents/utils.js +136 -0
  86. package/dist/cli/agents/utils.js.map +1 -0
  87. package/dist/cli/agents/vscode.d.ts +21 -0
  88. package/dist/cli/agents/vscode.d.ts.map +1 -0
  89. package/dist/cli/agents/vscode.js +62 -0
  90. package/dist/cli/agents/vscode.js.map +1 -0
  91. package/dist/cli/auth.d.ts +2 -0
  92. package/dist/cli/auth.d.ts.map +1 -0
  93. package/dist/cli/auth.js +94 -0
  94. package/dist/cli/auth.js.map +1 -0
  95. package/dist/cli/backfill.d.ts +2 -0
  96. package/dist/cli/backfill.d.ts.map +1 -0
  97. package/dist/cli/backfill.js +58 -0
  98. package/dist/cli/backfill.js.map +1 -0
  99. package/dist/cli/daemon.d.ts +6 -1
  100. package/dist/cli/daemon.d.ts.map +1 -1
  101. package/dist/cli/daemon.js +61 -3
  102. package/dist/cli/daemon.js.map +1 -1
  103. package/dist/cli/doctor.d.ts +8 -0
  104. package/dist/cli/doctor.d.ts.map +1 -0
  105. package/dist/cli/doctor.js +318 -0
  106. package/dist/cli/doctor.js.map +1 -0
  107. package/dist/cli/health.d.ts +1 -1
  108. package/dist/cli/health.d.ts.map +1 -1
  109. package/dist/cli/health.js +42 -3
  110. package/dist/cli/health.js.map +1 -1
  111. package/dist/cli/help.d.ts +6 -0
  112. package/dist/cli/help.d.ts.map +1 -0
  113. package/dist/cli/help.js +63 -0
  114. package/dist/cli/help.js.map +1 -0
  115. package/dist/cli/index.d.ts +1 -1
  116. package/dist/cli/index.d.ts.map +1 -1
  117. package/dist/cli/index.js +35 -7
  118. package/dist/cli/index.js.map +1 -1
  119. package/dist/cli/init.d.ts +2 -0
  120. package/dist/cli/init.d.ts.map +1 -0
  121. package/dist/cli/init.js +201 -0
  122. package/dist/cli/init.js.map +1 -0
  123. package/dist/cli/plugin.d.ts +5 -0
  124. package/dist/cli/plugin.d.ts.map +1 -0
  125. package/dist/cli/plugin.js +185 -0
  126. package/dist/cli/plugin.js.map +1 -0
  127. package/dist/cli/setup-mcp.d.ts +2 -0
  128. package/dist/cli/setup-mcp.d.ts.map +1 -0
  129. package/dist/cli/setup-mcp.js +114 -0
  130. package/dist/cli/setup-mcp.js.map +1 -0
  131. package/dist/cli/shell.d.ts +2 -0
  132. package/dist/cli/shell.d.ts.map +1 -0
  133. package/dist/cli/shell.js +86 -0
  134. package/dist/cli/shell.js.map +1 -0
  135. package/dist/cli/status.d.ts +2 -0
  136. package/dist/cli/status.d.ts.map +1 -0
  137. package/dist/cli/status.js +31 -0
  138. package/dist/cli/status.js.map +1 -0
  139. package/dist/cli/telemetry.d.ts +10 -0
  140. package/dist/cli/telemetry.d.ts.map +1 -0
  141. package/dist/cli/telemetry.js +56 -0
  142. package/dist/cli/telemetry.js.map +1 -0
  143. package/dist/cli/tui/agents-types.d.ts +28 -0
  144. package/dist/cli/tui/agents-types.d.ts.map +1 -0
  145. package/dist/cli/tui/agents-types.js +1 -0
  146. package/dist/cli/tui/agents-types.js.map +1 -0
  147. package/dist/cli/tui/agents.d.ts +11 -0
  148. package/dist/cli/tui/agents.d.ts.map +1 -0
  149. package/dist/cli/tui/agents.js +93 -0
  150. package/dist/cli/tui/agents.js.map +1 -0
  151. package/dist/cli/tui/banner.d.ts +3 -0
  152. package/dist/cli/tui/banner.d.ts.map +1 -0
  153. package/dist/cli/tui/banner.js +30 -0
  154. package/dist/cli/tui/banner.js.map +1 -0
  155. package/dist/cli/tui/components/AgentSelect.d.ts +13 -0
  156. package/dist/cli/tui/components/AgentSelect.d.ts.map +1 -0
  157. package/dist/cli/tui/components/AgentSelect.js +116 -0
  158. package/dist/cli/tui/components/AgentSelect.js.map +1 -0
  159. package/dist/cli/tui/components/Banner.d.ts +6 -0
  160. package/dist/cli/tui/components/Banner.d.ts.map +1 -0
  161. package/dist/cli/tui/components/Banner.js +25 -0
  162. package/dist/cli/tui/components/Banner.js.map +1 -0
  163. package/dist/cli/tui/components/BrowserSelect.d.ts +7 -0
  164. package/dist/cli/tui/components/BrowserSelect.d.ts.map +1 -0
  165. package/dist/cli/tui/components/BrowserSelect.js +19 -0
  166. package/dist/cli/tui/components/BrowserSelect.js.map +1 -0
  167. package/dist/cli/tui/components/InstallProgress.d.ts +9 -0
  168. package/dist/cli/tui/components/InstallProgress.d.ts.map +1 -0
  169. package/dist/cli/tui/components/InstallProgress.js +67 -0
  170. package/dist/cli/tui/components/InstallProgress.js.map +1 -0
  171. package/dist/cli/tui/components/SkillInstall.d.ts +14 -0
  172. package/dist/cli/tui/components/SkillInstall.d.ts.map +1 -0
  173. package/dist/cli/tui/components/SkillInstall.js +94 -0
  174. package/dist/cli/tui/components/SkillInstall.js.map +1 -0
  175. package/dist/cli/tui/components/Summary.d.ts +22 -0
  176. package/dist/cli/tui/components/Summary.d.ts.map +1 -0
  177. package/dist/cli/tui/components/Summary.js +135 -0
  178. package/dist/cli/tui/components/Summary.js.map +1 -0
  179. package/dist/cli/tui/components/SystemCheck.d.ts +8 -0
  180. package/dist/cli/tui/components/SystemCheck.d.ts.map +1 -0
  181. package/dist/cli/tui/components/SystemCheck.js +71 -0
  182. package/dist/cli/tui/components/SystemCheck.js.map +1 -0
  183. package/dist/cli/tui/components/Verification.d.ts +8 -0
  184. package/dist/cli/tui/components/Verification.d.ts.map +1 -0
  185. package/dist/cli/tui/components/Verification.js +63 -0
  186. package/dist/cli/tui/components/Verification.js.map +1 -0
  187. package/dist/cli/tui/config-writer-cli.d.ts +12 -0
  188. package/dist/cli/tui/config-writer-cli.d.ts.map +1 -0
  189. package/dist/cli/tui/config-writer-cli.js +39 -0
  190. package/dist/cli/tui/config-writer-cli.js.map +1 -0
  191. package/dist/cli/tui/config-writer-json.d.ts +16 -0
  192. package/dist/cli/tui/config-writer-json.d.ts.map +1 -0
  193. package/dist/cli/tui/config-writer-json.js +86 -0
  194. package/dist/cli/tui/config-writer-json.js.map +1 -0
  195. package/dist/cli/tui/config-writer-toml.d.ts +16 -0
  196. package/dist/cli/tui/config-writer-toml.d.ts.map +1 -0
  197. package/dist/cli/tui/config-writer-toml.js +83 -0
  198. package/dist/cli/tui/config-writer-toml.js.map +1 -0
  199. package/dist/cli/tui/config-writer.d.ts +25 -0
  200. package/dist/cli/tui/config-writer.d.ts.map +1 -0
  201. package/dist/cli/tui/config-writer.js +101 -0
  202. package/dist/cli/tui/config-writer.js.map +1 -0
  203. package/dist/cli/tui/detect-helpers.d.ts +6 -0
  204. package/dist/cli/tui/detect-helpers.d.ts.map +1 -0
  205. package/dist/cli/tui/detect-helpers.js +45 -0
  206. package/dist/cli/tui/detect-helpers.js.map +1 -0
  207. package/dist/cli/tui/extras-prompt.d.ts +7 -0
  208. package/dist/cli/tui/extras-prompt.d.ts.map +1 -0
  209. package/dist/cli/tui/extras-prompt.js +42 -0
  210. package/dist/cli/tui/extras-prompt.js.map +1 -0
  211. package/dist/cli/tui/flags-types.d.ts +19 -0
  212. package/dist/cli/tui/flags-types.d.ts.map +1 -0
  213. package/dist/cli/tui/flags-types.js +23 -0
  214. package/dist/cli/tui/flags-types.js.map +1 -0
  215. package/dist/cli/tui/flags.d.ts +5 -0
  216. package/dist/cli/tui/flags.d.ts.map +1 -0
  217. package/dist/cli/tui/flags.js +132 -0
  218. package/dist/cli/tui/flags.js.map +1 -0
  219. package/dist/cli/tui/format.d.ts +14 -0
  220. package/dist/cli/tui/format.d.ts.map +1 -0
  221. package/dist/cli/tui/format.js +37 -0
  222. package/dist/cli/tui/format.js.map +1 -0
  223. package/dist/cli/tui/hooks/useAgentDetect.d.ts +6 -0
  224. package/dist/cli/tui/hooks/useAgentDetect.d.ts.map +1 -0
  225. package/dist/cli/tui/hooks/useAgentDetect.js +19 -0
  226. package/dist/cli/tui/hooks/useAgentDetect.js.map +1 -0
  227. package/dist/cli/tui/hooks/useInstall.d.ts +14 -0
  228. package/dist/cli/tui/hooks/useInstall.d.ts.map +1 -0
  229. package/dist/cli/tui/hooks/useInstall.js +90 -0
  230. package/dist/cli/tui/hooks/useInstall.js.map +1 -0
  231. package/dist/cli/tui/hooks/useSystemCheck.d.ts +13 -0
  232. package/dist/cli/tui/hooks/useSystemCheck.d.ts.map +1 -0
  233. package/dist/cli/tui/hooks/useSystemCheck.js +95 -0
  234. package/dist/cli/tui/hooks/useSystemCheck.js.map +1 -0
  235. package/dist/cli/tui/hooks/useVerify.d.ts +14 -0
  236. package/dist/cli/tui/hooks/useVerify.d.ts.map +1 -0
  237. package/dist/cli/tui/hooks/useVerify.js +71 -0
  238. package/dist/cli/tui/hooks/useVerify.js.map +1 -0
  239. package/dist/cli/tui/ink-init.d.ts +2 -0
  240. package/dist/cli/tui/ink-init.d.ts.map +1 -0
  241. package/dist/cli/tui/ink-init.js +198 -0
  242. package/dist/cli/tui/ink-init.js.map +1 -0
  243. package/dist/cli/tui/reporter-auto.d.ts +7 -0
  244. package/dist/cli/tui/reporter-auto.d.ts.map +1 -0
  245. package/dist/cli/tui/reporter-auto.js +15 -0
  246. package/dist/cli/tui/reporter-auto.js.map +1 -0
  247. package/dist/cli/tui/reporter.d.ts +26 -0
  248. package/dist/cli/tui/reporter.d.ts.map +1 -0
  249. package/dist/cli/tui/reporter.js +32 -0
  250. package/dist/cli/tui/reporter.js.map +1 -0
  251. package/dist/cli/tui/run-command.d.ts +14 -0
  252. package/dist/cli/tui/run-command.d.ts.map +1 -0
  253. package/dist/cli/tui/run-command.js +72 -0
  254. package/dist/cli/tui/run-command.js.map +1 -0
  255. package/dist/cli/tui/select-agents.d.ts +6 -0
  256. package/dist/cli/tui/select-agents.d.ts.map +1 -0
  257. package/dist/cli/tui/select-agents.js +32 -0
  258. package/dist/cli/tui/select-agents.js.map +1 -0
  259. package/dist/cli/tui/status-agents.d.ts +11 -0
  260. package/dist/cli/tui/status-agents.d.ts.map +1 -0
  261. package/dist/cli/tui/status-agents.js +53 -0
  262. package/dist/cli/tui/status-agents.js.map +1 -0
  263. package/dist/cli/tui/status-cache.d.ts +6 -0
  264. package/dist/cli/tui/status-cache.d.ts.map +1 -0
  265. package/dist/cli/tui/status-cache.js +39 -0
  266. package/dist/cli/tui/status-cache.js.map +1 -0
  267. package/dist/cli/tui/status-format.d.ts +14 -0
  268. package/dist/cli/tui/status-format.d.ts.map +1 -0
  269. package/dist/cli/tui/status-format.js +41 -0
  270. package/dist/cli/tui/status-format.js.map +1 -0
  271. package/dist/cli/tui/status-python.d.ts +6 -0
  272. package/dist/cli/tui/status-python.d.ts.map +1 -0
  273. package/dist/cli/tui/status-python.js +30 -0
  274. package/dist/cli/tui/status-python.js.map +1 -0
  275. package/dist/cli/tui/system-check.d.ts +24 -0
  276. package/dist/cli/tui/system-check.d.ts.map +1 -0
  277. package/dist/cli/tui/system-check.js +103 -0
  278. package/dist/cli/tui/system-check.js.map +1 -0
  279. package/dist/cli/tui/tui-reporter.d.ts +19 -0
  280. package/dist/cli/tui/tui-reporter.d.ts.map +1 -0
  281. package/dist/cli/tui/tui-reporter.js +95 -0
  282. package/dist/cli/tui/tui-reporter.js.map +1 -0
  283. package/dist/cli/tui/utils/config-writer.d.ts +3 -0
  284. package/dist/cli/tui/utils/config-writer.d.ts.map +1 -0
  285. package/dist/cli/tui/utils/config-writer.js +22 -0
  286. package/dist/cli/tui/utils/config-writer.js.map +1 -0
  287. package/dist/cli/tui/utils/suppress-logs.d.ts +3 -0
  288. package/dist/cli/tui/utils/suppress-logs.d.ts.map +1 -0
  289. package/dist/cli/tui/utils/suppress-logs.js +11 -0
  290. package/dist/cli/tui/utils/suppress-logs.js.map +1 -0
  291. package/dist/cli/tui/verify-suggestions.d.ts +5 -0
  292. package/dist/cli/tui/verify-suggestions.d.ts.map +1 -0
  293. package/dist/cli/tui/verify-suggestions.js +20 -0
  294. package/dist/cli/tui/verify-suggestions.js.map +1 -0
  295. package/dist/cli/tui/verify.d.ts +14 -0
  296. package/dist/cli/tui/verify.d.ts.map +1 -0
  297. package/dist/cli/tui/verify.js +101 -0
  298. package/dist/cli/tui/verify.js.map +1 -0
  299. package/dist/cli/tui/version.d.ts +2 -0
  300. package/dist/cli/tui/version.d.ts.map +1 -0
  301. package/dist/cli/tui/version.js +14 -0
  302. package/dist/cli/tui/version.js.map +1 -0
  303. package/dist/cli/uninstall.d.ts +2 -0
  304. package/dist/cli/uninstall.d.ts.map +1 -0
  305. package/dist/cli/uninstall.js +57 -0
  306. package/dist/cli/uninstall.js.map +1 -0
  307. package/dist/cli/warmup.d.ts +10 -2
  308. package/dist/cli/warmup.d.ts.map +1 -1
  309. package/dist/cli/warmup.js +226 -93
  310. package/dist/cli/warmup.js.map +1 -1
  311. package/dist/config.d.ts +28 -2
  312. package/dist/config.d.ts.map +1 -1
  313. package/dist/config.js +106 -56
  314. package/dist/config.js.map +1 -1
  315. package/dist/crawl/crawler.d.ts +6 -0
  316. package/dist/crawl/crawler.d.ts.map +1 -1
  317. package/dist/crawl/crawler.js +210 -209
  318. package/dist/crawl/crawler.js.map +1 -1
  319. package/dist/crawl/dedup.d.ts +1 -0
  320. package/dist/crawl/dedup.d.ts.map +1 -1
  321. package/dist/crawl/dedup.js +124 -81
  322. package/dist/crawl/dedup.js.map +1 -1
  323. package/dist/crawl/etag-incremental.d.ts +43 -0
  324. package/dist/crawl/etag-incremental.d.ts.map +1 -0
  325. package/dist/crawl/etag-incremental.js +94 -0
  326. package/dist/crawl/etag-incremental.js.map +1 -0
  327. package/dist/crawl/index-to-vec.d.ts +10 -0
  328. package/dist/crawl/index-to-vec.d.ts.map +1 -0
  329. package/dist/crawl/index-to-vec.js +44 -0
  330. package/dist/crawl/index-to-vec.js.map +1 -0
  331. package/dist/crawl/mapper.js +136 -164
  332. package/dist/crawl/mapper.js.map +1 -1
  333. package/dist/crawl/rate-limiter.js +63 -66
  334. package/dist/crawl/rate-limiter.js.map +1 -1
  335. package/dist/crawl/robots.js +58 -57
  336. package/dist/crawl/robots.js.map +1 -1
  337. package/dist/crawl/sitemap-first.d.ts +12 -0
  338. package/dist/crawl/sitemap-first.d.ts.map +1 -0
  339. package/dist/crawl/sitemap-first.js +47 -0
  340. package/dist/crawl/sitemap-first.js.map +1 -0
  341. package/dist/crawl/sitemap.js +33 -32
  342. package/dist/crawl/sitemap.js.map +1 -1
  343. package/dist/crawl/url-utils.d.ts +1 -0
  344. package/dist/crawl/url-utils.d.ts.map +1 -1
  345. package/dist/crawl/url-utils.js +49 -37
  346. package/dist/crawl/url-utils.js.map +1 -1
  347. package/dist/daemon/health-check.d.ts +16 -0
  348. package/dist/daemon/health-check.d.ts.map +1 -0
  349. package/dist/daemon/health-check.js +33 -0
  350. package/dist/daemon/health-check.js.map +1 -0
  351. package/dist/daemon/http-server.d.ts +26 -0
  352. package/dist/daemon/http-server.d.ts.map +1 -0
  353. package/dist/daemon/http-server.js +275 -0
  354. package/dist/daemon/http-server.js.map +1 -0
  355. package/dist/daemon/proxy.d.ts +10 -0
  356. package/dist/daemon/proxy.d.ts.map +1 -0
  357. package/dist/daemon/proxy.js +93 -0
  358. package/dist/daemon/proxy.js.map +1 -0
  359. package/dist/embedding/embed.d.ts +59 -0
  360. package/dist/embedding/embed.d.ts.map +1 -0
  361. package/dist/embedding/embed.js +233 -0
  362. package/dist/embedding/embed.js.map +1 -0
  363. package/dist/embedding/fastembed-provider.d.ts +19 -0
  364. package/dist/embedding/fastembed-provider.d.ts.map +1 -0
  365. package/dist/embedding/fastembed-provider.js +51 -0
  366. package/dist/embedding/fastembed-provider.js.map +1 -0
  367. package/dist/embedding/key-terms.d.ts +12 -0
  368. package/dist/embedding/key-terms.d.ts.map +1 -0
  369. package/dist/embedding/key-terms.js +234 -0
  370. package/dist/embedding/key-terms.js.map +1 -0
  371. package/dist/extraction/boilerplate.d.ts +15 -0
  372. package/dist/extraction/boilerplate.d.ts.map +1 -0
  373. package/dist/extraction/boilerplate.js +52 -0
  374. package/dist/extraction/boilerplate.js.map +1 -0
  375. package/dist/extraction/defuddle.d.ts.map +1 -1
  376. package/dist/extraction/defuddle.js +27 -23
  377. package/dist/extraction/defuddle.js.map +1 -1
  378. package/dist/extraction/extract.d.ts.map +1 -1
  379. package/dist/extraction/extract.js +76 -76
  380. package/dist/extraction/extract.js.map +1 -1
  381. package/dist/extraction/jsonld.js +50 -54
  382. package/dist/extraction/jsonld.js.map +1 -1
  383. package/dist/extraction/lang-hints.d.ts +2 -0
  384. package/dist/extraction/lang-hints.d.ts.map +1 -0
  385. package/dist/extraction/lang-hints.js +30 -0
  386. package/dist/extraction/lang-hints.js.map +1 -0
  387. package/dist/extraction/llm-fallback.d.ts +17 -0
  388. package/dist/extraction/llm-fallback.d.ts.map +1 -0
  389. package/dist/extraction/llm-fallback.js +130 -0
  390. package/dist/extraction/llm-fallback.js.map +1 -0
  391. package/dist/extraction/markdown-sanitize.d.ts +2 -0
  392. package/dist/extraction/markdown-sanitize.d.ts.map +1 -0
  393. package/dist/extraction/markdown-sanitize.js +151 -0
  394. package/dist/extraction/markdown-sanitize.js.map +1 -0
  395. package/dist/extraction/markdown.d.ts +11 -0
  396. package/dist/extraction/markdown.d.ts.map +1 -1
  397. package/dist/extraction/markdown.js +195 -91
  398. package/dist/extraction/markdown.js.map +1 -1
  399. package/dist/extraction/pipeline.d.ts +8 -0
  400. package/dist/extraction/pipeline.d.ts.map +1 -1
  401. package/dist/extraction/pipeline.js +57 -91
  402. package/dist/extraction/pipeline.js.map +1 -1
  403. package/dist/extraction/readability.d.ts +1 -1
  404. package/dist/extraction/readability.d.ts.map +1 -1
  405. package/dist/extraction/readability.js +28 -29
  406. package/dist/extraction/readability.js.map +1 -1
  407. package/dist/extraction/schema.d.ts +12 -0
  408. package/dist/extraction/schema.d.ts.map +1 -1
  409. package/dist/extraction/schema.js +135 -72
  410. package/dist/extraction/schema.js.map +1 -1
  411. package/dist/extraction/site-extractors/docs-generic.d.ts.map +1 -1
  412. package/dist/extraction/site-extractors/docs-generic.js +81 -91
  413. package/dist/extraction/site-extractors/docs-generic.js.map +1 -1
  414. package/dist/extraction/site-extractors/github.d.ts.map +1 -1
  415. package/dist/extraction/site-extractors/github.js +87 -95
  416. package/dist/extraction/site-extractors/github.js.map +1 -1
  417. package/dist/extraction/site-extractors/mdn.d.ts.map +1 -1
  418. package/dist/extraction/site-extractors/mdn.js +46 -54
  419. package/dist/extraction/site-extractors/mdn.js.map +1 -1
  420. package/dist/extraction/site-extractors/stackoverflow.d.ts.map +1 -1
  421. package/dist/extraction/site-extractors/stackoverflow.js +71 -80
  422. package/dist/extraction/site-extractors/stackoverflow.js.map +1 -1
  423. package/dist/extraction/structured-data.d.ts +4 -0
  424. package/dist/extraction/structured-data.d.ts.map +1 -0
  425. package/dist/extraction/structured-data.js +173 -0
  426. package/dist/extraction/structured-data.js.map +1 -0
  427. package/dist/extraction/structured.d.ts +4 -0
  428. package/dist/extraction/structured.d.ts.map +1 -0
  429. package/dist/extraction/structured.js +163 -0
  430. package/dist/extraction/structured.js.map +1 -0
  431. package/dist/extraction/v1/classifier.d.ts +3 -0
  432. package/dist/extraction/v1/classifier.d.ts.map +1 -0
  433. package/dist/extraction/v1/classifier.js +110 -0
  434. package/dist/extraction/v1/classifier.js.map +1 -0
  435. package/dist/extraction/v1/extract-provider.d.ts +16 -0
  436. package/dist/extraction/v1/extract-provider.d.ts.map +1 -0
  437. package/dist/extraction/v1/extract-provider.js +43 -0
  438. package/dist/extraction/v1/extract-provider.js.map +1 -0
  439. package/dist/extraction/v1/local-llm.d.ts +8 -0
  440. package/dist/extraction/v1/local-llm.d.ts.map +1 -0
  441. package/dist/extraction/v1/local-llm.js +58 -0
  442. package/dist/extraction/v1/local-llm.js.map +1 -0
  443. package/dist/extraction/v1/news.d.ts +3 -0
  444. package/dist/extraction/v1/news.d.ts.map +1 -0
  445. package/dist/extraction/v1/news.js +61 -0
  446. package/dist/extraction/v1/news.js.map +1 -0
  447. package/dist/extraction/v1/product.d.ts +3 -0
  448. package/dist/extraction/v1/product.d.ts.map +1 -0
  449. package/dist/extraction/v1/product.js +166 -0
  450. package/dist/extraction/v1/product.js.map +1 -0
  451. package/dist/extraction/v1/recipe.d.ts +3 -0
  452. package/dist/extraction/v1/recipe.d.ts.map +1 -0
  453. package/dist/extraction/v1/recipe.js +136 -0
  454. package/dist/extraction/v1/recipe.js.map +1 -0
  455. package/dist/extraction/v1/routed.d.ts +17 -0
  456. package/dist/extraction/v1/routed.d.ts.map +1 -0
  457. package/dist/extraction/v1/routed.js +68 -0
  458. package/dist/extraction/v1/routed.js.map +1 -0
  459. package/dist/extraction/v1/schemas/Article.d.ts +11 -0
  460. package/dist/extraction/v1/schemas/Article.d.ts.map +1 -0
  461. package/dist/extraction/v1/schemas/Article.js +23 -0
  462. package/dist/extraction/v1/schemas/Article.js.map +1 -0
  463. package/dist/extraction/v1/schemas/CodeSnippet.d.ts +9 -0
  464. package/dist/extraction/v1/schemas/CodeSnippet.d.ts.map +1 -0
  465. package/dist/extraction/v1/schemas/CodeSnippet.js +90 -0
  466. package/dist/extraction/v1/schemas/CodeSnippet.js.map +1 -0
  467. package/dist/extraction/v1/schemas/EventListing.d.ts +10 -0
  468. package/dist/extraction/v1/schemas/EventListing.d.ts.map +1 -0
  469. package/dist/extraction/v1/schemas/EventListing.js +122 -0
  470. package/dist/extraction/v1/schemas/EventListing.js.map +1 -0
  471. package/dist/extraction/v1/schemas/Paper.d.ts +10 -0
  472. package/dist/extraction/v1/schemas/Paper.d.ts.map +1 -0
  473. package/dist/extraction/v1/schemas/Paper.js +156 -0
  474. package/dist/extraction/v1/schemas/Paper.js.map +1 -0
  475. package/dist/extraction/v1/schemas/Product.d.ts +17 -0
  476. package/dist/extraction/v1/schemas/Product.d.ts.map +1 -0
  477. package/dist/extraction/v1/schemas/Product.js +149 -0
  478. package/dist/extraction/v1/schemas/Product.js.map +1 -0
  479. package/dist/extraction/v1/schemas/Recipe.d.ts +14 -0
  480. package/dist/extraction/v1/schemas/Recipe.d.ts.map +1 -0
  481. package/dist/extraction/v1/schemas/Recipe.js +160 -0
  482. package/dist/extraction/v1/schemas/Recipe.js.map +1 -0
  483. package/dist/extraction/v1/schemas/index.d.ts +13 -0
  484. package/dist/extraction/v1/schemas/index.d.ts.map +1 -0
  485. package/dist/extraction/v1/schemas/index.js +44 -0
  486. package/dist/extraction/v1/schemas/index.js.map +1 -0
  487. package/dist/extraction/v1/site-extractors.d.ts +5 -0
  488. package/dist/extraction/v1/site-extractors.d.ts.map +1 -0
  489. package/dist/extraction/v1/site-extractors.js +31 -0
  490. package/dist/extraction/v1/site-extractors.js.map +1 -0
  491. package/dist/fetch/action-executor.d.ts +28 -0
  492. package/dist/fetch/action-executor.d.ts.map +1 -0
  493. package/dist/fetch/action-executor.js +88 -0
  494. package/dist/fetch/action-executor.js.map +1 -0
  495. package/dist/fetch/auth.d.ts +2 -1
  496. package/dist/fetch/auth.d.ts.map +1 -1
  497. package/dist/fetch/auth.js +56 -26
  498. package/dist/fetch/auth.js.map +1 -1
  499. package/dist/fetch/browser-pool.d.ts +30 -11
  500. package/dist/fetch/browser-pool.d.ts.map +1 -1
  501. package/dist/fetch/browser-pool.js +303 -127
  502. package/dist/fetch/browser-pool.js.map +1 -1
  503. package/dist/fetch/browser-selector.d.ts +17 -0
  504. package/dist/fetch/browser-selector.d.ts.map +1 -0
  505. package/dist/fetch/browser-selector.js +72 -0
  506. package/dist/fetch/browser-selector.js.map +1 -0
  507. package/dist/fetch/browser-types.d.ts +3 -0
  508. package/dist/fetch/browser-types.d.ts.map +1 -0
  509. package/dist/fetch/browser-types.js +45 -0
  510. package/dist/fetch/browser-types.js.map +1 -0
  511. package/dist/fetch/cdp-client.d.ts +9 -0
  512. package/dist/fetch/cdp-client.d.ts.map +1 -0
  513. package/dist/fetch/cdp-client.js +89 -0
  514. package/dist/fetch/cdp-client.js.map +1 -0
  515. package/dist/fetch/content-check.js +39 -46
  516. package/dist/fetch/content-check.js.map +1 -1
  517. package/dist/fetch/http-client.d.ts +4 -0
  518. package/dist/fetch/http-client.d.ts.map +1 -1
  519. package/dist/fetch/http-client.js +147 -128
  520. package/dist/fetch/http-client.js.map +1 -1
  521. package/dist/fetch/lightpanda.d.ts +28 -0
  522. package/dist/fetch/lightpanda.d.ts.map +1 -0
  523. package/dist/fetch/lightpanda.js +174 -0
  524. package/dist/fetch/lightpanda.js.map +1 -0
  525. package/dist/fetch/playwright-tier.d.ts +19 -0
  526. package/dist/fetch/playwright-tier.d.ts.map +1 -0
  527. package/dist/fetch/playwright-tier.js +76 -0
  528. package/dist/fetch/playwright-tier.js.map +1 -0
  529. package/dist/fetch/router.d.ts +49 -3
  530. package/dist/fetch/router.d.ts.map +1 -1
  531. package/dist/fetch/router.js +185 -81
  532. package/dist/fetch/router.js.map +1 -1
  533. package/dist/index.js +97 -17
  534. package/dist/index.js.map +1 -1
  535. package/dist/instructions.d.ts +31 -0
  536. package/dist/instructions.d.ts.map +1 -0
  537. package/dist/instructions.js +245 -0
  538. package/dist/instructions.js.map +1 -0
  539. package/dist/integrations/cloud/llm/anthropic.d.ts +3 -0
  540. package/dist/integrations/cloud/llm/anthropic.d.ts.map +1 -0
  541. package/dist/integrations/cloud/llm/anthropic.js +41 -0
  542. package/dist/integrations/cloud/llm/anthropic.js.map +1 -0
  543. package/dist/integrations/cloud/llm/cache.d.ts +5 -0
  544. package/dist/integrations/cloud/llm/cache.d.ts.map +1 -0
  545. package/dist/integrations/cloud/llm/cache.js +49 -0
  546. package/dist/integrations/cloud/llm/cache.js.map +1 -0
  547. package/dist/integrations/cloud/llm/gemini.d.ts +3 -0
  548. package/dist/integrations/cloud/llm/gemini.d.ts.map +1 -0
  549. package/dist/integrations/cloud/llm/gemini.js +37 -0
  550. package/dist/integrations/cloud/llm/gemini.js.map +1 -0
  551. package/dist/integrations/cloud/llm/groq.d.ts +3 -0
  552. package/dist/integrations/cloud/llm/groq.d.ts.map +1 -0
  553. package/dist/integrations/cloud/llm/groq.js +74 -0
  554. package/dist/integrations/cloud/llm/groq.js.map +1 -0
  555. package/dist/integrations/cloud/llm/hash.d.ts +3 -0
  556. package/dist/integrations/cloud/llm/hash.d.ts.map +1 -0
  557. package/dist/integrations/cloud/llm/hash.js +26 -0
  558. package/dist/integrations/cloud/llm/hash.js.map +1 -0
  559. package/dist/integrations/cloud/llm/openai.d.ts +3 -0
  560. package/dist/integrations/cloud/llm/openai.d.ts.map +1 -0
  561. package/dist/integrations/cloud/llm/openai.js +43 -0
  562. package/dist/integrations/cloud/llm/openai.js.map +1 -0
  563. package/dist/integrations/cloud/llm/select.d.ts +5 -0
  564. package/dist/integrations/cloud/llm/select.d.ts.map +1 -0
  565. package/dist/integrations/cloud/llm/select.js +30 -0
  566. package/dist/integrations/cloud/llm/select.js.map +1 -0
  567. package/dist/integrations/cloud/llm/types.d.ts +24 -0
  568. package/dist/integrations/cloud/llm/types.d.ts.map +1 -0
  569. package/dist/integrations/cloud/llm/types.js +1 -0
  570. package/dist/integrations/cloud/llm/types.js.map +1 -0
  571. package/dist/integrations/cloud/llm/validate.d.ts +6 -0
  572. package/dist/integrations/cloud/llm/validate.d.ts.map +1 -0
  573. package/dist/integrations/cloud/llm/validate.js +63 -0
  574. package/dist/integrations/cloud/llm/validate.js.map +1 -0
  575. package/dist/logger.d.ts +4 -1
  576. package/dist/logger.d.ts.map +1 -1
  577. package/dist/logger.js +71 -30
  578. package/dist/logger.js.map +1 -1
  579. package/dist/pdf-parse.d.js +1 -0
  580. package/dist/pdf-parse.d.js.map +1 -0
  581. package/dist/plugins/loader.d.ts +20 -0
  582. package/dist/plugins/loader.d.ts.map +1 -0
  583. package/dist/plugins/loader.js +157 -0
  584. package/dist/plugins/loader.js.map +1 -0
  585. package/dist/plugins/registry.d.ts +26 -0
  586. package/dist/plugins/registry.d.ts.map +1 -0
  587. package/dist/plugins/registry.js +71 -0
  588. package/dist/plugins/registry.js.map +1 -0
  589. package/dist/plugins/validate.d.ts +9 -0
  590. package/dist/plugins/validate.d.ts.map +1 -0
  591. package/dist/plugins/validate.js +79 -0
  592. package/dist/plugins/validate.js.map +1 -0
  593. package/dist/providers/embed-provider.d.ts +11 -0
  594. package/dist/providers/embed-provider.d.ts.map +1 -0
  595. package/dist/providers/embed-provider.js +24 -0
  596. package/dist/providers/embed-provider.js.map +1 -0
  597. package/dist/providers/extract-provider.d.ts +23 -0
  598. package/dist/providers/extract-provider.d.ts.map +1 -0
  599. package/dist/providers/extract-provider.js +25 -0
  600. package/dist/providers/extract-provider.js.map +1 -0
  601. package/dist/providers/rerank-provider.d.ts +16 -0
  602. package/dist/providers/rerank-provider.d.ts.map +1 -0
  603. package/dist/providers/rerank-provider.js +28 -0
  604. package/dist/providers/rerank-provider.js.map +1 -0
  605. package/dist/providers/search-provider.d.ts +25 -0
  606. package/dist/providers/search-provider.d.ts.map +1 -0
  607. package/dist/providers/search-provider.js +44 -0
  608. package/dist/providers/search-provider.js.map +1 -0
  609. package/dist/providers/vector-store.d.ts +27 -0
  610. package/dist/providers/vector-store.d.ts.map +1 -0
  611. package/dist/providers/vector-store.js +27 -0
  612. package/dist/providers/vector-store.js.map +1 -0
  613. package/dist/python-env.d.ts +9 -0
  614. package/dist/python-env.d.ts.map +1 -0
  615. package/dist/python-env.js +13 -0
  616. package/dist/python-env.js.map +1 -0
  617. package/dist/repl/commands/agent.d.ts +5 -0
  618. package/dist/repl/commands/agent.d.ts.map +1 -0
  619. package/dist/repl/commands/agent.js +62 -0
  620. package/dist/repl/commands/agent.js.map +1 -0
  621. package/dist/repl/commands/cache.d.ts +4 -0
  622. package/dist/repl/commands/cache.d.ts.map +1 -0
  623. package/dist/repl/commands/cache.js +43 -0
  624. package/dist/repl/commands/cache.js.map +1 -0
  625. package/dist/repl/commands/crawl.d.ts +7 -0
  626. package/dist/repl/commands/crawl.d.ts.map +1 -0
  627. package/dist/repl/commands/crawl.js +44 -0
  628. package/dist/repl/commands/crawl.js.map +1 -0
  629. package/dist/repl/commands/extract.d.ts +5 -0
  630. package/dist/repl/commands/extract.d.ts.map +1 -0
  631. package/dist/repl/commands/extract.js +47 -0
  632. package/dist/repl/commands/extract.js.map +1 -0
  633. package/dist/repl/commands/fetch.d.ts +5 -0
  634. package/dist/repl/commands/fetch.d.ts.map +1 -0
  635. package/dist/repl/commands/fetch.js +67 -0
  636. package/dist/repl/commands/fetch.js.map +1 -0
  637. package/dist/repl/commands/find-similar.d.ts +5 -0
  638. package/dist/repl/commands/find-similar.d.ts.map +1 -0
  639. package/dist/repl/commands/find-similar.js +74 -0
  640. package/dist/repl/commands/find-similar.js.map +1 -0
  641. package/dist/repl/commands/research.d.ts +5 -0
  642. package/dist/repl/commands/research.d.ts.map +1 -0
  643. package/dist/repl/commands/research.js +65 -0
  644. package/dist/repl/commands/research.js.map +1 -0
  645. package/dist/repl/commands/search.d.ts +5 -0
  646. package/dist/repl/commands/search.d.ts.map +1 -0
  647. package/dist/repl/commands/search.js +74 -0
  648. package/dist/repl/commands/search.js.map +1 -0
  649. package/dist/repl/commands/types.d.ts +9 -0
  650. package/dist/repl/commands/types.d.ts.map +1 -0
  651. package/dist/repl/commands/types.js +1 -0
  652. package/dist/repl/commands/types.js.map +1 -0
  653. package/dist/repl/formatters.d.ts +13 -0
  654. package/dist/repl/formatters.d.ts.map +1 -0
  655. package/dist/repl/formatters.js +283 -0
  656. package/dist/repl/formatters.js.map +1 -0
  657. package/dist/repl/parser.d.ts +9 -0
  658. package/dist/repl/parser.d.ts.map +1 -0
  659. package/dist/repl/parser.js +86 -0
  660. package/dist/repl/parser.js.map +1 -0
  661. package/dist/repl/shell.d.ts +8 -0
  662. package/dist/repl/shell.d.ts.map +1 -0
  663. package/dist/repl/shell.js +184 -0
  664. package/dist/repl/shell.js.map +1 -0
  665. package/dist/research/branch-exploration.d.ts +14 -0
  666. package/dist/research/branch-exploration.d.ts.map +1 -0
  667. package/dist/research/branch-exploration.js +100 -0
  668. package/dist/research/branch-exploration.js.map +1 -0
  669. package/dist/research/brief.d.ts +5 -0
  670. package/dist/research/brief.d.ts.map +1 -0
  671. package/dist/research/brief.js +242 -0
  672. package/dist/research/brief.js.map +1 -0
  673. package/dist/research/citation-graph.d.ts +9 -0
  674. package/dist/research/citation-graph.d.ts.map +1 -0
  675. package/dist/research/citation-graph.js +114 -0
  676. package/dist/research/citation-graph.js.map +1 -0
  677. package/dist/research/decompose.d.ts +14 -0
  678. package/dist/research/decompose.d.ts.map +1 -0
  679. package/dist/research/decompose.js +439 -0
  680. package/dist/research/decompose.js.map +1 -0
  681. package/dist/research/pipeline.d.ts +5 -0
  682. package/dist/research/pipeline.d.ts.map +1 -0
  683. package/dist/research/pipeline.js +269 -0
  684. package/dist/research/pipeline.js.map +1 -0
  685. package/dist/research/synthesis-local.d.ts +16 -0
  686. package/dist/research/synthesis-local.d.ts.map +1 -0
  687. package/dist/research/synthesis-local.js +73 -0
  688. package/dist/research/synthesis-local.js.map +1 -0
  689. package/dist/research/synthesize.d.ts +10 -0
  690. package/dist/research/synthesize.d.ts.map +1 -0
  691. package/dist/research/synthesize.js +137 -0
  692. package/dist/research/synthesize.js.map +1 -0
  693. package/dist/search/answer-synthesis.d.ts +33 -0
  694. package/dist/search/answer-synthesis.d.ts.map +1 -0
  695. package/dist/search/answer-synthesis.js +244 -0
  696. package/dist/search/answer-synthesis.js.map +1 -0
  697. package/dist/search/context-formatter.d.ts +3 -0
  698. package/dist/search/context-formatter.d.ts.map +1 -0
  699. package/dist/search/context-formatter.js +56 -0
  700. package/dist/search/context-formatter.js.map +1 -0
  701. package/dist/search/dedup.d.ts +1 -0
  702. package/dist/search/dedup.d.ts.map +1 -1
  703. package/dist/search/dedup.js +40 -32
  704. package/dist/search/dedup.js.map +1 -1
  705. package/dist/search/engines/arxiv.d.ts +7 -0
  706. package/dist/search/engines/arxiv.d.ts.map +1 -0
  707. package/dist/search/engines/arxiv.js +70 -0
  708. package/dist/search/engines/arxiv.js.map +1 -0
  709. package/dist/search/engines/bing-news.d.ts +7 -0
  710. package/dist/search/engines/bing-news.d.ts.map +1 -0
  711. package/dist/search/engines/bing-news.js +97 -0
  712. package/dist/search/engines/bing-news.js.map +1 -0
  713. package/dist/search/engines/bing.d.ts +1 -0
  714. package/dist/search/engines/bing.d.ts.map +1 -1
  715. package/dist/search/engines/bing.js +100 -44
  716. package/dist/search/engines/bing.js.map +1 -1
  717. package/dist/search/engines/devdocs.d.ts +6 -0
  718. package/dist/search/engines/devdocs.d.ts.map +1 -0
  719. package/dist/search/engines/devdocs.js +56 -0
  720. package/dist/search/engines/devdocs.js.map +1 -0
  721. package/dist/search/engines/duckduckgo.d.ts.map +1 -1
  722. package/dist/search/engines/duckduckgo.js +56 -44
  723. package/dist/search/engines/duckduckgo.js.map +1 -1
  724. package/dist/search/engines/github-code.d.ts +7 -0
  725. package/dist/search/engines/github-code.d.ts.map +1 -0
  726. package/dist/search/engines/github-code.js +55 -0
  727. package/dist/search/engines/github-code.js.map +1 -0
  728. package/dist/search/engines/hn-algolia.d.ts +7 -0
  729. package/dist/search/engines/hn-algolia.d.ts.map +1 -0
  730. package/dist/search/engines/hn-algolia.js +76 -0
  731. package/dist/search/engines/hn-algolia.js.map +1 -0
  732. package/dist/search/engines/lobsters.d.ts +7 -0
  733. package/dist/search/engines/lobsters.d.ts.map +1 -0
  734. package/dist/search/engines/lobsters.js +83 -0
  735. package/dist/search/engines/lobsters.js.map +1 -0
  736. package/dist/search/engines/mdn.d.ts +7 -0
  737. package/dist/search/engines/mdn.d.ts.map +1 -0
  738. package/dist/search/engines/mdn.js +48 -0
  739. package/dist/search/engines/mdn.js.map +1 -0
  740. package/dist/search/engines/semantic-scholar.d.ts +7 -0
  741. package/dist/search/engines/semantic-scholar.d.ts.map +1 -0
  742. package/dist/search/engines/semantic-scholar.js +69 -0
  743. package/dist/search/engines/semantic-scholar.js.map +1 -0
  744. package/dist/search/engines/stackoverflow.d.ts +7 -0
  745. package/dist/search/engines/stackoverflow.d.ts.map +1 -0
  746. package/dist/search/engines/stackoverflow.js +73 -0
  747. package/dist/search/engines/stackoverflow.js.map +1 -0
  748. package/dist/search/engines/startpage.d.ts.map +1 -1
  749. package/dist/search/engines/startpage.js +65 -46
  750. package/dist/search/engines/startpage.js.map +1 -1
  751. package/dist/search/evidence.d.ts +25 -0
  752. package/dist/search/evidence.d.ts.map +1 -0
  753. package/dist/search/evidence.js +220 -0
  754. package/dist/search/evidence.js.map +1 -0
  755. package/dist/search/filters.js +49 -55
  756. package/dist/search/filters.js.map +1 -1
  757. package/dist/search/find-similar/crawl-rank.d.ts +9 -0
  758. package/dist/search/find-similar/crawl-rank.d.ts.map +1 -0
  759. package/dist/search/find-similar/crawl-rank.js +272 -0
  760. package/dist/search/find-similar/crawl-rank.js.map +1 -0
  761. package/dist/search/find-similar/mode.d.ts +4 -0
  762. package/dist/search/find-similar/mode.d.ts.map +1 -0
  763. package/dist/search/find-similar/mode.js +12 -0
  764. package/dist/search/find-similar/mode.js.map +1 -0
  765. package/dist/search/find-similar.d.ts +5 -0
  766. package/dist/search/find-similar.d.ts.map +1 -0
  767. package/dist/search/find-similar.js +509 -0
  768. package/dist/search/find-similar.js.map +1 -0
  769. package/dist/search/highlights.d.ts +19 -0
  770. package/dist/search/highlights.d.ts.map +1 -0
  771. package/dist/search/highlights.js +167 -0
  772. package/dist/search/highlights.js.map +1 -0
  773. package/dist/search/language-filter.d.ts +29 -0
  774. package/dist/search/language-filter.d.ts.map +1 -0
  775. package/dist/search/language-filter.js +126 -0
  776. package/dist/search/language-filter.js.map +1 -0
  777. package/dist/search/legacy/searxng-orchestrator.d.ts +4 -0
  778. package/dist/search/legacy/searxng-orchestrator.d.ts.map +1 -0
  779. package/dist/search/legacy/searxng-orchestrator.js +501 -0
  780. package/dist/search/legacy/searxng-orchestrator.js.map +1 -0
  781. package/dist/search/legacy/searxng-provider.d.ts +7 -0
  782. package/dist/search/legacy/searxng-provider.d.ts.map +1 -0
  783. package/dist/search/legacy/searxng-provider.js +11 -0
  784. package/dist/search/legacy/searxng-provider.js.map +1 -0
  785. package/dist/search/multi-query.d.ts +25 -0
  786. package/dist/search/multi-query.d.ts.map +1 -0
  787. package/dist/search/multi-query.js +228 -0
  788. package/dist/search/multi-query.js.map +1 -0
  789. package/dist/search/query.js +32 -34
  790. package/dist/search/query.js.map +1 -1
  791. package/dist/search/rerank.d.ts +3 -1
  792. package/dist/search/rerank.d.ts.map +1 -1
  793. package/dist/search/rerank.js +44 -35
  794. package/dist/search/rerank.js.map +1 -1
  795. package/dist/search/reranker/authority-boost.d.ts +3 -0
  796. package/dist/search/reranker/authority-boost.d.ts.map +1 -0
  797. package/dist/search/reranker/authority-boost.js +179 -0
  798. package/dist/search/reranker/authority-boost.js.map +1 -0
  799. package/dist/search/reranker/consensus-boost.d.ts +3 -0
  800. package/dist/search/reranker/consensus-boost.d.ts.map +1 -0
  801. package/dist/search/reranker/consensus-boost.js +27 -0
  802. package/dist/search/reranker/consensus-boost.js.map +1 -0
  803. package/dist/search/reranker/recency-boost.d.ts +3 -0
  804. package/dist/search/reranker/recency-boost.d.ts.map +1 -0
  805. package/dist/search/reranker/recency-boost.js +13 -0
  806. package/dist/search/reranker/recency-boost.js.map +1 -0
  807. package/dist/search/reranker/recency.d.ts +3 -0
  808. package/dist/search/reranker/recency.d.ts.map +1 -0
  809. package/dist/search/reranker/recency.js +23 -0
  810. package/dist/search/reranker/recency.js.map +1 -0
  811. package/dist/search/reranker/transformers-rerank-provider.d.ts +12 -0
  812. package/dist/search/reranker/transformers-rerank-provider.d.ts.map +1 -0
  813. package/dist/search/reranker/transformers-rerank-provider.js +78 -0
  814. package/dist/search/reranker/transformers-rerank-provider.js.map +1 -0
  815. package/dist/search/rrf.d.ts +17 -0
  816. package/dist/search/rrf.d.ts.map +1 -0
  817. package/dist/search/rrf.js +39 -0
  818. package/dist/search/rrf.js.map +1 -0
  819. package/dist/search/sampling.d.ts +25 -0
  820. package/dist/search/sampling.d.ts.map +1 -0
  821. package/dist/search/sampling.js +52 -0
  822. package/dist/search/sampling.js.map +1 -0
  823. package/dist/search/searxng.d.ts.map +1 -1
  824. package/dist/search/searxng.js +69 -79
  825. package/dist/search/searxng.js.map +1 -1
  826. package/dist/search/tokens.d.ts +3 -0
  827. package/dist/search/tokens.d.ts.map +1 -0
  828. package/dist/search/tokens.js +39 -0
  829. package/dist/search/tokens.js.map +1 -0
  830. package/dist/search/truncate.d.ts +6 -0
  831. package/dist/search/truncate.d.ts.map +1 -0
  832. package/dist/search/truncate.js +26 -0
  833. package/dist/search/truncate.js.map +1 -0
  834. package/dist/search/url-unwrap.d.ts +3 -0
  835. package/dist/search/url-unwrap.d.ts.map +1 -0
  836. package/dist/search/url-unwrap.js +43 -0
  837. package/dist/search/url-unwrap.js.map +1 -0
  838. package/dist/search/v1/context-rank.d.ts +13 -0
  839. package/dist/search/v1/context-rank.d.ts.map +1 -0
  840. package/dist/search/v1/context-rank.js +74 -0
  841. package/dist/search/v1/context-rank.js.map +1 -0
  842. package/dist/search/v1/engine-base.d.ts +27 -0
  843. package/dist/search/v1/engine-base.d.ts.map +1 -0
  844. package/dist/search/v1/engine-base.js +110 -0
  845. package/dist/search/v1/engine-base.js.map +1 -0
  846. package/dist/search/v1/intent-router.d.ts +22 -0
  847. package/dist/search/v1/intent-router.d.ts.map +1 -0
  848. package/dist/search/v1/intent-router.js +138 -0
  849. package/dist/search/v1/intent-router.js.map +1 -0
  850. package/dist/search/v1/orchestrator.d.ts +24 -0
  851. package/dist/search/v1/orchestrator.d.ts.map +1 -0
  852. package/dist/search/v1/orchestrator.js +163 -0
  853. package/dist/search/v1/orchestrator.js.map +1 -0
  854. package/dist/search/v1/recency-boost.d.ts +9 -0
  855. package/dist/search/v1/recency-boost.d.ts.map +1 -0
  856. package/dist/search/v1/recency-boost.js +37 -0
  857. package/dist/search/v1/recency-boost.js.map +1 -0
  858. package/dist/search/v1/recent-cache-dedup.d.ts +6 -0
  859. package/dist/search/v1/recent-cache-dedup.d.ts.map +1 -0
  860. package/dist/search/v1/recent-cache-dedup.js +85 -0
  861. package/dist/search/v1/recent-cache-dedup.js.map +1 -0
  862. package/dist/search/v1/rss/feed-config.d.ts +21 -0
  863. package/dist/search/v1/rss/feed-config.d.ts.map +1 -0
  864. package/dist/search/v1/rss/feed-config.js +90 -0
  865. package/dist/search/v1/rss/feed-config.js.map +1 -0
  866. package/dist/search/v1/rss/feed-parser.d.ts +14 -0
  867. package/dist/search/v1/rss/feed-parser.d.ts.map +1 -0
  868. package/dist/search/v1/rss/feed-parser.js +104 -0
  869. package/dist/search/v1/rss/feed-parser.js.map +1 -0
  870. package/dist/search/v1/rss/feed-poller.d.ts +22 -0
  871. package/dist/search/v1/rss/feed-poller.d.ts.map +1 -0
  872. package/dist/search/v1/rss/feed-poller.js +102 -0
  873. package/dist/search/v1/rss/feed-poller.js.map +1 -0
  874. package/dist/search/v1/rss/feed-store.d.ts +30 -0
  875. package/dist/search/v1/rss/feed-store.d.ts.map +1 -0
  876. package/dist/search/v1/rss/feed-store.js +134 -0
  877. package/dist/search/v1/rss/feed-store.js.map +1 -0
  878. package/dist/search/v1/rss/rss-engine.d.ts +6 -0
  879. package/dist/search/v1/rss/rss-engine.d.ts.map +1 -0
  880. package/dist/search/v1/rss/rss-engine.js +28 -0
  881. package/dist/search/v1/rss/rss-engine.js.map +1 -0
  882. package/dist/search/v1/v1-provider.d.ts +7 -0
  883. package/dist/search/v1/v1-provider.d.ts.map +1 -0
  884. package/dist/search/v1/v1-provider.js +68 -0
  885. package/dist/search/v1/v1-provider.js.map +1 -0
  886. package/dist/search/v1/verticals/code.d.ts +4 -0
  887. package/dist/search/v1/verticals/code.d.ts.map +1 -0
  888. package/dist/search/v1/verticals/code.js +20 -0
  889. package/dist/search/v1/verticals/code.js.map +1 -0
  890. package/dist/search/v1/verticals/docs.d.ts +4 -0
  891. package/dist/search/v1/verticals/docs.d.ts.map +1 -0
  892. package/dist/search/v1/verticals/docs.js +20 -0
  893. package/dist/search/v1/verticals/docs.js.map +1 -0
  894. package/dist/search/v1/verticals/general.d.ts +4 -0
  895. package/dist/search/v1/verticals/general.d.ts.map +1 -0
  896. package/dist/search/v1/verticals/general.js +22 -0
  897. package/dist/search/v1/verticals/general.js.map +1 -0
  898. package/dist/search/v1/verticals/news.d.ts +10 -0
  899. package/dist/search/v1/verticals/news.d.ts.map +1 -0
  900. package/dist/search/v1/verticals/news.js +52 -0
  901. package/dist/search/v1/verticals/news.js.map +1 -0
  902. package/dist/search/v1/verticals/papers.d.ts +4 -0
  903. package/dist/search/v1/verticals/papers.d.ts.map +1 -0
  904. package/dist/search/v1/verticals/papers.js +23 -0
  905. package/dist/search/v1/verticals/papers.js.map +1 -0
  906. package/dist/search/validator.js +31 -31
  907. package/dist/search/validator.js.map +1 -1
  908. package/dist/searxng/bootstrap.d.ts +30 -0
  909. package/dist/searxng/bootstrap.d.ts.map +1 -1
  910. package/dist/searxng/bootstrap.js +223 -85
  911. package/dist/searxng/bootstrap.js.map +1 -1
  912. package/dist/searxng/docker.d.ts.map +1 -1
  913. package/dist/searxng/docker.js +69 -60
  914. package/dist/searxng/docker.js.map +1 -1
  915. package/dist/searxng/process.d.ts +13 -1
  916. package/dist/searxng/process.d.ts.map +1 -1
  917. package/dist/searxng/process.js +231 -164
  918. package/dist/searxng/process.js.map +1 -1
  919. package/dist/server/backend-status.d.ts +13 -0
  920. package/dist/server/backend-status.d.ts.map +1 -0
  921. package/dist/server/backend-status.js +40 -0
  922. package/dist/server/backend-status.js.map +1 -0
  923. package/dist/server/tool-schemas.d.ts +549 -0
  924. package/dist/server/tool-schemas.d.ts.map +1 -0
  925. package/dist/server/tool-schemas.js +464 -0
  926. package/dist/server/tool-schemas.js.map +1 -0
  927. package/dist/server/warmup-on-start.d.ts +9 -0
  928. package/dist/server/warmup-on-start.d.ts.map +1 -0
  929. package/dist/server/warmup-on-start.js +55 -0
  930. package/dist/server/warmup-on-start.js.map +1 -0
  931. package/dist/server.d.ts +17 -0
  932. package/dist/server.d.ts.map +1 -1
  933. package/dist/server.js +454 -297
  934. package/dist/server.js.map +1 -1
  935. package/dist/tools/agent.d.ts +5 -0
  936. package/dist/tools/agent.d.ts.map +1 -0
  937. package/dist/tools/agent.js +128 -0
  938. package/dist/tools/agent.js.map +1 -0
  939. package/dist/tools/cache.d.ts +2 -1
  940. package/dist/tools/cache.d.ts.map +1 -1
  941. package/dist/tools/cache.js +175 -44
  942. package/dist/tools/cache.js.map +1 -1
  943. package/dist/tools/crawl.d.ts.map +1 -1
  944. package/dist/tools/crawl.js +171 -88
  945. package/dist/tools/crawl.js.map +1 -1
  946. package/dist/tools/extract.d.ts +2 -2
  947. package/dist/tools/extract.d.ts.map +1 -1
  948. package/dist/tools/extract.js +175 -59
  949. package/dist/tools/extract.js.map +1 -1
  950. package/dist/tools/fetch.d.ts +2 -2
  951. package/dist/tools/fetch.d.ts.map +1 -1
  952. package/dist/tools/fetch.js +161 -68
  953. package/dist/tools/fetch.js.map +1 -1
  954. package/dist/tools/find-similar.d.ts +5 -0
  955. package/dist/tools/find-similar.d.ts.map +1 -0
  956. package/dist/tools/find-similar.js +127 -0
  957. package/dist/tools/find-similar.js.map +1 -0
  958. package/dist/tools/research.d.ts +5 -0
  959. package/dist/tools/research.d.ts.map +1 -0
  960. package/dist/tools/research.js +107 -0
  961. package/dist/tools/research.js.map +1 -0
  962. package/dist/tools/search.d.ts +10 -2
  963. package/dist/tools/search.d.ts.map +1 -1
  964. package/dist/tools/search.js +13 -158
  965. package/dist/tools/search.js.map +1 -1
  966. package/dist/types.d.ts +350 -7
  967. package/dist/types.d.ts.map +1 -1
  968. package/dist/types.js +6 -1
  969. package/dist/types.js.map +1 -1
  970. package/dist/util/mode.d.ts +4 -0
  971. package/dist/util/mode.d.ts.map +1 -0
  972. package/dist/util/mode.js +34 -0
  973. package/dist/util/mode.js.map +1 -0
  974. package/package.json +78 -8
  975. package/dist/extraction/trafilatura.d.ts +0 -6
  976. package/dist/extraction/trafilatura.d.ts.map +0 -1
  977. package/dist/extraction/trafilatura.js +0 -105
  978. package/dist/extraction/trafilatura.js.map +0 -1
  979. package/dist/search/flashrank.d.ts +0 -12
  980. package/dist/search/flashrank.d.ts.map +0 -1
  981. package/dist/search/flashrank.js +0 -63
  982. package/dist/search/flashrank.js.map +0 -1
@@ -0,0 +1,52 @@
1
+ const BOILERPLATE_TEXT_EQUALITY = [
2
+ "was this helpful?",
3
+ "send",
4
+ "edit this page",
5
+ "edit on github",
6
+ "suggest changes",
7
+ "skip to main content",
8
+ "on this page"
9
+ ];
10
+ const BOILERPLATE_TEXT_PATTERNS = [
11
+ /^\s*last updated on .+$/i
12
+ ];
13
+ const BOILERPLATE_SELECTORS = [
14
+ '[class*="feedback"]',
15
+ '[class*="edit-page"]',
16
+ '[aria-label*="Edit"]',
17
+ 'footer[class*="docs"]',
18
+ '[class*="sticky-cta"]',
19
+ 'main [role="banner"]',
20
+ '[role="navigation"]',
21
+ '[class*="sidebar"]',
22
+ '[data-collection="docs"]'
23
+ ];
24
+ function stripBoilerplateMarkdown(md) {
25
+ if (!md) return md;
26
+ const lines = md.split("\n");
27
+ const kept = lines.filter((line) => {
28
+ const t = line.trim().toLowerCase();
29
+ if (!t) return true;
30
+ if (BOILERPLATE_TEXT_EQUALITY.includes(t)) return false;
31
+ return !BOILERPLATE_TEXT_PATTERNS.some((re) => re.test(line));
32
+ });
33
+ return kept.join("\n").replace(/\n{3,}/g, "\n\n");
34
+ }
35
+ function stripBoilerplateDom(document) {
36
+ for (const sel of BOILERPLATE_SELECTORS) {
37
+ const nodes = document.querySelectorAll(sel);
38
+ const list = [];
39
+ for (let i = 0; i < nodes.length; i++) list.push(nodes[i]);
40
+ for (const el of list) {
41
+ el.parentNode?.removeChild(el);
42
+ }
43
+ }
44
+ }
45
+ export {
46
+ BOILERPLATE_SELECTORS,
47
+ BOILERPLATE_TEXT_EQUALITY,
48
+ BOILERPLATE_TEXT_PATTERNS,
49
+ stripBoilerplateDom,
50
+ stripBoilerplateMarkdown
51
+ };
52
+ //# sourceMappingURL=boilerplate.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../../src/extraction/boilerplate.ts"],"sourcesContent":["export const BOILERPLATE_TEXT_EQUALITY: ReadonlyArray<string> = [\n 'was this helpful?',\n 'send',\n 'edit this page',\n 'edit on github',\n 'suggest changes',\n 'skip to main content',\n 'on this page',\n];\n\nexport const BOILERPLATE_TEXT_PATTERNS: ReadonlyArray<RegExp> = [\n /^\\s*last updated on .+$/i,\n];\n\nexport const BOILERPLATE_SELECTORS: ReadonlyArray<string> = [\n '[class*=\"feedback\"]',\n '[class*=\"edit-page\"]',\n '[aria-label*=\"Edit\"]',\n 'footer[class*=\"docs\"]',\n '[class*=\"sticky-cta\"]',\n 'main [role=\"banner\"]',\n '[role=\"navigation\"]',\n '[class*=\"sidebar\"]',\n '[data-collection=\"docs\"]',\n];\n\nexport interface BoilerplateDocument {\n querySelectorAll(selector: string): ArrayLike<BoilerplateElement>;\n}\n\ninterface BoilerplateElement {\n parentNode: { removeChild(child: BoilerplateElement): void } | null;\n}\n\nexport function stripBoilerplateMarkdown(md: string): string {\n if (!md) return md;\n const lines = md.split('\\n');\n const kept = lines.filter((line) => {\n const t = line.trim().toLowerCase();\n if (!t) return true;\n if (BOILERPLATE_TEXT_EQUALITY.includes(t)) return false;\n return !BOILERPLATE_TEXT_PATTERNS.some((re) => re.test(line));\n });\n return kept.join('\\n').replace(/\\n{3,}/g, '\\n\\n');\n}\n\nexport function stripBoilerplateDom(document: BoilerplateDocument): void {\n for (const sel of BOILERPLATE_SELECTORS) {\n const nodes = document.querySelectorAll(sel);\n const list: BoilerplateElement[] = [];\n for (let i = 0; i < nodes.length; i++) list.push(nodes[i]);\n for (const el of list) {\n el.parentNode?.removeChild(el);\n }\n }\n}\n"],"mappings":"AAAO,MAAM,4BAAmD;AAAA,EAC9D;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AAEO,MAAM,4BAAmD;AAAA,EAC9D;AACF;AAEO,MAAM,wBAA+C;AAAA,EAC1D;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AAUO,SAAS,yBAAyB,IAAoB;AAC3D,MAAI,CAAC,GAAI,QAAO;AAChB,QAAM,QAAQ,GAAG,MAAM,IAAI;AAC3B,QAAM,OAAO,MAAM,OAAO,CAAC,SAAS;AAClC,UAAM,IAAI,KAAK,KAAK,EAAE,YAAY;AAClC,QAAI,CAAC,EAAG,QAAO;AACf,QAAI,0BAA0B,SAAS,CAAC,EAAG,QAAO;AAClD,WAAO,CAAC,0BAA0B,KAAK,CAAC,OAAO,GAAG,KAAK,IAAI,CAAC;AAAA,EAC9D,CAAC;AACD,SAAO,KAAK,KAAK,IAAI,EAAE,QAAQ,WAAW,MAAM;AAClD;AAEO,SAAS,oBAAoB,UAAqC;AACvE,aAAW,OAAO,uBAAuB;AACvC,UAAM,QAAQ,SAAS,iBAAiB,GAAG;AAC3C,UAAM,OAA6B,CAAC;AACpC,aAAS,IAAI,GAAG,IAAI,MAAM,QAAQ,IAAK,MAAK,KAAK,MAAM,CAAC,CAAC;AACzD,eAAW,MAAM,MAAM;AACrB,SAAG,YAAY,YAAY,EAAE;AAAA,IAC/B;AAAA,EACF;AACF;","names":[]}
@@ -1 +1 @@
1
- {"version":3,"file":"defuddle.d.ts","sourceRoot":"","sources":["../../src/extraction/defuddle.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAC;AAIpD,wBAAsB,eAAe,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,gBAAgB,GAAG,IAAI,CAAC,CAoBjG"}
1
+ {"version":3,"file":"defuddle.d.ts","sourceRoot":"","sources":["../../src/extraction/defuddle.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAC;AAKpD,wBAAsB,eAAe,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,gBAAgB,GAAG,IAAI,CAAC,CAsBjG"}
@@ -1,26 +1,30 @@
1
- import { Defuddle } from 'defuddle/node';
1
+ import { Defuddle } from "defuddle/node";
2
+ import { htmlToMarkdown } from "./markdown.js";
2
3
  const MIN_CONTENT_THRESHOLD = 100;
3
- export async function defuddleExtract(html, url) {
4
- try {
5
- const result = await Defuddle(html, url, { markdown: true });
6
- if (!result.content || result.content.length < MIN_CONTENT_THRESHOLD)
7
- return null;
8
- return {
9
- title: result.title ?? '',
10
- markdown: result.content,
11
- metadata: {
12
- description: result.description || undefined,
13
- author: result.author || undefined,
14
- date: result.published || undefined,
15
- language: result.language || undefined,
16
- },
17
- links: [],
18
- images: [],
19
- extractor: 'defuddle',
20
- };
21
- }
22
- catch {
23
- return null;
24
- }
4
+ async function defuddleExtract(html, url) {
5
+ try {
6
+ const result = await Defuddle(html, url);
7
+ if (!result.content) return null;
8
+ const markdown = htmlToMarkdown(result.content);
9
+ if (markdown.length < MIN_CONTENT_THRESHOLD) return null;
10
+ return {
11
+ title: result.title ?? "",
12
+ markdown,
13
+ metadata: {
14
+ description: result.description || void 0,
15
+ author: result.author || void 0,
16
+ date: result.published || void 0,
17
+ language: result.language || void 0
18
+ },
19
+ links: [],
20
+ images: [],
21
+ extractor: "defuddle"
22
+ };
23
+ } catch {
24
+ return null;
25
+ }
25
26
  }
27
+ export {
28
+ defuddleExtract
29
+ };
26
30
  //# sourceMappingURL=defuddle.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"defuddle.js","sourceRoot":"","sources":["../../src/extraction/defuddle.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AAGzC,MAAM,qBAAqB,GAAG,GAAG,CAAC;AAElC,MAAM,CAAC,KAAK,UAAU,eAAe,CAAC,IAAY,EAAE,GAAW;IAC7D,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,GAAG,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC;QAC7D,IAAI,CAAC,MAAM,CAAC,OAAO,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,GAAG,qBAAqB;YAAE,OAAO,IAAI,CAAC;QAClF,OAAO;YACL,KAAK,EAAE,MAAM,CAAC,KAAK,IAAI,EAAE;YACzB,QAAQ,EAAE,MAAM,CAAC,OAAO;YACxB,QAAQ,EAAE;gBACR,WAAW,EAAE,MAAM,CAAC,WAAW,IAAI,SAAS;gBAC5C,MAAM,EAAE,MAAM,CAAC,MAAM,IAAI,SAAS;gBAClC,IAAI,EAAE,MAAM,CAAC,SAAS,IAAI,SAAS;gBACnC,QAAQ,EAAE,MAAM,CAAC,QAAQ,IAAI,SAAS;aACvC;YACD,KAAK,EAAE,EAAE;YACT,MAAM,EAAE,EAAE;YACV,SAAS,EAAE,UAAU;SACtB,CAAC;IACJ,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC"}
1
+ {"version":3,"sources":["../../src/extraction/defuddle.ts"],"sourcesContent":["import { Defuddle } from 'defuddle/node';\nimport type { ExtractionResult } from '../types.js';\nimport { htmlToMarkdown } from './markdown.js';\n\nconst MIN_CONTENT_THRESHOLD = 100;\n\nexport async function defuddleExtract(html: string, url: string): Promise<ExtractionResult | null> {\n try {\n const result = await Defuddle(html, url);\n if (!result.content) return null;\n const markdown = htmlToMarkdown(result.content);\n if (markdown.length < MIN_CONTENT_THRESHOLD) return null;\n return {\n title: result.title ?? '',\n markdown,\n metadata: {\n description: result.description || undefined,\n author: result.author || undefined,\n date: result.published || undefined,\n language: result.language || undefined,\n },\n links: [],\n images: [],\n extractor: 'defuddle',\n };\n } catch {\n return null;\n }\n}\n"],"mappings":"AAAA,SAAS,gBAAgB;AAEzB,SAAS,sBAAsB;AAE/B,MAAM,wBAAwB;AAE9B,eAAsB,gBAAgB,MAAc,KAA+C;AACjG,MAAI;AACF,UAAM,SAAS,MAAM,SAAS,MAAM,GAAG;AACvC,QAAI,CAAC,OAAO,QAAS,QAAO;AAC5B,UAAM,WAAW,eAAe,OAAO,OAAO;AAC9C,QAAI,SAAS,SAAS,sBAAuB,QAAO;AACpD,WAAO;AAAA,MACL,OAAO,OAAO,SAAS;AAAA,MACvB;AAAA,MACA,UAAU;AAAA,QACR,aAAa,OAAO,eAAe;AAAA,QACnC,QAAQ,OAAO,UAAU;AAAA,QACzB,MAAM,OAAO,aAAa;AAAA,QAC1B,UAAU,OAAO,YAAY;AAAA,MAC/B;AAAA,MACA,OAAO,CAAC;AAAA,MACR,QAAQ,CAAC;AAAA,MACT,WAAW;AAAA,IACb;AAAA,EACF,QAAQ;AACN,WAAO;AAAA,EACT;AACF;","names":[]}
@@ -1 +1 @@
1
- {"version":3,"file":"extract.d.ts","sourceRoot":"","sources":["../../src/extraction/extract.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,YAAY,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAS3D,wBAAgB,eAAe,CAAC,IAAI,EAAE,MAAM,GAAG,YAAY,CA2B1D;AAED,wBAAgB,eAAe,CAC7B,IAAI,EAAE,MAAM,EACZ,QAAQ,EAAE,MAAM,EAChB,QAAQ,EAAE,OAAO,GAChB,MAAM,GAAG,MAAM,EAAE,CAUnB;AAED,wBAAgB,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,SAAS,EAAE,CA6CvD"}
1
+ {"version":3,"file":"extract.d.ts","sourceRoot":"","sources":["../../src/extraction/extract.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,YAAY,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAS3D,wBAAgB,eAAe,CAAC,IAAI,EAAE,MAAM,GAAG,YAAY,CAiC1D;AAED,wBAAgB,eAAe,CAC7B,IAAI,EAAE,MAAM,EACZ,QAAQ,EAAE,MAAM,EAChB,QAAQ,EAAE,OAAO,GAChB,MAAM,GAAG,MAAM,EAAE,CAUnB;AAED,wBAAgB,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,SAAS,EAAE,CA6CvD"}
@@ -1,83 +1,83 @@
1
- import { parseHTML } from 'linkedom';
1
+ import { parseHTML } from "linkedom";
2
2
  function getMetaContent(doc, nameOrProperty) {
3
- const el = doc.querySelector(`meta[name="${nameOrProperty}"]`) ??
4
- doc.querySelector(`meta[property="${nameOrProperty}"]`);
5
- return el?.getAttribute('content') ?? undefined;
3
+ const el = doc.querySelector(`meta[name="${nameOrProperty}"]`) ?? doc.querySelector(`meta[property="${nameOrProperty}"]`);
4
+ return el?.getAttribute("content") ?? void 0;
6
5
  }
7
- export function extractMetadata(html) {
8
- const { document: doc } = parseHTML(html);
9
- const result = {};
10
- const title = doc.querySelector('title')?.textContent?.trim();
11
- if (title)
12
- result.title = title;
13
- const description = getMetaContent(doc, 'description') ?? getMetaContent(doc, 'og:description');
14
- if (description)
15
- result.description = description;
16
- const author = getMetaContent(doc, 'author');
17
- if (author)
18
- result.author = author;
19
- const date = getMetaContent(doc, 'date') ?? getMetaContent(doc, 'article:published_time');
20
- if (date)
21
- result.date = date;
22
- const keywords = getMetaContent(doc, 'keywords');
23
- if (keywords) {
24
- result.keywords = keywords.split(',').map((k) => k.trim()).filter(Boolean);
25
- }
26
- const ogImage = getMetaContent(doc, 'og:image');
27
- if (ogImage)
28
- result.og_image = ogImage;
29
- return result;
6
+ function extractMetadata(html) {
7
+ const { document: doc } = parseHTML(html);
8
+ const result = {};
9
+ const title = doc.querySelector("title")?.textContent?.trim();
10
+ if (title) result.title = title;
11
+ const description = getMetaContent(doc, "description") ?? getMetaContent(doc, "og:description");
12
+ if (description) result.description = description;
13
+ const author = getMetaContent(doc, "author");
14
+ if (author) result.author = author;
15
+ const date = getMetaContent(doc, "date") ?? getMetaContent(doc, "article:published_time");
16
+ if (date) result.date = date;
17
+ const keywords = getMetaContent(doc, "keywords");
18
+ if (keywords) {
19
+ result.keywords = keywords.split(",").map((k) => k.trim()).filter(Boolean);
20
+ }
21
+ const ogImage = getMetaContent(doc, "og:image");
22
+ if (ogImage) result.og_image = ogImage;
23
+ const ogType = getMetaContent(doc, "og:type");
24
+ if (ogType) result.og_type = ogType;
25
+ const canonical = doc.querySelector('link[rel="canonical"]')?.getAttribute("href");
26
+ if (canonical) result.canonical_url = canonical;
27
+ return result;
30
28
  }
31
- export function extractSelector(html, selector, multiple) {
32
- const { document: doc } = parseHTML(html);
33
- if (multiple) {
34
- const elements = doc.querySelectorAll(selector);
35
- return Array.from(elements).map((el) => (el.textContent ?? '').trim());
36
- }
37
- const el = doc.querySelector(selector);
38
- return el ? (el.textContent ?? '').trim() : '';
29
+ function extractSelector(html, selector, multiple) {
30
+ const { document: doc } = parseHTML(html);
31
+ if (multiple) {
32
+ const elements = doc.querySelectorAll(selector);
33
+ return Array.from(elements).map((el2) => (el2.textContent ?? "").trim());
34
+ }
35
+ const el = doc.querySelector(selector);
36
+ return el ? (el.textContent ?? "").trim() : "";
39
37
  }
40
- export function extractTables(html) {
41
- const { document: doc } = parseHTML(html);
42
- const tables = doc.querySelectorAll('table');
43
- if (tables.length === 0)
44
- return [];
45
- return Array.from(tables).map((table) => {
46
- const caption = table.querySelector('caption')?.textContent?.trim() || undefined;
47
- const thElements = table.querySelectorAll('thead th');
48
- let headers;
49
- let bodyRows;
50
- if (thElements.length > 0) {
51
- headers = Array.from(thElements).map((th) => (th.textContent ?? '').trim());
52
- bodyRows = Array.from(table.querySelectorAll('tbody tr'));
53
- if (bodyRows.length === 0) {
54
- const allRows = Array.from(table.querySelectorAll('tr'));
55
- bodyRows = allRows.slice(1);
56
- }
57
- }
58
- else {
59
- const allRows = Array.from(table.querySelectorAll('tr'));
60
- const firstRow = allRows[0];
61
- const firstRowThs = firstRow ? Array.from(firstRow.querySelectorAll('th')) : [];
62
- if (firstRowThs.length > 0) {
63
- headers = firstRowThs.map((th) => (th.textContent ?? '').trim());
64
- bodyRows = allRows.slice(1);
65
- }
66
- else {
67
- const cellCount = firstRow ? firstRow.querySelectorAll('td').length : 0;
68
- headers = Array.from({ length: cellCount }, (_, i) => `col_${i + 1}`);
69
- bodyRows = allRows;
70
- }
71
- }
72
- const rows = bodyRows.map((row) => {
73
- const cells = Array.from(row.querySelectorAll('td'));
74
- const obj = {};
75
- headers.forEach((header, i) => {
76
- obj[header] = (cells[i]?.textContent ?? '').trim();
77
- });
78
- return obj;
79
- });
80
- return { caption, headers, rows };
38
+ function extractTables(html) {
39
+ const { document: doc } = parseHTML(html);
40
+ const tables = doc.querySelectorAll("table");
41
+ if (tables.length === 0) return [];
42
+ return Array.from(tables).map((table) => {
43
+ const caption = table.querySelector("caption")?.textContent?.trim() || void 0;
44
+ const thElements = table.querySelectorAll("thead th");
45
+ let headers;
46
+ let bodyRows;
47
+ if (thElements.length > 0) {
48
+ headers = Array.from(thElements).map((th) => (th.textContent ?? "").trim());
49
+ bodyRows = Array.from(table.querySelectorAll("tbody tr"));
50
+ if (bodyRows.length === 0) {
51
+ const allRows = Array.from(table.querySelectorAll("tr"));
52
+ bodyRows = allRows.slice(1);
53
+ }
54
+ } else {
55
+ const allRows = Array.from(table.querySelectorAll("tr"));
56
+ const firstRow = allRows[0];
57
+ const firstRowThs = firstRow ? Array.from(firstRow.querySelectorAll("th")) : [];
58
+ if (firstRowThs.length > 0) {
59
+ headers = firstRowThs.map((th) => (th.textContent ?? "").trim());
60
+ bodyRows = allRows.slice(1);
61
+ } else {
62
+ const cellCount = firstRow ? firstRow.querySelectorAll("td").length : 0;
63
+ headers = Array.from({ length: cellCount }, (_, i) => `col_${i + 1}`);
64
+ bodyRows = allRows;
65
+ }
66
+ }
67
+ const rows = bodyRows.map((row) => {
68
+ const cells = Array.from(row.querySelectorAll("td"));
69
+ const obj = {};
70
+ headers.forEach((header, i) => {
71
+ obj[header] = (cells[i]?.textContent ?? "").trim();
72
+ });
73
+ return obj;
81
74
  });
75
+ return { caption, headers, rows };
76
+ });
82
77
  }
78
+ export {
79
+ extractMetadata,
80
+ extractSelector,
81
+ extractTables
82
+ };
83
83
  //# sourceMappingURL=extract.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"extract.js","sourceRoot":"","sources":["../../src/extraction/extract.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,UAAU,CAAC;AAGrC,SAAS,cAAc,CAAC,GAAa,EAAE,cAAsB;IAC3D,MAAM,EAAE,GACN,GAAG,CAAC,aAAa,CAAC,cAAc,cAAc,IAAI,CAAC;QACnD,GAAG,CAAC,aAAa,CAAC,kBAAkB,cAAc,IAAI,CAAC,CAAC;IAC1D,OAAO,EAAE,EAAE,YAAY,CAAC,SAAS,CAAC,IAAI,SAAS,CAAC;AAClD,CAAC;AAED,MAAM,UAAU,eAAe,CAAC,IAAY;IAC1C,MAAM,EAAE,QAAQ,EAAE,GAAG,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IAC1C,MAAM,MAAM,GAAiB,EAAE,CAAC;IAEhC,MAAM,KAAK,GAAG,GAAG,CAAC,aAAa,CAAC,OAAO,CAAC,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC;IAC9D,IAAI,KAAK;QAAE,MAAM,CAAC,KAAK,GAAG,KAAK,CAAC;IAEhC,MAAM,WAAW,GACf,cAAc,CAAC,GAAG,EAAE,aAAa,CAAC,IAAI,cAAc,CAAC,GAAG,EAAE,gBAAgB,CAAC,CAAC;IAC9E,IAAI,WAAW;QAAE,MAAM,CAAC,WAAW,GAAG,WAAW,CAAC;IAElD,MAAM,MAAM,GAAG,cAAc,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAC;IAC7C,IAAI,MAAM;QAAE,MAAM,CAAC,MAAM,GAAG,MAAM,CAAC;IAEnC,MAAM,IAAI,GACR,cAAc,CAAC,GAAG,EAAE,MAAM,CAAC,IAAI,cAAc,CAAC,GAAG,EAAE,wBAAwB,CAAC,CAAC;IAC/E,IAAI,IAAI;QAAE,MAAM,CAAC,IAAI,GAAG,IAAI,CAAC;IAE7B,MAAM,QAAQ,GAAG,cAAc,CAAC,GAAG,EAAE,UAAU,CAAC,CAAC;IACjD,IAAI,QAAQ,EAAE,CAAC;QACb,MAAM,CAAC,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;IAC7E,CAAC;IAED,MAAM,OAAO,GAAG,cAAc,CAAC,GAAG,EAAE,UAAU,CAAC,CAAC;IAChD,IAAI,OAAO;QAAE,MAAM,CAAC,QAAQ,GAAG,OAAO,CAAC;IAEvC,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,MAAM,UAAU,eAAe,CAC7B,IAAY,EACZ,QAAgB,EAChB,QAAiB;IAEjB,MAAM,EAAE,QAAQ,EAAE,GAAG,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IAE1C,IAAI,QAAQ,EAAE,CAAC;QACb,MAAM,QAAQ,GAAG,GAAG,CAAC,gBAAgB,CAAC,QAAQ,CAAC,CAAC;QAChD,OAAO,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;IACzE,CAAC;IAED,MAAM,EAAE,GAAG,GAAG,CAAC,aAAa,CAAC,QAAQ,CAAC,CAAC;IACvC,OAAO,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;AACjD,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,IAAY;IACxC,MAAM,EAAE,QAAQ,EAAE,GAAG,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IAC1C,MAAM,MAAM,GAAG,GAAG,CAAC,gBAAgB,CAAC,OAAO,CAAC,CAAC;IAC7C,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAEnC,OAAO,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE;QACtC,MAAM,OAAO,GAAG,KAAK,CAAC,aAAa,CAAC,SAAS,CAAC,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,SAAS,CAAC;QAEjF,MAAM,UAAU,GAAG,KAAK,CAAC,gBAAgB,CAAC,UAAU,CAAC,CAAC;QACtD,IAAI,OAAiB,CAAC;QACtB,IAAI,QAAmB,CAAC;QAExB,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC1B,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;YAC5E,QAAQ,GAAG,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,gBAAgB,CAAC,UAAU,CAAC,CAAC,CAAC;YAC1D,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBAC1B,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,CAAC;gBACzD,QAAQ,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;YAC9B,CAAC;QACH,CAAC;aAAM,CAAC;YACN,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,CAAC;YACzD,MAAM,QAAQ,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC;YAC5B,MAAM,WAAW,GAAG,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YAEhF,IAAI,WAAW,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC3B,OAAO,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;gBACjE,QAAQ,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;YAC9B,CAAC;iBAAM,CAAC;gBACN,MAAM,SAAS,GAAG,QAAQ,CAAC,CAAC,CAAC,QAAQ,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;gBACxE,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,SAAS,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;gBACtE,QAAQ,GAAG,OAAO,CAAC;YACrB,CAAC;QACH,CAAC;QAED,MAAM,IAAI,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE;YAChC,MAAM,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,CAAC;YACrD,MAAM,GAAG,GAA2B,EAAE,CAAC;YACvC,OAAO,CAAC,OAAO,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;gBAC5B,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,WAAW,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;YACrD,CAAC,CAAC,CAAC;YACH,OAAO,GAAG,CAAC;QACb,CAAC,CAAC,CAAC;QAEH,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC;IACpC,CAAC,CAAC,CAAC;AACL,CAAC"}
1
+ {"version":3,"sources":["../../src/extraction/extract.ts"],"sourcesContent":["import { parseHTML } from 'linkedom';\nimport type { MetadataData, TableData } from '../types.js';\n\nfunction getMetaContent(doc: Document, nameOrProperty: string): string | undefined {\n const el =\n doc.querySelector(`meta[name=\"${nameOrProperty}\"]`) ??\n doc.querySelector(`meta[property=\"${nameOrProperty}\"]`);\n return el?.getAttribute('content') ?? undefined;\n}\n\nexport function extractMetadata(html: string): MetadataData {\n const { document: doc } = parseHTML(html);\n const result: MetadataData = {};\n\n const title = doc.querySelector('title')?.textContent?.trim();\n if (title) result.title = title;\n\n const description =\n getMetaContent(doc, 'description') ?? getMetaContent(doc, 'og:description');\n if (description) result.description = description;\n\n const author = getMetaContent(doc, 'author');\n if (author) result.author = author;\n\n const date =\n getMetaContent(doc, 'date') ?? getMetaContent(doc, 'article:published_time');\n if (date) result.date = date;\n\n const keywords = getMetaContent(doc, 'keywords');\n if (keywords) {\n result.keywords = keywords.split(',').map((k) => k.trim()).filter(Boolean);\n }\n\n const ogImage = getMetaContent(doc, 'og:image');\n if (ogImage) result.og_image = ogImage;\n\n const ogType = getMetaContent(doc, 'og:type');\n if (ogType) result.og_type = ogType;\n\n const canonical = doc.querySelector('link[rel=\"canonical\"]')?.getAttribute('href');\n if (canonical) result.canonical_url = canonical;\n\n return result;\n}\n\nexport function extractSelector(\n html: string,\n selector: string,\n multiple: boolean,\n): string | string[] {\n const { document: doc } = parseHTML(html);\n\n if (multiple) {\n const elements = doc.querySelectorAll(selector);\n return Array.from(elements).map((el) => (el.textContent ?? '').trim());\n }\n\n const el = doc.querySelector(selector);\n return el ? (el.textContent ?? '').trim() : '';\n}\n\nexport function extractTables(html: string): TableData[] {\n const { document: doc } = parseHTML(html);\n const tables = doc.querySelectorAll('table');\n if (tables.length === 0) return [];\n\n return Array.from(tables).map((table) => {\n const caption = table.querySelector('caption')?.textContent?.trim() || undefined;\n\n const thElements = table.querySelectorAll('thead th');\n let headers: string[];\n let bodyRows: Element[];\n\n if (thElements.length > 0) {\n headers = Array.from(thElements).map((th) => (th.textContent ?? '').trim());\n bodyRows = Array.from(table.querySelectorAll('tbody tr'));\n if (bodyRows.length === 0) {\n const allRows = Array.from(table.querySelectorAll('tr'));\n bodyRows = allRows.slice(1);\n }\n } else {\n const allRows = Array.from(table.querySelectorAll('tr'));\n const firstRow = allRows[0];\n const firstRowThs = firstRow ? Array.from(firstRow.querySelectorAll('th')) : [];\n\n if (firstRowThs.length > 0) {\n headers = firstRowThs.map((th) => (th.textContent ?? '').trim());\n bodyRows = allRows.slice(1);\n } else {\n const cellCount = firstRow ? firstRow.querySelectorAll('td').length : 0;\n headers = Array.from({ length: cellCount }, (_, i) => `col_${i + 1}`);\n bodyRows = allRows;\n }\n }\n\n const rows = bodyRows.map((row) => {\n const cells = Array.from(row.querySelectorAll('td'));\n const obj: Record<string, string> = {};\n headers.forEach((header, i) => {\n obj[header] = (cells[i]?.textContent ?? '').trim();\n });\n return obj;\n });\n\n return { caption, headers, rows };\n });\n}\n"],"mappings":"AAAA,SAAS,iBAAiB;AAG1B,SAAS,eAAe,KAAe,gBAA4C;AACjF,QAAM,KACJ,IAAI,cAAc,cAAc,cAAc,IAAI,KAClD,IAAI,cAAc,kBAAkB,cAAc,IAAI;AACxD,SAAO,IAAI,aAAa,SAAS,KAAK;AACxC;AAEO,SAAS,gBAAgB,MAA4B;AAC1D,QAAM,EAAE,UAAU,IAAI,IAAI,UAAU,IAAI;AACxC,QAAM,SAAuB,CAAC;AAE9B,QAAM,QAAQ,IAAI,cAAc,OAAO,GAAG,aAAa,KAAK;AAC5D,MAAI,MAAO,QAAO,QAAQ;AAE1B,QAAM,cACJ,eAAe,KAAK,aAAa,KAAK,eAAe,KAAK,gBAAgB;AAC5E,MAAI,YAAa,QAAO,cAAc;AAEtC,QAAM,SAAS,eAAe,KAAK,QAAQ;AAC3C,MAAI,OAAQ,QAAO,SAAS;AAE5B,QAAM,OACJ,eAAe,KAAK,MAAM,KAAK,eAAe,KAAK,wBAAwB;AAC7E,MAAI,KAAM,QAAO,OAAO;AAExB,QAAM,WAAW,eAAe,KAAK,UAAU;AAC/C,MAAI,UAAU;AACZ,WAAO,WAAW,SAAS,MAAM,GAAG,EAAE,IAAI,CAAC,MAAM,EAAE,KAAK,CAAC,EAAE,OAAO,OAAO;AAAA,EAC3E;AAEA,QAAM,UAAU,eAAe,KAAK,UAAU;AAC9C,MAAI,QAAS,QAAO,WAAW;AAE/B,QAAM,SAAS,eAAe,KAAK,SAAS;AAC5C,MAAI,OAAQ,QAAO,UAAU;AAE7B,QAAM,YAAY,IAAI,cAAc,uBAAuB,GAAG,aAAa,MAAM;AACjF,MAAI,UAAW,QAAO,gBAAgB;AAEtC,SAAO;AACT;AAEO,SAAS,gBACd,MACA,UACA,UACmB;AACnB,QAAM,EAAE,UAAU,IAAI,IAAI,UAAU,IAAI;AAExC,MAAI,UAAU;AACZ,UAAM,WAAW,IAAI,iBAAiB,QAAQ;AAC9C,WAAO,MAAM,KAAK,QAAQ,EAAE,IAAI,CAACA,SAAQA,IAAG,eAAe,IAAI,KAAK,CAAC;AAAA,EACvE;AAEA,QAAM,KAAK,IAAI,cAAc,QAAQ;AACrC,SAAO,MAAM,GAAG,eAAe,IAAI,KAAK,IAAI;AAC9C;AAEO,SAAS,cAAc,MAA2B;AACvD,QAAM,EAAE,UAAU,IAAI,IAAI,UAAU,IAAI;AACxC,QAAM,SAAS,IAAI,iBAAiB,OAAO;AAC3C,MAAI,OAAO,WAAW,EAAG,QAAO,CAAC;AAEjC,SAAO,MAAM,KAAK,MAAM,EAAE,IAAI,CAAC,UAAU;AACvC,UAAM,UAAU,MAAM,cAAc,SAAS,GAAG,aAAa,KAAK,KAAK;AAEvE,UAAM,aAAa,MAAM,iBAAiB,UAAU;AACpD,QAAI;AACJ,QAAI;AAEJ,QAAI,WAAW,SAAS,GAAG;AACzB,gBAAU,MAAM,KAAK,UAAU,EAAE,IAAI,CAAC,QAAQ,GAAG,eAAe,IAAI,KAAK,CAAC;AAC1E,iBAAW,MAAM,KAAK,MAAM,iBAAiB,UAAU,CAAC;AACxD,UAAI,SAAS,WAAW,GAAG;AACzB,cAAM,UAAU,MAAM,KAAK,MAAM,iBAAiB,IAAI,CAAC;AACvD,mBAAW,QAAQ,MAAM,CAAC;AAAA,MAC5B;AAAA,IACF,OAAO;AACL,YAAM,UAAU,MAAM,KAAK,MAAM,iBAAiB,IAAI,CAAC;AACvD,YAAM,WAAW,QAAQ,CAAC;AAC1B,YAAM,cAAc,WAAW,MAAM,KAAK,SAAS,iBAAiB,IAAI,CAAC,IAAI,CAAC;AAE9E,UAAI,YAAY,SAAS,GAAG;AAC1B,kBAAU,YAAY,IAAI,CAAC,QAAQ,GAAG,eAAe,IAAI,KAAK,CAAC;AAC/D,mBAAW,QAAQ,MAAM,CAAC;AAAA,MAC5B,OAAO;AACL,cAAM,YAAY,WAAW,SAAS,iBAAiB,IAAI,EAAE,SAAS;AACtE,kBAAU,MAAM,KAAK,EAAE,QAAQ,UAAU,GAAG,CAAC,GAAG,MAAM,OAAO,IAAI,CAAC,EAAE;AACpE,mBAAW;AAAA,MACb;AAAA,IACF;AAEA,UAAM,OAAO,SAAS,IAAI,CAAC,QAAQ;AACjC,YAAM,QAAQ,MAAM,KAAK,IAAI,iBAAiB,IAAI,CAAC;AACnD,YAAM,MAA8B,CAAC;AACrC,cAAQ,QAAQ,CAAC,QAAQ,MAAM;AAC7B,YAAI,MAAM,KAAK,MAAM,CAAC,GAAG,eAAe,IAAI,KAAK;AAAA,MACnD,CAAC;AACD,aAAO;AAAA,IACT,CAAC;AAED,WAAO,EAAE,SAAS,SAAS,KAAK;AAAA,EAClC,CAAC;AACH;","names":["el"]}
@@ -1,64 +1,60 @@
1
- import { parseHTML } from 'linkedom';
2
- import { createLogger } from '../logger.js';
3
- const log = createLogger('jsonld');
4
- export function extractJsonLd(html) {
5
- const { document: doc } = parseHTML(html);
6
- const scripts = doc.querySelectorAll('script[type="application/ld+json"]');
7
- const results = [];
8
- for (const script of scripts) {
9
- try {
10
- const text = script.textContent?.trim();
11
- if (!text)
12
- continue;
13
- const parsed = JSON.parse(text);
14
- if (Array.isArray(parsed)) {
15
- results.push(...parsed);
16
- }
17
- else if (parsed['@graph'] && Array.isArray(parsed['@graph'])) {
18
- results.push(...parsed['@graph']);
19
- }
20
- else {
21
- results.push(parsed);
22
- }
23
- }
24
- catch (err) {
25
- log.debug('Failed to parse JSON-LD block', { error: String(err) });
26
- }
1
+ import { parseHTML } from "linkedom";
2
+ import { createLogger } from "../logger.js";
3
+ const log = createLogger("jsonld");
4
+ function extractJsonLd(html) {
5
+ const { document: doc } = parseHTML(html);
6
+ const scripts = doc.querySelectorAll('script[type="application/ld+json"]');
7
+ const results = [];
8
+ for (const script of scripts) {
9
+ try {
10
+ const text = script.textContent?.trim();
11
+ if (!text) continue;
12
+ const parsed = JSON.parse(text);
13
+ if (Array.isArray(parsed)) {
14
+ results.push(...parsed);
15
+ } else if (parsed["@graph"] && Array.isArray(parsed["@graph"])) {
16
+ results.push(...parsed["@graph"]);
17
+ } else {
18
+ results.push(parsed);
19
+ }
20
+ } catch (err) {
21
+ log.warn("Failed to parse JSON-LD block", { error: String(err) });
27
22
  }
28
- return results;
23
+ }
24
+ return results;
29
25
  }
30
- export function matchJsonLdToSchema(jsonLdBlocks, schema) {
31
- if (!schema.properties || jsonLdBlocks.length === 0)
32
- return {};
33
- const result = {};
34
- const flattened = flattenJsonLd(jsonLdBlocks);
35
- for (const fieldName of Object.keys(schema.properties)) {
36
- if (flattened[fieldName] !== undefined) {
37
- result[fieldName] = flattened[fieldName];
38
- }
26
+ function matchJsonLdToSchema(jsonLdBlocks, schema) {
27
+ if (!schema.properties || jsonLdBlocks.length === 0) return {};
28
+ const result = {};
29
+ const flattened = flattenJsonLd(jsonLdBlocks);
30
+ for (const fieldName of Object.keys(schema.properties)) {
31
+ if (flattened[fieldName] !== void 0) {
32
+ result[fieldName] = flattened[fieldName];
39
33
  }
40
- return result;
34
+ }
35
+ return result;
41
36
  }
42
37
  function flattenJsonLd(blocks) {
43
- const flat = {};
44
- for (const block of blocks) {
45
- flattenObject(block, flat);
46
- }
47
- return flat;
38
+ const flat = {};
39
+ for (const block of blocks) {
40
+ flattenObject(block, flat);
41
+ }
42
+ return flat;
48
43
  }
49
44
  function flattenObject(obj, target) {
50
- for (const [key, value] of Object.entries(obj)) {
51
- if (key.startsWith('@'))
52
- continue;
53
- // First-wins: earlier blocks and shallower keys take priority
54
- if (!(key in target)) {
55
- if (typeof value === 'object' && value !== null && !Array.isArray(value)) {
56
- flattenObject(value, target);
57
- }
58
- else {
59
- target[key] = value;
60
- }
61
- }
45
+ for (const [key, value] of Object.entries(obj)) {
46
+ if (key.startsWith("@")) continue;
47
+ if (!(key in target)) {
48
+ if (typeof value === "object" && value !== null && !Array.isArray(value)) {
49
+ flattenObject(value, target);
50
+ } else {
51
+ target[key] = value;
52
+ }
62
53
  }
54
+ }
63
55
  }
56
+ export {
57
+ extractJsonLd,
58
+ matchJsonLdToSchema
59
+ };
64
60
  //# sourceMappingURL=jsonld.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"jsonld.js","sourceRoot":"","sources":["../../src/extraction/jsonld.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,UAAU,CAAC;AACrC,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAG5C,MAAM,GAAG,GAAG,YAAY,CAAC,QAAQ,CAAC,CAAC;AAEnC,MAAM,UAAU,aAAa,CAAC,IAAY;IACxC,MAAM,EAAE,QAAQ,EAAE,GAAG,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IAC1C,MAAM,OAAO,GAAG,GAAG,CAAC,gBAAgB,CAAC,oCAAoC,CAAC,CAAC;IAC3E,MAAM,OAAO,GAA8B,EAAE,CAAC;IAE9C,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC7B,IAAI,CAAC;YACH,MAAM,IAAI,GAAG,MAAM,CAAC,WAAW,EAAE,IAAI,EAAE,CAAC;YACxC,IAAI,CAAC,IAAI;gBAAE,SAAS;YAEpB,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;YAEhC,IAAI,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC;gBAC1B,OAAO,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,CAAC;YAC1B,CAAC;iBAAM,IAAI,MAAM,CAAC,QAAQ,CAAC,IAAI,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,EAAE,CAAC;gBAC/D,OAAO,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC;YACpC,CAAC;iBAAM,CAAC;gBACN,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YACvB,CAAC;QACH,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,GAAG,CAAC,KAAK,CAAC,+BAA+B,EAAE,EAAE,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QACrE,CAAC;IACH,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,MAAM,UAAU,mBAAmB,CACjC,YAAuC,EACvC,MAAkB;IAElB,IAAI,CAAC,MAAM,CAAC,UAAU,IAAI,YAAY,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAE/D,MAAM,MAAM,GAA4B,EAAE,CAAC;IAC3C,MAAM,SAAS,GAAG,aAAa,CAAC,YAAY,CAAC,CAAC;IAE9C,KAAK,MAAM,SAAS,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC,EAAE,CAAC;QACvD,IAAI,SAAS,CAAC,SAAS,CAAC,KAAK,SAAS,EAAE,CAAC;YACvC,MAAM,CAAC,SAAS,CAAC,GAAG,SAAS,CAAC,SAAS,CAAC,CAAC;QAC3C,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,SAAS,aAAa,CACpB,MAAiC;IAEjC,MAAM,IAAI,GAA4B,EAAE,CAAC;IAEzC,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,aAAa,CAAC,KAAK,EAAE,IAAI,CAAC,CAAC;IAC7B,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,aAAa,CACpB,GAA4B,EAC5B,MAA+B;IAE/B,KAAK,MAAM,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC;QAC/C,IAAI,GAAG,CAAC,UAAU,CAAC,GAAG,CAAC;YAAE,SAAS;QAElC,8DAA8D;QAC9D,IAAI,CAAC,CAAC,GAAG,IAAI,MAAM,CAAC,EAAE,CAAC;YACrB,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC;gBACzE,aAAa,CAAC,KAAgC,EAAE,MAAM,CAAC,CAAC;YAC1D,CAAC;iBAAM,CAAC;gBACN,MAAM,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC;YACtB,CAAC;QACH,CAAC;IACH,CAAC;AACH,CAAC"}
1
+ {"version":3,"sources":["../../src/extraction/jsonld.ts"],"sourcesContent":["import { parseHTML } from 'linkedom';\nimport { createLogger } from '../logger.js';\nimport type { JsonSchema } from './schema.js';\n\nconst log = createLogger('jsonld');\n\nexport function extractJsonLd(html: string): Record<string, unknown>[] {\n const { document: doc } = parseHTML(html);\n const scripts = doc.querySelectorAll('script[type=\"application/ld+json\"]');\n const results: Record<string, unknown>[] = [];\n\n for (const script of scripts) {\n try {\n const text = script.textContent?.trim();\n if (!text) continue;\n\n const parsed = JSON.parse(text);\n\n if (Array.isArray(parsed)) {\n results.push(...parsed);\n } else if (parsed['@graph'] && Array.isArray(parsed['@graph'])) {\n results.push(...parsed['@graph']);\n } else {\n results.push(parsed);\n }\n } catch (err) {\n log.warn('Failed to parse JSON-LD block', { error: String(err) });\n }\n }\n\n return results;\n}\n\nexport function matchJsonLdToSchema(\n jsonLdBlocks: Record<string, unknown>[],\n schema: JsonSchema,\n): Record<string, unknown> {\n if (!schema.properties || jsonLdBlocks.length === 0) return {};\n\n const result: Record<string, unknown> = {};\n const flattened = flattenJsonLd(jsonLdBlocks);\n\n for (const fieldName of Object.keys(schema.properties)) {\n if (flattened[fieldName] !== undefined) {\n result[fieldName] = flattened[fieldName];\n }\n }\n\n return result;\n}\n\nfunction flattenJsonLd(\n blocks: Record<string, unknown>[],\n): Record<string, unknown> {\n const flat: Record<string, unknown> = {};\n\n for (const block of blocks) {\n flattenObject(block, flat);\n }\n\n return flat;\n}\n\nfunction flattenObject(\n obj: Record<string, unknown>,\n target: Record<string, unknown>,\n): void {\n for (const [key, value] of Object.entries(obj)) {\n if (key.startsWith('@')) continue;\n\n // First-wins: earlier blocks and shallower keys take priority\n if (!(key in target)) {\n if (typeof value === 'object' && value !== null && !Array.isArray(value)) {\n flattenObject(value as Record<string, unknown>, target);\n } else {\n target[key] = value;\n }\n }\n }\n}\n"],"mappings":"AAAA,SAAS,iBAAiB;AAC1B,SAAS,oBAAoB;AAG7B,MAAM,MAAM,aAAa,QAAQ;AAE1B,SAAS,cAAc,MAAyC;AACrE,QAAM,EAAE,UAAU,IAAI,IAAI,UAAU,IAAI;AACxC,QAAM,UAAU,IAAI,iBAAiB,oCAAoC;AACzE,QAAM,UAAqC,CAAC;AAE5C,aAAW,UAAU,SAAS;AAC5B,QAAI;AACF,YAAM,OAAO,OAAO,aAAa,KAAK;AACtC,UAAI,CAAC,KAAM;AAEX,YAAM,SAAS,KAAK,MAAM,IAAI;AAE9B,UAAI,MAAM,QAAQ,MAAM,GAAG;AACzB,gBAAQ,KAAK,GAAG,MAAM;AAAA,MACxB,WAAW,OAAO,QAAQ,KAAK,MAAM,QAAQ,OAAO,QAAQ,CAAC,GAAG;AAC9D,gBAAQ,KAAK,GAAG,OAAO,QAAQ,CAAC;AAAA,MAClC,OAAO;AACL,gBAAQ,KAAK,MAAM;AAAA,MACrB;AAAA,IACF,SAAS,KAAK;AACZ,UAAI,KAAK,iCAAiC,EAAE,OAAO,OAAO,GAAG,EAAE,CAAC;AAAA,IAClE;AAAA,EACF;AAEA,SAAO;AACT;AAEO,SAAS,oBACd,cACA,QACyB;AACzB,MAAI,CAAC,OAAO,cAAc,aAAa,WAAW,EAAG,QAAO,CAAC;AAE7D,QAAM,SAAkC,CAAC;AACzC,QAAM,YAAY,cAAc,YAAY;AAE5C,aAAW,aAAa,OAAO,KAAK,OAAO,UAAU,GAAG;AACtD,QAAI,UAAU,SAAS,MAAM,QAAW;AACtC,aAAO,SAAS,IAAI,UAAU,SAAS;AAAA,IACzC;AAAA,EACF;AAEA,SAAO;AACT;AAEA,SAAS,cACP,QACyB;AACzB,QAAM,OAAgC,CAAC;AAEvC,aAAW,SAAS,QAAQ;AAC1B,kBAAc,OAAO,IAAI;AAAA,EAC3B;AAEA,SAAO;AACT;AAEA,SAAS,cACP,KACA,QACM;AACN,aAAW,CAAC,KAAK,KAAK,KAAK,OAAO,QAAQ,GAAG,GAAG;AAC9C,QAAI,IAAI,WAAW,GAAG,EAAG;AAGzB,QAAI,EAAE,OAAO,SAAS;AACpB,UAAI,OAAO,UAAU,YAAY,UAAU,QAAQ,CAAC,MAAM,QAAQ,KAAK,GAAG;AACxE,sBAAc,OAAkC,MAAM;AAAA,MACxD,OAAO;AACL,eAAO,GAAG,IAAI;AAAA,MAChB;AAAA,IACF;AAAA,EACF;AACF;","names":[]}
@@ -0,0 +1,2 @@
1
+ export declare function detectCodeLanguage(classAttr: string | null | undefined): string | null;
2
+ //# sourceMappingURL=lang-hints.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"lang-hints.d.ts","sourceRoot":"","sources":["../../src/extraction/lang-hints.ts"],"names":[],"mappings":"AAiBA,wBAAgB,kBAAkB,CAAC,SAAS,EAAE,MAAM,GAAG,IAAI,GAAG,SAAS,GAAG,MAAM,GAAG,IAAI,CAUtF"}
@@ -0,0 +1,30 @@
1
+ const ALIASES = {
2
+ typescript: "ts",
3
+ javascript: "js",
4
+ python: "py",
5
+ rust: "rs",
6
+ golang: "go",
7
+ shell: "sh"
8
+ };
9
+ const PATTERNS = [
10
+ /(?:^|\s)language-([a-z0-9+#-]+)/i,
11
+ /(?:^|\s)lang-([a-z0-9+#-]+)/i,
12
+ /(?:^|\s)hljs-([a-z0-9+#-]+)/i,
13
+ /(?:^|\s)prism-language-([a-z0-9+#-]+)/i,
14
+ /(?:^|\s)highlight-source-([a-z0-9+#-]+)/i
15
+ ];
16
+ function detectCodeLanguage(classAttr) {
17
+ if (!classAttr) return null;
18
+ for (const re of PATTERNS) {
19
+ const m = classAttr.match(re);
20
+ if (m) {
21
+ const raw = m[1].toLowerCase();
22
+ return ALIASES[raw] ?? raw;
23
+ }
24
+ }
25
+ return null;
26
+ }
27
+ export {
28
+ detectCodeLanguage
29
+ };
30
+ //# sourceMappingURL=lang-hints.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../../src/extraction/lang-hints.ts"],"sourcesContent":["const ALIASES: Record<string, string> = {\n typescript: 'ts',\n javascript: 'js',\n python: 'py',\n rust: 'rs',\n golang: 'go',\n shell: 'sh',\n};\n\nconst PATTERNS = [\n /(?:^|\\s)language-([a-z0-9+#-]+)/i,\n /(?:^|\\s)lang-([a-z0-9+#-]+)/i,\n /(?:^|\\s)hljs-([a-z0-9+#-]+)/i,\n /(?:^|\\s)prism-language-([a-z0-9+#-]+)/i,\n /(?:^|\\s)highlight-source-([a-z0-9+#-]+)/i,\n];\n\nexport function detectCodeLanguage(classAttr: string | null | undefined): string | null {\n if (!classAttr) return null;\n for (const re of PATTERNS) {\n const m = classAttr.match(re);\n if (m) {\n const raw = m[1].toLowerCase();\n return ALIASES[raw] ?? raw;\n }\n }\n return null;\n}\n"],"mappings":"AAAA,MAAM,UAAkC;AAAA,EACtC,YAAY;AAAA,EACZ,YAAY;AAAA,EACZ,QAAQ;AAAA,EACR,MAAM;AAAA,EACN,QAAQ;AAAA,EACR,OAAO;AACT;AAEA,MAAM,WAAW;AAAA,EACf;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AAEO,SAAS,mBAAmB,WAAqD;AACtF,MAAI,CAAC,UAAW,QAAO;AACvB,aAAW,MAAM,UAAU;AACzB,UAAM,IAAI,UAAU,MAAM,EAAE;AAC5B,QAAI,GAAG;AACL,YAAM,MAAM,EAAE,CAAC,EAAE,YAAY;AAC7B,aAAO,QAAQ,GAAG,KAAK;AAAA,IACzB;AAAA,EACF;AACA,SAAO;AACT;","names":[]}
@@ -0,0 +1,17 @@
1
+ import type { LLMExtractResult } from '../integrations/cloud/llm/types.js';
2
+ export interface LLMFallbackBudget {
3
+ remaining: number;
4
+ }
5
+ export interface LLMFallbackInput {
6
+ html: string;
7
+ jsonSchema: Record<string, unknown>;
8
+ partial: Record<string, unknown>;
9
+ missing: string[];
10
+ signal?: AbortSignal;
11
+ budget?: LLMFallbackBudget;
12
+ }
13
+ export interface LLMFallbackResult extends LLMExtractResult {
14
+ warnings: string[];
15
+ }
16
+ export declare function extractWithLLM(input: LLMFallbackInput): Promise<LLMFallbackResult>;
17
+ //# sourceMappingURL=llm-fallback.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"llm-fallback.d.ts","sourceRoot":"","sources":["../../src/extraction/llm-fallback.ts"],"names":[],"mappings":"AAYA,OAAO,KAAK,EAAE,gBAAgB,EAAe,MAAM,oCAAoC,CAAC;AAKxF,MAAM,WAAW,iBAAiB;IAChC,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,gBAAgB;IAC/B,IAAI,EAAE,MAAM,CAAC;IACb,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACpC,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACjC,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,MAAM,CAAC,EAAE,WAAW,CAAC;IACrB,MAAM,CAAC,EAAE,iBAAiB,CAAC;CAC5B;AAED,MAAM,WAAW,iBAAkB,SAAQ,gBAAgB;IACzD,QAAQ,EAAE,MAAM,EAAE,CAAC;CACpB;AAeD,wBAAsB,cAAc,CAClC,KAAK,EAAE,gBAAgB,GACtB,OAAO,CAAC,iBAAiB,CAAC,CAsF5B"}