@iflow-mcp/jakeliume-webpeel 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (547) hide show
  1. package/LICENSE +15 -0
  2. package/README.md +313 -0
  3. package/dist/cache.d.ts +30 -0
  4. package/dist/cache.js +139 -0
  5. package/dist/cli/commands/auth.d.ts +5 -0
  6. package/dist/cli/commands/auth.js +411 -0
  7. package/dist/cli/commands/doctor.d.ts +37 -0
  8. package/dist/cli/commands/doctor.js +371 -0
  9. package/dist/cli/commands/fetch.d.ts +6 -0
  10. package/dist/cli/commands/fetch.js +1345 -0
  11. package/dist/cli/commands/guide.d.ts +2 -0
  12. package/dist/cli/commands/guide.js +183 -0
  13. package/dist/cli/commands/interact.d.ts +5 -0
  14. package/dist/cli/commands/interact.js +840 -0
  15. package/dist/cli/commands/jobs.d.ts +5 -0
  16. package/dist/cli/commands/jobs.js +997 -0
  17. package/dist/cli/commands/monitor.d.ts +12 -0
  18. package/dist/cli/commands/monitor.js +197 -0
  19. package/dist/cli/commands/observe.d.ts +12 -0
  20. package/dist/cli/commands/observe.js +158 -0
  21. package/dist/cli/commands/screenshot.d.ts +5 -0
  22. package/dist/cli/commands/screenshot.js +282 -0
  23. package/dist/cli/commands/search.d.ts +5 -0
  24. package/dist/cli/commands/search.js +1021 -0
  25. package/dist/cli/commands/setup.d.ts +13 -0
  26. package/dist/cli/commands/setup.js +244 -0
  27. package/dist/cli/commands/skill.d.ts +15 -0
  28. package/dist/cli/commands/skill.js +195 -0
  29. package/dist/cli/utils.d.ts +84 -0
  30. package/dist/cli/utils.js +806 -0
  31. package/dist/cli-auth.d.ts +75 -0
  32. package/dist/cli-auth.js +369 -0
  33. package/dist/cli.d.ts +17 -0
  34. package/dist/cli.js +99 -0
  35. package/dist/core/actions.d.ts +69 -0
  36. package/dist/core/actions.js +495 -0
  37. package/dist/core/agent.d.ts +98 -0
  38. package/dist/core/agent.js +558 -0
  39. package/dist/core/answer.d.ts +42 -0
  40. package/dist/core/answer.js +395 -0
  41. package/dist/core/application-tracker.d.ts +84 -0
  42. package/dist/core/application-tracker.js +184 -0
  43. package/dist/core/apply.d.ts +162 -0
  44. package/dist/core/apply.js +816 -0
  45. package/dist/core/auth-detection.d.ts +35 -0
  46. package/dist/core/auth-detection.js +358 -0
  47. package/dist/core/auto-extract.d.ts +82 -0
  48. package/dist/core/auto-extract.js +604 -0
  49. package/dist/core/auto-interact.d.ts +23 -0
  50. package/dist/core/auto-interact.js +246 -0
  51. package/dist/core/bm25-filter.d.ts +66 -0
  52. package/dist/core/bm25-filter.js +288 -0
  53. package/dist/core/branding.d.ts +54 -0
  54. package/dist/core/branding.js +234 -0
  55. package/dist/core/browser-fetch.d.ts +323 -0
  56. package/dist/core/browser-fetch.js +1600 -0
  57. package/dist/core/browser-pool.d.ts +91 -0
  58. package/dist/core/browser-pool.js +550 -0
  59. package/dist/core/budget.d.ts +42 -0
  60. package/dist/core/budget.js +324 -0
  61. package/dist/core/business-intel.d.ts +47 -0
  62. package/dist/core/business-intel.js +279 -0
  63. package/dist/core/cache.d.ts +13 -0
  64. package/dist/core/cache.js +121 -0
  65. package/dist/core/cf-worker-proxy.d.ts +32 -0
  66. package/dist/core/cf-worker-proxy.js +87 -0
  67. package/dist/core/challenge-detection.d.ts +26 -0
  68. package/dist/core/challenge-detection.js +468 -0
  69. package/dist/core/change-tracking.d.ts +75 -0
  70. package/dist/core/change-tracking.js +276 -0
  71. package/dist/core/chunker.d.ts +46 -0
  72. package/dist/core/chunker.js +249 -0
  73. package/dist/core/chunking.d.ts +42 -0
  74. package/dist/core/chunking.js +181 -0
  75. package/dist/core/circuit-breaker.d.ts +44 -0
  76. package/dist/core/circuit-breaker.js +85 -0
  77. package/dist/core/content-pruner.d.ts +47 -0
  78. package/dist/core/content-pruner.js +425 -0
  79. package/dist/core/cookie-cache.d.ts +60 -0
  80. package/dist/core/cookie-cache.js +163 -0
  81. package/dist/core/crawl-checkpoint.d.ts +54 -0
  82. package/dist/core/crawl-checkpoint.js +104 -0
  83. package/dist/core/crawler.d.ts +84 -0
  84. package/dist/core/crawler.js +349 -0
  85. package/dist/core/cross-verify.d.ts +27 -0
  86. package/dist/core/cross-verify.js +93 -0
  87. package/dist/core/deep-fetch.d.ts +74 -0
  88. package/dist/core/deep-fetch.js +405 -0
  89. package/dist/core/deep-research.d.ts +141 -0
  90. package/dist/core/deep-research.js +972 -0
  91. package/dist/core/design-analysis.d.ts +70 -0
  92. package/dist/core/design-analysis.js +490 -0
  93. package/dist/core/design-compare.d.ts +38 -0
  94. package/dist/core/design-compare.js +264 -0
  95. package/dist/core/diff.d.ts +61 -0
  96. package/dist/core/diff.js +289 -0
  97. package/dist/core/dns-cache.d.ts +20 -0
  98. package/dist/core/dns-cache.js +198 -0
  99. package/dist/core/documents.d.ts +23 -0
  100. package/dist/core/documents.js +123 -0
  101. package/dist/core/domain-memory.d.ts +66 -0
  102. package/dist/core/domain-memory.js +163 -0
  103. package/dist/core/domain-verify.d.ts +40 -0
  104. package/dist/core/domain-verify.js +379 -0
  105. package/dist/core/engine-ranker.d.ts +112 -0
  106. package/dist/core/engine-ranker.js +395 -0
  107. package/dist/core/extract-inline.d.ts +38 -0
  108. package/dist/core/extract-inline.js +215 -0
  109. package/dist/core/extract-listings.d.ts +38 -0
  110. package/dist/core/extract-listings.js +461 -0
  111. package/dist/core/extract.d.ts +9 -0
  112. package/dist/core/extract.js +139 -0
  113. package/dist/core/fetch-cache.d.ts +57 -0
  114. package/dist/core/fetch-cache.js +95 -0
  115. package/dist/core/fetcher.d.ts +13 -0
  116. package/dist/core/fetcher.js +12 -0
  117. package/dist/core/google-cache.d.ts +29 -0
  118. package/dist/core/google-cache.js +180 -0
  119. package/dist/core/google-serp-parser.d.ts +82 -0
  120. package/dist/core/google-serp-parser.js +287 -0
  121. package/dist/core/hotel-search.d.ts +122 -0
  122. package/dist/core/hotel-search.js +382 -0
  123. package/dist/core/http-fetch.d.ts +72 -0
  124. package/dist/core/http-fetch.js +820 -0
  125. package/dist/core/human.d.ts +175 -0
  126. package/dist/core/human.js +680 -0
  127. package/dist/core/image-caption.d.ts +44 -0
  128. package/dist/core/image-caption.js +271 -0
  129. package/dist/core/jobs.d.ts +75 -0
  130. package/dist/core/jobs.js +634 -0
  131. package/dist/core/json-ld.d.ts +15 -0
  132. package/dist/core/json-ld.js +617 -0
  133. package/dist/core/language-detect.d.ts +18 -0
  134. package/dist/core/language-detect.js +135 -0
  135. package/dist/core/links.d.ts +10 -0
  136. package/dist/core/links.js +44 -0
  137. package/dist/core/llm-extract.d.ts +71 -0
  138. package/dist/core/llm-extract.js +507 -0
  139. package/dist/core/llm-provider.d.ts +100 -0
  140. package/dist/core/llm-provider.js +702 -0
  141. package/dist/core/local-search.d.ts +60 -0
  142. package/dist/core/local-search.js +308 -0
  143. package/dist/core/logger.d.ts +28 -0
  144. package/dist/core/logger.js +104 -0
  145. package/dist/core/map.d.ts +33 -0
  146. package/dist/core/map.js +127 -0
  147. package/dist/core/markdown.d.ts +92 -0
  148. package/dist/core/markdown.js +809 -0
  149. package/dist/core/metadata.d.ts +34 -0
  150. package/dist/core/metadata.js +422 -0
  151. package/dist/core/observe.d.ts +113 -0
  152. package/dist/core/observe.js +395 -0
  153. package/dist/core/ocr.d.ts +12 -0
  154. package/dist/core/ocr.js +33 -0
  155. package/dist/core/paginate.d.ts +31 -0
  156. package/dist/core/paginate.js +106 -0
  157. package/dist/core/pdf.d.ts +8 -0
  158. package/dist/core/pdf.js +25 -0
  159. package/dist/core/peel-tls.d.ts +25 -0
  160. package/dist/core/peel-tls.js +220 -0
  161. package/dist/core/pipeline.d.ts +132 -0
  162. package/dist/core/pipeline.js +1666 -0
  163. package/dist/core/profiles.d.ts +61 -0
  164. package/dist/core/profiles.js +350 -0
  165. package/dist/core/prompt-guard.d.ts +30 -0
  166. package/dist/core/prompt-guard.js +119 -0
  167. package/dist/core/proxy-config.d.ts +90 -0
  168. package/dist/core/proxy-config.js +172 -0
  169. package/dist/core/quick-answer.d.ts +53 -0
  170. package/dist/core/quick-answer.js +833 -0
  171. package/dist/core/rate-governor.d.ts +80 -0
  172. package/dist/core/rate-governor.js +238 -0
  173. package/dist/core/readability.d.ts +57 -0
  174. package/dist/core/readability.js +533 -0
  175. package/dist/core/research.d.ts +66 -0
  176. package/dist/core/research.js +270 -0
  177. package/dist/core/retry.d.ts +60 -0
  178. package/dist/core/retry.js +119 -0
  179. package/dist/core/safe-browsing.d.ts +30 -0
  180. package/dist/core/safe-browsing.js +206 -0
  181. package/dist/core/schema-extraction.d.ts +66 -0
  182. package/dist/core/schema-extraction.js +352 -0
  183. package/dist/core/schema-postprocess.d.ts +32 -0
  184. package/dist/core/schema-postprocess.js +469 -0
  185. package/dist/core/schema-templates.d.ts +19 -0
  186. package/dist/core/schema-templates.js +143 -0
  187. package/dist/core/screenshot.d.ts +224 -0
  188. package/dist/core/screenshot.js +207 -0
  189. package/dist/core/search-engines.d.ts +25 -0
  190. package/dist/core/search-engines.js +182 -0
  191. package/dist/core/search-provider.d.ts +243 -0
  192. package/dist/core/search-provider.js +1629 -0
  193. package/dist/core/searxng-provider.d.ts +35 -0
  194. package/dist/core/searxng-provider.js +105 -0
  195. package/dist/core/selective-evidence.d.ts +151 -0
  196. package/dist/core/selective-evidence.js +389 -0
  197. package/dist/core/site-search.d.ts +44 -0
  198. package/dist/core/site-search.js +252 -0
  199. package/dist/core/sitemap.d.ts +23 -0
  200. package/dist/core/sitemap.js +105 -0
  201. package/dist/core/source-credibility.d.ts +29 -0
  202. package/dist/core/source-credibility.js +584 -0
  203. package/dist/core/source-scoring.d.ts +166 -0
  204. package/dist/core/source-scoring.js +396 -0
  205. package/dist/core/stemmer.d.ts +38 -0
  206. package/dist/core/stemmer.js +509 -0
  207. package/dist/core/strategies.d.ts +104 -0
  208. package/dist/core/strategies.js +1044 -0
  209. package/dist/core/strategy-hooks.d.ts +145 -0
  210. package/dist/core/strategy-hooks.js +74 -0
  211. package/dist/core/structured-extract.d.ts +43 -0
  212. package/dist/core/structured-extract.js +550 -0
  213. package/dist/core/summarize.d.ts +17 -0
  214. package/dist/core/summarize.js +78 -0
  215. package/dist/core/synonyms.d.ts +42 -0
  216. package/dist/core/synonyms.js +184 -0
  217. package/dist/core/system-monitor.d.ts +61 -0
  218. package/dist/core/system-monitor.js +133 -0
  219. package/dist/core/table-format.d.ts +30 -0
  220. package/dist/core/table-format.js +146 -0
  221. package/dist/core/threat-feeds.d.ts +23 -0
  222. package/dist/core/threat-feeds.js +104 -0
  223. package/dist/core/timing.d.ts +21 -0
  224. package/dist/core/timing.js +33 -0
  225. package/dist/core/transcript-export.d.ts +47 -0
  226. package/dist/core/transcript-export.js +107 -0
  227. package/dist/core/user-agents.d.ts +82 -0
  228. package/dist/core/user-agents.js +239 -0
  229. package/dist/core/vertical-search.d.ts +54 -0
  230. package/dist/core/vertical-search.js +158 -0
  231. package/dist/core/watch-manager.d.ts +175 -0
  232. package/dist/core/watch-manager.js +416 -0
  233. package/dist/core/watch.d.ts +101 -0
  234. package/dist/core/watch.js +389 -0
  235. package/dist/core/youtube.d.ts +130 -0
  236. package/dist/core/youtube.js +1175 -0
  237. package/dist/ee/challenge-re-export.d.ts +1 -0
  238. package/dist/ee/challenge-re-export.js +1 -0
  239. package/dist/ee/challenge-solver.d.ts +72 -0
  240. package/dist/ee/challenge-solver.js +720 -0
  241. package/dist/ee/domain-extractors.d.ts +8 -0
  242. package/dist/ee/domain-extractors.js +8 -0
  243. package/dist/ee/domain-intel.d.ts +16 -0
  244. package/dist/ee/domain-intel.js +133 -0
  245. package/dist/ee/extractors/allrecipes.d.ts +2 -0
  246. package/dist/ee/extractors/allrecipes.js +120 -0
  247. package/dist/ee/extractors/amazon.d.ts +2 -0
  248. package/dist/ee/extractors/amazon.js +78 -0
  249. package/dist/ee/extractors/arxiv.d.ts +2 -0
  250. package/dist/ee/extractors/arxiv.js +137 -0
  251. package/dist/ee/extractors/bestbuy.d.ts +2 -0
  252. package/dist/ee/extractors/bestbuy.js +78 -0
  253. package/dist/ee/extractors/carscom.d.ts +2 -0
  254. package/dist/ee/extractors/carscom.js +121 -0
  255. package/dist/ee/extractors/coingecko.d.ts +2 -0
  256. package/dist/ee/extractors/coingecko.js +134 -0
  257. package/dist/ee/extractors/craigslist.d.ts +2 -0
  258. package/dist/ee/extractors/craigslist.js +92 -0
  259. package/dist/ee/extractors/devto.d.ts +2 -0
  260. package/dist/ee/extractors/devto.js +135 -0
  261. package/dist/ee/extractors/ebay.d.ts +2 -0
  262. package/dist/ee/extractors/ebay.js +90 -0
  263. package/dist/ee/extractors/espn.d.ts +2 -0
  264. package/dist/ee/extractors/espn.js +260 -0
  265. package/dist/ee/extractors/etsy.d.ts +2 -0
  266. package/dist/ee/extractors/etsy.js +52 -0
  267. package/dist/ee/extractors/facebook.d.ts +2 -0
  268. package/dist/ee/extractors/facebook.js +46 -0
  269. package/dist/ee/extractors/github.d.ts +2 -0
  270. package/dist/ee/extractors/github.js +196 -0
  271. package/dist/ee/extractors/google-flights.d.ts +2 -0
  272. package/dist/ee/extractors/google-flights.js +176 -0
  273. package/dist/ee/extractors/hackernews.d.ts +2 -0
  274. package/dist/ee/extractors/hackernews.js +147 -0
  275. package/dist/ee/extractors/imdb.d.ts +2 -0
  276. package/dist/ee/extractors/imdb.js +172 -0
  277. package/dist/ee/extractors/index.d.ts +26 -0
  278. package/dist/ee/extractors/index.js +247 -0
  279. package/dist/ee/extractors/instagram.d.ts +2 -0
  280. package/dist/ee/extractors/instagram.js +102 -0
  281. package/dist/ee/extractors/kalshi.d.ts +2 -0
  282. package/dist/ee/extractors/kalshi.js +121 -0
  283. package/dist/ee/extractors/kayak-cars.d.ts +2 -0
  284. package/dist/ee/extractors/kayak-cars.js +270 -0
  285. package/dist/ee/extractors/linkedin.d.ts +2 -0
  286. package/dist/ee/extractors/linkedin.js +113 -0
  287. package/dist/ee/extractors/medium.d.ts +2 -0
  288. package/dist/ee/extractors/medium.js +130 -0
  289. package/dist/ee/extractors/news.d.ts +4 -0
  290. package/dist/ee/extractors/news.js +173 -0
  291. package/dist/ee/extractors/npm.d.ts +2 -0
  292. package/dist/ee/extractors/npm.js +86 -0
  293. package/dist/ee/extractors/pdf.d.ts +2 -0
  294. package/dist/ee/extractors/pdf.js +108 -0
  295. package/dist/ee/extractors/pinterest.d.ts +2 -0
  296. package/dist/ee/extractors/pinterest.js +34 -0
  297. package/dist/ee/extractors/polymarket.d.ts +2 -0
  298. package/dist/ee/extractors/polymarket.js +358 -0
  299. package/dist/ee/extractors/producthunt.d.ts +2 -0
  300. package/dist/ee/extractors/producthunt.js +88 -0
  301. package/dist/ee/extractors/pubmed.d.ts +2 -0
  302. package/dist/ee/extractors/pubmed.js +162 -0
  303. package/dist/ee/extractors/pypi.d.ts +2 -0
  304. package/dist/ee/extractors/pypi.js +80 -0
  305. package/dist/ee/extractors/reddit.d.ts +2 -0
  306. package/dist/ee/extractors/reddit.js +438 -0
  307. package/dist/ee/extractors/redfin.d.ts +2 -0
  308. package/dist/ee/extractors/redfin.js +156 -0
  309. package/dist/ee/extractors/semanticscholar.d.ts +2 -0
  310. package/dist/ee/extractors/semanticscholar.js +131 -0
  311. package/dist/ee/extractors/shared.d.ts +12 -0
  312. package/dist/ee/extractors/shared.js +76 -0
  313. package/dist/ee/extractors/soundcloud.d.ts +2 -0
  314. package/dist/ee/extractors/soundcloud.js +34 -0
  315. package/dist/ee/extractors/sportsbetting.d.ts +2 -0
  316. package/dist/ee/extractors/sportsbetting.js +37 -0
  317. package/dist/ee/extractors/spotify.d.ts +2 -0
  318. package/dist/ee/extractors/spotify.js +34 -0
  319. package/dist/ee/extractors/stackoverflow.d.ts +2 -0
  320. package/dist/ee/extractors/stackoverflow.js +61 -0
  321. package/dist/ee/extractors/substack.d.ts +2 -0
  322. package/dist/ee/extractors/substack.js +115 -0
  323. package/dist/ee/extractors/substackroot.d.ts +2 -0
  324. package/dist/ee/extractors/substackroot.js +46 -0
  325. package/dist/ee/extractors/tiktok.d.ts +2 -0
  326. package/dist/ee/extractors/tiktok.js +29 -0
  327. package/dist/ee/extractors/tradingview.d.ts +2 -0
  328. package/dist/ee/extractors/tradingview.js +182 -0
  329. package/dist/ee/extractors/twitch.d.ts +2 -0
  330. package/dist/ee/extractors/twitch.js +36 -0
  331. package/dist/ee/extractors/twitter.d.ts +2 -0
  332. package/dist/ee/extractors/twitter.js +327 -0
  333. package/dist/ee/extractors/types.d.ts +14 -0
  334. package/dist/ee/extractors/types.js +1 -0
  335. package/dist/ee/extractors/walmart.d.ts +2 -0
  336. package/dist/ee/extractors/walmart.js +50 -0
  337. package/dist/ee/extractors/weather.d.ts +2 -0
  338. package/dist/ee/extractors/weather.js +133 -0
  339. package/dist/ee/extractors/wikipedia.d.ts +4 -0
  340. package/dist/ee/extractors/wikipedia.js +235 -0
  341. package/dist/ee/extractors/yelp.d.ts +2 -0
  342. package/dist/ee/extractors/yelp.js +216 -0
  343. package/dist/ee/extractors/youtube.d.ts +2 -0
  344. package/dist/ee/extractors/youtube.js +189 -0
  345. package/dist/ee/extractors/zillow.d.ts +54 -0
  346. package/dist/ee/extractors/zillow.js +247 -0
  347. package/dist/ee/extractors-re-export.d.ts +1 -0
  348. package/dist/ee/extractors-re-export.js +1 -0
  349. package/dist/ee/premium-hooks.d.ts +20 -0
  350. package/dist/ee/premium-hooks.js +50 -0
  351. package/dist/ee/spa-detection.d.ts +2 -0
  352. package/dist/ee/spa-detection.js +2 -0
  353. package/dist/ee/stability.d.ts +4 -0
  354. package/dist/ee/stability.js +29 -0
  355. package/dist/ee/swr-cache.d.ts +14 -0
  356. package/dist/ee/swr-cache.js +34 -0
  357. package/dist/index.d.ts +143 -0
  358. package/dist/index.js +291 -0
  359. package/dist/integrations/index.d.ts +2 -0
  360. package/dist/integrations/index.js +2 -0
  361. package/dist/integrations/langchain.d.ts +64 -0
  362. package/dist/integrations/langchain.js +115 -0
  363. package/dist/integrations/llamaindex.d.ts +50 -0
  364. package/dist/integrations/llamaindex.js +91 -0
  365. package/dist/mcp/handlers/act.d.ts +5 -0
  366. package/dist/mcp/handlers/act.js +34 -0
  367. package/dist/mcp/handlers/definitions.d.ts +6 -0
  368. package/dist/mcp/handlers/definitions.js +395 -0
  369. package/dist/mcp/handlers/extract.d.ts +7 -0
  370. package/dist/mcp/handlers/extract.js +135 -0
  371. package/dist/mcp/handlers/fetch.d.ts +6 -0
  372. package/dist/mcp/handlers/fetch.js +98 -0
  373. package/dist/mcp/handlers/find.d.ts +5 -0
  374. package/dist/mcp/handlers/find.js +137 -0
  375. package/dist/mcp/handlers/index.d.ts +13 -0
  376. package/dist/mcp/handlers/index.js +63 -0
  377. package/dist/mcp/handlers/legacy.d.ts +25 -0
  378. package/dist/mcp/handlers/legacy.js +450 -0
  379. package/dist/mcp/handlers/meta.d.ts +6 -0
  380. package/dist/mcp/handlers/meta.js +40 -0
  381. package/dist/mcp/handlers/monitor.d.ts +5 -0
  382. package/dist/mcp/handlers/monitor.js +41 -0
  383. package/dist/mcp/handlers/observe.d.ts +8 -0
  384. package/dist/mcp/handlers/observe.js +37 -0
  385. package/dist/mcp/handlers/read.d.ts +6 -0
  386. package/dist/mcp/handlers/read.js +78 -0
  387. package/dist/mcp/handlers/see.d.ts +5 -0
  388. package/dist/mcp/handlers/see.js +75 -0
  389. package/dist/mcp/handlers/types.d.ts +29 -0
  390. package/dist/mcp/handlers/types.js +28 -0
  391. package/dist/mcp/server.d.ts +7 -0
  392. package/dist/mcp/server.js +108 -0
  393. package/dist/mcp/smart-router.d.ts +23 -0
  394. package/dist/mcp/smart-router.js +178 -0
  395. package/dist/server/app.d.ts +14 -0
  396. package/dist/server/app.js +632 -0
  397. package/dist/server/auth-store.d.ts +28 -0
  398. package/dist/server/auth-store.js +88 -0
  399. package/dist/server/bull-queues.d.ts +60 -0
  400. package/dist/server/bull-queues.js +90 -0
  401. package/dist/server/email-service.d.ts +55 -0
  402. package/dist/server/email-service.js +291 -0
  403. package/dist/server/job-queue.d.ts +100 -0
  404. package/dist/server/job-queue.js +145 -0
  405. package/dist/server/logger.d.ts +10 -0
  406. package/dist/server/logger.js +37 -0
  407. package/dist/server/middleware/audit-log.d.ts +14 -0
  408. package/dist/server/middleware/audit-log.js +73 -0
  409. package/dist/server/middleware/auth.d.ts +35 -0
  410. package/dist/server/middleware/auth.js +225 -0
  411. package/dist/server/middleware/rate-limit.d.ts +50 -0
  412. package/dist/server/middleware/rate-limit.js +270 -0
  413. package/dist/server/middleware/scope-guard.d.ts +25 -0
  414. package/dist/server/middleware/scope-guard.js +45 -0
  415. package/dist/server/middleware/url-validator.d.ts +15 -0
  416. package/dist/server/middleware/url-validator.js +201 -0
  417. package/dist/server/openapi.yaml +6418 -0
  418. package/dist/server/pg-auth-store.d.ts +146 -0
  419. package/dist/server/pg-auth-store.js +576 -0
  420. package/dist/server/pg-job-queue.d.ts +59 -0
  421. package/dist/server/pg-job-queue.js +375 -0
  422. package/dist/server/routes/activity.d.ts +6 -0
  423. package/dist/server/routes/activity.js +79 -0
  424. package/dist/server/routes/admin-active.d.ts +7 -0
  425. package/dist/server/routes/admin-active.js +120 -0
  426. package/dist/server/routes/admin-stats.d.ts +7 -0
  427. package/dist/server/routes/admin-stats.js +176 -0
  428. package/dist/server/routes/agent.d.ts +24 -0
  429. package/dist/server/routes/agent.js +480 -0
  430. package/dist/server/routes/answer.d.ts +5 -0
  431. package/dist/server/routes/answer.js +125 -0
  432. package/dist/server/routes/ask.d.ts +28 -0
  433. package/dist/server/routes/ask.js +295 -0
  434. package/dist/server/routes/batch.d.ts +6 -0
  435. package/dist/server/routes/batch.js +493 -0
  436. package/dist/server/routes/cache-warm.d.ts +25 -0
  437. package/dist/server/routes/cache-warm.js +212 -0
  438. package/dist/server/routes/cli-usage.d.ts +6 -0
  439. package/dist/server/routes/cli-usage.js +127 -0
  440. package/dist/server/routes/compat.d.ts +23 -0
  441. package/dist/server/routes/compat.js +652 -0
  442. package/dist/server/routes/crawl.d.ts +13 -0
  443. package/dist/server/routes/crawl.js +287 -0
  444. package/dist/server/routes/deep-fetch.d.ts +8 -0
  445. package/dist/server/routes/deep-fetch.js +57 -0
  446. package/dist/server/routes/deep-research.d.ts +11 -0
  447. package/dist/server/routes/deep-research.js +232 -0
  448. package/dist/server/routes/demo.d.ts +24 -0
  449. package/dist/server/routes/demo.js +517 -0
  450. package/dist/server/routes/do.d.ts +8 -0
  451. package/dist/server/routes/do.js +72 -0
  452. package/dist/server/routes/extract.d.ts +14 -0
  453. package/dist/server/routes/extract.js +325 -0
  454. package/dist/server/routes/feed.d.ts +15 -0
  455. package/dist/server/routes/feed.js +311 -0
  456. package/dist/server/routes/fetch-queue.d.ts +13 -0
  457. package/dist/server/routes/fetch-queue.js +357 -0
  458. package/dist/server/routes/fetch.d.ts +7 -0
  459. package/dist/server/routes/fetch.js +1274 -0
  460. package/dist/server/routes/go.d.ts +14 -0
  461. package/dist/server/routes/go.js +81 -0
  462. package/dist/server/routes/health.d.ts +11 -0
  463. package/dist/server/routes/health.js +141 -0
  464. package/dist/server/routes/jobs.d.ts +7 -0
  465. package/dist/server/routes/jobs.js +574 -0
  466. package/dist/server/routes/map.d.ts +11 -0
  467. package/dist/server/routes/map.js +116 -0
  468. package/dist/server/routes/mcp.d.ts +14 -0
  469. package/dist/server/routes/mcp.js +197 -0
  470. package/dist/server/routes/metrics.d.ts +37 -0
  471. package/dist/server/routes/metrics.js +149 -0
  472. package/dist/server/routes/oauth.d.ts +9 -0
  473. package/dist/server/routes/oauth.js +396 -0
  474. package/dist/server/routes/playground.d.ts +17 -0
  475. package/dist/server/routes/playground.js +283 -0
  476. package/dist/server/routes/reader.d.ts +18 -0
  477. package/dist/server/routes/reader.js +192 -0
  478. package/dist/server/routes/research.d.ts +14 -0
  479. package/dist/server/routes/research.js +482 -0
  480. package/dist/server/routes/screenshot.d.ts +22 -0
  481. package/dist/server/routes/screenshot.js +820 -0
  482. package/dist/server/routes/search.d.ts +6 -0
  483. package/dist/server/routes/search.js +874 -0
  484. package/dist/server/routes/session.d.ts +17 -0
  485. package/dist/server/routes/session.js +548 -0
  486. package/dist/server/routes/share.d.ts +18 -0
  487. package/dist/server/routes/share.js +462 -0
  488. package/dist/server/routes/smart-search/handlers/cars.d.ts +2 -0
  489. package/dist/server/routes/smart-search/handlers/cars.js +102 -0
  490. package/dist/server/routes/smart-search/handlers/flights.d.ts +2 -0
  491. package/dist/server/routes/smart-search/handlers/flights.js +72 -0
  492. package/dist/server/routes/smart-search/handlers/general.d.ts +13 -0
  493. package/dist/server/routes/smart-search/handlers/general.js +717 -0
  494. package/dist/server/routes/smart-search/handlers/hotels.d.ts +2 -0
  495. package/dist/server/routes/smart-search/handlers/hotels.js +88 -0
  496. package/dist/server/routes/smart-search/handlers/products.d.ts +2 -0
  497. package/dist/server/routes/smart-search/handlers/products.js +1309 -0
  498. package/dist/server/routes/smart-search/handlers/rental.d.ts +2 -0
  499. package/dist/server/routes/smart-search/handlers/rental.js +154 -0
  500. package/dist/server/routes/smart-search/handlers/restaurants.d.ts +2 -0
  501. package/dist/server/routes/smart-search/handlers/restaurants.js +225 -0
  502. package/dist/server/routes/smart-search/handlers/transit-verdict.d.ts +41 -0
  503. package/dist/server/routes/smart-search/handlers/transit-verdict.js +224 -0
  504. package/dist/server/routes/smart-search/index.d.ts +19 -0
  505. package/dist/server/routes/smart-search/index.js +546 -0
  506. package/dist/server/routes/smart-search/intent.d.ts +3 -0
  507. package/dist/server/routes/smart-search/intent.js +264 -0
  508. package/dist/server/routes/smart-search/llm.d.ts +16 -0
  509. package/dist/server/routes/smart-search/llm.js +70 -0
  510. package/dist/server/routes/smart-search/sources/reddit.d.ts +18 -0
  511. package/dist/server/routes/smart-search/sources/reddit.js +34 -0
  512. package/dist/server/routes/smart-search/sources/yelp.d.ts +25 -0
  513. package/dist/server/routes/smart-search/sources/yelp.js +171 -0
  514. package/dist/server/routes/smart-search/sources/youtube.d.ts +8 -0
  515. package/dist/server/routes/smart-search/sources/youtube.js +9 -0
  516. package/dist/server/routes/smart-search/types.d.ts +81 -0
  517. package/dist/server/routes/smart-search/types.js +1 -0
  518. package/dist/server/routes/smart-search/utils.d.ts +20 -0
  519. package/dist/server/routes/smart-search/utils.js +146 -0
  520. package/dist/server/routes/stats.d.ts +6 -0
  521. package/dist/server/routes/stats.js +71 -0
  522. package/dist/server/routes/stripe.d.ts +15 -0
  523. package/dist/server/routes/stripe.js +296 -0
  524. package/dist/server/routes/transcript-export.d.ts +10 -0
  525. package/dist/server/routes/transcript-export.js +178 -0
  526. package/dist/server/routes/usage.d.ts +9 -0
  527. package/dist/server/routes/usage.js +279 -0
  528. package/dist/server/routes/users.d.ts +8 -0
  529. package/dist/server/routes/users.js +1867 -0
  530. package/dist/server/routes/watch.d.ts +15 -0
  531. package/dist/server/routes/watch.js +309 -0
  532. package/dist/server/routes/webhooks.d.ts +26 -0
  533. package/dist/server/routes/webhooks.js +170 -0
  534. package/dist/server/routes/youtube.d.ts +6 -0
  535. package/dist/server/routes/youtube.js +130 -0
  536. package/dist/server/sentry.d.ts +14 -0
  537. package/dist/server/sentry.js +104 -0
  538. package/dist/server/types.d.ts +15 -0
  539. package/dist/server/types.js +7 -0
  540. package/dist/server/utils/response.d.ts +44 -0
  541. package/dist/server/utils/response.js +69 -0
  542. package/dist/server/utils/sse.d.ts +22 -0
  543. package/dist/server/utils/sse.js +38 -0
  544. package/dist/types.d.ts +552 -0
  545. package/dist/types.js +39 -0
  546. package/llms.txt +105 -0
  547. package/package.json +189 -0
@@ -0,0 +1,35 @@
1
+ /**
2
+ * SearXNG Search Provider
3
+ *
4
+ * Connects to a self-hosted SearXNG instance (running on Mac Mini with residential IP,
5
+ * exposed via Cloudflare Tunnel). SearXNG aggregates Google, Bing, Brave, Startpage, etc.
6
+ * and is not rate-limited or blocked since it runs on a residential IP.
7
+ *
8
+ * Config (env vars):
9
+ * SEARXNG_URL — Base URL of SearXNG instance (e.g. https://search.webpeel.dev)
10
+ *
11
+ * Falls back gracefully if SEARXNG_URL is not set or instance is unreachable.
12
+ */
13
+ export interface SearXNGSearchResult {
14
+ title: string;
15
+ url: string;
16
+ description?: string;
17
+ publishedDate?: string;
18
+ score?: number;
19
+ imageUrl?: string;
20
+ }
21
+ /**
22
+ * Fetches search results from a SearXNG instance.
23
+ * Returns results compatible with WebSearchResult interface in search-provider.ts.
24
+ */
25
+ export declare function searchViaSearXNG(query: string, options?: {
26
+ count?: number;
27
+ signal?: AbortSignal;
28
+ timeoutMs?: number;
29
+ engines?: string;
30
+ language?: string;
31
+ }): Promise<SearXNGSearchResult[]>;
32
+ /**
33
+ * Quick health check — true if SearXNG is reachable and returning results.
34
+ */
35
+ export declare function isSearXNGHealthy(): Promise<boolean>;
@@ -0,0 +1,105 @@
1
+ /**
2
+ * SearXNG Search Provider
3
+ *
4
+ * Connects to a self-hosted SearXNG instance (running on Mac Mini with residential IP,
5
+ * exposed via Cloudflare Tunnel). SearXNG aggregates Google, Bing, Brave, Startpage, etc.
6
+ * and is not rate-limited or blocked since it runs on a residential IP.
7
+ *
8
+ * Config (env vars):
9
+ * SEARXNG_URL — Base URL of SearXNG instance (e.g. https://search.webpeel.dev)
10
+ *
11
+ * Falls back gracefully if SEARXNG_URL is not set or instance is unreachable.
12
+ */
13
+ import { fetch as undiciFetch } from 'undici';
14
+ import { createLogger } from './logger.js';
15
+ const log = createLogger('searxng');
16
+ /**
17
+ * Fetches search results from a SearXNG instance.
18
+ * Returns results compatible with WebSearchResult interface in search-provider.ts.
19
+ */
20
+ export async function searchViaSearXNG(query, options = {}) {
21
+ const baseUrl = process.env.SEARXNG_URL;
22
+ if (!baseUrl)
23
+ return [];
24
+ const { count = 10, signal, timeoutMs = 15000, engines = '', language = 'en', } = options;
25
+ const controller = new AbortController();
26
+ const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
27
+ if (signal)
28
+ signal.addEventListener('abort', () => controller.abort());
29
+ try {
30
+ const params = new URLSearchParams({
31
+ q: query,
32
+ format: 'json',
33
+ language,
34
+ safesearch: '0',
35
+ categories: 'general',
36
+ });
37
+ if (engines)
38
+ params.set('engines', engines);
39
+ const url = `${baseUrl.replace(/\/$/, '')}/search?${params.toString()}`;
40
+ const response = await undiciFetch(url, {
41
+ signal: controller.signal,
42
+ headers: {
43
+ 'Accept': 'application/json',
44
+ 'User-Agent': 'WebPeel/1.0 (internal search aggregator)',
45
+ },
46
+ });
47
+ if (!response.ok) {
48
+ log.debug(`HTTP ${response.status}`);
49
+ return [];
50
+ }
51
+ const data = (await response.json());
52
+ const results = data?.results ?? [];
53
+ if (results.length === 0) {
54
+ log.debug('0 results returned');
55
+ return [];
56
+ }
57
+ const seen = new Set();
58
+ const output = [];
59
+ for (const r of results) {
60
+ if (!r.url || !r.title)
61
+ continue;
62
+ const normalized = r.url.replace(/\/$/, '').toLowerCase();
63
+ if (seen.has(normalized))
64
+ continue;
65
+ seen.add(normalized);
66
+ output.push({
67
+ title: r.title,
68
+ url: r.url,
69
+ description: r.content ?? undefined,
70
+ publishedDate: r.publishedDate ?? undefined,
71
+ score: r.score ?? undefined,
72
+ imageUrl: r.img_src ?? r.thumbnail ?? undefined,
73
+ });
74
+ if (output.length >= count)
75
+ break;
76
+ }
77
+ log.debug(`${output.length} results for "${query.substring(0, 40)}"`);
78
+ return output;
79
+ }
80
+ catch (e) {
81
+ const msg = e instanceof Error ? e.message : String(e);
82
+ if (msg.includes('abort') || msg.includes('timeout') || msg.includes('AbortError')) {
83
+ log.debug(`timed out after ${timeoutMs}ms`);
84
+ }
85
+ else {
86
+ log.debug('fetch error:', msg);
87
+ }
88
+ return [];
89
+ }
90
+ finally {
91
+ clearTimeout(timeoutId);
92
+ }
93
+ }
94
+ /**
95
+ * Quick health check — true if SearXNG is reachable and returning results.
96
+ */
97
+ export async function isSearXNGHealthy() {
98
+ try {
99
+ const results = await searchViaSearXNG('test', { count: 1, timeoutMs: 10000 });
100
+ return results.length > 0;
101
+ }
102
+ catch {
103
+ return false;
104
+ }
105
+ }
@@ -0,0 +1,151 @@
1
+ /**
2
+ * Selective Evidence Aggregation
3
+ *
4
+ * AttnRes-inspired evidence selection: instead of naively concatenating all
5
+ * sources, score and select evidence blocks that maximise relevance,
6
+ * credibility, and source diversity for a given query.
7
+ *
8
+ * Design goals:
9
+ * 1. Query-aware block scoring — BM25 relevance per content block
10
+ * 2. Credibility/authority weighting — higher-authority sources get a boost
11
+ * 3. Structured-signal detection — detect structured data even when
12
+ * domainData.structured is absent (prices, dates, tables, lists, JSON-LD)
13
+ * 4. Per-domain diversity limits — configurable cap per registered domain
14
+ * 5. Query-type-aware policy — factual vs exploratory queries use
15
+ * different diversity/concentration knobs
16
+ * 6. Exact facts preserved — numbers, prices, dates are never mutated
17
+ *
18
+ * No external dependencies — pure TypeScript, reuses existing helpers.
19
+ */
20
+ /** A single evidence source with content and metadata */
21
+ export interface EvidenceSource {
22
+ url: string;
23
+ title: string;
24
+ content: string;
25
+ /** Snippet from search results (fallback when content is empty) */
26
+ snippet?: string;
27
+ /** Pre-computed structured data from domain extractors */
28
+ structured?: unknown;
29
+ /** Page metadata (publish dates, etc.) */
30
+ metadata?: Record<string, unknown>;
31
+ }
32
+ /** A scored and selected evidence block */
33
+ export interface SelectedBlock {
34
+ /** The text content of this block */
35
+ text: string;
36
+ /** Source URL this block came from */
37
+ sourceUrl: string;
38
+ /** Source title */
39
+ sourceTitle: string;
40
+ /** Composite score used for ranking (0-1) */
41
+ score: number;
42
+ /** Whether a structured signal was detected in this block */
43
+ hasStructuredSignal: boolean;
44
+ /** Whether the selector used full page content or a snippet fallback */
45
+ contentMode: 'content' | 'snippet';
46
+ }
47
+ /** Result of selectEvidence() */
48
+ export interface SelectionResult {
49
+ /** Selected evidence blocks, in score-descending order */
50
+ blocks: SelectedBlock[];
51
+ /** Number of total candidate blocks evaluated */
52
+ totalCandidates: number;
53
+ /** Number of sources that contributed at least one block */
54
+ sourcesUsed: number;
55
+ /** The query type policy that was applied */
56
+ policy: QueryPolicy;
57
+ }
58
+ export type QueryType = 'factual' | 'comparison' | 'exploratory';
59
+ export interface QueryPolicy {
60
+ /** The detected query type */
61
+ type: QueryType;
62
+ /** Max blocks from any single registered domain */
63
+ maxBlocksPerDomain: number;
64
+ /** Weight multiplier for authority score (0-1) */
65
+ authorityWeight: number;
66
+ /** Weight multiplier for BM25 relevance (0-1) */
67
+ relevanceWeight: number;
68
+ /** Weight multiplier for structured signal boost (0-1) */
69
+ structuredWeight: number;
70
+ /** Minimum number of unique domains to try to include */
71
+ minDomains: number;
72
+ }
73
+ /**
74
+ * Classify a query and return the appropriate diversity/weighting policy.
75
+ *
76
+ * - **factual**: pricing, version, limit, spec queries → tight authority
77
+ * concentration, fewer domains needed, structured signals weighted high
78
+ * - **comparison**: "X vs Y", "alternatives", "pros and cons" → moderate
79
+ * diversity, balanced weights
80
+ * - **exploratory**: "how does X work", "explain Y", research queries →
81
+ * maximum diversity, many domains encouraged
82
+ */
83
+ export declare function classifyQuery(query: string): QueryPolicy;
84
+ /**
85
+ * Detect whether a text block contains structured information signals.
86
+ *
87
+ * This does NOT rely on domainData.structured being present — it looks at
88
+ * the actual content for patterns that indicate structured data:
89
+ * - Price/currency patterns ($X.XX, €, £)
90
+ * - Markdown tables (lines starting with |)
91
+ * - Key-value patterns ("Key: Value")
92
+ * - Numeric data density (percentages, measurements, dates)
93
+ * - JSON-LD or schema.org markers
94
+ * - Ordered/numbered lists with data
95
+ *
96
+ * Returns a score 0-1 representing structured signal strength.
97
+ */
98
+ export declare function detectStructuredSignal(text: string): number;
99
+ /**
100
+ * Compute a structured signal score for a source, combining:
101
+ * 1. Pre-existing structured data (domainData.structured) if present
102
+ * 2. Content-derived structured signals from detectStructuredSignal()
103
+ *
104
+ * Returns 0-1.
105
+ */
106
+ export declare function sourceStructuredScore(source: EvidenceSource): number;
107
+ /**
108
+ * Returns true when fetched content is a WebPeel placeholder / error shell rather
109
+ * than usable evidence for synthesis.
110
+ */
111
+ export declare function isUnusableEvidenceContent(text: string | undefined | null): boolean;
112
+ /**
113
+ * Choose the best evidence text for a source.
114
+ * - Prefer full fetched content when it is usable
115
+ * - Fall back to the search snippet when the fetch content is blocked/error placeholder
116
+ */
117
+ export declare function getBestEvidenceText(source: EvidenceSource): {
118
+ text: string;
119
+ mode: 'content' | 'snippet' | 'none';
120
+ };
121
+ export interface SelectEvidenceOptions {
122
+ /** The user query */
123
+ query: string;
124
+ /** All candidate sources */
125
+ sources: EvidenceSource[];
126
+ /** Maximum total blocks to return. Default: 12 */
127
+ maxBlocks?: number;
128
+ /** Maximum character budget for all selected blocks combined. Default: 6000 */
129
+ maxChars?: number;
130
+ /** Override the auto-detected policy */
131
+ policyOverride?: Partial<QueryPolicy>;
132
+ }
133
+ /**
134
+ * Select the best evidence blocks from multiple sources for a given query.
135
+ *
136
+ * Pipeline:
137
+ * 1. Classify query → policy (diversity caps, weight distribution)
138
+ * 2. For each source: split into blocks, score BM25 against query
139
+ * 3. Compute composite score per block: relevance × authority × structured
140
+ * 4. Apply per-domain diversity cap
141
+ * 5. Ensure minimum domain diversity (promote under-represented domains)
142
+ * 6. Return top blocks within budget
143
+ */
144
+ export declare function selectEvidence(options: SelectEvidenceOptions): SelectionResult;
145
+ /**
146
+ * Format selected evidence blocks into a numbered, source-attributed string
147
+ * suitable for LLM context injection.
148
+ *
149
+ * Preserves exact facts/numbers — no summarization or transformation.
150
+ */
151
+ export declare function formatEvidenceForLLM(result: SelectionResult): string;
@@ -0,0 +1,389 @@
1
+ /**
2
+ * Selective Evidence Aggregation
3
+ *
4
+ * AttnRes-inspired evidence selection: instead of naively concatenating all
5
+ * sources, score and select evidence blocks that maximise relevance,
6
+ * credibility, and source diversity for a given query.
7
+ *
8
+ * Design goals:
9
+ * 1. Query-aware block scoring — BM25 relevance per content block
10
+ * 2. Credibility/authority weighting — higher-authority sources get a boost
11
+ * 3. Structured-signal detection — detect structured data even when
12
+ * domainData.structured is absent (prices, dates, tables, lists, JSON-LD)
13
+ * 4. Per-domain diversity limits — configurable cap per registered domain
14
+ * 5. Query-type-aware policy — factual vs exploratory queries use
15
+ * different diversity/concentration knobs
16
+ * 6. Exact facts preserved — numbers, prices, dates are never mutated
17
+ *
18
+ * No external dependencies — pure TypeScript, reuses existing helpers.
19
+ */
20
+ import { splitIntoBlocks, scoreBM25 } from './bm25-filter.js';
21
+ import { scoreDomainAuthority, extractRegisteredDomain, isFactualQuery, } from './source-scoring.js';
22
+ // Comparison / research query patterns
23
+ const COMPARISON_PATTERN = /\b(compare|comparison|vs\.?|versus|difference|differences|between|pros?\s+and\s+cons?|alternatives?|better|which\s+is|review|benchmark|ranking)\b/i;
24
+ // Exploratory / open-ended query patterns
25
+ const EXPLORATORY_PATTERN = /\b(how\s+(?:does|do|to|can)|what\s+(?:is|are|does)|explain|overview|introduction|guide|tutorial|learn|understand|history|background|research|explore|survey)\b/i;
26
+ /**
27
+ * Classify a query and return the appropriate diversity/weighting policy.
28
+ *
29
+ * - **factual**: pricing, version, limit, spec queries → tight authority
30
+ * concentration, fewer domains needed, structured signals weighted high
31
+ * - **comparison**: "X vs Y", "alternatives", "pros and cons" → moderate
32
+ * diversity, balanced weights
33
+ * - **exploratory**: "how does X work", "explain Y", research queries →
34
+ * maximum diversity, many domains encouraged
35
+ */
36
+ export function classifyQuery(query) {
37
+ // Order matters: check factual first (most specific), then comparison
38
+ if (isFactualQuery(query)) {
39
+ return {
40
+ type: 'factual',
41
+ maxBlocksPerDomain: 4,
42
+ authorityWeight: 0.35,
43
+ relevanceWeight: 0.40,
44
+ structuredWeight: 0.25,
45
+ minDomains: 2,
46
+ };
47
+ }
48
+ if (COMPARISON_PATTERN.test(query)) {
49
+ return {
50
+ type: 'comparison',
51
+ maxBlocksPerDomain: 3,
52
+ authorityWeight: 0.25,
53
+ relevanceWeight: 0.45,
54
+ structuredWeight: 0.15,
55
+ minDomains: 3,
56
+ };
57
+ }
58
+ if (EXPLORATORY_PATTERN.test(query)) {
59
+ return {
60
+ type: 'exploratory',
61
+ maxBlocksPerDomain: 2,
62
+ authorityWeight: 0.20,
63
+ relevanceWeight: 0.50,
64
+ structuredWeight: 0.10,
65
+ minDomains: 4,
66
+ };
67
+ }
68
+ // Default: balanced
69
+ return {
70
+ type: 'exploratory',
71
+ maxBlocksPerDomain: 3,
72
+ authorityWeight: 0.25,
73
+ relevanceWeight: 0.45,
74
+ structuredWeight: 0.15,
75
+ minDomains: 3,
76
+ };
77
+ }
78
+ // ---------------------------------------------------------------------------
79
+ // Structured-signal detection (lightweight, no giant dependency)
80
+ // ---------------------------------------------------------------------------
81
+ /**
82
+ * Detect whether a text block contains structured information signals.
83
+ *
84
+ * This does NOT rely on domainData.structured being present — it looks at
85
+ * the actual content for patterns that indicate structured data:
86
+ * - Price/currency patterns ($X.XX, €, £)
87
+ * - Markdown tables (lines starting with |)
88
+ * - Key-value patterns ("Key: Value")
89
+ * - Numeric data density (percentages, measurements, dates)
90
+ * - JSON-LD or schema.org markers
91
+ * - Ordered/numbered lists with data
92
+ *
93
+ * Returns a score 0-1 representing structured signal strength.
94
+ */
95
+ export function detectStructuredSignal(text) {
96
+ if (!text || text.length < 10)
97
+ return 0;
98
+ let signal = 0;
99
+ const lines = text.split('\n');
100
+ // Price/currency patterns — strong signal
101
+ const priceMatches = text.match(/[$€£¥]\s?\d[\d,.]+/g);
102
+ if (priceMatches && priceMatches.length > 0) {
103
+ signal += Math.min(0.3, priceMatches.length * 0.1);
104
+ }
105
+ // Markdown table rows (|col1|col2|)
106
+ const tableRows = lines.filter(l => /^\s*\|.*\|/.test(l));
107
+ if (tableRows.length >= 2) {
108
+ signal += Math.min(0.3, tableRows.length * 0.05);
109
+ }
110
+ // Key-value patterns ("Label: Value" at start of line)
111
+ const kvMatches = lines.filter(l => /^\s*[A-Z][A-Za-z\s]{1,25}:\s+\S/.test(l));
112
+ if (kvMatches.length >= 2) {
113
+ signal += Math.min(0.2, kvMatches.length * 0.04);
114
+ }
115
+ // Numeric data density — dates, percentages, measurements
116
+ const numericPatterns = text.match(/\b\d{1,3}(?:[.,]\d{1,3})*\s*(?:%|GB|MB|TB|kg|lb|mph|km|mi|ms|sec|min|hr|days?|months?|years?)\b/gi);
117
+ if (numericPatterns && numericPatterns.length >= 2) {
118
+ signal += Math.min(0.2, numericPatterns.length * 0.04);
119
+ }
120
+ // Explicit version/spec patterns (v2.0, API v3, version 4.1)
121
+ if (/\bv(?:ersion)?\s?\d+(?:\.\d+)+/i.test(text)) {
122
+ signal += 0.1;
123
+ }
124
+ // JSON-LD / schema.org markers
125
+ if (/@context|schema\.org|itemtype|itemprop/i.test(text)) {
126
+ signal += 0.15;
127
+ }
128
+ return Math.min(1.0, signal);
129
+ }
130
+ /**
131
+ * Compute a structured signal score for a source, combining:
132
+ * 1. Pre-existing structured data (domainData.structured) if present
133
+ * 2. Content-derived structured signals from detectStructuredSignal()
134
+ *
135
+ * Returns 0-1.
136
+ */
137
+ export function sourceStructuredScore(source) {
138
+ let score = 0;
139
+ // If domain extractor provided structured data, strong signal
140
+ if (source.structured != null) {
141
+ const str = typeof source.structured === 'string'
142
+ ? source.structured
143
+ : JSON.stringify(source.structured);
144
+ // Non-trivial structured data (more than just {})
145
+ if (str.length > 5) {
146
+ score += 0.5;
147
+ }
148
+ }
149
+ // Content-derived structured signal
150
+ const contentSignal = detectStructuredSignal(source.content || '');
151
+ score += contentSignal * 0.5;
152
+ return Math.min(1.0, score);
153
+ }
154
+ // ---------------------------------------------------------------------------
155
+ // Evidence quality / fallback helpers
156
+ // ---------------------------------------------------------------------------
157
+ const UNUSABLE_EVIDENCE_PATTERNS = [
158
+ /^#\s*⚠️\s+.+?\s+—\s+Access Blocked/im,
159
+ /This site uses advanced bot protection and blocked our request\./i,
160
+ /^##\s*❌\s+Reddit Post Not Found/im,
161
+ /The post at r\/.+ could not be found\./i,
162
+ /Server returned an error page \(522\)/i,
163
+ /fetch_failed/i,
164
+ ];
165
+ /**
166
+ * Returns true when fetched content is a WebPeel placeholder / error shell rather
167
+ * than usable evidence for synthesis.
168
+ */
169
+ export function isUnusableEvidenceContent(text) {
170
+ if (!text)
171
+ return true;
172
+ const trimmed = text.trim();
173
+ if (!trimmed)
174
+ return true;
175
+ return UNUSABLE_EVIDENCE_PATTERNS.some((pattern) => pattern.test(trimmed));
176
+ }
177
+ /**
178
+ * Choose the best evidence text for a source.
179
+ * - Prefer full fetched content when it is usable
180
+ * - Fall back to the search snippet when the fetch content is blocked/error placeholder
181
+ */
182
+ export function getBestEvidenceText(source) {
183
+ if (!isUnusableEvidenceContent(source.content)) {
184
+ return { text: source.content, mode: 'content' };
185
+ }
186
+ const snippet = source.snippet?.trim() ?? '';
187
+ if (snippet.length >= 20) {
188
+ return { text: snippet, mode: 'snippet' };
189
+ }
190
+ return { text: '', mode: 'none' };
191
+ }
192
+ // ---------------------------------------------------------------------------
193
+ // Main selector
194
+ // ---------------------------------------------------------------------------
195
+ /**
196
+ * Select the best evidence blocks from multiple sources for a given query.
197
+ *
198
+ * Pipeline:
199
+ * 1. Classify query → policy (diversity caps, weight distribution)
200
+ * 2. For each source: split into blocks, score BM25 against query
201
+ * 3. Compute composite score per block: relevance × authority × structured
202
+ * 4. Apply per-domain diversity cap
203
+ * 5. Ensure minimum domain diversity (promote under-represented domains)
204
+ * 6. Return top blocks within budget
205
+ */
206
+ export function selectEvidence(options) {
207
+ const { query, sources, maxBlocks = 12, maxChars = 6000, policyOverride, } = options;
208
+ // Step 1: Classify query and build policy
209
+ const basePolicy = classifyQuery(query);
210
+ const policy = { ...basePolicy, ...policyOverride };
211
+ if (sources.length === 0) {
212
+ return { blocks: [], totalCandidates: 0, sourcesUsed: 0, policy };
213
+ }
214
+ // Tokenize query for BM25
215
+ const queryTerms = query
216
+ .toLowerCase()
217
+ .replace(/[^\w\s]/g, ' ')
218
+ .split(/\s+/)
219
+ .filter(t => t.length > 1);
220
+ const candidates = [];
221
+ for (const source of sources) {
222
+ const bestText = getBestEvidenceText(source);
223
+ const text = bestText.text;
224
+ if (!text || text.length < 20)
225
+ continue;
226
+ const blocks = splitIntoBlocks(text);
227
+ if (blocks.length === 0)
228
+ continue;
229
+ const bm25Scores = queryTerms.length > 0
230
+ ? scoreBM25(blocks, queryTerms)
231
+ : blocks.map(() => 0.1); // small baseline when no query terms
232
+ const authority = scoreDomainAuthority(source.url);
233
+ const structuredSrc = sourceStructuredScore({ ...source, content: text });
234
+ const domain = extractRegisteredDomain(source.url);
235
+ for (let i = 0; i < blocks.length; i++) {
236
+ const raw = blocks[i].raw;
237
+ // Skip very short blocks (nav fragments, single words)
238
+ if (raw.length < 30)
239
+ continue;
240
+ // Normalize BM25 to 0-1 range using sigmoid
241
+ const rawBm25 = bm25Scores[i];
242
+ const normBm25 = rawBm25 > 0
243
+ ? 2 / (1 + Math.exp(-rawBm25 * 4)) - 1
244
+ : 0;
245
+ // Per-block structured signal
246
+ const blockStructured = detectStructuredSignal(raw);
247
+ const combinedStructured = Math.min(1.0, structuredSrc * 0.6 + blockStructured * 0.4);
248
+ // Composite: weighted sum per policy
249
+ const composite = normBm25 * policy.relevanceWeight +
250
+ authority * policy.authorityWeight +
251
+ combinedStructured * policy.structuredWeight;
252
+ candidates.push({
253
+ text: raw,
254
+ sourceUrl: source.url,
255
+ sourceTitle: source.title,
256
+ domain,
257
+ bm25Score: normBm25,
258
+ authorityScore: authority,
259
+ structuredScore: combinedStructured,
260
+ compositeScore: composite,
261
+ hasStructuredSignal: combinedStructured > 0.15,
262
+ contentMode: bestText.mode === 'snippet' ? 'snippet' : 'content',
263
+ });
264
+ }
265
+ }
266
+ const totalCandidates = candidates.length;
267
+ if (totalCandidates === 0) {
268
+ return { blocks: [], totalCandidates: 0, sourcesUsed: 0, policy };
269
+ }
270
+ // Step 4: Sort by composite score, apply per-domain cap
271
+ candidates.sort((a, b) => b.compositeScore - a.compositeScore);
272
+ const domainBlockCounts = new Map();
273
+ const selected = [];
274
+ let charBudget = maxChars;
275
+ for (const c of candidates) {
276
+ if (selected.length >= maxBlocks)
277
+ break;
278
+ if (charBudget <= 0)
279
+ break;
280
+ const domainCount = domainBlockCounts.get(c.domain) ?? 0;
281
+ if (domainCount >= policy.maxBlocksPerDomain)
282
+ continue;
283
+ // Don't exceed char budget
284
+ if (c.text.length > charBudget) {
285
+ // If block is small enough to partially fit and we have no blocks yet, take it
286
+ if (selected.length === 0) {
287
+ selected.push({ ...c, text: c.text.substring(0, charBudget) });
288
+ charBudget = 0;
289
+ domainBlockCounts.set(c.domain, domainCount + 1);
290
+ }
291
+ continue;
292
+ }
293
+ selected.push(c);
294
+ charBudget -= c.text.length;
295
+ domainBlockCounts.set(c.domain, domainCount + 1);
296
+ }
297
+ // Step 5: Ensure minimum domain diversity
298
+ // If we haven't hit minDomains, try to swap in blocks from under-represented domains
299
+ const selectedDomains = new Set(selected.map(s => s.domain));
300
+ if (selectedDomains.size < policy.minDomains && selected.length > 1) {
301
+ // Find domains not yet represented
302
+ const allDomains = new Set(candidates.map(c => c.domain));
303
+ const missingDomains = [...allDomains].filter(d => !selectedDomains.has(d));
304
+ for (const missingDomain of missingDomains) {
305
+ if (selectedDomains.size >= policy.minDomains)
306
+ break;
307
+ // Find best block from this domain
308
+ const domainBest = candidates.find(c => c.domain === missingDomain && !selected.includes(c));
309
+ if (!domainBest || domainBest.compositeScore <= 0)
310
+ continue;
311
+ // Replace the lowest-scored block from the most-represented domain
312
+ // (only if the replacement isn't drastically worse)
313
+ const domainCounts = new Map();
314
+ for (const s of selected) {
315
+ domainCounts.set(s.domain, (domainCounts.get(s.domain) ?? 0) + 1);
316
+ }
317
+ // Find the domain with the most blocks
318
+ let maxDomain = '';
319
+ let maxCount = 0;
320
+ for (const [d, c] of domainCounts) {
321
+ if (c > maxCount) {
322
+ maxCount = c;
323
+ maxDomain = d;
324
+ }
325
+ }
326
+ // Only swap if the over-represented domain has 2+ blocks
327
+ if (maxCount < 2)
328
+ continue;
329
+ // Find the worst block from that domain
330
+ const worstIdx = selected.reduce((worst, s, i) => {
331
+ if (s.domain !== maxDomain)
332
+ return worst;
333
+ if (worst === -1)
334
+ return i;
335
+ return s.compositeScore < selected[worst].compositeScore ? i : worst;
336
+ }, -1);
337
+ if (worstIdx === -1)
338
+ continue;
339
+ // Only swap if the replacement isn't more than 40% worse
340
+ const worstScore = selected[worstIdx].compositeScore;
341
+ if (domainBest.compositeScore >= worstScore * 0.6) {
342
+ selected[worstIdx] = domainBest;
343
+ selectedDomains.add(missingDomain);
344
+ }
345
+ }
346
+ }
347
+ // Build result
348
+ const sourcesUsed = new Set(selected.map(s => s.sourceUrl)).size;
349
+ const blocks = selected.map(c => ({
350
+ text: c.text,
351
+ sourceUrl: c.sourceUrl,
352
+ sourceTitle: c.sourceTitle,
353
+ score: c.compositeScore,
354
+ hasStructuredSignal: c.hasStructuredSignal,
355
+ contentMode: c.contentMode,
356
+ }));
357
+ return { blocks, totalCandidates, sourcesUsed, policy };
358
+ }
359
+ // ---------------------------------------------------------------------------
360
+ // Convenience: format selected evidence for LLM context
361
+ // ---------------------------------------------------------------------------
362
+ /**
363
+ * Format selected evidence blocks into a numbered, source-attributed string
364
+ * suitable for LLM context injection.
365
+ *
366
+ * Preserves exact facts/numbers — no summarization or transformation.
367
+ */
368
+ export function formatEvidenceForLLM(result) {
369
+ if (result.blocks.length === 0)
370
+ return '';
371
+ // Group blocks by source for readability
372
+ const sourceGroups = new Map();
373
+ for (const block of result.blocks) {
374
+ const key = block.sourceUrl;
375
+ if (!sourceGroups.has(key))
376
+ sourceGroups.set(key, []);
377
+ sourceGroups.get(key).push(block);
378
+ }
379
+ const parts = [];
380
+ let sourceIdx = 1;
381
+ for (const [url, blocks] of sourceGroups) {
382
+ const title = blocks[0].sourceTitle;
383
+ const structuredTag = blocks.some(b => b.hasStructuredSignal) ? ' [structured]' : '';
384
+ const snippetTag = blocks.every(b => b.contentMode === 'snippet') ? ' [snippet]' : '';
385
+ parts.push(`[${sourceIdx}] ${title}${structuredTag}${snippetTag}\nURL: ${url}\n\n${blocks.map(b => b.text).join('\n\n')}`);
386
+ sourceIdx++;
387
+ }
388
+ return parts.join('\n\n---\n\n');
389
+ }