@iflow-mcp/jakeliume-webpeel 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (547) hide show
  1. package/LICENSE +15 -0
  2. package/README.md +313 -0
  3. package/dist/cache.d.ts +30 -0
  4. package/dist/cache.js +139 -0
  5. package/dist/cli/commands/auth.d.ts +5 -0
  6. package/dist/cli/commands/auth.js +411 -0
  7. package/dist/cli/commands/doctor.d.ts +37 -0
  8. package/dist/cli/commands/doctor.js +371 -0
  9. package/dist/cli/commands/fetch.d.ts +6 -0
  10. package/dist/cli/commands/fetch.js +1345 -0
  11. package/dist/cli/commands/guide.d.ts +2 -0
  12. package/dist/cli/commands/guide.js +183 -0
  13. package/dist/cli/commands/interact.d.ts +5 -0
  14. package/dist/cli/commands/interact.js +840 -0
  15. package/dist/cli/commands/jobs.d.ts +5 -0
  16. package/dist/cli/commands/jobs.js +997 -0
  17. package/dist/cli/commands/monitor.d.ts +12 -0
  18. package/dist/cli/commands/monitor.js +197 -0
  19. package/dist/cli/commands/observe.d.ts +12 -0
  20. package/dist/cli/commands/observe.js +158 -0
  21. package/dist/cli/commands/screenshot.d.ts +5 -0
  22. package/dist/cli/commands/screenshot.js +282 -0
  23. package/dist/cli/commands/search.d.ts +5 -0
  24. package/dist/cli/commands/search.js +1021 -0
  25. package/dist/cli/commands/setup.d.ts +13 -0
  26. package/dist/cli/commands/setup.js +244 -0
  27. package/dist/cli/commands/skill.d.ts +15 -0
  28. package/dist/cli/commands/skill.js +195 -0
  29. package/dist/cli/utils.d.ts +84 -0
  30. package/dist/cli/utils.js +806 -0
  31. package/dist/cli-auth.d.ts +75 -0
  32. package/dist/cli-auth.js +369 -0
  33. package/dist/cli.d.ts +17 -0
  34. package/dist/cli.js +99 -0
  35. package/dist/core/actions.d.ts +69 -0
  36. package/dist/core/actions.js +495 -0
  37. package/dist/core/agent.d.ts +98 -0
  38. package/dist/core/agent.js +558 -0
  39. package/dist/core/answer.d.ts +42 -0
  40. package/dist/core/answer.js +395 -0
  41. package/dist/core/application-tracker.d.ts +84 -0
  42. package/dist/core/application-tracker.js +184 -0
  43. package/dist/core/apply.d.ts +162 -0
  44. package/dist/core/apply.js +816 -0
  45. package/dist/core/auth-detection.d.ts +35 -0
  46. package/dist/core/auth-detection.js +358 -0
  47. package/dist/core/auto-extract.d.ts +82 -0
  48. package/dist/core/auto-extract.js +604 -0
  49. package/dist/core/auto-interact.d.ts +23 -0
  50. package/dist/core/auto-interact.js +246 -0
  51. package/dist/core/bm25-filter.d.ts +66 -0
  52. package/dist/core/bm25-filter.js +288 -0
  53. package/dist/core/branding.d.ts +54 -0
  54. package/dist/core/branding.js +234 -0
  55. package/dist/core/browser-fetch.d.ts +323 -0
  56. package/dist/core/browser-fetch.js +1600 -0
  57. package/dist/core/browser-pool.d.ts +91 -0
  58. package/dist/core/browser-pool.js +550 -0
  59. package/dist/core/budget.d.ts +42 -0
  60. package/dist/core/budget.js +324 -0
  61. package/dist/core/business-intel.d.ts +47 -0
  62. package/dist/core/business-intel.js +279 -0
  63. package/dist/core/cache.d.ts +13 -0
  64. package/dist/core/cache.js +121 -0
  65. package/dist/core/cf-worker-proxy.d.ts +32 -0
  66. package/dist/core/cf-worker-proxy.js +87 -0
  67. package/dist/core/challenge-detection.d.ts +26 -0
  68. package/dist/core/challenge-detection.js +468 -0
  69. package/dist/core/change-tracking.d.ts +75 -0
  70. package/dist/core/change-tracking.js +276 -0
  71. package/dist/core/chunker.d.ts +46 -0
  72. package/dist/core/chunker.js +249 -0
  73. package/dist/core/chunking.d.ts +42 -0
  74. package/dist/core/chunking.js +181 -0
  75. package/dist/core/circuit-breaker.d.ts +44 -0
  76. package/dist/core/circuit-breaker.js +85 -0
  77. package/dist/core/content-pruner.d.ts +47 -0
  78. package/dist/core/content-pruner.js +425 -0
  79. package/dist/core/cookie-cache.d.ts +60 -0
  80. package/dist/core/cookie-cache.js +163 -0
  81. package/dist/core/crawl-checkpoint.d.ts +54 -0
  82. package/dist/core/crawl-checkpoint.js +104 -0
  83. package/dist/core/crawler.d.ts +84 -0
  84. package/dist/core/crawler.js +349 -0
  85. package/dist/core/cross-verify.d.ts +27 -0
  86. package/dist/core/cross-verify.js +93 -0
  87. package/dist/core/deep-fetch.d.ts +74 -0
  88. package/dist/core/deep-fetch.js +405 -0
  89. package/dist/core/deep-research.d.ts +141 -0
  90. package/dist/core/deep-research.js +972 -0
  91. package/dist/core/design-analysis.d.ts +70 -0
  92. package/dist/core/design-analysis.js +490 -0
  93. package/dist/core/design-compare.d.ts +38 -0
  94. package/dist/core/design-compare.js +264 -0
  95. package/dist/core/diff.d.ts +61 -0
  96. package/dist/core/diff.js +289 -0
  97. package/dist/core/dns-cache.d.ts +20 -0
  98. package/dist/core/dns-cache.js +198 -0
  99. package/dist/core/documents.d.ts +23 -0
  100. package/dist/core/documents.js +123 -0
  101. package/dist/core/domain-memory.d.ts +66 -0
  102. package/dist/core/domain-memory.js +163 -0
  103. package/dist/core/domain-verify.d.ts +40 -0
  104. package/dist/core/domain-verify.js +379 -0
  105. package/dist/core/engine-ranker.d.ts +112 -0
  106. package/dist/core/engine-ranker.js +395 -0
  107. package/dist/core/extract-inline.d.ts +38 -0
  108. package/dist/core/extract-inline.js +215 -0
  109. package/dist/core/extract-listings.d.ts +38 -0
  110. package/dist/core/extract-listings.js +461 -0
  111. package/dist/core/extract.d.ts +9 -0
  112. package/dist/core/extract.js +139 -0
  113. package/dist/core/fetch-cache.d.ts +57 -0
  114. package/dist/core/fetch-cache.js +95 -0
  115. package/dist/core/fetcher.d.ts +13 -0
  116. package/dist/core/fetcher.js +12 -0
  117. package/dist/core/google-cache.d.ts +29 -0
  118. package/dist/core/google-cache.js +180 -0
  119. package/dist/core/google-serp-parser.d.ts +82 -0
  120. package/dist/core/google-serp-parser.js +287 -0
  121. package/dist/core/hotel-search.d.ts +122 -0
  122. package/dist/core/hotel-search.js +382 -0
  123. package/dist/core/http-fetch.d.ts +72 -0
  124. package/dist/core/http-fetch.js +820 -0
  125. package/dist/core/human.d.ts +175 -0
  126. package/dist/core/human.js +680 -0
  127. package/dist/core/image-caption.d.ts +44 -0
  128. package/dist/core/image-caption.js +271 -0
  129. package/dist/core/jobs.d.ts +75 -0
  130. package/dist/core/jobs.js +634 -0
  131. package/dist/core/json-ld.d.ts +15 -0
  132. package/dist/core/json-ld.js +617 -0
  133. package/dist/core/language-detect.d.ts +18 -0
  134. package/dist/core/language-detect.js +135 -0
  135. package/dist/core/links.d.ts +10 -0
  136. package/dist/core/links.js +44 -0
  137. package/dist/core/llm-extract.d.ts +71 -0
  138. package/dist/core/llm-extract.js +507 -0
  139. package/dist/core/llm-provider.d.ts +100 -0
  140. package/dist/core/llm-provider.js +702 -0
  141. package/dist/core/local-search.d.ts +60 -0
  142. package/dist/core/local-search.js +308 -0
  143. package/dist/core/logger.d.ts +28 -0
  144. package/dist/core/logger.js +104 -0
  145. package/dist/core/map.d.ts +33 -0
  146. package/dist/core/map.js +127 -0
  147. package/dist/core/markdown.d.ts +92 -0
  148. package/dist/core/markdown.js +809 -0
  149. package/dist/core/metadata.d.ts +34 -0
  150. package/dist/core/metadata.js +422 -0
  151. package/dist/core/observe.d.ts +113 -0
  152. package/dist/core/observe.js +395 -0
  153. package/dist/core/ocr.d.ts +12 -0
  154. package/dist/core/ocr.js +33 -0
  155. package/dist/core/paginate.d.ts +31 -0
  156. package/dist/core/paginate.js +106 -0
  157. package/dist/core/pdf.d.ts +8 -0
  158. package/dist/core/pdf.js +25 -0
  159. package/dist/core/peel-tls.d.ts +25 -0
  160. package/dist/core/peel-tls.js +220 -0
  161. package/dist/core/pipeline.d.ts +132 -0
  162. package/dist/core/pipeline.js +1666 -0
  163. package/dist/core/profiles.d.ts +61 -0
  164. package/dist/core/profiles.js +350 -0
  165. package/dist/core/prompt-guard.d.ts +30 -0
  166. package/dist/core/prompt-guard.js +119 -0
  167. package/dist/core/proxy-config.d.ts +90 -0
  168. package/dist/core/proxy-config.js +172 -0
  169. package/dist/core/quick-answer.d.ts +53 -0
  170. package/dist/core/quick-answer.js +833 -0
  171. package/dist/core/rate-governor.d.ts +80 -0
  172. package/dist/core/rate-governor.js +238 -0
  173. package/dist/core/readability.d.ts +57 -0
  174. package/dist/core/readability.js +533 -0
  175. package/dist/core/research.d.ts +66 -0
  176. package/dist/core/research.js +270 -0
  177. package/dist/core/retry.d.ts +60 -0
  178. package/dist/core/retry.js +119 -0
  179. package/dist/core/safe-browsing.d.ts +30 -0
  180. package/dist/core/safe-browsing.js +206 -0
  181. package/dist/core/schema-extraction.d.ts +66 -0
  182. package/dist/core/schema-extraction.js +352 -0
  183. package/dist/core/schema-postprocess.d.ts +32 -0
  184. package/dist/core/schema-postprocess.js +469 -0
  185. package/dist/core/schema-templates.d.ts +19 -0
  186. package/dist/core/schema-templates.js +143 -0
  187. package/dist/core/screenshot.d.ts +224 -0
  188. package/dist/core/screenshot.js +207 -0
  189. package/dist/core/search-engines.d.ts +25 -0
  190. package/dist/core/search-engines.js +182 -0
  191. package/dist/core/search-provider.d.ts +243 -0
  192. package/dist/core/search-provider.js +1629 -0
  193. package/dist/core/searxng-provider.d.ts +35 -0
  194. package/dist/core/searxng-provider.js +105 -0
  195. package/dist/core/selective-evidence.d.ts +151 -0
  196. package/dist/core/selective-evidence.js +389 -0
  197. package/dist/core/site-search.d.ts +44 -0
  198. package/dist/core/site-search.js +252 -0
  199. package/dist/core/sitemap.d.ts +23 -0
  200. package/dist/core/sitemap.js +105 -0
  201. package/dist/core/source-credibility.d.ts +29 -0
  202. package/dist/core/source-credibility.js +584 -0
  203. package/dist/core/source-scoring.d.ts +166 -0
  204. package/dist/core/source-scoring.js +396 -0
  205. package/dist/core/stemmer.d.ts +38 -0
  206. package/dist/core/stemmer.js +509 -0
  207. package/dist/core/strategies.d.ts +104 -0
  208. package/dist/core/strategies.js +1044 -0
  209. package/dist/core/strategy-hooks.d.ts +145 -0
  210. package/dist/core/strategy-hooks.js +74 -0
  211. package/dist/core/structured-extract.d.ts +43 -0
  212. package/dist/core/structured-extract.js +550 -0
  213. package/dist/core/summarize.d.ts +17 -0
  214. package/dist/core/summarize.js +78 -0
  215. package/dist/core/synonyms.d.ts +42 -0
  216. package/dist/core/synonyms.js +184 -0
  217. package/dist/core/system-monitor.d.ts +61 -0
  218. package/dist/core/system-monitor.js +133 -0
  219. package/dist/core/table-format.d.ts +30 -0
  220. package/dist/core/table-format.js +146 -0
  221. package/dist/core/threat-feeds.d.ts +23 -0
  222. package/dist/core/threat-feeds.js +104 -0
  223. package/dist/core/timing.d.ts +21 -0
  224. package/dist/core/timing.js +33 -0
  225. package/dist/core/transcript-export.d.ts +47 -0
  226. package/dist/core/transcript-export.js +107 -0
  227. package/dist/core/user-agents.d.ts +82 -0
  228. package/dist/core/user-agents.js +239 -0
  229. package/dist/core/vertical-search.d.ts +54 -0
  230. package/dist/core/vertical-search.js +158 -0
  231. package/dist/core/watch-manager.d.ts +175 -0
  232. package/dist/core/watch-manager.js +416 -0
  233. package/dist/core/watch.d.ts +101 -0
  234. package/dist/core/watch.js +389 -0
  235. package/dist/core/youtube.d.ts +130 -0
  236. package/dist/core/youtube.js +1175 -0
  237. package/dist/ee/challenge-re-export.d.ts +1 -0
  238. package/dist/ee/challenge-re-export.js +1 -0
  239. package/dist/ee/challenge-solver.d.ts +72 -0
  240. package/dist/ee/challenge-solver.js +720 -0
  241. package/dist/ee/domain-extractors.d.ts +8 -0
  242. package/dist/ee/domain-extractors.js +8 -0
  243. package/dist/ee/domain-intel.d.ts +16 -0
  244. package/dist/ee/domain-intel.js +133 -0
  245. package/dist/ee/extractors/allrecipes.d.ts +2 -0
  246. package/dist/ee/extractors/allrecipes.js +120 -0
  247. package/dist/ee/extractors/amazon.d.ts +2 -0
  248. package/dist/ee/extractors/amazon.js +78 -0
  249. package/dist/ee/extractors/arxiv.d.ts +2 -0
  250. package/dist/ee/extractors/arxiv.js +137 -0
  251. package/dist/ee/extractors/bestbuy.d.ts +2 -0
  252. package/dist/ee/extractors/bestbuy.js +78 -0
  253. package/dist/ee/extractors/carscom.d.ts +2 -0
  254. package/dist/ee/extractors/carscom.js +121 -0
  255. package/dist/ee/extractors/coingecko.d.ts +2 -0
  256. package/dist/ee/extractors/coingecko.js +134 -0
  257. package/dist/ee/extractors/craigslist.d.ts +2 -0
  258. package/dist/ee/extractors/craigslist.js +92 -0
  259. package/dist/ee/extractors/devto.d.ts +2 -0
  260. package/dist/ee/extractors/devto.js +135 -0
  261. package/dist/ee/extractors/ebay.d.ts +2 -0
  262. package/dist/ee/extractors/ebay.js +90 -0
  263. package/dist/ee/extractors/espn.d.ts +2 -0
  264. package/dist/ee/extractors/espn.js +260 -0
  265. package/dist/ee/extractors/etsy.d.ts +2 -0
  266. package/dist/ee/extractors/etsy.js +52 -0
  267. package/dist/ee/extractors/facebook.d.ts +2 -0
  268. package/dist/ee/extractors/facebook.js +46 -0
  269. package/dist/ee/extractors/github.d.ts +2 -0
  270. package/dist/ee/extractors/github.js +196 -0
  271. package/dist/ee/extractors/google-flights.d.ts +2 -0
  272. package/dist/ee/extractors/google-flights.js +176 -0
  273. package/dist/ee/extractors/hackernews.d.ts +2 -0
  274. package/dist/ee/extractors/hackernews.js +147 -0
  275. package/dist/ee/extractors/imdb.d.ts +2 -0
  276. package/dist/ee/extractors/imdb.js +172 -0
  277. package/dist/ee/extractors/index.d.ts +26 -0
  278. package/dist/ee/extractors/index.js +247 -0
  279. package/dist/ee/extractors/instagram.d.ts +2 -0
  280. package/dist/ee/extractors/instagram.js +102 -0
  281. package/dist/ee/extractors/kalshi.d.ts +2 -0
  282. package/dist/ee/extractors/kalshi.js +121 -0
  283. package/dist/ee/extractors/kayak-cars.d.ts +2 -0
  284. package/dist/ee/extractors/kayak-cars.js +270 -0
  285. package/dist/ee/extractors/linkedin.d.ts +2 -0
  286. package/dist/ee/extractors/linkedin.js +113 -0
  287. package/dist/ee/extractors/medium.d.ts +2 -0
  288. package/dist/ee/extractors/medium.js +130 -0
  289. package/dist/ee/extractors/news.d.ts +4 -0
  290. package/dist/ee/extractors/news.js +173 -0
  291. package/dist/ee/extractors/npm.d.ts +2 -0
  292. package/dist/ee/extractors/npm.js +86 -0
  293. package/dist/ee/extractors/pdf.d.ts +2 -0
  294. package/dist/ee/extractors/pdf.js +108 -0
  295. package/dist/ee/extractors/pinterest.d.ts +2 -0
  296. package/dist/ee/extractors/pinterest.js +34 -0
  297. package/dist/ee/extractors/polymarket.d.ts +2 -0
  298. package/dist/ee/extractors/polymarket.js +358 -0
  299. package/dist/ee/extractors/producthunt.d.ts +2 -0
  300. package/dist/ee/extractors/producthunt.js +88 -0
  301. package/dist/ee/extractors/pubmed.d.ts +2 -0
  302. package/dist/ee/extractors/pubmed.js +162 -0
  303. package/dist/ee/extractors/pypi.d.ts +2 -0
  304. package/dist/ee/extractors/pypi.js +80 -0
  305. package/dist/ee/extractors/reddit.d.ts +2 -0
  306. package/dist/ee/extractors/reddit.js +438 -0
  307. package/dist/ee/extractors/redfin.d.ts +2 -0
  308. package/dist/ee/extractors/redfin.js +156 -0
  309. package/dist/ee/extractors/semanticscholar.d.ts +2 -0
  310. package/dist/ee/extractors/semanticscholar.js +131 -0
  311. package/dist/ee/extractors/shared.d.ts +12 -0
  312. package/dist/ee/extractors/shared.js +76 -0
  313. package/dist/ee/extractors/soundcloud.d.ts +2 -0
  314. package/dist/ee/extractors/soundcloud.js +34 -0
  315. package/dist/ee/extractors/sportsbetting.d.ts +2 -0
  316. package/dist/ee/extractors/sportsbetting.js +37 -0
  317. package/dist/ee/extractors/spotify.d.ts +2 -0
  318. package/dist/ee/extractors/spotify.js +34 -0
  319. package/dist/ee/extractors/stackoverflow.d.ts +2 -0
  320. package/dist/ee/extractors/stackoverflow.js +61 -0
  321. package/dist/ee/extractors/substack.d.ts +2 -0
  322. package/dist/ee/extractors/substack.js +115 -0
  323. package/dist/ee/extractors/substackroot.d.ts +2 -0
  324. package/dist/ee/extractors/substackroot.js +46 -0
  325. package/dist/ee/extractors/tiktok.d.ts +2 -0
  326. package/dist/ee/extractors/tiktok.js +29 -0
  327. package/dist/ee/extractors/tradingview.d.ts +2 -0
  328. package/dist/ee/extractors/tradingview.js +182 -0
  329. package/dist/ee/extractors/twitch.d.ts +2 -0
  330. package/dist/ee/extractors/twitch.js +36 -0
  331. package/dist/ee/extractors/twitter.d.ts +2 -0
  332. package/dist/ee/extractors/twitter.js +327 -0
  333. package/dist/ee/extractors/types.d.ts +14 -0
  334. package/dist/ee/extractors/types.js +1 -0
  335. package/dist/ee/extractors/walmart.d.ts +2 -0
  336. package/dist/ee/extractors/walmart.js +50 -0
  337. package/dist/ee/extractors/weather.d.ts +2 -0
  338. package/dist/ee/extractors/weather.js +133 -0
  339. package/dist/ee/extractors/wikipedia.d.ts +4 -0
  340. package/dist/ee/extractors/wikipedia.js +235 -0
  341. package/dist/ee/extractors/yelp.d.ts +2 -0
  342. package/dist/ee/extractors/yelp.js +216 -0
  343. package/dist/ee/extractors/youtube.d.ts +2 -0
  344. package/dist/ee/extractors/youtube.js +189 -0
  345. package/dist/ee/extractors/zillow.d.ts +54 -0
  346. package/dist/ee/extractors/zillow.js +247 -0
  347. package/dist/ee/extractors-re-export.d.ts +1 -0
  348. package/dist/ee/extractors-re-export.js +1 -0
  349. package/dist/ee/premium-hooks.d.ts +20 -0
  350. package/dist/ee/premium-hooks.js +50 -0
  351. package/dist/ee/spa-detection.d.ts +2 -0
  352. package/dist/ee/spa-detection.js +2 -0
  353. package/dist/ee/stability.d.ts +4 -0
  354. package/dist/ee/stability.js +29 -0
  355. package/dist/ee/swr-cache.d.ts +14 -0
  356. package/dist/ee/swr-cache.js +34 -0
  357. package/dist/index.d.ts +143 -0
  358. package/dist/index.js +291 -0
  359. package/dist/integrations/index.d.ts +2 -0
  360. package/dist/integrations/index.js +2 -0
  361. package/dist/integrations/langchain.d.ts +64 -0
  362. package/dist/integrations/langchain.js +115 -0
  363. package/dist/integrations/llamaindex.d.ts +50 -0
  364. package/dist/integrations/llamaindex.js +91 -0
  365. package/dist/mcp/handlers/act.d.ts +5 -0
  366. package/dist/mcp/handlers/act.js +34 -0
  367. package/dist/mcp/handlers/definitions.d.ts +6 -0
  368. package/dist/mcp/handlers/definitions.js +395 -0
  369. package/dist/mcp/handlers/extract.d.ts +7 -0
  370. package/dist/mcp/handlers/extract.js +135 -0
  371. package/dist/mcp/handlers/fetch.d.ts +6 -0
  372. package/dist/mcp/handlers/fetch.js +98 -0
  373. package/dist/mcp/handlers/find.d.ts +5 -0
  374. package/dist/mcp/handlers/find.js +137 -0
  375. package/dist/mcp/handlers/index.d.ts +13 -0
  376. package/dist/mcp/handlers/index.js +63 -0
  377. package/dist/mcp/handlers/legacy.d.ts +25 -0
  378. package/dist/mcp/handlers/legacy.js +450 -0
  379. package/dist/mcp/handlers/meta.d.ts +6 -0
  380. package/dist/mcp/handlers/meta.js +40 -0
  381. package/dist/mcp/handlers/monitor.d.ts +5 -0
  382. package/dist/mcp/handlers/monitor.js +41 -0
  383. package/dist/mcp/handlers/observe.d.ts +8 -0
  384. package/dist/mcp/handlers/observe.js +37 -0
  385. package/dist/mcp/handlers/read.d.ts +6 -0
  386. package/dist/mcp/handlers/read.js +78 -0
  387. package/dist/mcp/handlers/see.d.ts +5 -0
  388. package/dist/mcp/handlers/see.js +75 -0
  389. package/dist/mcp/handlers/types.d.ts +29 -0
  390. package/dist/mcp/handlers/types.js +28 -0
  391. package/dist/mcp/server.d.ts +7 -0
  392. package/dist/mcp/server.js +108 -0
  393. package/dist/mcp/smart-router.d.ts +23 -0
  394. package/dist/mcp/smart-router.js +178 -0
  395. package/dist/server/app.d.ts +14 -0
  396. package/dist/server/app.js +632 -0
  397. package/dist/server/auth-store.d.ts +28 -0
  398. package/dist/server/auth-store.js +88 -0
  399. package/dist/server/bull-queues.d.ts +60 -0
  400. package/dist/server/bull-queues.js +90 -0
  401. package/dist/server/email-service.d.ts +55 -0
  402. package/dist/server/email-service.js +291 -0
  403. package/dist/server/job-queue.d.ts +100 -0
  404. package/dist/server/job-queue.js +145 -0
  405. package/dist/server/logger.d.ts +10 -0
  406. package/dist/server/logger.js +37 -0
  407. package/dist/server/middleware/audit-log.d.ts +14 -0
  408. package/dist/server/middleware/audit-log.js +73 -0
  409. package/dist/server/middleware/auth.d.ts +35 -0
  410. package/dist/server/middleware/auth.js +225 -0
  411. package/dist/server/middleware/rate-limit.d.ts +50 -0
  412. package/dist/server/middleware/rate-limit.js +270 -0
  413. package/dist/server/middleware/scope-guard.d.ts +25 -0
  414. package/dist/server/middleware/scope-guard.js +45 -0
  415. package/dist/server/middleware/url-validator.d.ts +15 -0
  416. package/dist/server/middleware/url-validator.js +201 -0
  417. package/dist/server/openapi.yaml +6418 -0
  418. package/dist/server/pg-auth-store.d.ts +146 -0
  419. package/dist/server/pg-auth-store.js +576 -0
  420. package/dist/server/pg-job-queue.d.ts +59 -0
  421. package/dist/server/pg-job-queue.js +375 -0
  422. package/dist/server/routes/activity.d.ts +6 -0
  423. package/dist/server/routes/activity.js +79 -0
  424. package/dist/server/routes/admin-active.d.ts +7 -0
  425. package/dist/server/routes/admin-active.js +120 -0
  426. package/dist/server/routes/admin-stats.d.ts +7 -0
  427. package/dist/server/routes/admin-stats.js +176 -0
  428. package/dist/server/routes/agent.d.ts +24 -0
  429. package/dist/server/routes/agent.js +480 -0
  430. package/dist/server/routes/answer.d.ts +5 -0
  431. package/dist/server/routes/answer.js +125 -0
  432. package/dist/server/routes/ask.d.ts +28 -0
  433. package/dist/server/routes/ask.js +295 -0
  434. package/dist/server/routes/batch.d.ts +6 -0
  435. package/dist/server/routes/batch.js +493 -0
  436. package/dist/server/routes/cache-warm.d.ts +25 -0
  437. package/dist/server/routes/cache-warm.js +212 -0
  438. package/dist/server/routes/cli-usage.d.ts +6 -0
  439. package/dist/server/routes/cli-usage.js +127 -0
  440. package/dist/server/routes/compat.d.ts +23 -0
  441. package/dist/server/routes/compat.js +652 -0
  442. package/dist/server/routes/crawl.d.ts +13 -0
  443. package/dist/server/routes/crawl.js +287 -0
  444. package/dist/server/routes/deep-fetch.d.ts +8 -0
  445. package/dist/server/routes/deep-fetch.js +57 -0
  446. package/dist/server/routes/deep-research.d.ts +11 -0
  447. package/dist/server/routes/deep-research.js +232 -0
  448. package/dist/server/routes/demo.d.ts +24 -0
  449. package/dist/server/routes/demo.js +517 -0
  450. package/dist/server/routes/do.d.ts +8 -0
  451. package/dist/server/routes/do.js +72 -0
  452. package/dist/server/routes/extract.d.ts +14 -0
  453. package/dist/server/routes/extract.js +325 -0
  454. package/dist/server/routes/feed.d.ts +15 -0
  455. package/dist/server/routes/feed.js +311 -0
  456. package/dist/server/routes/fetch-queue.d.ts +13 -0
  457. package/dist/server/routes/fetch-queue.js +357 -0
  458. package/dist/server/routes/fetch.d.ts +7 -0
  459. package/dist/server/routes/fetch.js +1274 -0
  460. package/dist/server/routes/go.d.ts +14 -0
  461. package/dist/server/routes/go.js +81 -0
  462. package/dist/server/routes/health.d.ts +11 -0
  463. package/dist/server/routes/health.js +141 -0
  464. package/dist/server/routes/jobs.d.ts +7 -0
  465. package/dist/server/routes/jobs.js +574 -0
  466. package/dist/server/routes/map.d.ts +11 -0
  467. package/dist/server/routes/map.js +116 -0
  468. package/dist/server/routes/mcp.d.ts +14 -0
  469. package/dist/server/routes/mcp.js +197 -0
  470. package/dist/server/routes/metrics.d.ts +37 -0
  471. package/dist/server/routes/metrics.js +149 -0
  472. package/dist/server/routes/oauth.d.ts +9 -0
  473. package/dist/server/routes/oauth.js +396 -0
  474. package/dist/server/routes/playground.d.ts +17 -0
  475. package/dist/server/routes/playground.js +283 -0
  476. package/dist/server/routes/reader.d.ts +18 -0
  477. package/dist/server/routes/reader.js +192 -0
  478. package/dist/server/routes/research.d.ts +14 -0
  479. package/dist/server/routes/research.js +482 -0
  480. package/dist/server/routes/screenshot.d.ts +22 -0
  481. package/dist/server/routes/screenshot.js +820 -0
  482. package/dist/server/routes/search.d.ts +6 -0
  483. package/dist/server/routes/search.js +874 -0
  484. package/dist/server/routes/session.d.ts +17 -0
  485. package/dist/server/routes/session.js +548 -0
  486. package/dist/server/routes/share.d.ts +18 -0
  487. package/dist/server/routes/share.js +462 -0
  488. package/dist/server/routes/smart-search/handlers/cars.d.ts +2 -0
  489. package/dist/server/routes/smart-search/handlers/cars.js +102 -0
  490. package/dist/server/routes/smart-search/handlers/flights.d.ts +2 -0
  491. package/dist/server/routes/smart-search/handlers/flights.js +72 -0
  492. package/dist/server/routes/smart-search/handlers/general.d.ts +13 -0
  493. package/dist/server/routes/smart-search/handlers/general.js +717 -0
  494. package/dist/server/routes/smart-search/handlers/hotels.d.ts +2 -0
  495. package/dist/server/routes/smart-search/handlers/hotels.js +88 -0
  496. package/dist/server/routes/smart-search/handlers/products.d.ts +2 -0
  497. package/dist/server/routes/smart-search/handlers/products.js +1309 -0
  498. package/dist/server/routes/smart-search/handlers/rental.d.ts +2 -0
  499. package/dist/server/routes/smart-search/handlers/rental.js +154 -0
  500. package/dist/server/routes/smart-search/handlers/restaurants.d.ts +2 -0
  501. package/dist/server/routes/smart-search/handlers/restaurants.js +225 -0
  502. package/dist/server/routes/smart-search/handlers/transit-verdict.d.ts +41 -0
  503. package/dist/server/routes/smart-search/handlers/transit-verdict.js +224 -0
  504. package/dist/server/routes/smart-search/index.d.ts +19 -0
  505. package/dist/server/routes/smart-search/index.js +546 -0
  506. package/dist/server/routes/smart-search/intent.d.ts +3 -0
  507. package/dist/server/routes/smart-search/intent.js +264 -0
  508. package/dist/server/routes/smart-search/llm.d.ts +16 -0
  509. package/dist/server/routes/smart-search/llm.js +70 -0
  510. package/dist/server/routes/smart-search/sources/reddit.d.ts +18 -0
  511. package/dist/server/routes/smart-search/sources/reddit.js +34 -0
  512. package/dist/server/routes/smart-search/sources/yelp.d.ts +25 -0
  513. package/dist/server/routes/smart-search/sources/yelp.js +171 -0
  514. package/dist/server/routes/smart-search/sources/youtube.d.ts +8 -0
  515. package/dist/server/routes/smart-search/sources/youtube.js +9 -0
  516. package/dist/server/routes/smart-search/types.d.ts +81 -0
  517. package/dist/server/routes/smart-search/types.js +1 -0
  518. package/dist/server/routes/smart-search/utils.d.ts +20 -0
  519. package/dist/server/routes/smart-search/utils.js +146 -0
  520. package/dist/server/routes/stats.d.ts +6 -0
  521. package/dist/server/routes/stats.js +71 -0
  522. package/dist/server/routes/stripe.d.ts +15 -0
  523. package/dist/server/routes/stripe.js +296 -0
  524. package/dist/server/routes/transcript-export.d.ts +10 -0
  525. package/dist/server/routes/transcript-export.js +178 -0
  526. package/dist/server/routes/usage.d.ts +9 -0
  527. package/dist/server/routes/usage.js +279 -0
  528. package/dist/server/routes/users.d.ts +8 -0
  529. package/dist/server/routes/users.js +1867 -0
  530. package/dist/server/routes/watch.d.ts +15 -0
  531. package/dist/server/routes/watch.js +309 -0
  532. package/dist/server/routes/webhooks.d.ts +26 -0
  533. package/dist/server/routes/webhooks.js +170 -0
  534. package/dist/server/routes/youtube.d.ts +6 -0
  535. package/dist/server/routes/youtube.js +130 -0
  536. package/dist/server/sentry.d.ts +14 -0
  537. package/dist/server/sentry.js +104 -0
  538. package/dist/server/types.d.ts +15 -0
  539. package/dist/server/types.js +7 -0
  540. package/dist/server/utils/response.d.ts +44 -0
  541. package/dist/server/utils/response.js +69 -0
  542. package/dist/server/utils/sse.d.ts +22 -0
  543. package/dist/server/utils/sse.js +38 -0
  544. package/dist/types.d.ts +552 -0
  545. package/dist/types.js +39 -0
  546. package/llms.txt +105 -0
  547. package/package.json +189 -0
@@ -0,0 +1,558 @@
1
+ /**
2
+ * Autonomous web research agent
3
+ * Searches the web, fetches pages, and extracts structured data based on natural language prompts
4
+ *
5
+ * Supports:
6
+ * - depth: "basic" (1 search, top 3) vs "thorough" (multi-step, up to 3 searches, top 10)
7
+ * - maxSources: control how many sources to include (default 5, max 20)
8
+ * - topic: "general" | "news" | "technical" | "academic" — adjusts queries & prioritization
9
+ * - outputSchema: JSON Schema for structured output with validation
10
+ * - streaming callbacks for SSE support
11
+ */
12
+ import { load } from 'cheerio';
13
+ import { peel } from '../index.js';
14
+ import { createLogger } from './logger.js';
15
+ const log = createLogger('agent');
16
+ // ---------------------------------------------------------------------------
17
+ // Helpers
18
+ // ---------------------------------------------------------------------------
19
+ /**
20
+ * Search DuckDuckGo HTML and parse results
21
+ */
22
+ async function searchWeb(query, limit = 10) {
23
+ const { fetch: undiciFetch } = await import('undici');
24
+ const encodedQuery = encodeURIComponent(query);
25
+ const url = `https://html.duckduckgo.com/html/?q=${encodedQuery}`;
26
+ try {
27
+ const response = await undiciFetch(url, {
28
+ headers: {
29
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36',
30
+ },
31
+ });
32
+ const html = await response.text();
33
+ const $ = load(html);
34
+ const results = [];
35
+ $('.result').each((_, el) => {
36
+ const link = $(el).find('.result__a');
37
+ const snippet = $(el).find('.result__snippet');
38
+ const rawUrl = link.attr('href');
39
+ const title = link.text().trim();
40
+ const desc = snippet.text().trim();
41
+ if (rawUrl && title) {
42
+ try {
43
+ const actualUrl = rawUrl.startsWith('//')
44
+ ? `https:${rawUrl}`
45
+ : rawUrl.includes('uddg=')
46
+ ? decodeURIComponent(rawUrl.split('uddg=')[1].split('&')[0])
47
+ : rawUrl;
48
+ results.push({ url: actualUrl, title, snippet: desc });
49
+ }
50
+ catch (e) {
51
+ log.debug('url decode failed:', e instanceof Error ? e.message : e);
52
+ }
53
+ }
54
+ });
55
+ return results.slice(0, limit);
56
+ }
57
+ catch (error) {
58
+ log.error('Search failed:', error);
59
+ return [];
60
+ }
61
+ }
62
+ /**
63
+ * Prioritise search results by topic relevance (higher = better)
64
+ */
65
+ function scoreByTopic(result, topic) {
66
+ const url = result.url.toLowerCase();
67
+ const domain = (() => { try {
68
+ return new URL(url).hostname;
69
+ }
70
+ catch {
71
+ return '';
72
+ } })();
73
+ switch (topic) {
74
+ case 'academic':
75
+ if (/\.edu$|arxiv\.org|scholar\.google|pubmed|ieee\.org|acm\.org|researchgate\.net/.test(domain))
76
+ return 10;
77
+ if (/\.gov$/.test(domain))
78
+ return 5;
79
+ return 0;
80
+ case 'technical':
81
+ if (/github\.com|stackoverflow\.com|docs\.|developer\.|devdocs\.io|mdn\./.test(domain))
82
+ return 10;
83
+ if (/\.dev$|\.io$/.test(domain))
84
+ return 3;
85
+ return 0;
86
+ case 'news':
87
+ if (/reuters\.com|apnews\.com|bbc\.com|cnn\.com|nytimes\.com|theguardian\.com|bloomberg\.com|techcrunch\.com|theverge\.com|arstechnica\.com/.test(domain))
88
+ return 10;
89
+ if (/news|press|blog/.test(domain))
90
+ return 3;
91
+ return 0;
92
+ default:
93
+ return 0;
94
+ }
95
+ }
96
+ /**
97
+ * Add topic-specific modifiers to search queries
98
+ */
99
+ function enhanceQueryForTopic(query, topic) {
100
+ switch (topic) {
101
+ case 'news':
102
+ return `${query} latest news 2026`;
103
+ case 'academic':
104
+ return `${query} research paper study`;
105
+ case 'technical':
106
+ return `${query} documentation tutorial`;
107
+ default:
108
+ return query;
109
+ }
110
+ }
111
+ /**
112
+ * Call OpenAI-compatible LLM API (non-streaming)
113
+ */
114
+ async function callLLM(messages, options) {
115
+ const { apiKey, model = 'gpt-4o-mini', baseUrl = 'https://api.openai.com/v1', jsonMode } = options;
116
+ const { fetch: undiciFetch } = await import('undici');
117
+ const body = {
118
+ model,
119
+ messages,
120
+ temperature: 0,
121
+ };
122
+ if (jsonMode) {
123
+ body.response_format = { type: 'json_object' };
124
+ }
125
+ const response = await undiciFetch(`${baseUrl}/chat/completions`, {
126
+ method: 'POST',
127
+ headers: {
128
+ 'Content-Type': 'application/json',
129
+ Authorization: `Bearer ${apiKey}`,
130
+ },
131
+ body: JSON.stringify(body),
132
+ });
133
+ if (!response.ok) {
134
+ const errorText = await response.text();
135
+ throw new Error(`LLM API error ${response.status}: ${errorText}`);
136
+ }
137
+ const result = (await response.json());
138
+ const content = result.choices?.[0]?.message?.content;
139
+ if (!content) {
140
+ throw new Error('LLM returned empty response');
141
+ }
142
+ const usage = result.usage
143
+ ? { input: result.usage.prompt_tokens ?? 0, output: result.usage.completion_tokens ?? 0 }
144
+ : { input: 0, output: 0 };
145
+ return { content, usage };
146
+ }
147
+ /**
148
+ * Call OpenAI-compatible LLM API with streaming.
149
+ * Invokes `onChunk` for each text delta, returns full content when done.
150
+ */
151
+ async function callLLMStreaming(messages, options, onChunk) {
152
+ if (!onChunk)
153
+ return callLLM(messages, options);
154
+ const { apiKey, model = 'gpt-4o-mini', baseUrl = 'https://api.openai.com/v1', jsonMode } = options;
155
+ const { fetch: undiciFetch } = await import('undici');
156
+ const body = {
157
+ model,
158
+ messages,
159
+ temperature: 0,
160
+ stream: true,
161
+ stream_options: { include_usage: true },
162
+ };
163
+ if (jsonMode) {
164
+ body.response_format = { type: 'json_object' };
165
+ }
166
+ const response = await undiciFetch(`${baseUrl}/chat/completions`, {
167
+ method: 'POST',
168
+ headers: {
169
+ 'Content-Type': 'application/json',
170
+ Authorization: `Bearer ${apiKey}`,
171
+ },
172
+ body: JSON.stringify(body),
173
+ });
174
+ if (!response.ok) {
175
+ const errorText = await response.text();
176
+ throw new Error(`LLM API error ${response.status}: ${errorText}`);
177
+ }
178
+ let fullContent = '';
179
+ let usage = { input: 0, output: 0 };
180
+ // Read the SSE stream
181
+ const reader = response.body?.getReader?.();
182
+ if (!reader) {
183
+ // Fallback: consume entire body
184
+ const text = await response.text();
185
+ return { content: text, usage };
186
+ }
187
+ const decoder = new TextDecoder();
188
+ let buffer = '';
189
+ while (true) {
190
+ const { done, value } = await reader.read();
191
+ if (done)
192
+ break;
193
+ buffer += decoder.decode(value, { stream: true });
194
+ const lines = buffer.split('\n');
195
+ buffer = lines.pop() || '';
196
+ for (const line of lines) {
197
+ const trimmed = line.trim();
198
+ if (!trimmed || !trimmed.startsWith('data: '))
199
+ continue;
200
+ const data = trimmed.slice(6);
201
+ if (data === '[DONE]')
202
+ continue;
203
+ try {
204
+ const parsed = JSON.parse(data);
205
+ const delta = parsed.choices?.[0]?.delta?.content;
206
+ if (delta) {
207
+ fullContent += delta;
208
+ onChunk(delta);
209
+ }
210
+ // Final chunk may include usage
211
+ if (parsed.usage) {
212
+ usage = {
213
+ input: parsed.usage.prompt_tokens ?? 0,
214
+ output: parsed.usage.completion_tokens ?? 0,
215
+ };
216
+ }
217
+ }
218
+ catch (e) {
219
+ log.debug('stream chunk parse failed:', e instanceof Error ? e.message : e);
220
+ }
221
+ }
222
+ }
223
+ return { content: fullContent, usage };
224
+ }
225
+ /**
226
+ * Validate JSON data against a JSON Schema (best-effort, no extra deps)
227
+ */
228
+ function validateJsonSchema(data, schema) {
229
+ // Lightweight validation: check required fields and top-level types
230
+ if (schema.type === 'object' && schema.properties) {
231
+ if (typeof data !== 'object' || data === null || Array.isArray(data)) {
232
+ return { valid: false, errors: 'Expected an object' };
233
+ }
234
+ if (schema.required && Array.isArray(schema.required)) {
235
+ const missing = schema.required.filter((k) => !(k in data));
236
+ if (missing.length > 0) {
237
+ return { valid: false, errors: `Missing required fields: ${missing.join(', ')}` };
238
+ }
239
+ }
240
+ }
241
+ else if (schema.type === 'array') {
242
+ if (!Array.isArray(data)) {
243
+ return { valid: false, errors: 'Expected an array' };
244
+ }
245
+ }
246
+ return { valid: true };
247
+ }
248
+ /**
249
+ * Truncate content to approximately N tokens (rough estimate: 1 token ≈ 4 chars)
250
+ */
251
+ function truncateContent(content, maxTokens = 3000) {
252
+ const maxChars = maxTokens * 4;
253
+ if (content.length <= maxChars)
254
+ return content;
255
+ return content.slice(0, maxChars) + '\n\n[Content truncated...]';
256
+ }
257
+ // ---------------------------------------------------------------------------
258
+ // Main agent
259
+ // ---------------------------------------------------------------------------
260
+ /**
261
+ * Run autonomous web research agent
262
+ */
263
+ export async function runAgent(options) {
264
+ const { prompt, urls: startUrls = [], schema: legacySchema, outputSchema, llmApiKey, llmApiBase = 'https://api.openai.com/v1', llmModel = 'gpt-4o-mini', maxPages, maxSources: rawMaxSources, depth = 'basic', topic = 'general', maxCredits, onProgress, onEvent, } = options;
265
+ if (!llmApiKey)
266
+ throw new Error('llmApiKey is required');
267
+ if (!prompt)
268
+ throw new Error('prompt is required');
269
+ // Effective schema = outputSchema || legacy schema
270
+ const effectiveSchema = outputSchema || legacySchema;
271
+ // Determine effective maxSources:
272
+ // new param > legacy maxPages > depth-based default
273
+ const depthDefaults = depth === 'thorough'
274
+ ? { maxSources: 10, maxQueries: 3, resultsPerQuery: 10 }
275
+ : { maxSources: 3, maxQueries: 1, resultsPerQuery: 5 };
276
+ const maxSourcesLimit = Math.min(rawMaxSources ?? maxPages ?? depthDefaults.maxSources, 20);
277
+ const maxQueries = depth === 'thorough' ? depthDefaults.maxQueries : depthDefaults.maxQueries;
278
+ const visitedUrls = new Set();
279
+ const sources = [];
280
+ const sourcesDetailed = [];
281
+ let pagesVisited = 0;
282
+ let creditsUsed = 0;
283
+ let totalUsage = { input: 0, output: 0 };
284
+ const collectedData = [];
285
+ // Emit both legacy progress and new event
286
+ const reportProgress = (status, message, currentUrl) => {
287
+ if (onProgress) {
288
+ onProgress({ status, currentUrl, pagesVisited, message });
289
+ }
290
+ };
291
+ const emit = (event) => {
292
+ if (onEvent)
293
+ onEvent(event);
294
+ };
295
+ const accUsage = (u) => {
296
+ totalUsage.input += u.input;
297
+ totalUsage.output += u.output;
298
+ };
299
+ try {
300
+ // -----------------------------------------------------------------------
301
+ // Step 1: Determine search strategy & collect URLs
302
+ // -----------------------------------------------------------------------
303
+ let urlsToVisit = [...startUrls];
304
+ if (urlsToVisit.length === 0) {
305
+ reportProgress('searching', 'Planning research strategy...');
306
+ const queryCount = depth === 'thorough' ? '3-5' : '2-3';
307
+ const topicHint = topic !== 'general'
308
+ ? `\nFocus queries on ${topic} sources.`
309
+ : '';
310
+ const planningMessages = [
311
+ {
312
+ role: 'system',
313
+ content: `You are a web research assistant. Generate ${queryCount} specific search queries to find information for the user's request.${topicHint}\nReturn JSON only: {"queries": ["query1", "query2", ...]}`,
314
+ },
315
+ { role: 'user', content: `Research request: ${prompt}` },
316
+ ];
317
+ const planResponse = await callLLM(planningMessages, {
318
+ apiKey: llmApiKey,
319
+ model: llmModel,
320
+ baseUrl: llmApiBase,
321
+ jsonMode: true,
322
+ });
323
+ creditsUsed++;
324
+ accUsage(planResponse.usage);
325
+ let queries = [];
326
+ try {
327
+ const parsed = JSON.parse(planResponse.content);
328
+ queries = parsed.queries || [];
329
+ }
330
+ catch {
331
+ queries = [prompt];
332
+ }
333
+ // Limit queries to maxQueries
334
+ const effectiveQueries = queries.slice(0, maxQueries);
335
+ for (const rawQuery of effectiveQueries) {
336
+ const query = topic !== 'general' ? enhanceQueryForTopic(rawQuery, topic) : rawQuery;
337
+ reportProgress('searching', `Searching: ${query}`);
338
+ emit({ type: 'step', action: 'searching', query });
339
+ const results = await searchWeb(query, depthDefaults.resultsPerQuery);
340
+ // Sort by topic relevance
341
+ if (topic !== 'general') {
342
+ results.sort((a, b) => scoreByTopic(b, topic) - scoreByTopic(a, topic));
343
+ }
344
+ urlsToVisit.push(...results.map(r => r.url));
345
+ if (urlsToVisit.length >= maxSourcesLimit * 2)
346
+ break; // fetch a bit more than needed to account for failures
347
+ }
348
+ // Deduplicate by hostname+pathname
349
+ const seen = new Set();
350
+ urlsToVisit = urlsToVisit.filter(u => {
351
+ try {
352
+ const key = new URL(u).hostname + new URL(u).pathname;
353
+ if (seen.has(key))
354
+ return false;
355
+ seen.add(key);
356
+ return true;
357
+ }
358
+ catch {
359
+ return false;
360
+ }
361
+ });
362
+ }
363
+ // -----------------------------------------------------------------------
364
+ // Step 2: Visit pages and collect data
365
+ // -----------------------------------------------------------------------
366
+ const maxToFetch = Math.min(urlsToVisit.length, maxSourcesLimit);
367
+ for (const url of urlsToVisit.slice(0, maxToFetch + 5)) {
368
+ // Enough data collected?
369
+ if (collectedData.length >= maxSourcesLimit)
370
+ break;
371
+ if (maxCredits && creditsUsed >= maxCredits) {
372
+ reportProgress('done', 'Credit limit reached');
373
+ break;
374
+ }
375
+ if (visitedUrls.has(url))
376
+ continue;
377
+ visitedUrls.add(url);
378
+ reportProgress('visiting', `Fetching: ${url}`, url);
379
+ emit({ type: 'step', action: 'fetching', url });
380
+ try {
381
+ const result = await peel(url, { format: 'markdown', timeout: 15000 });
382
+ pagesVisited++;
383
+ creditsUsed++;
384
+ const truncated = truncateContent(result.content, depth === 'thorough' ? 4000 : 3000);
385
+ collectedData.push({ url: result.url, title: result.title, content: truncated });
386
+ sources.push(result.url);
387
+ sourcesDetailed.push({ url: result.url, title: result.title });
388
+ reportProgress('visiting', `Fetched: ${result.title}`, url);
389
+ }
390
+ catch (error) {
391
+ log.error(`Failed to fetch ${url}:`, error.message);
392
+ }
393
+ }
394
+ // -----------------------------------------------------------------------
395
+ // Step 2b (thorough only): Cross-reference — ask LLM if more info needed
396
+ // -----------------------------------------------------------------------
397
+ if (depth === 'thorough' && collectedData.length > 0 && collectedData.length < maxSourcesLimit) {
398
+ reportProgress('searching', 'Cross-referencing — checking for gaps...');
399
+ emit({ type: 'step', action: 'analyzing', summary: 'Cross-referencing collected data for gaps...' });
400
+ const gapMessages = [
401
+ {
402
+ role: 'system',
403
+ content: 'You are a web research assistant. Given the user\'s research request and summaries of pages already visited, identify any gaps. If more searches would help, return JSON: {"queries":["q1"]}. If no gaps, return {"queries":[]}.',
404
+ },
405
+ {
406
+ role: 'user',
407
+ content: `Research request: ${prompt}\n\nPages visited:\n${collectedData.map(d => `- ${d.title} (${d.url})`).join('\n')}`,
408
+ },
409
+ ];
410
+ try {
411
+ const gapResponse = await callLLM(gapMessages, {
412
+ apiKey: llmApiKey, model: llmModel, baseUrl: llmApiBase, jsonMode: true,
413
+ });
414
+ creditsUsed++;
415
+ accUsage(gapResponse.usage);
416
+ const gapParsed = JSON.parse(gapResponse.content);
417
+ const gapQueries = (gapParsed.queries || []).slice(0, 2);
418
+ for (const q of gapQueries) {
419
+ emit({ type: 'step', action: 'searching', query: q });
420
+ const results = await searchWeb(q, 5);
421
+ for (const r of results) {
422
+ if (collectedData.length >= maxSourcesLimit)
423
+ break;
424
+ if (visitedUrls.has(r.url))
425
+ continue;
426
+ visitedUrls.add(r.url);
427
+ emit({ type: 'step', action: 'fetching', url: r.url });
428
+ try {
429
+ const result = await peel(r.url, { format: 'markdown', timeout: 15000 });
430
+ pagesVisited++;
431
+ creditsUsed++;
432
+ const truncated = truncateContent(result.content, 4000);
433
+ collectedData.push({ url: result.url, title: result.title, content: truncated });
434
+ sources.push(result.url);
435
+ sourcesDetailed.push({ url: result.url, title: result.title });
436
+ }
437
+ catch (e) {
438
+ log.debug('page fetch failed:', e instanceof Error ? e.message : e);
439
+ }
440
+ }
441
+ }
442
+ }
443
+ catch (e) {
444
+ log.debug('research batch failed:', e instanceof Error ? e.message : e);
445
+ }
446
+ }
447
+ // -----------------------------------------------------------------------
448
+ // Step 3: Extract / synthesise final answer
449
+ // -----------------------------------------------------------------------
450
+ if (collectedData.length === 0) {
451
+ return {
452
+ success: false,
453
+ data: { error: 'No data could be collected from the web' },
454
+ sources: [],
455
+ pagesVisited,
456
+ creditsUsed,
457
+ tokensUsed: totalUsage,
458
+ };
459
+ }
460
+ reportProgress('extracting', 'Analyzing collected data...');
461
+ emit({ type: 'step', action: 'analyzing', summary: `Synthesizing answer from ${collectedData.length} sources...` });
462
+ const context = collectedData
463
+ .map(d => `Source: ${d.url}\nTitle: ${d.title}\n\n${d.content}`)
464
+ .join('\n\n---\n\n');
465
+ const truncatedContext = truncateContent(context, depth === 'thorough' ? 12000 : 8000);
466
+ // Build system prompt based on schema or free-form
467
+ let systemPrompt;
468
+ if (effectiveSchema) {
469
+ systemPrompt =
470
+ 'You are a web research assistant. Extract structured data from the provided web content based on the user\'s request. ' +
471
+ `Return a JSON object matching this schema:\n${JSON.stringify(effectiveSchema, null, 2)}\n\nReturn ONLY valid JSON, no explanation.`;
472
+ }
473
+ else {
474
+ systemPrompt =
475
+ 'You are a web research assistant. Based on the provided web content, answer the user\'s research question. ' +
476
+ 'Provide a comprehensive, well-structured answer. Return a JSON object with:\n' +
477
+ '- "answer": your detailed answer as a string (use markdown formatting)\n' +
478
+ '- "keyFindings": array of key facts/findings\n' +
479
+ 'Return ONLY valid JSON, no explanation.';
480
+ }
481
+ const extractMessages = [
482
+ { role: 'system', content: systemPrompt },
483
+ {
484
+ role: 'user',
485
+ content: `Research request: ${prompt}\n\nCollected data from ${collectedData.length} web pages:\n\n${truncatedContext}`,
486
+ },
487
+ ];
488
+ // Use streaming LLM call when onEvent is present
489
+ const hasStreaming = !!onEvent;
490
+ const extractResponse = await callLLMStreaming(extractMessages, { apiKey: llmApiKey, model: llmModel, baseUrl: llmApiBase, jsonMode: true }, hasStreaming ? (text) => emit({ type: 'chunk', text }) : undefined);
491
+ creditsUsed++;
492
+ accUsage(extractResponse.usage);
493
+ // Parse final result
494
+ let finalData;
495
+ try {
496
+ finalData = JSON.parse(extractResponse.content);
497
+ }
498
+ catch {
499
+ finalData = { result: extractResponse.content };
500
+ }
501
+ // Validate against outputSchema if provided
502
+ if (outputSchema) {
503
+ const validation = validateJsonSchema(finalData, outputSchema);
504
+ if (!validation.valid) {
505
+ // Try once more: ask LLM to fix
506
+ try {
507
+ const fixMessages = [
508
+ {
509
+ role: 'system',
510
+ content: `The previous response did not match the required JSON schema. Fix it.\nSchema: ${JSON.stringify(outputSchema)}\nErrors: ${validation.errors}\nReturn ONLY valid JSON.`,
511
+ },
512
+ { role: 'user', content: extractResponse.content },
513
+ ];
514
+ const fixResponse = await callLLM(fixMessages, {
515
+ apiKey: llmApiKey, model: llmModel, baseUrl: llmApiBase, jsonMode: true,
516
+ });
517
+ creditsUsed++;
518
+ accUsage(fixResponse.usage);
519
+ finalData = JSON.parse(fixResponse.content);
520
+ }
521
+ catch {
522
+ // Return what we have with a warning
523
+ finalData._validationWarning = `Output did not match schema: ${validation.errors}`;
524
+ }
525
+ }
526
+ }
527
+ const answerText = typeof finalData?.answer === 'string' ? finalData.answer : undefined;
528
+ reportProgress('done', `Completed: ${pagesVisited} pages visited`);
529
+ emit({
530
+ type: 'done',
531
+ answer: answerText || JSON.stringify(finalData),
532
+ sources: sourcesDetailed,
533
+ tokensUsed: totalUsage,
534
+ });
535
+ return {
536
+ success: true,
537
+ data: finalData,
538
+ answer: answerText,
539
+ sources,
540
+ sourcesDetailed,
541
+ pagesVisited,
542
+ creditsUsed,
543
+ tokensUsed: totalUsage,
544
+ };
545
+ }
546
+ catch (error) {
547
+ log.error('Agent error:', error);
548
+ return {
549
+ success: false,
550
+ data: { error: error.message || 'Unknown error occurred' },
551
+ sources,
552
+ sourcesDetailed,
553
+ pagesVisited,
554
+ creditsUsed,
555
+ tokensUsed: totalUsage,
556
+ };
557
+ }
558
+ }
@@ -0,0 +1,42 @@
1
+ /**
2
+ * /answer core implementation
3
+ *
4
+ * Flow:
5
+ * - search the web
6
+ * - fetch top sources via WebPeel
7
+ * - call an LLM (BYOK) to generate a cited answer
8
+ */
9
+ import { type SearchProviderId } from './search-provider.js';
10
+ export type LLMProviderId = 'openai' | 'anthropic' | 'google';
11
+ export interface TokensUsed {
12
+ input: number;
13
+ output: number;
14
+ }
15
+ export interface AnswerCitation {
16
+ title: string;
17
+ url: string;
18
+ snippet: string;
19
+ }
20
+ export interface AnswerRequest {
21
+ question: string;
22
+ searchProvider?: SearchProviderId;
23
+ searchApiKey?: string;
24
+ llmProvider: LLMProviderId;
25
+ llmApiKey: string;
26
+ llmModel?: string;
27
+ maxSources?: number;
28
+ stream?: boolean;
29
+ /** Called with incremental text when stream=true */
30
+ onChunk?: (text: string) => void;
31
+ /** Optional AbortSignal */
32
+ signal?: AbortSignal;
33
+ }
34
+ export interface AnswerResponse {
35
+ answer: string;
36
+ citations: AnswerCitation[];
37
+ searchProvider: SearchProviderId;
38
+ llmProvider: LLMProviderId;
39
+ llmModel: string;
40
+ tokensUsed: TokensUsed;
41
+ }
42
+ export declare function answerQuestion(req: AnswerRequest): Promise<AnswerResponse>;