@iflow-mcp/jakeliume-webpeel 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (547) hide show
  1. package/LICENSE +15 -0
  2. package/README.md +313 -0
  3. package/dist/cache.d.ts +30 -0
  4. package/dist/cache.js +139 -0
  5. package/dist/cli/commands/auth.d.ts +5 -0
  6. package/dist/cli/commands/auth.js +411 -0
  7. package/dist/cli/commands/doctor.d.ts +37 -0
  8. package/dist/cli/commands/doctor.js +371 -0
  9. package/dist/cli/commands/fetch.d.ts +6 -0
  10. package/dist/cli/commands/fetch.js +1345 -0
  11. package/dist/cli/commands/guide.d.ts +2 -0
  12. package/dist/cli/commands/guide.js +183 -0
  13. package/dist/cli/commands/interact.d.ts +5 -0
  14. package/dist/cli/commands/interact.js +840 -0
  15. package/dist/cli/commands/jobs.d.ts +5 -0
  16. package/dist/cli/commands/jobs.js +997 -0
  17. package/dist/cli/commands/monitor.d.ts +12 -0
  18. package/dist/cli/commands/monitor.js +197 -0
  19. package/dist/cli/commands/observe.d.ts +12 -0
  20. package/dist/cli/commands/observe.js +158 -0
  21. package/dist/cli/commands/screenshot.d.ts +5 -0
  22. package/dist/cli/commands/screenshot.js +282 -0
  23. package/dist/cli/commands/search.d.ts +5 -0
  24. package/dist/cli/commands/search.js +1021 -0
  25. package/dist/cli/commands/setup.d.ts +13 -0
  26. package/dist/cli/commands/setup.js +244 -0
  27. package/dist/cli/commands/skill.d.ts +15 -0
  28. package/dist/cli/commands/skill.js +195 -0
  29. package/dist/cli/utils.d.ts +84 -0
  30. package/dist/cli/utils.js +806 -0
  31. package/dist/cli-auth.d.ts +75 -0
  32. package/dist/cli-auth.js +369 -0
  33. package/dist/cli.d.ts +17 -0
  34. package/dist/cli.js +99 -0
  35. package/dist/core/actions.d.ts +69 -0
  36. package/dist/core/actions.js +495 -0
  37. package/dist/core/agent.d.ts +98 -0
  38. package/dist/core/agent.js +558 -0
  39. package/dist/core/answer.d.ts +42 -0
  40. package/dist/core/answer.js +395 -0
  41. package/dist/core/application-tracker.d.ts +84 -0
  42. package/dist/core/application-tracker.js +184 -0
  43. package/dist/core/apply.d.ts +162 -0
  44. package/dist/core/apply.js +816 -0
  45. package/dist/core/auth-detection.d.ts +35 -0
  46. package/dist/core/auth-detection.js +358 -0
  47. package/dist/core/auto-extract.d.ts +82 -0
  48. package/dist/core/auto-extract.js +604 -0
  49. package/dist/core/auto-interact.d.ts +23 -0
  50. package/dist/core/auto-interact.js +246 -0
  51. package/dist/core/bm25-filter.d.ts +66 -0
  52. package/dist/core/bm25-filter.js +288 -0
  53. package/dist/core/branding.d.ts +54 -0
  54. package/dist/core/branding.js +234 -0
  55. package/dist/core/browser-fetch.d.ts +323 -0
  56. package/dist/core/browser-fetch.js +1600 -0
  57. package/dist/core/browser-pool.d.ts +91 -0
  58. package/dist/core/browser-pool.js +550 -0
  59. package/dist/core/budget.d.ts +42 -0
  60. package/dist/core/budget.js +324 -0
  61. package/dist/core/business-intel.d.ts +47 -0
  62. package/dist/core/business-intel.js +279 -0
  63. package/dist/core/cache.d.ts +13 -0
  64. package/dist/core/cache.js +121 -0
  65. package/dist/core/cf-worker-proxy.d.ts +32 -0
  66. package/dist/core/cf-worker-proxy.js +87 -0
  67. package/dist/core/challenge-detection.d.ts +26 -0
  68. package/dist/core/challenge-detection.js +468 -0
  69. package/dist/core/change-tracking.d.ts +75 -0
  70. package/dist/core/change-tracking.js +276 -0
  71. package/dist/core/chunker.d.ts +46 -0
  72. package/dist/core/chunker.js +249 -0
  73. package/dist/core/chunking.d.ts +42 -0
  74. package/dist/core/chunking.js +181 -0
  75. package/dist/core/circuit-breaker.d.ts +44 -0
  76. package/dist/core/circuit-breaker.js +85 -0
  77. package/dist/core/content-pruner.d.ts +47 -0
  78. package/dist/core/content-pruner.js +425 -0
  79. package/dist/core/cookie-cache.d.ts +60 -0
  80. package/dist/core/cookie-cache.js +163 -0
  81. package/dist/core/crawl-checkpoint.d.ts +54 -0
  82. package/dist/core/crawl-checkpoint.js +104 -0
  83. package/dist/core/crawler.d.ts +84 -0
  84. package/dist/core/crawler.js +349 -0
  85. package/dist/core/cross-verify.d.ts +27 -0
  86. package/dist/core/cross-verify.js +93 -0
  87. package/dist/core/deep-fetch.d.ts +74 -0
  88. package/dist/core/deep-fetch.js +405 -0
  89. package/dist/core/deep-research.d.ts +141 -0
  90. package/dist/core/deep-research.js +972 -0
  91. package/dist/core/design-analysis.d.ts +70 -0
  92. package/dist/core/design-analysis.js +490 -0
  93. package/dist/core/design-compare.d.ts +38 -0
  94. package/dist/core/design-compare.js +264 -0
  95. package/dist/core/diff.d.ts +61 -0
  96. package/dist/core/diff.js +289 -0
  97. package/dist/core/dns-cache.d.ts +20 -0
  98. package/dist/core/dns-cache.js +198 -0
  99. package/dist/core/documents.d.ts +23 -0
  100. package/dist/core/documents.js +123 -0
  101. package/dist/core/domain-memory.d.ts +66 -0
  102. package/dist/core/domain-memory.js +163 -0
  103. package/dist/core/domain-verify.d.ts +40 -0
  104. package/dist/core/domain-verify.js +379 -0
  105. package/dist/core/engine-ranker.d.ts +112 -0
  106. package/dist/core/engine-ranker.js +395 -0
  107. package/dist/core/extract-inline.d.ts +38 -0
  108. package/dist/core/extract-inline.js +215 -0
  109. package/dist/core/extract-listings.d.ts +38 -0
  110. package/dist/core/extract-listings.js +461 -0
  111. package/dist/core/extract.d.ts +9 -0
  112. package/dist/core/extract.js +139 -0
  113. package/dist/core/fetch-cache.d.ts +57 -0
  114. package/dist/core/fetch-cache.js +95 -0
  115. package/dist/core/fetcher.d.ts +13 -0
  116. package/dist/core/fetcher.js +12 -0
  117. package/dist/core/google-cache.d.ts +29 -0
  118. package/dist/core/google-cache.js +180 -0
  119. package/dist/core/google-serp-parser.d.ts +82 -0
  120. package/dist/core/google-serp-parser.js +287 -0
  121. package/dist/core/hotel-search.d.ts +122 -0
  122. package/dist/core/hotel-search.js +382 -0
  123. package/dist/core/http-fetch.d.ts +72 -0
  124. package/dist/core/http-fetch.js +820 -0
  125. package/dist/core/human.d.ts +175 -0
  126. package/dist/core/human.js +680 -0
  127. package/dist/core/image-caption.d.ts +44 -0
  128. package/dist/core/image-caption.js +271 -0
  129. package/dist/core/jobs.d.ts +75 -0
  130. package/dist/core/jobs.js +634 -0
  131. package/dist/core/json-ld.d.ts +15 -0
  132. package/dist/core/json-ld.js +617 -0
  133. package/dist/core/language-detect.d.ts +18 -0
  134. package/dist/core/language-detect.js +135 -0
  135. package/dist/core/links.d.ts +10 -0
  136. package/dist/core/links.js +44 -0
  137. package/dist/core/llm-extract.d.ts +71 -0
  138. package/dist/core/llm-extract.js +507 -0
  139. package/dist/core/llm-provider.d.ts +100 -0
  140. package/dist/core/llm-provider.js +702 -0
  141. package/dist/core/local-search.d.ts +60 -0
  142. package/dist/core/local-search.js +308 -0
  143. package/dist/core/logger.d.ts +28 -0
  144. package/dist/core/logger.js +104 -0
  145. package/dist/core/map.d.ts +33 -0
  146. package/dist/core/map.js +127 -0
  147. package/dist/core/markdown.d.ts +92 -0
  148. package/dist/core/markdown.js +809 -0
  149. package/dist/core/metadata.d.ts +34 -0
  150. package/dist/core/metadata.js +422 -0
  151. package/dist/core/observe.d.ts +113 -0
  152. package/dist/core/observe.js +395 -0
  153. package/dist/core/ocr.d.ts +12 -0
  154. package/dist/core/ocr.js +33 -0
  155. package/dist/core/paginate.d.ts +31 -0
  156. package/dist/core/paginate.js +106 -0
  157. package/dist/core/pdf.d.ts +8 -0
  158. package/dist/core/pdf.js +25 -0
  159. package/dist/core/peel-tls.d.ts +25 -0
  160. package/dist/core/peel-tls.js +220 -0
  161. package/dist/core/pipeline.d.ts +132 -0
  162. package/dist/core/pipeline.js +1666 -0
  163. package/dist/core/profiles.d.ts +61 -0
  164. package/dist/core/profiles.js +350 -0
  165. package/dist/core/prompt-guard.d.ts +30 -0
  166. package/dist/core/prompt-guard.js +119 -0
  167. package/dist/core/proxy-config.d.ts +90 -0
  168. package/dist/core/proxy-config.js +172 -0
  169. package/dist/core/quick-answer.d.ts +53 -0
  170. package/dist/core/quick-answer.js +833 -0
  171. package/dist/core/rate-governor.d.ts +80 -0
  172. package/dist/core/rate-governor.js +238 -0
  173. package/dist/core/readability.d.ts +57 -0
  174. package/dist/core/readability.js +533 -0
  175. package/dist/core/research.d.ts +66 -0
  176. package/dist/core/research.js +270 -0
  177. package/dist/core/retry.d.ts +60 -0
  178. package/dist/core/retry.js +119 -0
  179. package/dist/core/safe-browsing.d.ts +30 -0
  180. package/dist/core/safe-browsing.js +206 -0
  181. package/dist/core/schema-extraction.d.ts +66 -0
  182. package/dist/core/schema-extraction.js +352 -0
  183. package/dist/core/schema-postprocess.d.ts +32 -0
  184. package/dist/core/schema-postprocess.js +469 -0
  185. package/dist/core/schema-templates.d.ts +19 -0
  186. package/dist/core/schema-templates.js +143 -0
  187. package/dist/core/screenshot.d.ts +224 -0
  188. package/dist/core/screenshot.js +207 -0
  189. package/dist/core/search-engines.d.ts +25 -0
  190. package/dist/core/search-engines.js +182 -0
  191. package/dist/core/search-provider.d.ts +243 -0
  192. package/dist/core/search-provider.js +1629 -0
  193. package/dist/core/searxng-provider.d.ts +35 -0
  194. package/dist/core/searxng-provider.js +105 -0
  195. package/dist/core/selective-evidence.d.ts +151 -0
  196. package/dist/core/selective-evidence.js +389 -0
  197. package/dist/core/site-search.d.ts +44 -0
  198. package/dist/core/site-search.js +252 -0
  199. package/dist/core/sitemap.d.ts +23 -0
  200. package/dist/core/sitemap.js +105 -0
  201. package/dist/core/source-credibility.d.ts +29 -0
  202. package/dist/core/source-credibility.js +584 -0
  203. package/dist/core/source-scoring.d.ts +166 -0
  204. package/dist/core/source-scoring.js +396 -0
  205. package/dist/core/stemmer.d.ts +38 -0
  206. package/dist/core/stemmer.js +509 -0
  207. package/dist/core/strategies.d.ts +104 -0
  208. package/dist/core/strategies.js +1044 -0
  209. package/dist/core/strategy-hooks.d.ts +145 -0
  210. package/dist/core/strategy-hooks.js +74 -0
  211. package/dist/core/structured-extract.d.ts +43 -0
  212. package/dist/core/structured-extract.js +550 -0
  213. package/dist/core/summarize.d.ts +17 -0
  214. package/dist/core/summarize.js +78 -0
  215. package/dist/core/synonyms.d.ts +42 -0
  216. package/dist/core/synonyms.js +184 -0
  217. package/dist/core/system-monitor.d.ts +61 -0
  218. package/dist/core/system-monitor.js +133 -0
  219. package/dist/core/table-format.d.ts +30 -0
  220. package/dist/core/table-format.js +146 -0
  221. package/dist/core/threat-feeds.d.ts +23 -0
  222. package/dist/core/threat-feeds.js +104 -0
  223. package/dist/core/timing.d.ts +21 -0
  224. package/dist/core/timing.js +33 -0
  225. package/dist/core/transcript-export.d.ts +47 -0
  226. package/dist/core/transcript-export.js +107 -0
  227. package/dist/core/user-agents.d.ts +82 -0
  228. package/dist/core/user-agents.js +239 -0
  229. package/dist/core/vertical-search.d.ts +54 -0
  230. package/dist/core/vertical-search.js +158 -0
  231. package/dist/core/watch-manager.d.ts +175 -0
  232. package/dist/core/watch-manager.js +416 -0
  233. package/dist/core/watch.d.ts +101 -0
  234. package/dist/core/watch.js +389 -0
  235. package/dist/core/youtube.d.ts +130 -0
  236. package/dist/core/youtube.js +1175 -0
  237. package/dist/ee/challenge-re-export.d.ts +1 -0
  238. package/dist/ee/challenge-re-export.js +1 -0
  239. package/dist/ee/challenge-solver.d.ts +72 -0
  240. package/dist/ee/challenge-solver.js +720 -0
  241. package/dist/ee/domain-extractors.d.ts +8 -0
  242. package/dist/ee/domain-extractors.js +8 -0
  243. package/dist/ee/domain-intel.d.ts +16 -0
  244. package/dist/ee/domain-intel.js +133 -0
  245. package/dist/ee/extractors/allrecipes.d.ts +2 -0
  246. package/dist/ee/extractors/allrecipes.js +120 -0
  247. package/dist/ee/extractors/amazon.d.ts +2 -0
  248. package/dist/ee/extractors/amazon.js +78 -0
  249. package/dist/ee/extractors/arxiv.d.ts +2 -0
  250. package/dist/ee/extractors/arxiv.js +137 -0
  251. package/dist/ee/extractors/bestbuy.d.ts +2 -0
  252. package/dist/ee/extractors/bestbuy.js +78 -0
  253. package/dist/ee/extractors/carscom.d.ts +2 -0
  254. package/dist/ee/extractors/carscom.js +121 -0
  255. package/dist/ee/extractors/coingecko.d.ts +2 -0
  256. package/dist/ee/extractors/coingecko.js +134 -0
  257. package/dist/ee/extractors/craigslist.d.ts +2 -0
  258. package/dist/ee/extractors/craigslist.js +92 -0
  259. package/dist/ee/extractors/devto.d.ts +2 -0
  260. package/dist/ee/extractors/devto.js +135 -0
  261. package/dist/ee/extractors/ebay.d.ts +2 -0
  262. package/dist/ee/extractors/ebay.js +90 -0
  263. package/dist/ee/extractors/espn.d.ts +2 -0
  264. package/dist/ee/extractors/espn.js +260 -0
  265. package/dist/ee/extractors/etsy.d.ts +2 -0
  266. package/dist/ee/extractors/etsy.js +52 -0
  267. package/dist/ee/extractors/facebook.d.ts +2 -0
  268. package/dist/ee/extractors/facebook.js +46 -0
  269. package/dist/ee/extractors/github.d.ts +2 -0
  270. package/dist/ee/extractors/github.js +196 -0
  271. package/dist/ee/extractors/google-flights.d.ts +2 -0
  272. package/dist/ee/extractors/google-flights.js +176 -0
  273. package/dist/ee/extractors/hackernews.d.ts +2 -0
  274. package/dist/ee/extractors/hackernews.js +147 -0
  275. package/dist/ee/extractors/imdb.d.ts +2 -0
  276. package/dist/ee/extractors/imdb.js +172 -0
  277. package/dist/ee/extractors/index.d.ts +26 -0
  278. package/dist/ee/extractors/index.js +247 -0
  279. package/dist/ee/extractors/instagram.d.ts +2 -0
  280. package/dist/ee/extractors/instagram.js +102 -0
  281. package/dist/ee/extractors/kalshi.d.ts +2 -0
  282. package/dist/ee/extractors/kalshi.js +121 -0
  283. package/dist/ee/extractors/kayak-cars.d.ts +2 -0
  284. package/dist/ee/extractors/kayak-cars.js +270 -0
  285. package/dist/ee/extractors/linkedin.d.ts +2 -0
  286. package/dist/ee/extractors/linkedin.js +113 -0
  287. package/dist/ee/extractors/medium.d.ts +2 -0
  288. package/dist/ee/extractors/medium.js +130 -0
  289. package/dist/ee/extractors/news.d.ts +4 -0
  290. package/dist/ee/extractors/news.js +173 -0
  291. package/dist/ee/extractors/npm.d.ts +2 -0
  292. package/dist/ee/extractors/npm.js +86 -0
  293. package/dist/ee/extractors/pdf.d.ts +2 -0
  294. package/dist/ee/extractors/pdf.js +108 -0
  295. package/dist/ee/extractors/pinterest.d.ts +2 -0
  296. package/dist/ee/extractors/pinterest.js +34 -0
  297. package/dist/ee/extractors/polymarket.d.ts +2 -0
  298. package/dist/ee/extractors/polymarket.js +358 -0
  299. package/dist/ee/extractors/producthunt.d.ts +2 -0
  300. package/dist/ee/extractors/producthunt.js +88 -0
  301. package/dist/ee/extractors/pubmed.d.ts +2 -0
  302. package/dist/ee/extractors/pubmed.js +162 -0
  303. package/dist/ee/extractors/pypi.d.ts +2 -0
  304. package/dist/ee/extractors/pypi.js +80 -0
  305. package/dist/ee/extractors/reddit.d.ts +2 -0
  306. package/dist/ee/extractors/reddit.js +438 -0
  307. package/dist/ee/extractors/redfin.d.ts +2 -0
  308. package/dist/ee/extractors/redfin.js +156 -0
  309. package/dist/ee/extractors/semanticscholar.d.ts +2 -0
  310. package/dist/ee/extractors/semanticscholar.js +131 -0
  311. package/dist/ee/extractors/shared.d.ts +12 -0
  312. package/dist/ee/extractors/shared.js +76 -0
  313. package/dist/ee/extractors/soundcloud.d.ts +2 -0
  314. package/dist/ee/extractors/soundcloud.js +34 -0
  315. package/dist/ee/extractors/sportsbetting.d.ts +2 -0
  316. package/dist/ee/extractors/sportsbetting.js +37 -0
  317. package/dist/ee/extractors/spotify.d.ts +2 -0
  318. package/dist/ee/extractors/spotify.js +34 -0
  319. package/dist/ee/extractors/stackoverflow.d.ts +2 -0
  320. package/dist/ee/extractors/stackoverflow.js +61 -0
  321. package/dist/ee/extractors/substack.d.ts +2 -0
  322. package/dist/ee/extractors/substack.js +115 -0
  323. package/dist/ee/extractors/substackroot.d.ts +2 -0
  324. package/dist/ee/extractors/substackroot.js +46 -0
  325. package/dist/ee/extractors/tiktok.d.ts +2 -0
  326. package/dist/ee/extractors/tiktok.js +29 -0
  327. package/dist/ee/extractors/tradingview.d.ts +2 -0
  328. package/dist/ee/extractors/tradingview.js +182 -0
  329. package/dist/ee/extractors/twitch.d.ts +2 -0
  330. package/dist/ee/extractors/twitch.js +36 -0
  331. package/dist/ee/extractors/twitter.d.ts +2 -0
  332. package/dist/ee/extractors/twitter.js +327 -0
  333. package/dist/ee/extractors/types.d.ts +14 -0
  334. package/dist/ee/extractors/types.js +1 -0
  335. package/dist/ee/extractors/walmart.d.ts +2 -0
  336. package/dist/ee/extractors/walmart.js +50 -0
  337. package/dist/ee/extractors/weather.d.ts +2 -0
  338. package/dist/ee/extractors/weather.js +133 -0
  339. package/dist/ee/extractors/wikipedia.d.ts +4 -0
  340. package/dist/ee/extractors/wikipedia.js +235 -0
  341. package/dist/ee/extractors/yelp.d.ts +2 -0
  342. package/dist/ee/extractors/yelp.js +216 -0
  343. package/dist/ee/extractors/youtube.d.ts +2 -0
  344. package/dist/ee/extractors/youtube.js +189 -0
  345. package/dist/ee/extractors/zillow.d.ts +54 -0
  346. package/dist/ee/extractors/zillow.js +247 -0
  347. package/dist/ee/extractors-re-export.d.ts +1 -0
  348. package/dist/ee/extractors-re-export.js +1 -0
  349. package/dist/ee/premium-hooks.d.ts +20 -0
  350. package/dist/ee/premium-hooks.js +50 -0
  351. package/dist/ee/spa-detection.d.ts +2 -0
  352. package/dist/ee/spa-detection.js +2 -0
  353. package/dist/ee/stability.d.ts +4 -0
  354. package/dist/ee/stability.js +29 -0
  355. package/dist/ee/swr-cache.d.ts +14 -0
  356. package/dist/ee/swr-cache.js +34 -0
  357. package/dist/index.d.ts +143 -0
  358. package/dist/index.js +291 -0
  359. package/dist/integrations/index.d.ts +2 -0
  360. package/dist/integrations/index.js +2 -0
  361. package/dist/integrations/langchain.d.ts +64 -0
  362. package/dist/integrations/langchain.js +115 -0
  363. package/dist/integrations/llamaindex.d.ts +50 -0
  364. package/dist/integrations/llamaindex.js +91 -0
  365. package/dist/mcp/handlers/act.d.ts +5 -0
  366. package/dist/mcp/handlers/act.js +34 -0
  367. package/dist/mcp/handlers/definitions.d.ts +6 -0
  368. package/dist/mcp/handlers/definitions.js +395 -0
  369. package/dist/mcp/handlers/extract.d.ts +7 -0
  370. package/dist/mcp/handlers/extract.js +135 -0
  371. package/dist/mcp/handlers/fetch.d.ts +6 -0
  372. package/dist/mcp/handlers/fetch.js +98 -0
  373. package/dist/mcp/handlers/find.d.ts +5 -0
  374. package/dist/mcp/handlers/find.js +137 -0
  375. package/dist/mcp/handlers/index.d.ts +13 -0
  376. package/dist/mcp/handlers/index.js +63 -0
  377. package/dist/mcp/handlers/legacy.d.ts +25 -0
  378. package/dist/mcp/handlers/legacy.js +450 -0
  379. package/dist/mcp/handlers/meta.d.ts +6 -0
  380. package/dist/mcp/handlers/meta.js +40 -0
  381. package/dist/mcp/handlers/monitor.d.ts +5 -0
  382. package/dist/mcp/handlers/monitor.js +41 -0
  383. package/dist/mcp/handlers/observe.d.ts +8 -0
  384. package/dist/mcp/handlers/observe.js +37 -0
  385. package/dist/mcp/handlers/read.d.ts +6 -0
  386. package/dist/mcp/handlers/read.js +78 -0
  387. package/dist/mcp/handlers/see.d.ts +5 -0
  388. package/dist/mcp/handlers/see.js +75 -0
  389. package/dist/mcp/handlers/types.d.ts +29 -0
  390. package/dist/mcp/handlers/types.js +28 -0
  391. package/dist/mcp/server.d.ts +7 -0
  392. package/dist/mcp/server.js +108 -0
  393. package/dist/mcp/smart-router.d.ts +23 -0
  394. package/dist/mcp/smart-router.js +178 -0
  395. package/dist/server/app.d.ts +14 -0
  396. package/dist/server/app.js +632 -0
  397. package/dist/server/auth-store.d.ts +28 -0
  398. package/dist/server/auth-store.js +88 -0
  399. package/dist/server/bull-queues.d.ts +60 -0
  400. package/dist/server/bull-queues.js +90 -0
  401. package/dist/server/email-service.d.ts +55 -0
  402. package/dist/server/email-service.js +291 -0
  403. package/dist/server/job-queue.d.ts +100 -0
  404. package/dist/server/job-queue.js +145 -0
  405. package/dist/server/logger.d.ts +10 -0
  406. package/dist/server/logger.js +37 -0
  407. package/dist/server/middleware/audit-log.d.ts +14 -0
  408. package/dist/server/middleware/audit-log.js +73 -0
  409. package/dist/server/middleware/auth.d.ts +35 -0
  410. package/dist/server/middleware/auth.js +225 -0
  411. package/dist/server/middleware/rate-limit.d.ts +50 -0
  412. package/dist/server/middleware/rate-limit.js +270 -0
  413. package/dist/server/middleware/scope-guard.d.ts +25 -0
  414. package/dist/server/middleware/scope-guard.js +45 -0
  415. package/dist/server/middleware/url-validator.d.ts +15 -0
  416. package/dist/server/middleware/url-validator.js +201 -0
  417. package/dist/server/openapi.yaml +6418 -0
  418. package/dist/server/pg-auth-store.d.ts +146 -0
  419. package/dist/server/pg-auth-store.js +576 -0
  420. package/dist/server/pg-job-queue.d.ts +59 -0
  421. package/dist/server/pg-job-queue.js +375 -0
  422. package/dist/server/routes/activity.d.ts +6 -0
  423. package/dist/server/routes/activity.js +79 -0
  424. package/dist/server/routes/admin-active.d.ts +7 -0
  425. package/dist/server/routes/admin-active.js +120 -0
  426. package/dist/server/routes/admin-stats.d.ts +7 -0
  427. package/dist/server/routes/admin-stats.js +176 -0
  428. package/dist/server/routes/agent.d.ts +24 -0
  429. package/dist/server/routes/agent.js +480 -0
  430. package/dist/server/routes/answer.d.ts +5 -0
  431. package/dist/server/routes/answer.js +125 -0
  432. package/dist/server/routes/ask.d.ts +28 -0
  433. package/dist/server/routes/ask.js +295 -0
  434. package/dist/server/routes/batch.d.ts +6 -0
  435. package/dist/server/routes/batch.js +493 -0
  436. package/dist/server/routes/cache-warm.d.ts +25 -0
  437. package/dist/server/routes/cache-warm.js +212 -0
  438. package/dist/server/routes/cli-usage.d.ts +6 -0
  439. package/dist/server/routes/cli-usage.js +127 -0
  440. package/dist/server/routes/compat.d.ts +23 -0
  441. package/dist/server/routes/compat.js +652 -0
  442. package/dist/server/routes/crawl.d.ts +13 -0
  443. package/dist/server/routes/crawl.js +287 -0
  444. package/dist/server/routes/deep-fetch.d.ts +8 -0
  445. package/dist/server/routes/deep-fetch.js +57 -0
  446. package/dist/server/routes/deep-research.d.ts +11 -0
  447. package/dist/server/routes/deep-research.js +232 -0
  448. package/dist/server/routes/demo.d.ts +24 -0
  449. package/dist/server/routes/demo.js +517 -0
  450. package/dist/server/routes/do.d.ts +8 -0
  451. package/dist/server/routes/do.js +72 -0
  452. package/dist/server/routes/extract.d.ts +14 -0
  453. package/dist/server/routes/extract.js +325 -0
  454. package/dist/server/routes/feed.d.ts +15 -0
  455. package/dist/server/routes/feed.js +311 -0
  456. package/dist/server/routes/fetch-queue.d.ts +13 -0
  457. package/dist/server/routes/fetch-queue.js +357 -0
  458. package/dist/server/routes/fetch.d.ts +7 -0
  459. package/dist/server/routes/fetch.js +1274 -0
  460. package/dist/server/routes/go.d.ts +14 -0
  461. package/dist/server/routes/go.js +81 -0
  462. package/dist/server/routes/health.d.ts +11 -0
  463. package/dist/server/routes/health.js +141 -0
  464. package/dist/server/routes/jobs.d.ts +7 -0
  465. package/dist/server/routes/jobs.js +574 -0
  466. package/dist/server/routes/map.d.ts +11 -0
  467. package/dist/server/routes/map.js +116 -0
  468. package/dist/server/routes/mcp.d.ts +14 -0
  469. package/dist/server/routes/mcp.js +197 -0
  470. package/dist/server/routes/metrics.d.ts +37 -0
  471. package/dist/server/routes/metrics.js +149 -0
  472. package/dist/server/routes/oauth.d.ts +9 -0
  473. package/dist/server/routes/oauth.js +396 -0
  474. package/dist/server/routes/playground.d.ts +17 -0
  475. package/dist/server/routes/playground.js +283 -0
  476. package/dist/server/routes/reader.d.ts +18 -0
  477. package/dist/server/routes/reader.js +192 -0
  478. package/dist/server/routes/research.d.ts +14 -0
  479. package/dist/server/routes/research.js +482 -0
  480. package/dist/server/routes/screenshot.d.ts +22 -0
  481. package/dist/server/routes/screenshot.js +820 -0
  482. package/dist/server/routes/search.d.ts +6 -0
  483. package/dist/server/routes/search.js +874 -0
  484. package/dist/server/routes/session.d.ts +17 -0
  485. package/dist/server/routes/session.js +548 -0
  486. package/dist/server/routes/share.d.ts +18 -0
  487. package/dist/server/routes/share.js +462 -0
  488. package/dist/server/routes/smart-search/handlers/cars.d.ts +2 -0
  489. package/dist/server/routes/smart-search/handlers/cars.js +102 -0
  490. package/dist/server/routes/smart-search/handlers/flights.d.ts +2 -0
  491. package/dist/server/routes/smart-search/handlers/flights.js +72 -0
  492. package/dist/server/routes/smart-search/handlers/general.d.ts +13 -0
  493. package/dist/server/routes/smart-search/handlers/general.js +717 -0
  494. package/dist/server/routes/smart-search/handlers/hotels.d.ts +2 -0
  495. package/dist/server/routes/smart-search/handlers/hotels.js +88 -0
  496. package/dist/server/routes/smart-search/handlers/products.d.ts +2 -0
  497. package/dist/server/routes/smart-search/handlers/products.js +1309 -0
  498. package/dist/server/routes/smart-search/handlers/rental.d.ts +2 -0
  499. package/dist/server/routes/smart-search/handlers/rental.js +154 -0
  500. package/dist/server/routes/smart-search/handlers/restaurants.d.ts +2 -0
  501. package/dist/server/routes/smart-search/handlers/restaurants.js +225 -0
  502. package/dist/server/routes/smart-search/handlers/transit-verdict.d.ts +41 -0
  503. package/dist/server/routes/smart-search/handlers/transit-verdict.js +224 -0
  504. package/dist/server/routes/smart-search/index.d.ts +19 -0
  505. package/dist/server/routes/smart-search/index.js +546 -0
  506. package/dist/server/routes/smart-search/intent.d.ts +3 -0
  507. package/dist/server/routes/smart-search/intent.js +264 -0
  508. package/dist/server/routes/smart-search/llm.d.ts +16 -0
  509. package/dist/server/routes/smart-search/llm.js +70 -0
  510. package/dist/server/routes/smart-search/sources/reddit.d.ts +18 -0
  511. package/dist/server/routes/smart-search/sources/reddit.js +34 -0
  512. package/dist/server/routes/smart-search/sources/yelp.d.ts +25 -0
  513. package/dist/server/routes/smart-search/sources/yelp.js +171 -0
  514. package/dist/server/routes/smart-search/sources/youtube.d.ts +8 -0
  515. package/dist/server/routes/smart-search/sources/youtube.js +9 -0
  516. package/dist/server/routes/smart-search/types.d.ts +81 -0
  517. package/dist/server/routes/smart-search/types.js +1 -0
  518. package/dist/server/routes/smart-search/utils.d.ts +20 -0
  519. package/dist/server/routes/smart-search/utils.js +146 -0
  520. package/dist/server/routes/stats.d.ts +6 -0
  521. package/dist/server/routes/stats.js +71 -0
  522. package/dist/server/routes/stripe.d.ts +15 -0
  523. package/dist/server/routes/stripe.js +296 -0
  524. package/dist/server/routes/transcript-export.d.ts +10 -0
  525. package/dist/server/routes/transcript-export.js +178 -0
  526. package/dist/server/routes/usage.d.ts +9 -0
  527. package/dist/server/routes/usage.js +279 -0
  528. package/dist/server/routes/users.d.ts +8 -0
  529. package/dist/server/routes/users.js +1867 -0
  530. package/dist/server/routes/watch.d.ts +15 -0
  531. package/dist/server/routes/watch.js +309 -0
  532. package/dist/server/routes/webhooks.d.ts +26 -0
  533. package/dist/server/routes/webhooks.js +170 -0
  534. package/dist/server/routes/youtube.d.ts +6 -0
  535. package/dist/server/routes/youtube.js +130 -0
  536. package/dist/server/sentry.d.ts +14 -0
  537. package/dist/server/sentry.js +104 -0
  538. package/dist/server/types.d.ts +15 -0
  539. package/dist/server/types.js +7 -0
  540. package/dist/server/utils/response.d.ts +44 -0
  541. package/dist/server/utils/response.js +69 -0
  542. package/dist/server/utils/sse.d.ts +22 -0
  543. package/dist/server/utils/sse.js +38 -0
  544. package/dist/types.d.ts +552 -0
  545. package/dist/types.js +39 -0
  546. package/llms.txt +105 -0
  547. package/package.json +189 -0
@@ -0,0 +1,720 @@
1
+ /**
2
+ * Challenge / bot-protection solver.
3
+ *
4
+ * Attempts to bypass bot-protection challenges using free, in-process methods:
5
+ * 1. Cloudflare JS challenge — render in stealth Playwright, wait for auto-solve
6
+ * 2. hCaptcha — accessibility bypass (TODO: implement if API is confirmed available)
7
+ *
8
+ * Architecture note:
9
+ * Browser-based solving is CPU/RAM intensive. When the env var BROWSER_WORKER_URL
10
+ * is set, the solve request is proxied to an external worker (e.g. Hetzner 4GB VM)
11
+ * instead of running locally. This keeps the main Render container (512 MB) lean.
12
+ *
13
+ * Usage:
14
+ * const result = await solveChallenge(url, 'cloudflare', html);
15
+ * if (result.solved) {
16
+ * // result.html = real page content
17
+ * // result.cookies = ["cf_clearance=...", ...]
18
+ * }
19
+ */
20
+ import { cacheCookiesForUrl } from '../core/cookie-cache.js';
21
+ import { createLogger } from '../core/logger.js';
22
+ const log = createLogger('challenge-solver');
23
+ // ── Image CAPTCHA solver constants ────────────────────────────────────────────
24
+ const OLLAMA_VISION_URL = 'http://178.156.229.86:11435/api/generate';
25
+ const OLLAMA_AUTH_TOKEN = 'c996233de4addb47e4cdec8bc5ff8776397f813ca7bd444e7258e0e2ed251963';
26
+ const OLLAMA_VISION_MODEL = 'moondream';
27
+ /** moondream on the 4GB Hetzner VPS takes ~30s per image */
28
+ const VISION_TIMEOUT_MS = 45_000;
29
+ const IMAGE_CAPTCHA_MAX_ROUNDS = 3;
30
+ /** Grid element selectors to try (reCAPTCHA, hCaptcha, generic) */
31
+ const CAPTCHA_GRID_SELECTORS = [
32
+ '.rc-imageselect-table',
33
+ '.task-grid',
34
+ '.task-image',
35
+ 'table.rc-imageselect-table',
36
+ '[class*="grid"]:not(body):not(html)',
37
+ '.captcha-grid',
38
+ ];
39
+ /** Verify/Submit button selectors */
40
+ const CAPTCHA_VERIFY_SELECTORS = [
41
+ '#recaptcha-verify-button',
42
+ 'button[data-action="verify"]',
43
+ 'button[class*="verify"]',
44
+ 'button[class*="submit"]',
45
+ '.rc-button-default',
46
+ '[id*="verify"]',
47
+ '[class*="verify"]',
48
+ ];
49
+ /** Instruction text containers to extract the target object from */
50
+ const CAPTCHA_INSTRUCTION_SELECTORS = [
51
+ '.rc-imageselect-desc-wrapper',
52
+ '.rc-imageselect-desc',
53
+ '.prompt-text',
54
+ '[class*="prompt"]',
55
+ '[class*="instruction"]',
56
+ '[class*="task-desc"]',
57
+ '[aria-label*="select"]',
58
+ '[aria-label*="click"]',
59
+ ];
60
+ /** Patterns to extract the object name from instruction text */
61
+ const CAPTCHA_OBJECT_PATTERNS = [
62
+ /select all (?:images|squares|tiles) (?:with|containing|that (?:have|contain)) (?:a |an )?(.+?)(?:\.|$)/i,
63
+ /click (?:all )?(?:images|squares|tiles) (?:containing|with|that (?:have|contain)) (?:a |an )?(.+?)(?:\.|$)/i,
64
+ /please click each image containing (?:a |an )?(.+?)(?:\.|$)/i,
65
+ /select all (?:the )?(?:image|picture)s? of (?:a |an )?(.+?)(?:\.|$)/i,
66
+ /identify all (?:images|pictures|squares) (?:with|showing|of) (?:a |an )?(.+?)(?:\.|$)/i,
67
+ ];
68
+ // ── Vision API call ───────────────────────────────────────────────────────────
69
+ /**
70
+ * Ask the moondream vision model which grid cells contain the target object.
71
+ * Returns an array of 1-indexed grid positions (1–9), or null if the call fails.
72
+ */
73
+ export async function askVisionModel(base64Image, targetObject) {
74
+ const prompt = `This is a 3x3 image grid CAPTCHA. Select all squares containing "${targetObject}". Reply with ONLY the grid positions as numbers 1-9 (left to right, top to bottom), separated by commas. Example: 1,3,7`;
75
+ const controller = new AbortController();
76
+ const timer = setTimeout(() => controller.abort(), VISION_TIMEOUT_MS);
77
+ try {
78
+ const response = await fetch(OLLAMA_VISION_URL, {
79
+ method: 'POST',
80
+ headers: {
81
+ 'Content-Type': 'application/json',
82
+ 'Authorization': `Bearer ${OLLAMA_AUTH_TOKEN}`,
83
+ },
84
+ body: JSON.stringify({
85
+ model: OLLAMA_VISION_MODEL,
86
+ prompt,
87
+ images: [base64Image],
88
+ stream: false,
89
+ options: { num_predict: 50, temperature: 0.1 },
90
+ }),
91
+ signal: controller.signal,
92
+ });
93
+ clearTimeout(timer);
94
+ if (!response.ok) {
95
+ log.debug(`Vision API returned HTTP ${response.status}`);
96
+ return null;
97
+ }
98
+ const data = await response.json();
99
+ const text = data.response ?? '';
100
+ log.debug(`Vision model response: "${text}"`);
101
+ // Match whole numbers only (not individual digits from multi-digit numbers like 10, 11)
102
+ const positions = text.match(/\b[1-9]\b/g)?.map(Number) ?? [];
103
+ if (positions.length === 0) {
104
+ log.debug('Vision model returned no valid grid positions');
105
+ return null;
106
+ }
107
+ return positions;
108
+ }
109
+ catch (err) {
110
+ clearTimeout(timer);
111
+ log.debug('Vision model call failed:', err instanceof Error ? err.message : String(err));
112
+ return null;
113
+ }
114
+ }
115
+ // ── Target object extraction ──────────────────────────────────────────────────
116
+ /**
117
+ * Detect if the page has an image grid CAPTCHA and extract the target object.
118
+ * Returns the object name (e.g. "traffic lights") or null if not detected.
119
+ */
120
+ export async function detectImageCaptchaTarget(page) {
121
+ for (const selector of CAPTCHA_INSTRUCTION_SELECTORS) {
122
+ try {
123
+ const el = await page.$(selector);
124
+ if (!el)
125
+ continue;
126
+ const text = await el.innerText().catch(() => '');
127
+ if (!text)
128
+ continue;
129
+ const normalized = text.trim().replace(/\s+/g, ' ');
130
+ for (const pattern of CAPTCHA_OBJECT_PATTERNS) {
131
+ const match = normalized.match(pattern);
132
+ if (match?.[1]) {
133
+ const target = match[1].trim().toLowerCase();
134
+ log.debug(`Detected image CAPTCHA target: "${target}" from selector ${selector}`);
135
+ return target;
136
+ }
137
+ }
138
+ }
139
+ catch {
140
+ // Continue to next selector
141
+ }
142
+ }
143
+ return null;
144
+ }
145
+ // ── Image CAPTCHA solver ──────────────────────────────────────────────────────
146
+ /**
147
+ * Solve an image grid CAPTCHA using the moondream vision model.
148
+ *
149
+ * Flow per round:
150
+ * 1. Screenshot the CAPTCHA grid element
151
+ * 2. Send to moondream → get grid positions
152
+ * 3. Click identified cells
153
+ * 4. Click Verify button
154
+ * 5. Check if solved; if a new round appears, repeat (max 3 rounds)
155
+ */
156
+ export async function solveImageCaptcha(page, targetObject) {
157
+ // Guard: only run when explicitly enabled or remote worker configured
158
+ const enabled = process.env.ENABLE_LOCAL_CHALLENGE_SOLVE === 'true' || !!process.env.BROWSER_WORKER_URL;
159
+ if (!enabled) {
160
+ return { solved: false, rounds: 0, error: 'Image CAPTCHA solving not enabled (set ENABLE_LOCAL_CHALLENGE_SOLVE=true)' };
161
+ }
162
+ let rounds = 0;
163
+ for (let attempt = 0; attempt < IMAGE_CAPTCHA_MAX_ROUNDS; attempt++) {
164
+ rounds++;
165
+ // ── 1. Screenshot the grid element ─────────────────────────────────────
166
+ let base64Screenshot = null;
167
+ for (const selector of CAPTCHA_GRID_SELECTORS) {
168
+ try {
169
+ const gridEl = await page.$(selector);
170
+ if (!gridEl)
171
+ continue;
172
+ const screenshot = await gridEl.screenshot({ type: 'png' });
173
+ base64Screenshot = screenshot.toString('base64');
174
+ log.debug(`Captured CAPTCHA grid with selector: ${selector}`);
175
+ break;
176
+ }
177
+ catch {
178
+ // Try next selector
179
+ }
180
+ }
181
+ if (!base64Screenshot) {
182
+ // Fall back to a viewport screenshot if no grid element found
183
+ try {
184
+ const fullshot = await page.screenshot({ type: 'png' });
185
+ base64Screenshot = fullshot.toString('base64');
186
+ log.debug('Fell back to full-page screenshot for CAPTCHA');
187
+ }
188
+ catch (err) {
189
+ return {
190
+ solved: false,
191
+ rounds,
192
+ error: `Screenshot failed: ${err instanceof Error ? err.message : String(err)}`,
193
+ };
194
+ }
195
+ }
196
+ // ── 2. Ask vision model ────────────────────────────────────────────────
197
+ log.debug(`Round ${rounds}: asking moondream to find "${targetObject}"…`);
198
+ const positions = await askVisionModel(base64Screenshot, targetObject);
199
+ if (!positions || positions.length === 0) {
200
+ log.debug(`Round ${rounds}: vision model returned no positions — stopping`);
201
+ return { solved: false, rounds, error: 'Vision model returned no valid positions' };
202
+ }
203
+ log.debug(`Round ${rounds}: vision model selected positions: ${positions.join(',')}`);
204
+ // ── 3. Click grid cells ────────────────────────────────────────────────
205
+ let clickedCount = 0;
206
+ for (const pos of positions) {
207
+ for (const gridSelector of CAPTCHA_GRID_SELECTORS) {
208
+ try {
209
+ const gridEl = await page.$(gridSelector);
210
+ if (!gridEl)
211
+ continue;
212
+ // Each grid cell: nth-child or direct child
213
+ const cells = await gridEl.$$('td, div[class*="cell"], div[class*="tile"], div[class*="image"]');
214
+ if (cells.length === 0) {
215
+ // Try direct children
216
+ const children = await gridEl.$$(':scope > *');
217
+ const target = children[pos - 1];
218
+ if (target) {
219
+ await target.click({ timeout: 5000 });
220
+ clickedCount++;
221
+ }
222
+ }
223
+ else {
224
+ const target = cells[pos - 1];
225
+ if (target) {
226
+ await target.click({ timeout: 5000 });
227
+ clickedCount++;
228
+ }
229
+ }
230
+ break;
231
+ }
232
+ catch {
233
+ // Try next selector
234
+ }
235
+ }
236
+ }
237
+ log.debug(`Round ${rounds}: clicked ${clickedCount}/${positions.length} cells`);
238
+ // Short delay before verify (let animation/state settle)
239
+ await page.waitForTimeout(500);
240
+ // ── 4. Click Verify button ─────────────────────────────────────────────
241
+ let clicked = false;
242
+ for (const btnSelector of CAPTCHA_VERIFY_SELECTORS) {
243
+ try {
244
+ const btn = await page.$(btnSelector);
245
+ if (btn) {
246
+ await btn.click({ timeout: 3000 });
247
+ clicked = true;
248
+ log.debug(`Round ${rounds}: clicked verify button (${btnSelector})`);
249
+ break;
250
+ }
251
+ }
252
+ catch {
253
+ // Try next
254
+ }
255
+ }
256
+ if (!clicked) {
257
+ log.debug(`Round ${rounds}: could not find verify button`);
258
+ }
259
+ // ── 5. Check if solved ─────────────────────────────────────────────────
260
+ await page.waitForTimeout(2000);
261
+ // Check for success indicators
262
+ const solved = await checkCaptchaSolved(page);
263
+ if (solved) {
264
+ log.debug(`Round ${rounds}: CAPTCHA solved!`);
265
+ return { solved: true, rounds };
266
+ }
267
+ // Check if a new round appeared (grid refreshed)
268
+ const newTarget = await detectImageCaptchaTarget(page);
269
+ if (!newTarget) {
270
+ // No more instructions — likely solved or error
271
+ log.debug(`Round ${rounds}: no more instruction text — assuming solved`);
272
+ return { solved: true, rounds };
273
+ }
274
+ // Update target object for next round (may change between rounds)
275
+ // eslint-disable-next-line no-param-reassign
276
+ targetObject = newTarget;
277
+ log.debug(`Round ${rounds}: new target for next round: "${targetObject}"`);
278
+ }
279
+ return { solved: false, rounds, error: `Reached max rounds (${IMAGE_CAPTCHA_MAX_ROUNDS}) without solving` };
280
+ }
281
+ /**
282
+ * Check if the CAPTCHA appears to have been solved (challenge gone, success message, etc.)
283
+ */
284
+ async function checkCaptchaSolved(page) {
285
+ // Check for reCAPTCHA success state
286
+ try {
287
+ const successEl = await page.$('.recaptcha-checkbox-checked, .rc-anchor-normal-footer, [aria-checked="true"]');
288
+ if (successEl)
289
+ return true;
290
+ }
291
+ catch { /* ignore */ }
292
+ // Check if CAPTCHA challenge overlay disappeared (grid gone)
293
+ try {
294
+ const gridEl = await page.$('.rc-imageselect-table, .task-grid');
295
+ // If we were on a CAPTCHA page and the grid is now gone, it was likely solved
296
+ if (!gridEl) {
297
+ // Only count as solved if we're no longer on a CAPTCHA title page
298
+ const title = await page.title().catch(() => '');
299
+ const isCaptchaTitle = title.toLowerCase().includes('captcha') || title.toLowerCase().includes('robot');
300
+ if (!isCaptchaTitle)
301
+ return true;
302
+ }
303
+ }
304
+ catch { /* ignore */ }
305
+ // Check page URL changed (successful solve often triggers redirect)
306
+ try {
307
+ const title = await page.title();
308
+ const isCaptchaPage = title.toLowerCase().includes('captcha') || title.toLowerCase().includes('robot');
309
+ if (!isCaptchaPage)
310
+ return true;
311
+ }
312
+ catch { /* ignore */ }
313
+ return false;
314
+ }
315
+ // ── Constants ─────────────────────────────────────────────────────────────────
316
+ const DEFAULT_TIMEOUT_MS = 15_000;
317
+ /** Cloudflare challenge title before it's solved */
318
+ const CF_CHALLENGE_TITLES = ['just a moment', 'please wait', 'one moment, please', 'checking your browser'];
319
+ /** Cloudflare challenge page markers */
320
+ const CF_CHALLENGE_SELECTORS = [
321
+ '#challenge-running',
322
+ '#challenge-form',
323
+ '#cf-challenge-running',
324
+ '.cf-browser-verification',
325
+ ];
326
+ // ── Main entry point ──────────────────────────────────────────────────────────
327
+ /**
328
+ * Attempt to solve a bot-protection challenge.
329
+ *
330
+ * @param url The page URL (used for proxy routing and cookie caching)
331
+ * @param challengeType The type of challenge as detected by challenge-detection
332
+ * @param html The raw challenge HTML (used for context / fallback)
333
+ * @param options Optional timeout and proxy settings
334
+ * @returns Solve result with real HTML content and cookies if successful
335
+ */
336
+ export async function solveChallenge(url, challengeType, html, options = {}) {
337
+ const domain = getDomain(url);
338
+ const timeout = options.timeout ?? DEFAULT_TIMEOUT_MS;
339
+ console.log(`[challenge-solver] Attempting ${challengeType} solve for ${domain}`);
340
+ // ── Remote worker proxy (Hetzner) ──────────────────────────────────────────
341
+ const workerUrl = process.env.BROWSER_WORKER_URL;
342
+ if (workerUrl) {
343
+ return solveViaRemoteWorker(url, challengeType, html, { timeout, proxy: options.proxy, workerUrl });
344
+ }
345
+ // ── Local solve ────────────────────────────────────────────────────────────
346
+ switch (challengeType) {
347
+ case 'cloudflare':
348
+ return solveCloudflare(url, html, timeout, options.proxy);
349
+ case 'captcha':
350
+ return solveCaptchaWithVision(url, html, timeout, options.proxy);
351
+ case 'datadome':
352
+ // DataDome can sometimes be bypassed with a stealth browser
353
+ return solveWithStealthBrowser(url, html, timeout, options.proxy, 'datadome');
354
+ case 'akamai':
355
+ case 'perimeterx':
356
+ case 'incapsula':
357
+ case 'generic-block':
358
+ // For other challenges, try stealth browser as a general approach
359
+ return solveWithStealthBrowser(url, html, timeout, options.proxy, challengeType);
360
+ case 'empty-shell':
361
+ // Not really a challenge — just an SPA shell, shouldn't reach here
362
+ return { solved: false, html, error: 'empty-shell is not a challenge to solve' };
363
+ default:
364
+ return { solved: false, html, error: `Unknown challenge type: ${challengeType}` };
365
+ }
366
+ }
367
+ // ── Image CAPTCHA orchestrator ────────────────────────────────────────────────
368
+ /**
369
+ * Solve an image CAPTCHA by opening a stealth browser, detecting the target
370
+ * object from the page instructions, and calling solveImageCaptcha().
371
+ */
372
+ async function solveCaptchaWithVision(url, _html, timeoutMs, proxy) {
373
+ let page = null;
374
+ try {
375
+ const { getStealthBrowser, getRandomUserAgent, getRandomViewport, applyStealthScripts } = await import('../core/browser-pool.js');
376
+ const browser = await getStealthBrowser();
377
+ const vp = getRandomViewport();
378
+ const ctx = await browser.newContext({
379
+ userAgent: getRandomUserAgent(),
380
+ viewport: { width: vp.width, height: vp.height },
381
+ ...(proxy ? { proxy: { server: proxy } } : {}),
382
+ locale: 'en-US',
383
+ timezoneId: 'America/New_York',
384
+ });
385
+ page = await ctx.newPage();
386
+ await applyStealthScripts(page);
387
+ await page.goto(url, {
388
+ waitUntil: 'domcontentloaded',
389
+ timeout: timeoutMs,
390
+ });
391
+ // Wait for CAPTCHA to render
392
+ await page.waitForTimeout(2000);
393
+ // Detect the target object from the CAPTCHA instructions
394
+ const targetObject = await detectImageCaptchaTarget(page);
395
+ if (!targetObject) {
396
+ const html = await page.content().catch(() => _html);
397
+ await ctx.close().catch(() => { });
398
+ return { solved: false, html, error: 'Could not detect image CAPTCHA target object from page' };
399
+ }
400
+ log.debug(`Image CAPTCHA target: "${targetObject}"`);
401
+ // Solve the CAPTCHA — may take up to VISION_TIMEOUT_MS * IMAGE_CAPTCHA_MAX_ROUNDS
402
+ const captchaResult = await solveImageCaptcha(page, targetObject);
403
+ const html = await page.content().catch(() => _html);
404
+ const cookies = await ctx.cookies();
405
+ const cookieStrings = cookies.map(c => `${c.name}=${c.value}; Path=${c.path || '/'}${c.domain ? `; Domain=${c.domain}` : ''}`);
406
+ if (cookieStrings.length > 0) {
407
+ cacheCookiesForUrl(url, cookieStrings);
408
+ }
409
+ await ctx.close().catch(() => { });
410
+ if (captchaResult.solved) {
411
+ console.log(`[challenge-solver] Image CAPTCHA solved for ${getDomain(url)} in ${captchaResult.rounds} round(s)`);
412
+ return { solved: true, html, cookies: cookieStrings, method: 'local-browser' };
413
+ }
414
+ return {
415
+ solved: false,
416
+ html,
417
+ error: captchaResult.error ?? `Image CAPTCHA not solved after ${captchaResult.rounds} round(s)`,
418
+ };
419
+ }
420
+ catch (err) {
421
+ const error = err instanceof Error ? err.message : String(err);
422
+ log.debug('Image CAPTCHA solve failed:', error);
423
+ return { solved: false, html: _html, error };
424
+ }
425
+ finally {
426
+ page = null;
427
+ }
428
+ }
429
+ // ── Cloudflare solver ─────────────────────────────────────────────────────────
430
+ /**
431
+ * Solve Cloudflare JS challenge by rendering the page in a stealth browser.
432
+ *
433
+ * Cloudflare's "Just a moment..." challenge:
434
+ * - Runs JavaScript fingerprinting in the browser
435
+ * - If the fingerprint passes (looks like a real browser), auto-redirects to the real page
436
+ * - No human interaction needed if the browser stealth is good enough
437
+ *
438
+ * Strategy:
439
+ * 1. Open a fresh stealth browser page
440
+ * 2. Navigate to the URL
441
+ * 3. Wait for the challenge to complete (title changes OR challenge element disappears)
442
+ * 4. Extract HTML and cookies
443
+ * 5. Cache cf_clearance cookie for future requests
444
+ */
445
+ async function solveCloudflare(url, _html, timeoutMs, proxy) {
446
+ let browser = null;
447
+ let page = null;
448
+ try {
449
+ const { getStealthBrowser, getRandomUserAgent, getRandomViewport, applyStealthScripts } = await import('../core/browser-pool.js');
450
+ browser = await getStealthBrowser();
451
+ const vp = getRandomViewport();
452
+ const ctx = await browser.newContext({
453
+ userAgent: getRandomUserAgent(),
454
+ viewport: { width: vp.width, height: vp.height },
455
+ ...(proxy ? { proxy: { server: proxy } } : {}),
456
+ // Accept all languages to look more like a real browser
457
+ locale: 'en-US',
458
+ timezoneId: 'America/New_York',
459
+ });
460
+ page = await ctx.newPage();
461
+ await applyStealthScripts(page);
462
+ // Navigate to the challenge URL
463
+ await page.goto(url, {
464
+ waitUntil: 'domcontentloaded',
465
+ timeout: timeoutMs,
466
+ });
467
+ // Wait for Cloudflare challenge to resolve
468
+ const solved = await waitForChallengeResolution(page, timeoutMs);
469
+ if (!solved) {
470
+ log.debug('Cloudflare challenge did not resolve within timeout');
471
+ await ctx.close().catch(() => { });
472
+ return { solved: false, html: await page.content().catch(() => _html), error: 'Cloudflare challenge timed out' };
473
+ }
474
+ // Extract real page content
475
+ const realHtml = await page.content();
476
+ // Extract cookies (especially cf_clearance)
477
+ const cookies = await ctx.cookies();
478
+ const cookieStrings = cookies.map(c => {
479
+ let s = `${c.name}=${c.value}`;
480
+ if (c.path)
481
+ s += `; Path=${c.path}`;
482
+ if (c.domain)
483
+ s += `; Domain=${c.domain}`;
484
+ if (c.secure)
485
+ s += '; Secure';
486
+ if (c.httpOnly)
487
+ s += '; HttpOnly';
488
+ if (c.expires && c.expires > 0) {
489
+ s += `; Expires=${new Date(c.expires * 1000).toUTCString()}`;
490
+ }
491
+ return s;
492
+ });
493
+ // Determine TTL based on cf_clearance expiry (default 30 min)
494
+ const cfClearance = cookies.find(c => c.name === 'cf_clearance');
495
+ const ttlMs = cfClearance?.expires && cfClearance.expires > 0
496
+ ? Math.min((cfClearance.expires * 1000) - Date.now(), 30 * 60 * 1000)
497
+ : 30 * 60 * 1000;
498
+ // Cache cookies for future requests
499
+ if (cookieStrings.length > 0) {
500
+ cacheCookiesForUrl(url, cookieStrings, ttlMs);
501
+ log.debug(`Cached ${cookieStrings.length} cookies for ${getDomain(url)} (TTL: ${Math.round(ttlMs / 60000)}m)`);
502
+ }
503
+ await ctx.close().catch(() => { });
504
+ console.log(`[challenge-solver] Cloudflare challenge solved for ${getDomain(url)}, extracted ${cookieStrings.length} cookies`);
505
+ return {
506
+ solved: true,
507
+ html: realHtml,
508
+ cookies: cookieStrings,
509
+ method: 'local-browser',
510
+ };
511
+ }
512
+ catch (err) {
513
+ const error = err instanceof Error ? err.message : String(err);
514
+ log.debug('Cloudflare solve failed:', error);
515
+ return { solved: false, html: _html, error };
516
+ }
517
+ finally {
518
+ // Don't close shared browser — it's managed by browser-pool
519
+ page = null;
520
+ browser = null;
521
+ }
522
+ }
523
+ // ── Generic stealth browser solver ───────────────────────────────────────────
524
+ /**
525
+ * General-purpose stealth browser solve for challenges that may auto-resolve
526
+ * when rendered in a legitimate-looking browser (DataDome, Akamai, etc.).
527
+ */
528
+ async function solveWithStealthBrowser(url, _html, timeoutMs, proxy, challengeType) {
529
+ let page = null;
530
+ try {
531
+ const { getStealthBrowser, getRandomUserAgent, getRandomViewport, applyStealthScripts } = await import('../core/browser-pool.js');
532
+ const browser = await getStealthBrowser();
533
+ const vp = getRandomViewport();
534
+ const ctx = await browser.newContext({
535
+ userAgent: getRandomUserAgent(),
536
+ viewport: { width: vp.width, height: vp.height },
537
+ ...(proxy ? { proxy: { server: proxy } } : {}),
538
+ locale: 'en-US',
539
+ });
540
+ page = await ctx.newPage();
541
+ await applyStealthScripts(page);
542
+ await page.goto(url, {
543
+ waitUntil: 'networkidle',
544
+ timeout: timeoutMs,
545
+ });
546
+ // Wait a bit for any JS-based challenges to execute
547
+ await page.waitForTimeout(2000);
548
+ const html = await page.content();
549
+ const cookies = await ctx.cookies();
550
+ const cookieStrings = cookies.map(c => `${c.name}=${c.value}; Path=${c.path || '/'}${c.domain ? `; Domain=${c.domain}` : ''}`);
551
+ // Check if we got real content (not a challenge page)
552
+ const titleEl = await page.title().catch(() => '');
553
+ const isStillChallenge = CF_CHALLENGE_TITLES.some(t => titleEl.toLowerCase().includes(t))
554
+ || html.includes('cf-browser-verification')
555
+ || html.includes('challenge-form');
556
+ if (isStillChallenge) {
557
+ await ctx.close().catch(() => { });
558
+ return { solved: false, html, error: `${challengeType} challenge did not resolve` };
559
+ }
560
+ if (cookieStrings.length > 0) {
561
+ cacheCookiesForUrl(url, cookieStrings);
562
+ }
563
+ await ctx.close().catch(() => { });
564
+ console.log(`[challenge-solver] ${challengeType} challenge solved for ${getDomain(url)}`);
565
+ return { solved: true, html, cookies: cookieStrings, method: 'local-browser' };
566
+ }
567
+ catch (err) {
568
+ const error = err instanceof Error ? err.message : String(err);
569
+ return { solved: false, html: _html, error };
570
+ }
571
+ finally {
572
+ page = null;
573
+ }
574
+ }
575
+ // ── Remote worker proxy ───────────────────────────────────────────────────────
576
+ /**
577
+ * Proxy a solve request to a remote browser worker (e.g. Hetzner VPS).
578
+ *
579
+ * The worker endpoint is expected to accept:
580
+ * POST /solve
581
+ * { url, challengeType, timeout, proxy? }
582
+ *
583
+ * And return:
584
+ * { solved: boolean, html: string, cookies?: string[], error?: string }
585
+ *
586
+ * Set BROWSER_WORKER_URL to the worker base URL (e.g. http://hetzner:3001)
587
+ * to route all browser-based challenge solving to the worker.
588
+ */
589
+ async function solveViaRemoteWorker(url, challengeType, html, options) {
590
+ const { workerUrl, timeout, proxy } = options;
591
+ try {
592
+ const controller = new AbortController();
593
+ const timer = setTimeout(() => controller.abort(), timeout + 5000); // Add buffer
594
+ const response = await fetch(`${workerUrl}/solve`, {
595
+ method: 'POST',
596
+ headers: { 'Content-Type': 'application/json' },
597
+ body: JSON.stringify({ url, challengeType, timeout, ...(proxy ? { proxy } : {}) }),
598
+ signal: controller.signal,
599
+ });
600
+ clearTimeout(timer);
601
+ if (!response.ok) {
602
+ throw new Error(`Worker returned HTTP ${response.status}`);
603
+ }
604
+ const result = await response.json();
605
+ // Cache cookies from remote solve
606
+ if (result.solved && result.cookies?.length) {
607
+ cacheCookiesForUrl(url, result.cookies);
608
+ console.log(`[challenge-solver] Remote ${challengeType} solve for ${getDomain(url)}, cached ${result.cookies.length} cookies`);
609
+ }
610
+ return { ...result, method: 'remote-worker' };
611
+ }
612
+ catch (err) {
613
+ const error = err instanceof Error ? err.message : String(err);
614
+ log.debug('Remote worker solve failed:', error);
615
+ // Fall through to local solve on worker failure
616
+ console.log(`[challenge-solver] Remote worker failed, attempting local ${challengeType} solve for ${getDomain(url)}`);
617
+ switch (challengeType) {
618
+ case 'cloudflare':
619
+ return solveCloudflare(url, html, options.timeout, options.proxy);
620
+ default:
621
+ return solveWithStealthBrowser(url, html, options.timeout, options.proxy, challengeType);
622
+ }
623
+ }
624
+ }
625
+ // ── Challenge resolution detection ───────────────────────────────────────────
626
+ /**
627
+ * Wait for a Cloudflare challenge page to resolve.
628
+ *
629
+ * Cloudflare's challenge works like this:
630
+ * 1. Initial page: title is "Just a moment..." with challenge elements
631
+ * 2. Browser runs JS fingerprinting
632
+ * 3. On pass: redirects to real page (title and content change)
633
+ * 4. On fail: stays on challenge page
634
+ *
635
+ * We detect resolution by watching for:
636
+ * - Title change (away from challenge titles)
637
+ * - Challenge element disappearance
638
+ * - URL change (often redirects after solve)
639
+ */
640
+ async function waitForChallengeResolution(page, timeoutMs) {
641
+ const start = Date.now();
642
+ const pollInterval = 500;
643
+ // Quick check: is it even a challenge page?
644
+ const initialTitle = await page.title().catch(() => '');
645
+ const isInitiallyChallenge = CF_CHALLENGE_TITLES.some(t => initialTitle.toLowerCase().includes(t));
646
+ if (!isInitiallyChallenge) {
647
+ // Not a challenge page to begin with — treat as solved
648
+ return true;
649
+ }
650
+ // Poll until timeout
651
+ while (Date.now() - start < timeoutMs) {
652
+ await page.waitForTimeout(pollInterval);
653
+ const title = await page.title().catch(() => '');
654
+ const lowerTitle = title.toLowerCase();
655
+ // Title changed away from challenge
656
+ const isChallengeTitle = CF_CHALLENGE_TITLES.some(t => lowerTitle.includes(t));
657
+ if (!isChallengeTitle && title.length > 0) {
658
+ // Give the page a moment to fully render
659
+ await page.waitForTimeout(1000);
660
+ return true;
661
+ }
662
+ // Check if challenge elements are gone
663
+ let challengeElementGone = true;
664
+ for (const selector of CF_CHALLENGE_SELECTORS) {
665
+ try {
666
+ const el = await page.$(selector);
667
+ if (el) {
668
+ challengeElementGone = false;
669
+ break;
670
+ }
671
+ }
672
+ catch {
673
+ // Selector check failed — continue
674
+ }
675
+ }
676
+ if (challengeElementGone && !isChallengeTitle) {
677
+ await page.waitForTimeout(500);
678
+ return true;
679
+ }
680
+ // Try waiting for network to settle (challenge often triggers fetches)
681
+ try {
682
+ await page.waitForLoadState('networkidle', { timeout: Math.min(3000, timeoutMs - (Date.now() - start)) });
683
+ const finalTitle = await page.title().catch(() => '');
684
+ if (!CF_CHALLENGE_TITLES.some(t => finalTitle.toLowerCase().includes(t))) {
685
+ return true;
686
+ }
687
+ }
688
+ catch {
689
+ // Timeout or error — continue polling
690
+ }
691
+ }
692
+ return false;
693
+ }
694
+ // ── hCaptcha Accessibility Bypass ────────────────────────────────────────────
695
+ // TODO: hCaptcha Accessibility Bypass
696
+ // hCaptcha has an accessibility service at https://www.hcaptcha.com/accessibility
697
+ // that provides a cookie allowing users with accessibility needs to bypass hCaptcha.
698
+ //
699
+ // Implementation notes:
700
+ // - The service used to allow programmatic registration without email verification
701
+ // - As of 2025, it requires manual verification (email link) to activate
702
+ // - Since this requires human interaction, it cannot be fully automated
703
+ //
704
+ // When/if implemented:
705
+ // 1. Check https://www.hcaptcha.com/accessibility for current API status
706
+ // 2. Register with a request to their accessibility API
707
+ // 3. If they return a cookie directly (no email verification), cache it
708
+ // 4. Attach the cookie to requests to sites using hCaptcha
709
+ //
710
+ // const HCAPTCHA_ACCESSIBILITY_URL = 'https://accounts.hcaptcha.com/demo?sitekey=bf5558a0-...';
711
+ // export async function getHCaptchaAccessibilityCookie(): Promise<string | null> { ... }
712
+ // ── Utility ───────────────────────────────────────────────────────────────────
713
+ function getDomain(url) {
714
+ try {
715
+ return new URL(url).hostname;
716
+ }
717
+ catch {
718
+ return url;
719
+ }
720
+ }