@iflow-mcp/jakeliume-webpeel 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (547) hide show
  1. package/LICENSE +15 -0
  2. package/README.md +313 -0
  3. package/dist/cache.d.ts +30 -0
  4. package/dist/cache.js +139 -0
  5. package/dist/cli/commands/auth.d.ts +5 -0
  6. package/dist/cli/commands/auth.js +411 -0
  7. package/dist/cli/commands/doctor.d.ts +37 -0
  8. package/dist/cli/commands/doctor.js +371 -0
  9. package/dist/cli/commands/fetch.d.ts +6 -0
  10. package/dist/cli/commands/fetch.js +1345 -0
  11. package/dist/cli/commands/guide.d.ts +2 -0
  12. package/dist/cli/commands/guide.js +183 -0
  13. package/dist/cli/commands/interact.d.ts +5 -0
  14. package/dist/cli/commands/interact.js +840 -0
  15. package/dist/cli/commands/jobs.d.ts +5 -0
  16. package/dist/cli/commands/jobs.js +997 -0
  17. package/dist/cli/commands/monitor.d.ts +12 -0
  18. package/dist/cli/commands/monitor.js +197 -0
  19. package/dist/cli/commands/observe.d.ts +12 -0
  20. package/dist/cli/commands/observe.js +158 -0
  21. package/dist/cli/commands/screenshot.d.ts +5 -0
  22. package/dist/cli/commands/screenshot.js +282 -0
  23. package/dist/cli/commands/search.d.ts +5 -0
  24. package/dist/cli/commands/search.js +1021 -0
  25. package/dist/cli/commands/setup.d.ts +13 -0
  26. package/dist/cli/commands/setup.js +244 -0
  27. package/dist/cli/commands/skill.d.ts +15 -0
  28. package/dist/cli/commands/skill.js +195 -0
  29. package/dist/cli/utils.d.ts +84 -0
  30. package/dist/cli/utils.js +806 -0
  31. package/dist/cli-auth.d.ts +75 -0
  32. package/dist/cli-auth.js +369 -0
  33. package/dist/cli.d.ts +17 -0
  34. package/dist/cli.js +99 -0
  35. package/dist/core/actions.d.ts +69 -0
  36. package/dist/core/actions.js +495 -0
  37. package/dist/core/agent.d.ts +98 -0
  38. package/dist/core/agent.js +558 -0
  39. package/dist/core/answer.d.ts +42 -0
  40. package/dist/core/answer.js +395 -0
  41. package/dist/core/application-tracker.d.ts +84 -0
  42. package/dist/core/application-tracker.js +184 -0
  43. package/dist/core/apply.d.ts +162 -0
  44. package/dist/core/apply.js +816 -0
  45. package/dist/core/auth-detection.d.ts +35 -0
  46. package/dist/core/auth-detection.js +358 -0
  47. package/dist/core/auto-extract.d.ts +82 -0
  48. package/dist/core/auto-extract.js +604 -0
  49. package/dist/core/auto-interact.d.ts +23 -0
  50. package/dist/core/auto-interact.js +246 -0
  51. package/dist/core/bm25-filter.d.ts +66 -0
  52. package/dist/core/bm25-filter.js +288 -0
  53. package/dist/core/branding.d.ts +54 -0
  54. package/dist/core/branding.js +234 -0
  55. package/dist/core/browser-fetch.d.ts +323 -0
  56. package/dist/core/browser-fetch.js +1600 -0
  57. package/dist/core/browser-pool.d.ts +91 -0
  58. package/dist/core/browser-pool.js +550 -0
  59. package/dist/core/budget.d.ts +42 -0
  60. package/dist/core/budget.js +324 -0
  61. package/dist/core/business-intel.d.ts +47 -0
  62. package/dist/core/business-intel.js +279 -0
  63. package/dist/core/cache.d.ts +13 -0
  64. package/dist/core/cache.js +121 -0
  65. package/dist/core/cf-worker-proxy.d.ts +32 -0
  66. package/dist/core/cf-worker-proxy.js +87 -0
  67. package/dist/core/challenge-detection.d.ts +26 -0
  68. package/dist/core/challenge-detection.js +468 -0
  69. package/dist/core/change-tracking.d.ts +75 -0
  70. package/dist/core/change-tracking.js +276 -0
  71. package/dist/core/chunker.d.ts +46 -0
  72. package/dist/core/chunker.js +249 -0
  73. package/dist/core/chunking.d.ts +42 -0
  74. package/dist/core/chunking.js +181 -0
  75. package/dist/core/circuit-breaker.d.ts +44 -0
  76. package/dist/core/circuit-breaker.js +85 -0
  77. package/dist/core/content-pruner.d.ts +47 -0
  78. package/dist/core/content-pruner.js +425 -0
  79. package/dist/core/cookie-cache.d.ts +60 -0
  80. package/dist/core/cookie-cache.js +163 -0
  81. package/dist/core/crawl-checkpoint.d.ts +54 -0
  82. package/dist/core/crawl-checkpoint.js +104 -0
  83. package/dist/core/crawler.d.ts +84 -0
  84. package/dist/core/crawler.js +349 -0
  85. package/dist/core/cross-verify.d.ts +27 -0
  86. package/dist/core/cross-verify.js +93 -0
  87. package/dist/core/deep-fetch.d.ts +74 -0
  88. package/dist/core/deep-fetch.js +405 -0
  89. package/dist/core/deep-research.d.ts +141 -0
  90. package/dist/core/deep-research.js +972 -0
  91. package/dist/core/design-analysis.d.ts +70 -0
  92. package/dist/core/design-analysis.js +490 -0
  93. package/dist/core/design-compare.d.ts +38 -0
  94. package/dist/core/design-compare.js +264 -0
  95. package/dist/core/diff.d.ts +61 -0
  96. package/dist/core/diff.js +289 -0
  97. package/dist/core/dns-cache.d.ts +20 -0
  98. package/dist/core/dns-cache.js +198 -0
  99. package/dist/core/documents.d.ts +23 -0
  100. package/dist/core/documents.js +123 -0
  101. package/dist/core/domain-memory.d.ts +66 -0
  102. package/dist/core/domain-memory.js +163 -0
  103. package/dist/core/domain-verify.d.ts +40 -0
  104. package/dist/core/domain-verify.js +379 -0
  105. package/dist/core/engine-ranker.d.ts +112 -0
  106. package/dist/core/engine-ranker.js +395 -0
  107. package/dist/core/extract-inline.d.ts +38 -0
  108. package/dist/core/extract-inline.js +215 -0
  109. package/dist/core/extract-listings.d.ts +38 -0
  110. package/dist/core/extract-listings.js +461 -0
  111. package/dist/core/extract.d.ts +9 -0
  112. package/dist/core/extract.js +139 -0
  113. package/dist/core/fetch-cache.d.ts +57 -0
  114. package/dist/core/fetch-cache.js +95 -0
  115. package/dist/core/fetcher.d.ts +13 -0
  116. package/dist/core/fetcher.js +12 -0
  117. package/dist/core/google-cache.d.ts +29 -0
  118. package/dist/core/google-cache.js +180 -0
  119. package/dist/core/google-serp-parser.d.ts +82 -0
  120. package/dist/core/google-serp-parser.js +287 -0
  121. package/dist/core/hotel-search.d.ts +122 -0
  122. package/dist/core/hotel-search.js +382 -0
  123. package/dist/core/http-fetch.d.ts +72 -0
  124. package/dist/core/http-fetch.js +820 -0
  125. package/dist/core/human.d.ts +175 -0
  126. package/dist/core/human.js +680 -0
  127. package/dist/core/image-caption.d.ts +44 -0
  128. package/dist/core/image-caption.js +271 -0
  129. package/dist/core/jobs.d.ts +75 -0
  130. package/dist/core/jobs.js +634 -0
  131. package/dist/core/json-ld.d.ts +15 -0
  132. package/dist/core/json-ld.js +617 -0
  133. package/dist/core/language-detect.d.ts +18 -0
  134. package/dist/core/language-detect.js +135 -0
  135. package/dist/core/links.d.ts +10 -0
  136. package/dist/core/links.js +44 -0
  137. package/dist/core/llm-extract.d.ts +71 -0
  138. package/dist/core/llm-extract.js +507 -0
  139. package/dist/core/llm-provider.d.ts +100 -0
  140. package/dist/core/llm-provider.js +702 -0
  141. package/dist/core/local-search.d.ts +60 -0
  142. package/dist/core/local-search.js +308 -0
  143. package/dist/core/logger.d.ts +28 -0
  144. package/dist/core/logger.js +104 -0
  145. package/dist/core/map.d.ts +33 -0
  146. package/dist/core/map.js +127 -0
  147. package/dist/core/markdown.d.ts +92 -0
  148. package/dist/core/markdown.js +809 -0
  149. package/dist/core/metadata.d.ts +34 -0
  150. package/dist/core/metadata.js +422 -0
  151. package/dist/core/observe.d.ts +113 -0
  152. package/dist/core/observe.js +395 -0
  153. package/dist/core/ocr.d.ts +12 -0
  154. package/dist/core/ocr.js +33 -0
  155. package/dist/core/paginate.d.ts +31 -0
  156. package/dist/core/paginate.js +106 -0
  157. package/dist/core/pdf.d.ts +8 -0
  158. package/dist/core/pdf.js +25 -0
  159. package/dist/core/peel-tls.d.ts +25 -0
  160. package/dist/core/peel-tls.js +220 -0
  161. package/dist/core/pipeline.d.ts +132 -0
  162. package/dist/core/pipeline.js +1666 -0
  163. package/dist/core/profiles.d.ts +61 -0
  164. package/dist/core/profiles.js +350 -0
  165. package/dist/core/prompt-guard.d.ts +30 -0
  166. package/dist/core/prompt-guard.js +119 -0
  167. package/dist/core/proxy-config.d.ts +90 -0
  168. package/dist/core/proxy-config.js +172 -0
  169. package/dist/core/quick-answer.d.ts +53 -0
  170. package/dist/core/quick-answer.js +833 -0
  171. package/dist/core/rate-governor.d.ts +80 -0
  172. package/dist/core/rate-governor.js +238 -0
  173. package/dist/core/readability.d.ts +57 -0
  174. package/dist/core/readability.js +533 -0
  175. package/dist/core/research.d.ts +66 -0
  176. package/dist/core/research.js +270 -0
  177. package/dist/core/retry.d.ts +60 -0
  178. package/dist/core/retry.js +119 -0
  179. package/dist/core/safe-browsing.d.ts +30 -0
  180. package/dist/core/safe-browsing.js +206 -0
  181. package/dist/core/schema-extraction.d.ts +66 -0
  182. package/dist/core/schema-extraction.js +352 -0
  183. package/dist/core/schema-postprocess.d.ts +32 -0
  184. package/dist/core/schema-postprocess.js +469 -0
  185. package/dist/core/schema-templates.d.ts +19 -0
  186. package/dist/core/schema-templates.js +143 -0
  187. package/dist/core/screenshot.d.ts +224 -0
  188. package/dist/core/screenshot.js +207 -0
  189. package/dist/core/search-engines.d.ts +25 -0
  190. package/dist/core/search-engines.js +182 -0
  191. package/dist/core/search-provider.d.ts +243 -0
  192. package/dist/core/search-provider.js +1629 -0
  193. package/dist/core/searxng-provider.d.ts +35 -0
  194. package/dist/core/searxng-provider.js +105 -0
  195. package/dist/core/selective-evidence.d.ts +151 -0
  196. package/dist/core/selective-evidence.js +389 -0
  197. package/dist/core/site-search.d.ts +44 -0
  198. package/dist/core/site-search.js +252 -0
  199. package/dist/core/sitemap.d.ts +23 -0
  200. package/dist/core/sitemap.js +105 -0
  201. package/dist/core/source-credibility.d.ts +29 -0
  202. package/dist/core/source-credibility.js +584 -0
  203. package/dist/core/source-scoring.d.ts +166 -0
  204. package/dist/core/source-scoring.js +396 -0
  205. package/dist/core/stemmer.d.ts +38 -0
  206. package/dist/core/stemmer.js +509 -0
  207. package/dist/core/strategies.d.ts +104 -0
  208. package/dist/core/strategies.js +1044 -0
  209. package/dist/core/strategy-hooks.d.ts +145 -0
  210. package/dist/core/strategy-hooks.js +74 -0
  211. package/dist/core/structured-extract.d.ts +43 -0
  212. package/dist/core/structured-extract.js +550 -0
  213. package/dist/core/summarize.d.ts +17 -0
  214. package/dist/core/summarize.js +78 -0
  215. package/dist/core/synonyms.d.ts +42 -0
  216. package/dist/core/synonyms.js +184 -0
  217. package/dist/core/system-monitor.d.ts +61 -0
  218. package/dist/core/system-monitor.js +133 -0
  219. package/dist/core/table-format.d.ts +30 -0
  220. package/dist/core/table-format.js +146 -0
  221. package/dist/core/threat-feeds.d.ts +23 -0
  222. package/dist/core/threat-feeds.js +104 -0
  223. package/dist/core/timing.d.ts +21 -0
  224. package/dist/core/timing.js +33 -0
  225. package/dist/core/transcript-export.d.ts +47 -0
  226. package/dist/core/transcript-export.js +107 -0
  227. package/dist/core/user-agents.d.ts +82 -0
  228. package/dist/core/user-agents.js +239 -0
  229. package/dist/core/vertical-search.d.ts +54 -0
  230. package/dist/core/vertical-search.js +158 -0
  231. package/dist/core/watch-manager.d.ts +175 -0
  232. package/dist/core/watch-manager.js +416 -0
  233. package/dist/core/watch.d.ts +101 -0
  234. package/dist/core/watch.js +389 -0
  235. package/dist/core/youtube.d.ts +130 -0
  236. package/dist/core/youtube.js +1175 -0
  237. package/dist/ee/challenge-re-export.d.ts +1 -0
  238. package/dist/ee/challenge-re-export.js +1 -0
  239. package/dist/ee/challenge-solver.d.ts +72 -0
  240. package/dist/ee/challenge-solver.js +720 -0
  241. package/dist/ee/domain-extractors.d.ts +8 -0
  242. package/dist/ee/domain-extractors.js +8 -0
  243. package/dist/ee/domain-intel.d.ts +16 -0
  244. package/dist/ee/domain-intel.js +133 -0
  245. package/dist/ee/extractors/allrecipes.d.ts +2 -0
  246. package/dist/ee/extractors/allrecipes.js +120 -0
  247. package/dist/ee/extractors/amazon.d.ts +2 -0
  248. package/dist/ee/extractors/amazon.js +78 -0
  249. package/dist/ee/extractors/arxiv.d.ts +2 -0
  250. package/dist/ee/extractors/arxiv.js +137 -0
  251. package/dist/ee/extractors/bestbuy.d.ts +2 -0
  252. package/dist/ee/extractors/bestbuy.js +78 -0
  253. package/dist/ee/extractors/carscom.d.ts +2 -0
  254. package/dist/ee/extractors/carscom.js +121 -0
  255. package/dist/ee/extractors/coingecko.d.ts +2 -0
  256. package/dist/ee/extractors/coingecko.js +134 -0
  257. package/dist/ee/extractors/craigslist.d.ts +2 -0
  258. package/dist/ee/extractors/craigslist.js +92 -0
  259. package/dist/ee/extractors/devto.d.ts +2 -0
  260. package/dist/ee/extractors/devto.js +135 -0
  261. package/dist/ee/extractors/ebay.d.ts +2 -0
  262. package/dist/ee/extractors/ebay.js +90 -0
  263. package/dist/ee/extractors/espn.d.ts +2 -0
  264. package/dist/ee/extractors/espn.js +260 -0
  265. package/dist/ee/extractors/etsy.d.ts +2 -0
  266. package/dist/ee/extractors/etsy.js +52 -0
  267. package/dist/ee/extractors/facebook.d.ts +2 -0
  268. package/dist/ee/extractors/facebook.js +46 -0
  269. package/dist/ee/extractors/github.d.ts +2 -0
  270. package/dist/ee/extractors/github.js +196 -0
  271. package/dist/ee/extractors/google-flights.d.ts +2 -0
  272. package/dist/ee/extractors/google-flights.js +176 -0
  273. package/dist/ee/extractors/hackernews.d.ts +2 -0
  274. package/dist/ee/extractors/hackernews.js +147 -0
  275. package/dist/ee/extractors/imdb.d.ts +2 -0
  276. package/dist/ee/extractors/imdb.js +172 -0
  277. package/dist/ee/extractors/index.d.ts +26 -0
  278. package/dist/ee/extractors/index.js +247 -0
  279. package/dist/ee/extractors/instagram.d.ts +2 -0
  280. package/dist/ee/extractors/instagram.js +102 -0
  281. package/dist/ee/extractors/kalshi.d.ts +2 -0
  282. package/dist/ee/extractors/kalshi.js +121 -0
  283. package/dist/ee/extractors/kayak-cars.d.ts +2 -0
  284. package/dist/ee/extractors/kayak-cars.js +270 -0
  285. package/dist/ee/extractors/linkedin.d.ts +2 -0
  286. package/dist/ee/extractors/linkedin.js +113 -0
  287. package/dist/ee/extractors/medium.d.ts +2 -0
  288. package/dist/ee/extractors/medium.js +130 -0
  289. package/dist/ee/extractors/news.d.ts +4 -0
  290. package/dist/ee/extractors/news.js +173 -0
  291. package/dist/ee/extractors/npm.d.ts +2 -0
  292. package/dist/ee/extractors/npm.js +86 -0
  293. package/dist/ee/extractors/pdf.d.ts +2 -0
  294. package/dist/ee/extractors/pdf.js +108 -0
  295. package/dist/ee/extractors/pinterest.d.ts +2 -0
  296. package/dist/ee/extractors/pinterest.js +34 -0
  297. package/dist/ee/extractors/polymarket.d.ts +2 -0
  298. package/dist/ee/extractors/polymarket.js +358 -0
  299. package/dist/ee/extractors/producthunt.d.ts +2 -0
  300. package/dist/ee/extractors/producthunt.js +88 -0
  301. package/dist/ee/extractors/pubmed.d.ts +2 -0
  302. package/dist/ee/extractors/pubmed.js +162 -0
  303. package/dist/ee/extractors/pypi.d.ts +2 -0
  304. package/dist/ee/extractors/pypi.js +80 -0
  305. package/dist/ee/extractors/reddit.d.ts +2 -0
  306. package/dist/ee/extractors/reddit.js +438 -0
  307. package/dist/ee/extractors/redfin.d.ts +2 -0
  308. package/dist/ee/extractors/redfin.js +156 -0
  309. package/dist/ee/extractors/semanticscholar.d.ts +2 -0
  310. package/dist/ee/extractors/semanticscholar.js +131 -0
  311. package/dist/ee/extractors/shared.d.ts +12 -0
  312. package/dist/ee/extractors/shared.js +76 -0
  313. package/dist/ee/extractors/soundcloud.d.ts +2 -0
  314. package/dist/ee/extractors/soundcloud.js +34 -0
  315. package/dist/ee/extractors/sportsbetting.d.ts +2 -0
  316. package/dist/ee/extractors/sportsbetting.js +37 -0
  317. package/dist/ee/extractors/spotify.d.ts +2 -0
  318. package/dist/ee/extractors/spotify.js +34 -0
  319. package/dist/ee/extractors/stackoverflow.d.ts +2 -0
  320. package/dist/ee/extractors/stackoverflow.js +61 -0
  321. package/dist/ee/extractors/substack.d.ts +2 -0
  322. package/dist/ee/extractors/substack.js +115 -0
  323. package/dist/ee/extractors/substackroot.d.ts +2 -0
  324. package/dist/ee/extractors/substackroot.js +46 -0
  325. package/dist/ee/extractors/tiktok.d.ts +2 -0
  326. package/dist/ee/extractors/tiktok.js +29 -0
  327. package/dist/ee/extractors/tradingview.d.ts +2 -0
  328. package/dist/ee/extractors/tradingview.js +182 -0
  329. package/dist/ee/extractors/twitch.d.ts +2 -0
  330. package/dist/ee/extractors/twitch.js +36 -0
  331. package/dist/ee/extractors/twitter.d.ts +2 -0
  332. package/dist/ee/extractors/twitter.js +327 -0
  333. package/dist/ee/extractors/types.d.ts +14 -0
  334. package/dist/ee/extractors/types.js +1 -0
  335. package/dist/ee/extractors/walmart.d.ts +2 -0
  336. package/dist/ee/extractors/walmart.js +50 -0
  337. package/dist/ee/extractors/weather.d.ts +2 -0
  338. package/dist/ee/extractors/weather.js +133 -0
  339. package/dist/ee/extractors/wikipedia.d.ts +4 -0
  340. package/dist/ee/extractors/wikipedia.js +235 -0
  341. package/dist/ee/extractors/yelp.d.ts +2 -0
  342. package/dist/ee/extractors/yelp.js +216 -0
  343. package/dist/ee/extractors/youtube.d.ts +2 -0
  344. package/dist/ee/extractors/youtube.js +189 -0
  345. package/dist/ee/extractors/zillow.d.ts +54 -0
  346. package/dist/ee/extractors/zillow.js +247 -0
  347. package/dist/ee/extractors-re-export.d.ts +1 -0
  348. package/dist/ee/extractors-re-export.js +1 -0
  349. package/dist/ee/premium-hooks.d.ts +20 -0
  350. package/dist/ee/premium-hooks.js +50 -0
  351. package/dist/ee/spa-detection.d.ts +2 -0
  352. package/dist/ee/spa-detection.js +2 -0
  353. package/dist/ee/stability.d.ts +4 -0
  354. package/dist/ee/stability.js +29 -0
  355. package/dist/ee/swr-cache.d.ts +14 -0
  356. package/dist/ee/swr-cache.js +34 -0
  357. package/dist/index.d.ts +143 -0
  358. package/dist/index.js +291 -0
  359. package/dist/integrations/index.d.ts +2 -0
  360. package/dist/integrations/index.js +2 -0
  361. package/dist/integrations/langchain.d.ts +64 -0
  362. package/dist/integrations/langchain.js +115 -0
  363. package/dist/integrations/llamaindex.d.ts +50 -0
  364. package/dist/integrations/llamaindex.js +91 -0
  365. package/dist/mcp/handlers/act.d.ts +5 -0
  366. package/dist/mcp/handlers/act.js +34 -0
  367. package/dist/mcp/handlers/definitions.d.ts +6 -0
  368. package/dist/mcp/handlers/definitions.js +395 -0
  369. package/dist/mcp/handlers/extract.d.ts +7 -0
  370. package/dist/mcp/handlers/extract.js +135 -0
  371. package/dist/mcp/handlers/fetch.d.ts +6 -0
  372. package/dist/mcp/handlers/fetch.js +98 -0
  373. package/dist/mcp/handlers/find.d.ts +5 -0
  374. package/dist/mcp/handlers/find.js +137 -0
  375. package/dist/mcp/handlers/index.d.ts +13 -0
  376. package/dist/mcp/handlers/index.js +63 -0
  377. package/dist/mcp/handlers/legacy.d.ts +25 -0
  378. package/dist/mcp/handlers/legacy.js +450 -0
  379. package/dist/mcp/handlers/meta.d.ts +6 -0
  380. package/dist/mcp/handlers/meta.js +40 -0
  381. package/dist/mcp/handlers/monitor.d.ts +5 -0
  382. package/dist/mcp/handlers/monitor.js +41 -0
  383. package/dist/mcp/handlers/observe.d.ts +8 -0
  384. package/dist/mcp/handlers/observe.js +37 -0
  385. package/dist/mcp/handlers/read.d.ts +6 -0
  386. package/dist/mcp/handlers/read.js +78 -0
  387. package/dist/mcp/handlers/see.d.ts +5 -0
  388. package/dist/mcp/handlers/see.js +75 -0
  389. package/dist/mcp/handlers/types.d.ts +29 -0
  390. package/dist/mcp/handlers/types.js +28 -0
  391. package/dist/mcp/server.d.ts +7 -0
  392. package/dist/mcp/server.js +108 -0
  393. package/dist/mcp/smart-router.d.ts +23 -0
  394. package/dist/mcp/smart-router.js +178 -0
  395. package/dist/server/app.d.ts +14 -0
  396. package/dist/server/app.js +632 -0
  397. package/dist/server/auth-store.d.ts +28 -0
  398. package/dist/server/auth-store.js +88 -0
  399. package/dist/server/bull-queues.d.ts +60 -0
  400. package/dist/server/bull-queues.js +90 -0
  401. package/dist/server/email-service.d.ts +55 -0
  402. package/dist/server/email-service.js +291 -0
  403. package/dist/server/job-queue.d.ts +100 -0
  404. package/dist/server/job-queue.js +145 -0
  405. package/dist/server/logger.d.ts +10 -0
  406. package/dist/server/logger.js +37 -0
  407. package/dist/server/middleware/audit-log.d.ts +14 -0
  408. package/dist/server/middleware/audit-log.js +73 -0
  409. package/dist/server/middleware/auth.d.ts +35 -0
  410. package/dist/server/middleware/auth.js +225 -0
  411. package/dist/server/middleware/rate-limit.d.ts +50 -0
  412. package/dist/server/middleware/rate-limit.js +270 -0
  413. package/dist/server/middleware/scope-guard.d.ts +25 -0
  414. package/dist/server/middleware/scope-guard.js +45 -0
  415. package/dist/server/middleware/url-validator.d.ts +15 -0
  416. package/dist/server/middleware/url-validator.js +201 -0
  417. package/dist/server/openapi.yaml +6418 -0
  418. package/dist/server/pg-auth-store.d.ts +146 -0
  419. package/dist/server/pg-auth-store.js +576 -0
  420. package/dist/server/pg-job-queue.d.ts +59 -0
  421. package/dist/server/pg-job-queue.js +375 -0
  422. package/dist/server/routes/activity.d.ts +6 -0
  423. package/dist/server/routes/activity.js +79 -0
  424. package/dist/server/routes/admin-active.d.ts +7 -0
  425. package/dist/server/routes/admin-active.js +120 -0
  426. package/dist/server/routes/admin-stats.d.ts +7 -0
  427. package/dist/server/routes/admin-stats.js +176 -0
  428. package/dist/server/routes/agent.d.ts +24 -0
  429. package/dist/server/routes/agent.js +480 -0
  430. package/dist/server/routes/answer.d.ts +5 -0
  431. package/dist/server/routes/answer.js +125 -0
  432. package/dist/server/routes/ask.d.ts +28 -0
  433. package/dist/server/routes/ask.js +295 -0
  434. package/dist/server/routes/batch.d.ts +6 -0
  435. package/dist/server/routes/batch.js +493 -0
  436. package/dist/server/routes/cache-warm.d.ts +25 -0
  437. package/dist/server/routes/cache-warm.js +212 -0
  438. package/dist/server/routes/cli-usage.d.ts +6 -0
  439. package/dist/server/routes/cli-usage.js +127 -0
  440. package/dist/server/routes/compat.d.ts +23 -0
  441. package/dist/server/routes/compat.js +652 -0
  442. package/dist/server/routes/crawl.d.ts +13 -0
  443. package/dist/server/routes/crawl.js +287 -0
  444. package/dist/server/routes/deep-fetch.d.ts +8 -0
  445. package/dist/server/routes/deep-fetch.js +57 -0
  446. package/dist/server/routes/deep-research.d.ts +11 -0
  447. package/dist/server/routes/deep-research.js +232 -0
  448. package/dist/server/routes/demo.d.ts +24 -0
  449. package/dist/server/routes/demo.js +517 -0
  450. package/dist/server/routes/do.d.ts +8 -0
  451. package/dist/server/routes/do.js +72 -0
  452. package/dist/server/routes/extract.d.ts +14 -0
  453. package/dist/server/routes/extract.js +325 -0
  454. package/dist/server/routes/feed.d.ts +15 -0
  455. package/dist/server/routes/feed.js +311 -0
  456. package/dist/server/routes/fetch-queue.d.ts +13 -0
  457. package/dist/server/routes/fetch-queue.js +357 -0
  458. package/dist/server/routes/fetch.d.ts +7 -0
  459. package/dist/server/routes/fetch.js +1274 -0
  460. package/dist/server/routes/go.d.ts +14 -0
  461. package/dist/server/routes/go.js +81 -0
  462. package/dist/server/routes/health.d.ts +11 -0
  463. package/dist/server/routes/health.js +141 -0
  464. package/dist/server/routes/jobs.d.ts +7 -0
  465. package/dist/server/routes/jobs.js +574 -0
  466. package/dist/server/routes/map.d.ts +11 -0
  467. package/dist/server/routes/map.js +116 -0
  468. package/dist/server/routes/mcp.d.ts +14 -0
  469. package/dist/server/routes/mcp.js +197 -0
  470. package/dist/server/routes/metrics.d.ts +37 -0
  471. package/dist/server/routes/metrics.js +149 -0
  472. package/dist/server/routes/oauth.d.ts +9 -0
  473. package/dist/server/routes/oauth.js +396 -0
  474. package/dist/server/routes/playground.d.ts +17 -0
  475. package/dist/server/routes/playground.js +283 -0
  476. package/dist/server/routes/reader.d.ts +18 -0
  477. package/dist/server/routes/reader.js +192 -0
  478. package/dist/server/routes/research.d.ts +14 -0
  479. package/dist/server/routes/research.js +482 -0
  480. package/dist/server/routes/screenshot.d.ts +22 -0
  481. package/dist/server/routes/screenshot.js +820 -0
  482. package/dist/server/routes/search.d.ts +6 -0
  483. package/dist/server/routes/search.js +874 -0
  484. package/dist/server/routes/session.d.ts +17 -0
  485. package/dist/server/routes/session.js +548 -0
  486. package/dist/server/routes/share.d.ts +18 -0
  487. package/dist/server/routes/share.js +462 -0
  488. package/dist/server/routes/smart-search/handlers/cars.d.ts +2 -0
  489. package/dist/server/routes/smart-search/handlers/cars.js +102 -0
  490. package/dist/server/routes/smart-search/handlers/flights.d.ts +2 -0
  491. package/dist/server/routes/smart-search/handlers/flights.js +72 -0
  492. package/dist/server/routes/smart-search/handlers/general.d.ts +13 -0
  493. package/dist/server/routes/smart-search/handlers/general.js +717 -0
  494. package/dist/server/routes/smart-search/handlers/hotels.d.ts +2 -0
  495. package/dist/server/routes/smart-search/handlers/hotels.js +88 -0
  496. package/dist/server/routes/smart-search/handlers/products.d.ts +2 -0
  497. package/dist/server/routes/smart-search/handlers/products.js +1309 -0
  498. package/dist/server/routes/smart-search/handlers/rental.d.ts +2 -0
  499. package/dist/server/routes/smart-search/handlers/rental.js +154 -0
  500. package/dist/server/routes/smart-search/handlers/restaurants.d.ts +2 -0
  501. package/dist/server/routes/smart-search/handlers/restaurants.js +225 -0
  502. package/dist/server/routes/smart-search/handlers/transit-verdict.d.ts +41 -0
  503. package/dist/server/routes/smart-search/handlers/transit-verdict.js +224 -0
  504. package/dist/server/routes/smart-search/index.d.ts +19 -0
  505. package/dist/server/routes/smart-search/index.js +546 -0
  506. package/dist/server/routes/smart-search/intent.d.ts +3 -0
  507. package/dist/server/routes/smart-search/intent.js +264 -0
  508. package/dist/server/routes/smart-search/llm.d.ts +16 -0
  509. package/dist/server/routes/smart-search/llm.js +70 -0
  510. package/dist/server/routes/smart-search/sources/reddit.d.ts +18 -0
  511. package/dist/server/routes/smart-search/sources/reddit.js +34 -0
  512. package/dist/server/routes/smart-search/sources/yelp.d.ts +25 -0
  513. package/dist/server/routes/smart-search/sources/yelp.js +171 -0
  514. package/dist/server/routes/smart-search/sources/youtube.d.ts +8 -0
  515. package/dist/server/routes/smart-search/sources/youtube.js +9 -0
  516. package/dist/server/routes/smart-search/types.d.ts +81 -0
  517. package/dist/server/routes/smart-search/types.js +1 -0
  518. package/dist/server/routes/smart-search/utils.d.ts +20 -0
  519. package/dist/server/routes/smart-search/utils.js +146 -0
  520. package/dist/server/routes/stats.d.ts +6 -0
  521. package/dist/server/routes/stats.js +71 -0
  522. package/dist/server/routes/stripe.d.ts +15 -0
  523. package/dist/server/routes/stripe.js +296 -0
  524. package/dist/server/routes/transcript-export.d.ts +10 -0
  525. package/dist/server/routes/transcript-export.js +178 -0
  526. package/dist/server/routes/usage.d.ts +9 -0
  527. package/dist/server/routes/usage.js +279 -0
  528. package/dist/server/routes/users.d.ts +8 -0
  529. package/dist/server/routes/users.js +1867 -0
  530. package/dist/server/routes/watch.d.ts +15 -0
  531. package/dist/server/routes/watch.js +309 -0
  532. package/dist/server/routes/webhooks.d.ts +26 -0
  533. package/dist/server/routes/webhooks.js +170 -0
  534. package/dist/server/routes/youtube.d.ts +6 -0
  535. package/dist/server/routes/youtube.js +130 -0
  536. package/dist/server/sentry.d.ts +14 -0
  537. package/dist/server/sentry.js +104 -0
  538. package/dist/server/types.d.ts +15 -0
  539. package/dist/server/types.js +7 -0
  540. package/dist/server/utils/response.d.ts +44 -0
  541. package/dist/server/utils/response.js +69 -0
  542. package/dist/server/utils/sse.d.ts +22 -0
  543. package/dist/server/utils/sse.js +38 -0
  544. package/dist/types.d.ts +552 -0
  545. package/dist/types.js +39 -0
  546. package/llms.txt +105 -0
  547. package/package.json +189 -0
@@ -0,0 +1,1600 @@
1
+ /**
2
+ * Browser-based fetching — uses Playwright via the browser pool.
3
+ * Handles browserFetch, browserScreenshot, retryFetch, and scrollAndWait.
4
+ */
5
+ import { TimeoutError, BlockedError, NetworkError, WebPeelError } from '../types.js';
6
+ import { detectChallenge } from './challenge-detection.js';
7
+ import { getRealisticUserAgent } from './user-agents.js';
8
+ import { getRandomUserAgent, applyStealthScripts, takePooledPage, ensurePagePool, recyclePooledPage, getBrowser, getStealthBrowser, getStealthPlaywright, getProfileBrowser, PAGE_POOL_SIZE, MAX_CONCURRENT_PAGES, getPooledPagesCount, ANTI_DETECTION_ARGS, getRandomViewport, } from './browser-pool.js';
9
+ // Proprietary stealth module — gitignored, loaded conditionally
10
+ let applyStealthPatches;
11
+ let applyAcceptLanguageHeader;
12
+ try {
13
+ // @ts-ignore — proprietary module, gitignored
14
+ const mod = await import('./stealth-patches.js');
15
+ applyStealthPatches = mod.applyStealthPatches;
16
+ applyAcceptLanguageHeader = mod.applyAcceptLanguageHeader;
17
+ }
18
+ catch { /* Not available in CI/open-source builds */ }
19
+ import { validateUrl, validateUserAgent, createAbortError } from './http-fetch.js';
20
+ import { autoInteract } from './auto-interact.js';
21
+ import { createLogger } from './logger.js';
22
+ const log = createLogger('browser');
23
+ // ── Execution context error detection ─────────────────────────────────────────
24
+ /**
25
+ * Check if an error indicates the page execution context was destroyed.
26
+ * This happens on SPAs (like Polymarket) when scrolling triggers navigation.
27
+ */
28
+ function isContextDestroyedError(err) {
29
+ const msg = err instanceof Error ? err.message : String(err);
30
+ return (msg.includes('Execution context was destroyed') ||
31
+ msg.includes('Target closed') ||
32
+ msg.includes('frame was detached') ||
33
+ msg.includes('Session closed'));
34
+ }
35
+ /**
36
+ * Resilient scrollThrough helper — scrolls from top to bottom to trigger
37
+ * IntersectionObservers, lazy loading, and animations.
38
+ *
39
+ * Handles SPAs gracefully: if the execution context is destroyed during scroll
40
+ * (e.g., Polymarket navigation), logs a warning and stops — does NOT throw.
41
+ *
42
+ * Also tries inner scrollable containers (virtual scroll) before falling back
43
+ * to window.scrollTo.
44
+ */
45
+ async function resilientScrollThrough(page, delayMs = 250) {
46
+ try {
47
+ // Use shared container detection from actions module
48
+ const { detectScrollContainer } = await import('./actions.js');
49
+ const containerSelector = (await detectScrollContainer(page)) || null;
50
+ if (containerSelector) {
51
+ // Scroll inner container
52
+ try {
53
+ const scrollHeight = await page.evaluate((sel) => {
54
+ const el = document.querySelector(sel);
55
+ return el ? el.scrollHeight : document.body.scrollHeight;
56
+ }, containerSelector);
57
+ const vh = await page.evaluate(() => window.innerHeight);
58
+ for (let y = 0; y < scrollHeight; y += Math.round(vh * 0.75)) {
59
+ await page.evaluate(([sel, sy]) => {
60
+ const el = document.querySelector(sel);
61
+ if (el)
62
+ el.scrollTop = sy;
63
+ }, [containerSelector, y]);
64
+ await page.waitForTimeout(delayMs);
65
+ }
66
+ await page.evaluate((sel) => {
67
+ const el = document.querySelector(sel);
68
+ if (el)
69
+ el.scrollTop = el.scrollHeight;
70
+ }, containerSelector);
71
+ await page.waitForTimeout(Math.round(delayMs * 1.6));
72
+ await page.evaluate((sel) => {
73
+ const el = document.querySelector(sel);
74
+ if (el)
75
+ el.scrollTop = 0;
76
+ }, containerSelector);
77
+ await page.waitForTimeout(Math.round(delayMs * 2.4));
78
+ return;
79
+ }
80
+ catch (innerErr) {
81
+ if (isContextDestroyedError(innerErr)) {
82
+ log.warn('Execution context destroyed during inner container scroll — continuing with captured content');
83
+ return;
84
+ }
85
+ // Fall through to window scroll
86
+ }
87
+ }
88
+ // Window-level scroll (standard path)
89
+ const scrollHeight = await page.evaluate(() => document.body.scrollHeight);
90
+ const viewportHeight = await page.evaluate(() => window.innerHeight);
91
+ for (let y = 0; y < scrollHeight; y += Math.round(viewportHeight * 0.75)) {
92
+ await page.evaluate((sy) => window.scrollTo({ top: sy, behavior: 'instant' }), y);
93
+ await page.waitForTimeout(delayMs);
94
+ }
95
+ await page.evaluate(() => window.scrollTo({ top: document.body.scrollHeight, behavior: 'instant' }));
96
+ await page.waitForTimeout(Math.round(delayMs * 1.6));
97
+ await page.evaluate(() => window.scrollTo({ top: 0, behavior: 'instant' }));
98
+ await page.waitForTimeout(Math.round(delayMs * 2.4));
99
+ }
100
+ catch (err) {
101
+ if (isContextDestroyedError(err)) {
102
+ log.warn('Execution context destroyed during scrollThrough — continuing with captured content');
103
+ return;
104
+ }
105
+ // Re-throw unexpected errors
106
+ throw err;
107
+ }
108
+ }
109
+ // ── Concurrency state (owned by this module) ─────────────────────────────────
110
+ let activePagesCount = 0;
111
+ // ── browserFetch ──────────────────────────────────────────────────────────────
112
+ /**
113
+ * Fetch using headless Chromium via Playwright
114
+ * Slower but can handle JavaScript-heavy sites and bypass some bot detection
115
+ */
116
+ export async function browserFetch(url, options = {}) {
117
+ // SECURITY: Validate URL to prevent SSRF
118
+ validateUrl(url);
119
+ const { userAgent, waitMs = 0, timeoutMs = 30000, screenshot = false, screenshotFullPage = false, headers, cookies, stealth = false, actions, keepPageOpen = false, signal, profileDir, headed = false, storageState, proxy, device = 'desktop', viewportWidth: optViewportWidth, viewportHeight: optViewportHeight, deviceScaleFactor: optDeviceScaleFactor, waitUntil: optWaitUntil, waitSelector, blockResources, isSPA = false, languages, } = options;
120
+ // Device emulation profiles (with deviceScaleFactor for crisp screenshots)
121
+ const deviceProfiles = {
122
+ desktop: { width: 1920, height: 1080, deviceScaleFactor: 1, userAgent: undefined },
123
+ mobile: {
124
+ width: 390,
125
+ height: 844,
126
+ deviceScaleFactor: 3,
127
+ userAgent: 'Mozilla/5.0 (iPhone; CPU iPhone OS 17_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Mobile/15E148 Safari/604.1',
128
+ },
129
+ tablet: {
130
+ width: 820,
131
+ height: 1180,
132
+ deviceScaleFactor: 2,
133
+ userAgent: 'Mozilla/5.0 (iPad; CPU OS 17_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Mobile/15E148 Safari/604.1',
134
+ },
135
+ };
136
+ const deviceProfile = deviceProfiles[device] ?? deviceProfiles.desktop;
137
+ const effectiveViewportWidth = optViewportWidth ?? deviceProfile.width;
138
+ const effectiveViewportHeight = optViewportHeight ?? deviceProfile.height;
139
+ const effectiveScaleFactor = optDeviceScaleFactor ?? deviceProfile.deviceScaleFactor;
140
+ const effectiveWaitUntil = optWaitUntil || 'domcontentloaded';
141
+ // Validate user agent if provided
142
+ // In stealth mode with no custom UA, always use a realistic Chrome UA
143
+ const validatedUserAgent = userAgent
144
+ ? validateUserAgent(userAgent)
145
+ : (stealth ? getRealisticUserAgent() : getRandomUserAgent());
146
+ // Validate wait time
147
+ if (waitMs < 0 || waitMs > 60000) {
148
+ throw new WebPeelError('Wait time must be between 0 and 60000ms');
149
+ }
150
+ if (signal?.aborted) {
151
+ throw createAbortError();
152
+ }
153
+ // SECURITY: Validate custom headers if provided
154
+ if (headers) {
155
+ for (const [key, value] of Object.entries(headers)) {
156
+ // Block Host header override
157
+ if (key.toLowerCase() === 'host') {
158
+ throw new WebPeelError('Custom Host header is not allowed');
159
+ }
160
+ if (typeof value !== 'string' || value.length > 500) {
161
+ throw new WebPeelError('Invalid header value');
162
+ }
163
+ }
164
+ }
165
+ // SECURITY: Limit concurrent browser pages with timeout
166
+ const queueStartTime = Date.now();
167
+ const QUEUE_TIMEOUT_MS = 30000; // 30 second max wait
168
+ while (activePagesCount >= MAX_CONCURRENT_PAGES) {
169
+ if (Date.now() - queueStartTime > QUEUE_TIMEOUT_MS) {
170
+ throw new TimeoutError('Browser page queue timeout - too many concurrent requests');
171
+ }
172
+ await new Promise(resolve => setTimeout(resolve, 100));
173
+ }
174
+ activePagesCount++;
175
+ let page = null;
176
+ let usingPooledPage = false;
177
+ let abortHandler;
178
+ // Declared here (outside try) so the finally block can reference it
179
+ const usingProfileBrowser = !!profileDir;
180
+ // Owned context created when storageState injection is requested
181
+ let ownedContext;
182
+ // Owned browser launched when proxy is specified (dedicated browser with proxy at launch level)
183
+ let ownedBrowser;
184
+ try {
185
+ const browser = usingProfileBrowser
186
+ ? await getProfileBrowser(profileDir, headed, stealth)
187
+ : stealth
188
+ ? await getStealthBrowser()
189
+ : await getBrowser();
190
+ // Only use the shared page pool for non-stealth, non-profile, non-keepOpen, non-storageState, non-proxy fetches
191
+ const shouldUsePagePool = !stealth && !userAgent && !keepPageOpen && !usingProfileBrowser && !storageState && !proxy;
192
+ if (shouldUsePagePool) {
193
+ page = takePooledPage();
194
+ usingPooledPage = !!page;
195
+ if (usingPooledPage && getPooledPagesCount() < PAGE_POOL_SIZE) {
196
+ void ensurePagePool(browser).catch(() => { });
197
+ }
198
+ }
199
+ if (!page) {
200
+ const effectiveLocale = (languages && languages.length > 0) ? languages[0] : 'en-US';
201
+ const pageOptions = {
202
+ userAgent: validatedUserAgent,
203
+ // viewport: null lets the browser use its natural window size (set via --window-size),
204
+ // avoiding the telltale Playwright default of 1280×720.
205
+ viewport: null,
206
+ ...(stealth
207
+ ? {
208
+ locale: effectiveLocale,
209
+ timezoneId: 'America/New_York',
210
+ javaScriptEnabled: true,
211
+ }
212
+ : {}),
213
+ };
214
+ if (proxy) {
215
+ // Parse proxy URL to extract auth credentials for Playwright
216
+ let playwrightProxy;
217
+ try {
218
+ const proxyUrl = new URL(proxy);
219
+ playwrightProxy = {
220
+ server: `${proxyUrl.protocol}//${proxyUrl.host}`,
221
+ username: proxyUrl.username || undefined,
222
+ password: proxyUrl.password || undefined,
223
+ };
224
+ }
225
+ catch (e) {
226
+ // Fallback: use proxy string as-is
227
+ log.debug('proxy URL parse failed, using as-is:', e instanceof Error ? e.message : e);
228
+ playwrightProxy = { server: proxy };
229
+ }
230
+ // Launch a DEDICATED fresh browser with proxy at the launch level.
231
+ // Context-level proxy is unreliable for anti-bot sites — they check the browser's
232
+ // IP at connection time (set at launch), not at context creation.
233
+ const pw = stealth ? await getStealthPlaywright() : (await import('playwright')).chromium;
234
+ const vp = getRandomViewport();
235
+ ownedBrowser = await pw.launch({
236
+ headless: true,
237
+ args: [...ANTI_DETECTION_ARGS, `--window-size=${vp.width},${vp.height}`],
238
+ proxy: playwrightProxy,
239
+ });
240
+ ownedContext = await ownedBrowser.newContext({
241
+ userAgent: validatedUserAgent || getRandomUserAgent(),
242
+ locale: effectiveLocale,
243
+ timezoneId: 'America/New_York',
244
+ javaScriptEnabled: true,
245
+ viewport: { width: effectiveViewportWidth || vp.width, height: effectiveViewportHeight || vp.height },
246
+ deviceScaleFactor: effectiveScaleFactor,
247
+ ...(storageState ? { storageState } : {}),
248
+ });
249
+ page = await ownedContext.newPage();
250
+ }
251
+ else if (storageState) {
252
+ // Create an isolated context with the injected storage state (cookies + localStorage)
253
+ ownedContext = await browser.newContext({
254
+ ...pageOptions,
255
+ storageState,
256
+ viewport: { width: effectiveViewportWidth, height: effectiveViewportHeight },
257
+ deviceScaleFactor: effectiveScaleFactor,
258
+ });
259
+ page = await ownedContext.newPage();
260
+ }
261
+ else {
262
+ // When deviceScaleFactor differs from default (1), create an isolated context
263
+ // so the scale factor is applied (Playwright requires it at context level)
264
+ if (effectiveScaleFactor !== 1) {
265
+ ownedContext = await browser.newContext({
266
+ ...pageOptions,
267
+ viewport: { width: effectiveViewportWidth, height: effectiveViewportHeight },
268
+ deviceScaleFactor: effectiveScaleFactor,
269
+ });
270
+ page = await ownedContext.newPage();
271
+ }
272
+ else {
273
+ page = await browser.newPage(pageOptions);
274
+ // Apply viewport for device emulation or explicit viewport overrides
275
+ if (device !== 'desktop' || optViewportWidth !== undefined || optViewportHeight !== undefined) {
276
+ await page.setViewportSize({ width: effectiveViewportWidth, height: effectiveViewportHeight }).catch(() => { });
277
+ }
278
+ }
279
+ }
280
+ await applyStealthScripts(page, languages);
281
+ // Apply supplemental stealth patches (canvas noise, connection API, battery, etc.)
282
+ // These go beyond what puppeteer-extra-plugin-stealth provides.
283
+ if (stealth) {
284
+ await applyStealthPatches?.(page);
285
+ await applyAcceptLanguageHeader?.(page, 'en-US');
286
+ }
287
+ usingPooledPage = false;
288
+ }
289
+ else {
290
+ await page.setViewportSize({ width: effectiveViewportWidth, height: effectiveViewportHeight }).catch(() => { });
291
+ }
292
+ if (signal) {
293
+ abortHandler = () => {
294
+ if (page && !page.isClosed()) {
295
+ void page.close().catch(() => { });
296
+ }
297
+ };
298
+ signal.addEventListener('abort', abortHandler, { once: true });
299
+ }
300
+ await page.unroute('**/*').catch(() => { });
301
+ const mergedHeaders = { ...(headers || {}) };
302
+ if (usingPooledPage) {
303
+ mergedHeaders['User-Agent'] = validatedUserAgent;
304
+ }
305
+ // Apply device user-agent (mobile/tablet) unless caller overrode userAgent
306
+ if (deviceProfile.userAgent && !userAgent) {
307
+ mergedHeaders['User-Agent'] = deviceProfile.userAgent;
308
+ }
309
+ if (usingPooledPage || Object.keys(mergedHeaders).length > 0) {
310
+ await page.setExtraHTTPHeaders(mergedHeaders);
311
+ }
312
+ // Set cookies if provided
313
+ if (cookies && cookies.length > 0) {
314
+ const parsedCookies = cookies.map(cookie => {
315
+ const [nameValue] = cookie.split(';').map(s => s.trim());
316
+ const [name, value] = nameValue.split('=');
317
+ if (!name || value === undefined) {
318
+ throw new WebPeelError(`Invalid cookie format: ${cookie}`);
319
+ }
320
+ return {
321
+ name: name.trim(),
322
+ value: value.trim(),
323
+ url,
324
+ };
325
+ });
326
+ await page.context().addCookies(parsedCookies);
327
+ }
328
+ if (signal?.aborted) {
329
+ throw createAbortError();
330
+ }
331
+ // Block resources: custom list takes precedence; otherwise use defaults in non-screenshot/non-stealth mode.
332
+ // In stealth mode, blocking common resources can be a bot-detection signal.
333
+ if (blockResources && blockResources.length > 0) {
334
+ const blockedTypes = new Set(blockResources);
335
+ await page.route('**/*', (route) => {
336
+ const resourceType = route.request().resourceType();
337
+ if (blockedTypes.has(resourceType)) {
338
+ route.abort();
339
+ }
340
+ else {
341
+ route.continue();
342
+ }
343
+ });
344
+ }
345
+ else if (screenshot) {
346
+ // Screenshots need all resources (images, CSS) for visual accuracy
347
+ await page.route('**/*', (route) => route.continue());
348
+ }
349
+ else {
350
+ // Default: block images/fonts/media for speed + bandwidth savings.
351
+ // In stealth mode, we keep stylesheets (blocking CSS is a bot signal)
352
+ // but still block images/fonts/media (saves ~70% bandwidth per page).
353
+ const blocklist = stealth
354
+ ? ['image', 'font', 'media'] // stealth: keep CSS, block heavy assets
355
+ : ['image', 'font', 'media', 'stylesheet']; // normal: block everything non-text
356
+ await page.route('**/*', (route) => {
357
+ const resourceType = route.request().resourceType();
358
+ if (blocklist.includes(resourceType)) {
359
+ route.abort();
360
+ }
361
+ else {
362
+ route.continue();
363
+ }
364
+ });
365
+ }
366
+ // SECURITY: Wrap entire operation in timeout
367
+ let screenshotBuffer;
368
+ const throwIfAborted = () => {
369
+ if (signal?.aborted) {
370
+ throw createAbortError();
371
+ }
372
+ };
373
+ const fetchPromise = (async () => {
374
+ let response;
375
+ try {
376
+ response = await page.goto(url, {
377
+ waitUntil: effectiveWaitUntil,
378
+ timeout: timeoutMs,
379
+ });
380
+ }
381
+ catch (gotoError) {
382
+ const msg = gotoError?.message || String(gotoError);
383
+ if (/net::ERR_HTTP2_PROTOCOL_ERROR/i.test(msg)) {
384
+ throw new BlockedError(`Site blocked the request (HTTP/2 protocol error). The site likely has anti-bot protection. Try using stealth mode or a proxy.`);
385
+ }
386
+ if (/net::ERR_CONNECTION_REFUSED/i.test(msg)) {
387
+ throw new NetworkError(`Connection refused by the server at ${url}. The server may be down or blocking your IP.`);
388
+ }
389
+ if (/net::ERR_CONNECTION_RESET/i.test(msg)) {
390
+ throw new BlockedError(`Connection was reset by the server. This typically indicates anti-bot protection or IP blocking. Try using stealth mode or a different IP.`);
391
+ }
392
+ if (/net::ERR_SSL/i.test(msg)) {
393
+ throw new NetworkError(`SSL/TLS error connecting to site. URL: ${url}`);
394
+ }
395
+ if (/net::ERR_NAME_NOT_RESOLVED/i.test(msg)) {
396
+ throw new NetworkError(`Domain not found: ${url}`);
397
+ }
398
+ if (/net::ERR_CERT/i.test(msg)) {
399
+ throw new NetworkError(`SSL certificate error for ${url}`);
400
+ }
401
+ if (/NS_ERROR_NET_RESET/i.test(msg)) {
402
+ throw new NetworkError(`Connection reset (Firefox). The site may be blocking automated access. URL: ${url}`);
403
+ }
404
+ if (/timeout/i.test(msg)) {
405
+ throw new TimeoutError(`Page load timed out after ${timeoutMs}ms: ${url}`);
406
+ }
407
+ if (/net::ERR_/i.test(msg)) {
408
+ throw new NetworkError(`Browser network error: ${msg.match(/net::ERR_\w+/i)?.[0] || msg}`);
409
+ }
410
+ throw gotoError;
411
+ }
412
+ throwIfAborted();
413
+ // Wait for a specific CSS selector if requested
414
+ if (waitSelector) {
415
+ await page.waitForSelector(waitSelector, { timeout: timeoutMs }).catch(() => {
416
+ log.debug(`waitSelector "${waitSelector}" not found within timeout`);
417
+ });
418
+ throwIfAborted();
419
+ }
420
+ // Quick check: if body text is very thin, wait for JS to render more content.
421
+ // Only adds latency when the page clearly hasn't loaded yet.
422
+ // eslint-disable-next-line @typescript-eslint/no-implied-eval
423
+ const bodyTextLength = await page.evaluate('document.body?.innerText?.trim().length || 0').catch(() => 0);
424
+ if (bodyTextLength < 500) {
425
+ await page.waitForLoadState('networkidle', { timeout: 1500 }).catch(() => { });
426
+ throwIfAborted();
427
+ }
428
+ // DOM stability check: wait for SPA hydration to settle.
429
+ // Polls innerText length every 500ms — if still growing, keep waiting.
430
+ // SPAs (Kayak, Google Flights, Expedia) get a longer timeout to allow async data loads.
431
+ {
432
+ const stabilityStart = Date.now();
433
+ // SPA sites (Kayak, Google Flights, Expedia) need up to 12s for results to load.
434
+ // Normal rendered pages need just 3s extra.
435
+ const MAX_STABILITY_WAIT_MS = isSPA ? 12000 : 3000;
436
+ // SPA: must be stable for 2s (4 consecutive 500ms checks). Normal: 1s (2 checks).
437
+ const STABLE_CHECKS_REQUIRED = isSPA ? 4 : 2;
438
+ const POLL_INTERVAL_MS = 500;
439
+ const MIN_CONTENT_LENGTH = 200; // Don't consider near-empty pages stable
440
+ let prevLength = await page.evaluate('document.body?.innerText?.length || 0').catch(() => 0);
441
+ let stableCount = 0;
442
+ while (Date.now() - stabilityStart < MAX_STABILITY_WAIT_MS) {
443
+ throwIfAborted();
444
+ await page.waitForTimeout(POLL_INTERVAL_MS);
445
+ const curLength = await page.evaluate('document.body?.innerText?.length || 0').catch(() => 0);
446
+ if (curLength !== prevLength) {
447
+ // Content changed — reset stability counter
448
+ stableCount = 0;
449
+ }
450
+ else if (curLength >= MIN_CONTENT_LENGTH) {
451
+ stableCount++;
452
+ if (stableCount >= STABLE_CHECKS_REQUIRED)
453
+ break; // stable long enough
454
+ }
455
+ prevLength = curLength;
456
+ }
457
+ if (isSPA) {
458
+ log.debug(`SPA stability check: ${Date.now() - stabilityStart}ms, length=${prevLength}`);
459
+ }
460
+ }
461
+ const finalUrl = page.url();
462
+ const contentType = response?.headers()?.['content-type'] || '';
463
+ const contentTypeLower = contentType.toLowerCase();
464
+ const urlLower = finalUrl.toLowerCase();
465
+ const isPdf = contentTypeLower.includes('application/pdf') || urlLower.endsWith('.pdf');
466
+ const isDocx = contentTypeLower.includes('wordprocessingml.document') || urlLower.endsWith('.docx');
467
+ const isBinaryDoc = !!response && (isPdf || isDocx);
468
+ // Small randomized delay in stealth mode (simulate human behavior)
469
+ // Keep it short — enough to look human, not enough to kill latency
470
+ if (stealth) {
471
+ const extraDelayMs = 200 + Math.floor(Math.random() * 601);
472
+ await page.waitForTimeout(extraDelayMs);
473
+ throwIfAborted();
474
+ }
475
+ // Human-like delay for proxied requests (helps bypass bot detection on strict sites)
476
+ if (proxy) {
477
+ // Realistic human behavior to bypass behavioral analysis
478
+ const humanDelay = 800 + Math.random() * 1200;
479
+ await page.waitForTimeout(humanDelay);
480
+ throwIfAborted();
481
+ // Realistic mouse movement (simulate human cursor)
482
+ try {
483
+ const vw = await page.evaluate(() => window.innerWidth);
484
+ const vh = await page.evaluate(() => window.innerHeight);
485
+ await page.mouse.move(100 + Math.random() * (vw - 200), 100 + Math.random() * (vh - 200), { steps: 5 + Math.floor(Math.random() * 10) });
486
+ // Small scroll to trigger lazy-loaded content
487
+ await page.evaluate(() => window.scrollBy(0, 200 + Math.random() * 400));
488
+ await page.waitForTimeout(300 + Math.random() * 500);
489
+ throwIfAborted();
490
+ // Second mouse move
491
+ await page.mouse.move(50 + Math.random() * (vw - 100), 50 + Math.random() * (vh - 100), { steps: 3 + Math.floor(Math.random() * 5) });
492
+ }
493
+ catch {
494
+ // Non-fatal: mouse/scroll simulation failed
495
+ }
496
+ throwIfAborted();
497
+ }
498
+ // Wait for additional time if requested (for dynamic content / screenshots)
499
+ if (waitMs > 0) {
500
+ await page.waitForTimeout(waitMs);
501
+ throwIfAborted();
502
+ }
503
+ // Auto-interact: dismiss cookie banners, consent popups, overlays
504
+ // before content extraction. Runs before user-specified actions so
505
+ // that popups don't interfere with custom interactions.
506
+ let autoInteractResult;
507
+ if (!isBinaryDoc) {
508
+ try {
509
+ autoInteractResult = await Promise.race([
510
+ autoInteract(page),
511
+ new Promise((resolve) => setTimeout(() => resolve({ cookieBannerDismissed: false, consentHandled: false, loadMoreClicked: 0, overlaysDismissed: 0 }), 3500)),
512
+ ]);
513
+ }
514
+ catch {
515
+ // Never block extraction
516
+ }
517
+ throwIfAborted();
518
+ }
519
+ // Execute page actions if provided
520
+ if (actions && actions.length > 0) {
521
+ const { executeActions } = await import('./actions.js');
522
+ const actionScreenshot = await executeActions(page, actions);
523
+ if (actionScreenshot) {
524
+ screenshotBuffer = actionScreenshot;
525
+ }
526
+ throwIfAborted();
527
+ }
528
+ // If the navigation returned a binary document (PDF/DOCX), grab the raw body.
529
+ if (isBinaryDoc) {
530
+ const buffer = await response.body();
531
+ throwIfAborted();
532
+ // Capture screenshot if requested (and not already captured by actions)
533
+ if (screenshot && !screenshotBuffer) {
534
+ screenshotBuffer = await page.screenshot({
535
+ fullPage: screenshotFullPage,
536
+ type: 'png',
537
+ });
538
+ }
539
+ return {
540
+ html: '',
541
+ finalUrl,
542
+ buffer,
543
+ contentType,
544
+ statusCode: response.status(),
545
+ };
546
+ }
547
+ const html = await page.content();
548
+ throwIfAborted();
549
+ return {
550
+ html,
551
+ finalUrl,
552
+ contentType,
553
+ statusCode: response?.status(),
554
+ autoInteractResult,
555
+ };
556
+ })();
557
+ let operationTimeout;
558
+ const timeoutPromise = new Promise((_, reject) => {
559
+ operationTimeout = setTimeout(() => reject(new TimeoutError(`Operation timed out after ${timeoutMs}ms`)), timeoutMs);
560
+ });
561
+ const fetchData = await Promise.race([fetchPromise, timeoutPromise]);
562
+ if (operationTimeout) {
563
+ clearTimeout(operationTimeout);
564
+ }
565
+ const { html, finalUrl } = fetchData;
566
+ const fetchBuffer = 'buffer' in fetchData ? fetchData.buffer : undefined;
567
+ const fetchContentType = 'contentType' in fetchData ? fetchData.contentType : undefined;
568
+ const fetchStatusCode = 'statusCode' in fetchData ? fetchData.statusCode : undefined;
569
+ const fetchAutoInteract = 'autoInteractResult' in fetchData ? fetchData.autoInteractResult : undefined;
570
+ const isBinaryDoc = !!fetchBuffer;
571
+ // SECURITY: Limit HTML size (skip for binary documents where html is empty)
572
+ if (!isBinaryDoc) {
573
+ if (html.length > 10 * 1024 * 1024) { // 10MB limit
574
+ throw new WebPeelError('Response too large (max 10MB)');
575
+ }
576
+ if (!html || html.length < 100) {
577
+ throw new BlockedError('Empty or suspiciously small response from browser.');
578
+ }
579
+ // Run challenge detection on browser-fetched HTML (covers both regular and stealth modes)
580
+ // Note: skip empty-shell type — that's a rendering quality issue (SPA needs more JS time),
581
+ // not a bot challenge. The caller's escalation logic handles empty-shell separately.
582
+ const browserChallengeResult = detectChallenge(html, fetchStatusCode);
583
+ if (browserChallengeResult.isChallenge && browserChallengeResult.type !== 'empty-shell') {
584
+ throw new BlockedError(`Challenge page detected (${browserChallengeResult.type || 'unknown'}, confidence: ${browserChallengeResult.confidence.toFixed(2)}). ` +
585
+ `Site requires human verification. Try a different approach or use a CAPTCHA solving service.`);
586
+ }
587
+ }
588
+ // Capture screenshot if requested (and not already captured by actions or document handler)
589
+ if (screenshot && !screenshotBuffer) {
590
+ screenshotBuffer = await page.screenshot({
591
+ fullPage: screenshotFullPage,
592
+ type: 'png'
593
+ });
594
+ }
595
+ // If keepPageOpen, return page/browser for caller to use (e.g., branding extraction)
596
+ if (keepPageOpen && page) {
597
+ return {
598
+ html,
599
+ buffer: fetchBuffer,
600
+ url: finalUrl,
601
+ statusCode: fetchStatusCode,
602
+ contentType: fetchContentType,
603
+ screenshot: screenshotBuffer,
604
+ page,
605
+ // Use ownedBrowser for proxy case, otherwise the shared browser
606
+ browser: ownedBrowser ?? browser,
607
+ ...(fetchAutoInteract !== undefined ? { autoInteract: fetchAutoInteract } : {}),
608
+ };
609
+ }
610
+ return {
611
+ html,
612
+ buffer: fetchBuffer,
613
+ url: finalUrl,
614
+ statusCode: fetchStatusCode,
615
+ contentType: fetchContentType,
616
+ screenshot: screenshotBuffer,
617
+ ...(fetchAutoInteract !== undefined ? { autoInteract: fetchAutoInteract } : {}),
618
+ };
619
+ }
620
+ catch (error) {
621
+ if (error instanceof BlockedError || error instanceof WebPeelError || error instanceof TimeoutError) {
622
+ throw error;
623
+ }
624
+ if (error instanceof Error && error.name === 'AbortError') {
625
+ throw error;
626
+ }
627
+ if (error instanceof Error && error.message.includes('Timeout')) {
628
+ throw new TimeoutError(`Browser navigation timed out`);
629
+ }
630
+ throw new NetworkError(`Browser fetch failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
631
+ }
632
+ finally {
633
+ if (signal && abortHandler) {
634
+ signal.removeEventListener('abort', abortHandler);
635
+ }
636
+ // CRITICAL: Always release/close page and decrement counter (unless keepPageOpen and no error)
637
+ if (page && !keepPageOpen) {
638
+ if (usingPooledPage) {
639
+ await recyclePooledPage(page);
640
+ }
641
+ else if (ownedContext) {
642
+ // Close the owned context (also closes the page)
643
+ await ownedContext.close().catch(() => { });
644
+ }
645
+ else if (!usingProfileBrowser) {
646
+ // Profile browser pages are NOT closed — the profile browser stays alive
647
+ // so that the next fetch in the same process reuses the session.
648
+ await page.close().catch(() => { });
649
+ }
650
+ }
651
+ // Close the dedicated proxy browser if one was launched (not when keeping page open)
652
+ if (ownedBrowser && !keepPageOpen) {
653
+ await ownedBrowser.close().catch(() => { });
654
+ }
655
+ activePagesCount--;
656
+ }
657
+ }
658
+ // ── browserScreenshot ─────────────────────────────────────────────────────────
659
+ /**
660
+ * Capture a screenshot of a URL using headless Chromium via Playwright.
661
+ */
662
+ export async function browserScreenshot(url, options = {}) {
663
+ // SECURITY: Validate URL to prevent SSRF
664
+ validateUrl(url);
665
+ const { fullPage = false, width, height, format = 'png', quality, waitMs = 0, timeoutMs = 30000, userAgent, headers, cookies, stealth = false, actions, scrollThrough = false, selector, } = options;
666
+ const validatedUserAgent = userAgent ? validateUserAgent(userAgent) : getRandomUserAgent();
667
+ // Basic validation
668
+ if (waitMs < 0 || waitMs > 60000) {
669
+ throw new WebPeelError('Wait time must be between 0 and 60000ms');
670
+ }
671
+ if (timeoutMs < 1000 || timeoutMs > 120000) {
672
+ throw new WebPeelError('Timeout must be between 1000 and 120000ms');
673
+ }
674
+ if (width !== undefined && (!Number.isFinite(width) || width < 100 || width > 5000)) {
675
+ throw new WebPeelError('Width must be between 100 and 5000');
676
+ }
677
+ if (height !== undefined && (!Number.isFinite(height) || height < 100 || height > 5000)) {
678
+ throw new WebPeelError('Height must be between 100 and 5000');
679
+ }
680
+ if (format !== 'png' && format !== 'jpeg') {
681
+ throw new WebPeelError('Format must be png or jpeg');
682
+ }
683
+ if (format === 'jpeg' && quality !== undefined) {
684
+ if (!Number.isFinite(quality) || quality < 1 || quality > 100) {
685
+ throw new WebPeelError('JPEG quality must be between 1 and 100');
686
+ }
687
+ }
688
+ // SECURITY: Validate custom headers if provided
689
+ if (headers) {
690
+ for (const [key, value] of Object.entries(headers)) {
691
+ if (key.toLowerCase() === 'host') {
692
+ throw new WebPeelError('Custom Host header is not allowed');
693
+ }
694
+ if (typeof value !== 'string' || value.length > 500) {
695
+ throw new WebPeelError('Invalid header value');
696
+ }
697
+ }
698
+ }
699
+ // SECURITY: Limit concurrent browser pages with timeout
700
+ const queueStartTime = Date.now();
701
+ const QUEUE_TIMEOUT_MS = 30000;
702
+ while (activePagesCount >= MAX_CONCURRENT_PAGES) {
703
+ if (Date.now() - queueStartTime > QUEUE_TIMEOUT_MS) {
704
+ throw new TimeoutError('Browser page queue timeout - too many concurrent requests');
705
+ }
706
+ await new Promise(resolve => setTimeout(resolve, 100));
707
+ }
708
+ activePagesCount++;
709
+ let page = null;
710
+ let usingPooledPage = false;
711
+ try {
712
+ const browser = stealth ? await getStealthBrowser() : await getBrowser();
713
+ const shouldUsePagePool = !stealth && !userAgent;
714
+ if (shouldUsePagePool) {
715
+ page = takePooledPage();
716
+ usingPooledPage = !!page;
717
+ if (usingPooledPage && getPooledPagesCount() < PAGE_POOL_SIZE) {
718
+ void ensurePagePool(browser).catch(() => { });
719
+ }
720
+ }
721
+ if (!page) {
722
+ page = await browser.newPage({
723
+ userAgent: validatedUserAgent,
724
+ viewport: width || height ? {
725
+ width: width || 1280,
726
+ height: height || 720,
727
+ } : null, // Use browser window size when no explicit dimensions requested
728
+ });
729
+ await applyStealthScripts(page);
730
+ usingPooledPage = false;
731
+ }
732
+ else {
733
+ await page.setViewportSize({
734
+ width: width || 1280,
735
+ height: height || 720,
736
+ }).catch(() => { });
737
+ }
738
+ await page.unroute('**/*').catch(() => { });
739
+ const mergedHeaders = { ...(headers || {}) };
740
+ if (usingPooledPage) {
741
+ mergedHeaders['User-Agent'] = validatedUserAgent;
742
+ }
743
+ if (usingPooledPage || Object.keys(mergedHeaders).length > 0) {
744
+ await page.setExtraHTTPHeaders(mergedHeaders);
745
+ }
746
+ if (cookies && cookies.length > 0) {
747
+ const parsedCookies = cookies.map(cookie => {
748
+ const [nameValue] = cookie.split(';').map(s => s.trim());
749
+ const [name, value] = nameValue.split('=');
750
+ if (!name || value === undefined) {
751
+ throw new WebPeelError(`Invalid cookie format: ${cookie}`);
752
+ }
753
+ return {
754
+ name: name.trim(),
755
+ value: value.trim(),
756
+ url,
757
+ };
758
+ });
759
+ await page.context().addCookies(parsedCookies);
760
+ }
761
+ // For screenshots, allow all resources
762
+ await page.route('**/*', (route) => route.continue());
763
+ let screenshotBuffer;
764
+ const doWork = (async () => {
765
+ try {
766
+ await page.goto(url, {
767
+ waitUntil: 'domcontentloaded',
768
+ timeout: timeoutMs,
769
+ });
770
+ }
771
+ catch (gotoError) {
772
+ const msg = gotoError?.message || String(gotoError);
773
+ if (/net::ERR_HTTP2_PROTOCOL_ERROR/i.test(msg)) {
774
+ throw new BlockedError(`Site blocked the request (HTTP/2 protocol error). The site likely has anti-bot protection. Try using stealth mode or a proxy.`);
775
+ }
776
+ if (/net::ERR_CONNECTION_REFUSED/i.test(msg)) {
777
+ throw new NetworkError(`Connection refused by the server at ${url}. The server may be down or blocking your IP.`);
778
+ }
779
+ if (/net::ERR_CONNECTION_RESET/i.test(msg)) {
780
+ throw new BlockedError(`Connection was reset by the server. This typically indicates anti-bot protection or IP blocking. Try using stealth mode or a different IP.`);
781
+ }
782
+ if (/net::ERR_SSL/i.test(msg)) {
783
+ throw new NetworkError(`SSL/TLS error connecting to site. URL: ${url}`);
784
+ }
785
+ if (/net::ERR_NAME_NOT_RESOLVED/i.test(msg)) {
786
+ throw new NetworkError(`Domain not found: ${url}`);
787
+ }
788
+ if (/net::ERR_CERT/i.test(msg)) {
789
+ throw new NetworkError(`SSL certificate error for ${url}`);
790
+ }
791
+ if (/NS_ERROR_NET_RESET/i.test(msg)) {
792
+ throw new NetworkError(`Connection reset (Firefox). The site may be blocking automated access. URL: ${url}`);
793
+ }
794
+ if (/timeout/i.test(msg)) {
795
+ throw new TimeoutError(`Page load timed out after ${timeoutMs}ms: ${url}`);
796
+ }
797
+ if (/net::ERR_/i.test(msg)) {
798
+ throw new NetworkError(`Browser network error: ${msg.match(/net::ERR_\w+/i)?.[0] || msg}`);
799
+ }
800
+ throw gotoError;
801
+ }
802
+ if (waitMs > 0) {
803
+ await page.waitForTimeout(waitMs);
804
+ }
805
+ // Element-level screenshot (clip to a specific CSS selector)
806
+ if (selector) {
807
+ const count = await page.locator(selector).count();
808
+ if (count === 0)
809
+ throw new WebPeelError(`Element not found: ${selector}`);
810
+ const element = await page.locator(selector).first();
811
+ const buf = await element.screenshot({
812
+ type: format,
813
+ ...(format === 'jpeg' && typeof quality === 'number' ? { quality } : {}),
814
+ });
815
+ return { finalUrl: page.url(), screenshotBuffer: buf };
816
+ }
817
+ // Scroll through the page to trigger IntersectionObservers, lazy loading, animations
818
+ if (scrollThrough) {
819
+ await resilientScrollThrough(page, 250);
820
+ }
821
+ if (actions && actions.length > 0) {
822
+ const { executeActions } = await import('./actions.js');
823
+ const actionScreenshot = await executeActions(page, actions, {
824
+ fullPage,
825
+ type: format,
826
+ quality,
827
+ });
828
+ if (actionScreenshot) {
829
+ screenshotBuffer = actionScreenshot;
830
+ }
831
+ }
832
+ const finalUrl = page.url();
833
+ // Capture screenshot if not captured via actions
834
+ if (!screenshotBuffer) {
835
+ screenshotBuffer = await page.screenshot({
836
+ fullPage,
837
+ type: format,
838
+ ...(format === 'jpeg' && typeof quality === 'number' ? { quality } : {}),
839
+ });
840
+ }
841
+ return { finalUrl, screenshotBuffer: screenshotBuffer };
842
+ })();
843
+ let operationTimeout;
844
+ const timeoutPromise = new Promise((_, reject) => {
845
+ operationTimeout = setTimeout(() => reject(new TimeoutError(`Operation timed out after ${timeoutMs}ms`)), timeoutMs);
846
+ });
847
+ const { finalUrl, screenshotBuffer: buf } = await Promise.race([doWork, timeoutPromise]);
848
+ if (operationTimeout) {
849
+ clearTimeout(operationTimeout);
850
+ }
851
+ return { buffer: buf, finalUrl };
852
+ }
853
+ catch (error) {
854
+ if (error instanceof BlockedError || error instanceof WebPeelError || error instanceof TimeoutError) {
855
+ throw error;
856
+ }
857
+ if (error instanceof Error && error.message.includes('Timeout')) {
858
+ throw new TimeoutError('Browser screenshot timed out');
859
+ }
860
+ throw new NetworkError(`Browser screenshot failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
861
+ }
862
+ finally {
863
+ if (page) {
864
+ if (usingPooledPage) {
865
+ await recyclePooledPage(page);
866
+ }
867
+ else {
868
+ await page.close().catch(() => { });
869
+ }
870
+ }
871
+ activePagesCount--;
872
+ }
873
+ }
874
+ // ── browserDiff ───────────────────────────────────────────────────────────────
875
+ /**
876
+ * Capture screenshots of two URLs and compute a pixel-level visual diff.
877
+ */
878
+ export async function browserDiff(url1, url2, options = {}) {
879
+ const { width = 1280, height = 720, fullPage = false, threshold = 0.1, stealth = false, waitMs = 0, timeoutMs = 30000, } = options;
880
+ // Take both screenshots as PNG (required for pixelmatch)
881
+ const [res1, res2] = await Promise.all([
882
+ browserScreenshot(url1, { width, height, fullPage, format: 'png', stealth, waitMs, timeoutMs }),
883
+ browserScreenshot(url2, { width, height, fullPage, format: 'png', stealth, waitMs, timeoutMs }),
884
+ ]);
885
+ // Dynamically import pngjs and pixelmatch (ESM-compatible)
886
+ const { PNG } = await import('pngjs');
887
+ const pixelmatch = (await import('pixelmatch')).default;
888
+ const img1 = PNG.sync.read(res1.buffer);
889
+ const img2 = PNG.sync.read(res2.buffer);
890
+ // Use the larger of the two dimensions
891
+ const outWidth = Math.max(img1.width, img2.width);
892
+ const outHeight = Math.max(img1.height, img2.height);
893
+ // Pad images to the same size if needed
894
+ function padImage(img, targetW, targetH) {
895
+ if (img.width === targetW && img.height === targetH) {
896
+ return img.data;
897
+ }
898
+ const padded = Buffer.alloc(targetW * targetH * 4, 0);
899
+ for (let y = 0; y < img.height && y < targetH; y++) {
900
+ for (let x = 0; x < img.width && x < targetW; x++) {
901
+ const srcIdx = (y * img.width + x) * 4;
902
+ const dstIdx = (y * targetW + x) * 4;
903
+ padded[dstIdx] = img.data[srcIdx];
904
+ padded[dstIdx + 1] = img.data[srcIdx + 1];
905
+ padded[dstIdx + 2] = img.data[srcIdx + 2];
906
+ padded[dstIdx + 3] = img.data[srcIdx + 3];
907
+ }
908
+ }
909
+ return padded;
910
+ }
911
+ const data1 = padImage(img1, outWidth, outHeight);
912
+ const data2 = padImage(img2, outWidth, outHeight);
913
+ const diffData = Buffer.alloc(outWidth * outHeight * 4);
914
+ const diffPixels = pixelmatch(data1, data2, diffData, outWidth, outHeight, { threshold });
915
+ const totalPixels = outWidth * outHeight;
916
+ const diffPercent = totalPixels > 0 ? (diffPixels / totalPixels) * 100 : 0;
917
+ const diffPng = new PNG({ width: outWidth, height: outHeight });
918
+ diffPng.data = diffData;
919
+ const diffBuffer = PNG.sync.write(diffPng);
920
+ return {
921
+ diffBuffer,
922
+ diffPixels,
923
+ totalPixels,
924
+ diffPercent,
925
+ dimensions: { width: outWidth, height: outHeight },
926
+ };
927
+ }
928
+ // ── retryFetch ────────────────────────────────────────────────────────────────
929
+ /**
930
+ * Retry a fetch operation with exponential backoff
931
+ */
932
+ export async function retryFetch(fn, maxAttempts = 3, baseDelayMs = 1000) {
933
+ let lastError = null;
934
+ for (let attempt = 1; attempt <= maxAttempts; attempt++) {
935
+ try {
936
+ return await fn();
937
+ }
938
+ catch (error) {
939
+ lastError = error instanceof Error ? error : new Error('Unknown error');
940
+ // Don't retry on blocked errors or timeouts
941
+ if (error instanceof BlockedError || error instanceof TimeoutError) {
942
+ throw error;
943
+ }
944
+ if (attempt < maxAttempts) {
945
+ const delay = baseDelayMs * Math.pow(2, attempt - 1);
946
+ await new Promise((resolve) => setTimeout(resolve, delay));
947
+ }
948
+ }
949
+ }
950
+ throw lastError || new NetworkError('Retry failed');
951
+ }
952
+ // ── scrollAndWait ─────────────────────────────────────────────────────────────
953
+ /**
954
+ * Scroll to the bottom of the page N times, waiting for the network to
955
+ * settle between each scroll. Useful for triggering lazy-loaded content
956
+ * (infinite scroll, deferred images, etc.).
957
+ *
958
+ * @param page - Playwright Page instance.
959
+ * @param times - Number of scroll-and-wait cycles (default: 3).
960
+ * @returns The final page HTML after all scrolls complete.
961
+ */
962
+ export async function scrollAndWait(page, times = 3) {
963
+ for (let i = 0; i < times; i++) {
964
+ // eslint-disable-next-line @typescript-eslint/no-implied-eval
965
+ await page.evaluate('window.scrollTo(0, document.body.scrollHeight)');
966
+ // Wait for network to settle (500 ms of no new requests) or 2 s max.
967
+ try {
968
+ await page.waitForLoadState('networkidle', { timeout: 2000 });
969
+ }
970
+ catch (e) {
971
+ // networkidle may never fire — fall back to a flat delay.
972
+ log.debug('networkidle timeout, falling back to flat delay:', e instanceof Error ? e.message : e);
973
+ await page.waitForTimeout(1000);
974
+ }
975
+ }
976
+ return page.content();
977
+ }
978
+ // ── browserFilmstrip ──────────────────────────────────────────────────────────
979
+ /**
980
+ * Capture multiple screenshots at evenly distributed scroll positions.
981
+ * Returns an array of Buffers (one per frame).
982
+ */
983
+ export async function browserFilmstrip(url, options = {}) {
984
+ validateUrl(url);
985
+ const { frames: frameCount = 6, width, height, format = 'png', quality, waitMs = 0, timeoutMs = 30000, userAgent, headers, cookies, stealth = false, } = options;
986
+ // Clamp frames between 2 and 12
987
+ const numFrames = Math.max(2, Math.min(12, frameCount));
988
+ const validatedUserAgent = userAgent ? validateUserAgent(userAgent) : getRandomUserAgent();
989
+ const queueStartTime = Date.now();
990
+ const QUEUE_TIMEOUT_MS = 30000;
991
+ while (activePagesCount >= MAX_CONCURRENT_PAGES) {
992
+ if (Date.now() - queueStartTime > QUEUE_TIMEOUT_MS) {
993
+ throw new TimeoutError('Browser page queue timeout - too many concurrent requests');
994
+ }
995
+ await new Promise(resolve => setTimeout(resolve, 100));
996
+ }
997
+ activePagesCount++;
998
+ let page = null;
999
+ try {
1000
+ const browser = stealth ? await getStealthBrowser() : await getBrowser();
1001
+ page = await browser.newPage({
1002
+ userAgent: validatedUserAgent,
1003
+ viewport: { width: width || 1280, height: height || 720 },
1004
+ });
1005
+ await applyStealthScripts(page);
1006
+ if (headers)
1007
+ await page.setExtraHTTPHeaders(headers);
1008
+ if (cookies && cookies.length > 0) {
1009
+ const parsedCookies = cookies.map(cookie => {
1010
+ const [nameValue] = cookie.split(';').map(s => s.trim());
1011
+ const [name, value] = nameValue.split('=');
1012
+ if (!name || value === undefined) {
1013
+ throw new WebPeelError(`Invalid cookie format: ${cookie}`);
1014
+ }
1015
+ return { name: name.trim(), value: value.trim(), url };
1016
+ });
1017
+ await page.context().addCookies(parsedCookies);
1018
+ }
1019
+ await page.route('**/*', (route) => route.continue());
1020
+ try {
1021
+ await page.goto(url, { waitUntil: 'domcontentloaded', timeout: timeoutMs });
1022
+ }
1023
+ catch (gotoError) {
1024
+ const msg = gotoError?.message || String(gotoError);
1025
+ if (/timeout/i.test(msg)) {
1026
+ throw new TimeoutError(`Page load timed out after ${timeoutMs}ms: ${url}`);
1027
+ }
1028
+ if (/net::ERR_/i.test(msg)) {
1029
+ throw new NetworkError(`Browser network error: ${msg.match(/net::ERR_\w+/i)?.[0] || msg}`);
1030
+ }
1031
+ throw gotoError;
1032
+ }
1033
+ if (waitMs > 0)
1034
+ await page.waitForTimeout(waitMs);
1035
+ // Wait a bit for initial animations
1036
+ await page.waitForTimeout(800);
1037
+ const scrollHeight = await page.evaluate(() => document.body.scrollHeight);
1038
+ const viewportHeight = await page.evaluate(() => window.innerHeight);
1039
+ const capturedFrames = [];
1040
+ // Calculate scroll positions (evenly distributed)
1041
+ const positions = [];
1042
+ for (let i = 0; i < numFrames; i++) {
1043
+ positions.push(Math.round((scrollHeight - viewportHeight) * i / (numFrames - 1)));
1044
+ }
1045
+ for (const pos of positions) {
1046
+ await page.evaluate((y) => window.scrollTo({ top: y, behavior: 'instant' }), pos);
1047
+ await page.waitForTimeout(350); // Let animations settle
1048
+ const buf = await page.screenshot({
1049
+ type: format,
1050
+ ...(format === 'jpeg' && typeof quality === 'number' ? { quality } : {}),
1051
+ });
1052
+ capturedFrames.push(buf);
1053
+ }
1054
+ const finalUrl = page.url();
1055
+ return { frames: capturedFrames, finalUrl };
1056
+ }
1057
+ catch (error) {
1058
+ if (error instanceof BlockedError || error instanceof WebPeelError || error instanceof TimeoutError) {
1059
+ throw error;
1060
+ }
1061
+ if (error instanceof Error && error.message.includes('Timeout')) {
1062
+ throw new TimeoutError('Browser filmstrip timed out');
1063
+ }
1064
+ throw new NetworkError(`Browser filmstrip failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
1065
+ }
1066
+ finally {
1067
+ if (page)
1068
+ await page.close().catch(() => { });
1069
+ activePagesCount--;
1070
+ }
1071
+ }
1072
+ // ── withBrowserPage ───────────────────────────────────────────────────────────
1073
+ /**
1074
+ * Shared boilerplate for the 4 new screenshot functions:
1075
+ * - Queue concurrency wait
1076
+ * - Launch browser (stealth or normal)
1077
+ * - Open a new page with viewport + userAgent
1078
+ * - Apply stealth scripts
1079
+ * - Set custom headers and cookies
1080
+ * - Navigate to the URL (with error normalisation)
1081
+ * - Wait optional extra time
1082
+ * - Call `fn(page)` for the unique per-function logic
1083
+ * - Always close the page and decrement the counter
1084
+ *
1085
+ * NOTE: Do NOT touch browserFetch / browserScreenshot / browserFilmstrip —
1086
+ * they have slightly different pooling / keep-open logic.
1087
+ */
1088
+ async function withBrowserPage(url, opts, fn) {
1089
+ validateUrl(url);
1090
+ const { width = 1440, height = 900, userAgent, headers, cookies, stealth = false, waitMs = 0, timeoutMs = 60000, } = opts;
1091
+ const validatedUserAgent = userAgent ? validateUserAgent(userAgent) : getRandomUserAgent();
1092
+ const queueStartTime = Date.now();
1093
+ const QUEUE_TIMEOUT_MS = 30000;
1094
+ while (activePagesCount >= MAX_CONCURRENT_PAGES) {
1095
+ if (Date.now() - queueStartTime > QUEUE_TIMEOUT_MS) {
1096
+ throw new TimeoutError('Browser page queue timeout - too many concurrent requests');
1097
+ }
1098
+ await new Promise(resolve => setTimeout(resolve, 100));
1099
+ }
1100
+ activePagesCount++;
1101
+ let page = null;
1102
+ try {
1103
+ const browser = stealth ? await getStealthBrowser() : await getBrowser();
1104
+ page = await browser.newPage({
1105
+ userAgent: validatedUserAgent,
1106
+ viewport: { width, height },
1107
+ });
1108
+ await applyStealthScripts(page);
1109
+ if (headers)
1110
+ await page.setExtraHTTPHeaders(headers);
1111
+ if (cookies && cookies.length > 0) {
1112
+ const parsedCookies = cookies.map(cookie => {
1113
+ const [nameValue] = cookie.split(';').map((s) => s.trim());
1114
+ const [name, value] = nameValue.split('=');
1115
+ if (!name || value === undefined)
1116
+ throw new WebPeelError(`Invalid cookie format: ${cookie}`);
1117
+ return { name: name.trim(), value: value.trim(), url };
1118
+ });
1119
+ await page.context().addCookies(parsedCookies);
1120
+ }
1121
+ try {
1122
+ await page.goto(url, { waitUntil: 'domcontentloaded', timeout: timeoutMs });
1123
+ }
1124
+ catch (gotoError) {
1125
+ const msg = gotoError?.message || String(gotoError);
1126
+ if (/timeout/i.test(msg))
1127
+ throw new TimeoutError(`Page load timed out after ${timeoutMs}ms: ${url}`);
1128
+ if (/net::ERR_/i.test(msg))
1129
+ throw new NetworkError(`Browser network error: ${msg.match(/net::ERR_\w+/i)?.[0] || msg}`);
1130
+ throw gotoError;
1131
+ }
1132
+ if (waitMs > 0)
1133
+ await page.waitForTimeout(waitMs);
1134
+ const result = await fn(page);
1135
+ const finalUrl = page.url();
1136
+ return { result, finalUrl };
1137
+ }
1138
+ catch (error) {
1139
+ if (error instanceof BlockedError || error instanceof WebPeelError || error instanceof TimeoutError)
1140
+ throw error;
1141
+ if (error instanceof Error && error.message.includes('Timeout'))
1142
+ throw new TimeoutError('Browser operation timed out');
1143
+ throw new NetworkError(`Browser operation failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
1144
+ }
1145
+ finally {
1146
+ if (page)
1147
+ await page.close().catch(() => { });
1148
+ activePagesCount--;
1149
+ }
1150
+ }
1151
+ // ── browserAudit ──────────────────────────────────────────────────────────────
1152
+ /**
1153
+ * Section-aware audit screenshots.
1154
+ * Finds all elements matching a CSS selector and captures a viewport screenshot
1155
+ * scrolled to each one. Returns one image buffer per matching element.
1156
+ */
1157
+ export async function browserAudit(url, options = {}) {
1158
+ const { width = 1440, height = 900, format = 'jpeg', quality = 80, selector = 'section', waitMs = 0, timeoutMs = 60000, userAgent, headers, cookies, stealth = false, scrollThrough = false, } = options;
1159
+ const { result: frames, finalUrl } = await withBrowserPage(url, { width, height, userAgent, headers, cookies, stealth, waitMs, timeoutMs }, async (page) => {
1160
+ // Scroll through to trigger lazy content
1161
+ if (scrollThrough) {
1162
+ await resilientScrollThrough(page, 200);
1163
+ }
1164
+ const elements = await page.evaluate((sel) => {
1165
+ const nodes = Array.from(document.querySelectorAll(sel));
1166
+ return nodes.map(el => {
1167
+ const rect = el.getBoundingClientRect();
1168
+ const scrollY = window.scrollY || document.documentElement.scrollTop;
1169
+ return {
1170
+ tag: el.tagName.toLowerCase(),
1171
+ id: el.id || '',
1172
+ className: el.className || '',
1173
+ top: rect.top + scrollY,
1174
+ height: rect.height,
1175
+ };
1176
+ });
1177
+ }, selector);
1178
+ const capturedFrames = [];
1179
+ for (let i = 0; i < elements.length; i++) {
1180
+ const el = elements[i];
1181
+ await page.evaluate((y) => window.scrollTo({ top: y, behavior: 'instant' }), el.top);
1182
+ await page.waitForTimeout(200);
1183
+ const buf = await page.screenshot({
1184
+ type: format,
1185
+ ...(format === 'jpeg' && typeof quality === 'number' ? { quality } : {}),
1186
+ });
1187
+ capturedFrames.push({ index: i, ...el, buffer: buf });
1188
+ }
1189
+ return capturedFrames;
1190
+ });
1191
+ return { frames, finalUrl };
1192
+ }
1193
+ // ── browserAnimationCapture ───────────────────────────────────────────────────
1194
+ /**
1195
+ * Capture N viewport screenshots at fixed intervals to record CSS animation states.
1196
+ */
1197
+ export async function browserAnimationCapture(url, options = {}) {
1198
+ const { frames: frameCount = 6, intervalMs = 500, scrollTo, selector, width = 1440, height = 900, format = 'jpeg', quality = 80, waitMs = 0, timeoutMs = 60000, userAgent, headers, cookies, stealth = false, } = options;
1199
+ const numFrames = Math.max(1, Math.min(30, frameCount));
1200
+ const { result: frames, finalUrl } = await withBrowserPage(url, { width, height, userAgent, headers, cookies, stealth, waitMs, timeoutMs }, async (page) => {
1201
+ // Position the viewport
1202
+ if (selector) {
1203
+ await page.evaluate((sel) => {
1204
+ const el = document.querySelector(sel);
1205
+ if (el)
1206
+ el.scrollIntoView({ behavior: 'instant', block: 'start' });
1207
+ }, selector);
1208
+ await page.waitForTimeout(300);
1209
+ }
1210
+ else if (typeof scrollTo === 'number') {
1211
+ await page.evaluate((y) => window.scrollTo({ top: y, behavior: 'instant' }), scrollTo);
1212
+ await page.waitForTimeout(300);
1213
+ }
1214
+ const capturedFrames = [];
1215
+ const startTime = Date.now();
1216
+ for (let i = 0; i < numFrames; i++) {
1217
+ const buf = await page.screenshot({
1218
+ type: format,
1219
+ ...(format === 'jpeg' && typeof quality === 'number' ? { quality } : {}),
1220
+ });
1221
+ capturedFrames.push({ index: i, timestampMs: Date.now() - startTime, buffer: buf });
1222
+ if (i < numFrames - 1) {
1223
+ await page.waitForTimeout(intervalMs);
1224
+ }
1225
+ }
1226
+ return capturedFrames;
1227
+ });
1228
+ return { frames, finalUrl };
1229
+ }
1230
+ // ── browserViewports ──────────────────────────────────────────────────────────
1231
+ /**
1232
+ * Capture screenshots at multiple viewport widths in a single browser session.
1233
+ * Resizes the viewport between each capture.
1234
+ */
1235
+ export async function browserViewports(url, options) {
1236
+ const { viewports, fullPage = false, format = 'jpeg', quality = 80, waitMs = 0, timeoutMs = 90000, userAgent, headers, cookies, stealth = false, scrollThrough = false, } = options;
1237
+ if (!viewports || viewports.length === 0) {
1238
+ throw new WebPeelError('At least one viewport is required');
1239
+ }
1240
+ // Use first viewport dimensions for initial page setup
1241
+ const firstVp = viewports[0];
1242
+ const { result: frames, finalUrl } = await withBrowserPage(url, { width: firstVp.width, height: firstVp.height, userAgent, headers, cookies, stealth, waitMs, timeoutMs }, async (page) => {
1243
+ const capturedFrames = [];
1244
+ for (const vp of viewports) {
1245
+ const label = vp.label || `${vp.width}x${vp.height}`;
1246
+ // Resize viewport
1247
+ await page.setViewportSize({ width: vp.width, height: vp.height });
1248
+ await page.waitForTimeout(500); // Wait for reflow
1249
+ if (scrollThrough) {
1250
+ await resilientScrollThrough(page, 150);
1251
+ }
1252
+ const buf = await page.screenshot({
1253
+ fullPage,
1254
+ type: format,
1255
+ ...(format === 'jpeg' && typeof quality === 'number' ? { quality } : {}),
1256
+ });
1257
+ capturedFrames.push({ width: vp.width, height: vp.height, label, buffer: buf });
1258
+ }
1259
+ return capturedFrames;
1260
+ });
1261
+ return { frames, finalUrl };
1262
+ }
1263
+ /**
1264
+ * Extract computed CSS values and validate against design rules.
1265
+ * Returns structured JSON instead of pixel images.
1266
+ */
1267
+ export async function browserDesignAudit(url, options = {}) {
1268
+ const { rules = {}, selector = 'body', width = 1440, height = 900, waitMs = 0, timeoutMs = 60000, userAgent, headers, cookies, stealth = false, } = options;
1269
+ const spacingGrid = rules.spacingGrid ?? 2; // 2px base grid (accepts any even value; catches odd-pixel errors)
1270
+ const minTouchTarget = rules.minTouchTarget ?? 44;
1271
+ const minContrast = rules.minContrast ?? 4.5;
1272
+ const { result: auditData, finalUrl } = await withBrowserPage(url, { width, height, userAgent, headers, cookies, stealth, waitMs, timeoutMs }, async (page) => {
1273
+ // Run design audit inside the browser
1274
+ return page.evaluate((params) => {
1275
+ const { sel, spacingGrid, minTouchTarget, minContrast } = params;
1276
+ // --- Helpers ---
1277
+ function parsePixels(val) {
1278
+ const n = parseFloat(val);
1279
+ return isNaN(n) ? 0 : n;
1280
+ }
1281
+ function parseRgb(color) {
1282
+ const m = color.match(/rgba?\(([0-9]+),\s*([0-9]+),\s*([0-9]+)/);
1283
+ if (!m)
1284
+ return null;
1285
+ return [parseInt(m[1]), parseInt(m[2]), parseInt(m[3])];
1286
+ }
1287
+ function parseRgba(color) {
1288
+ const m = color.match(/rgba?\((\d+),\s*(\d+),\s*(\d+)(?:,\s*([\d.]+))?\)/);
1289
+ if (!m)
1290
+ return null;
1291
+ return [parseInt(m[1]), parseInt(m[2]), parseInt(m[3]), m[4] !== undefined ? parseFloat(m[4]) : 1];
1292
+ }
1293
+ function getEffectiveBackground(el) {
1294
+ let current = el;
1295
+ while (current && current !== document.documentElement) {
1296
+ const style = window.getComputedStyle(current);
1297
+ const bg = style.backgroundColor;
1298
+ const parsed = parseRgba(bg);
1299
+ if (parsed && parsed[3] > 0.5) {
1300
+ return [parsed[0], parsed[1], parsed[2]];
1301
+ }
1302
+ current = current.parentElement;
1303
+ }
1304
+ // Check html element
1305
+ const htmlStyle = window.getComputedStyle(document.documentElement);
1306
+ const htmlBg = parseRgba(htmlStyle.backgroundColor);
1307
+ if (htmlBg && htmlBg[3] > 0.5) {
1308
+ return [htmlBg[0], htmlBg[1], htmlBg[2]];
1309
+ }
1310
+ // Check body element
1311
+ const bodyStyle = window.getComputedStyle(document.body);
1312
+ const bodyBg = parseRgba(bodyStyle.backgroundColor);
1313
+ if (bodyBg && bodyBg[3] > 0.5) {
1314
+ return [bodyBg[0], bodyBg[1], bodyBg[2]];
1315
+ }
1316
+ // Check color-scheme CSS property or meta tag
1317
+ const colorScheme = htmlStyle.colorScheme ||
1318
+ document.querySelector('meta[name="color-scheme"]')?.getAttribute('content') || '';
1319
+ if (colorScheme.includes('dark')) {
1320
+ return [0, 0, 0]; // Dark scheme default
1321
+ }
1322
+ // Ultimate fallback: white (standard web default)
1323
+ return [255, 255, 255];
1324
+ }
1325
+ function hasBackdropFilter(el) {
1326
+ let current = el;
1327
+ while (current) {
1328
+ const style = window.getComputedStyle(current);
1329
+ const bf = style.backdropFilter;
1330
+ if (bf && bf !== 'none' && bf !== '')
1331
+ return true;
1332
+ current = current.parentElement;
1333
+ }
1334
+ return false;
1335
+ }
1336
+ function detectPageColorScheme() {
1337
+ const htmlStyle = window.getComputedStyle(document.documentElement);
1338
+ const htmlBg = parseRgba(htmlStyle.backgroundColor);
1339
+ if (htmlBg && htmlBg[3] > 0.5) {
1340
+ const lum = luminance(htmlBg[0], htmlBg[1], htmlBg[2]);
1341
+ return lum < 0.18 ? 'dark' : 'light';
1342
+ }
1343
+ const bodyStyle = window.getComputedStyle(document.body);
1344
+ const bodyBg = parseRgba(bodyStyle.backgroundColor);
1345
+ if (bodyBg && bodyBg[3] > 0.5) {
1346
+ const lum = luminance(bodyBg[0], bodyBg[1], bodyBg[2]);
1347
+ return lum < 0.18 ? 'dark' : 'light';
1348
+ }
1349
+ const colorScheme = htmlStyle.colorScheme ||
1350
+ document.querySelector('meta[name="color-scheme"]')?.getAttribute('content') || '';
1351
+ if (colorScheme.includes('dark'))
1352
+ return 'dark';
1353
+ if (colorScheme.includes('light'))
1354
+ return 'light';
1355
+ return 'unknown';
1356
+ }
1357
+ function luminance(r, g, b) {
1358
+ const [rs, gs, bs] = [r, g, b].map(c => {
1359
+ const s = c / 255;
1360
+ return s <= 0.03928 ? s / 12.92 : Math.pow((s + 0.055) / 1.055, 2.4);
1361
+ });
1362
+ return 0.2126 * rs + 0.7152 * gs + 0.0722 * bs;
1363
+ }
1364
+ function contrastRatio(c1, c2) {
1365
+ const l1 = luminance(...c1);
1366
+ const l2 = luminance(...c2);
1367
+ const lighter = Math.max(l1, l2);
1368
+ const darker = Math.min(l1, l2);
1369
+ return (lighter + 0.05) / (darker + 0.05);
1370
+ }
1371
+ function elementLabel(el) {
1372
+ const id = el.id ? `#${el.id}` : '';
1373
+ const cls = el.className && typeof el.className === 'string'
1374
+ ? '.' + el.className.trim().split(/\s+/).slice(0, 2).join('.')
1375
+ : '';
1376
+ return `${el.tagName.toLowerCase()}${id}${cls}`;
1377
+ }
1378
+ function nearestMultiple(val, grid) {
1379
+ if (grid <= 0)
1380
+ return val;
1381
+ return Math.round(val / grid) * grid;
1382
+ }
1383
+ const root = document.querySelector(sel) || document.body;
1384
+ const allElements = Array.from(root.querySelectorAll('*'));
1385
+ const spacingViolations = [];
1386
+ const touchTargetViolations = [];
1387
+ const contrastViolations = [];
1388
+ const fontSizesSet = new Set();
1389
+ const lineHeightsSet = new Set();
1390
+ const letterSpacingsSet = new Set();
1391
+ const spacingValuesSet = new Set();
1392
+ const interactiveTags = new Set(['a', 'button', 'input', 'select', 'textarea', 'label']);
1393
+ for (const el of allElements) {
1394
+ const style = window.getComputedStyle(el);
1395
+ const rect = el.getBoundingClientRect();
1396
+ // Skip invisible elements
1397
+ if (rect.width === 0 && rect.height === 0)
1398
+ continue;
1399
+ const label = elementLabel(el);
1400
+ // Spacing
1401
+ const spacingProps = ['marginTop', 'marginRight', 'marginBottom', 'marginLeft',
1402
+ 'paddingTop', 'paddingRight', 'paddingBottom', 'paddingLeft', 'gap', 'rowGap', 'columnGap'];
1403
+ for (const prop of spacingProps) {
1404
+ const raw = style[prop];
1405
+ if (!raw || raw === 'normal' || raw === 'auto')
1406
+ continue;
1407
+ const px = parsePixels(raw);
1408
+ if (px <= 0)
1409
+ continue;
1410
+ spacingValuesSet.add(px);
1411
+ if (spacingGrid > 0 && Math.round(px) % spacingGrid !== 0) {
1412
+ spacingViolations.push({
1413
+ element: label,
1414
+ property: prop,
1415
+ value: Math.round(px),
1416
+ nearestGridValue: nearestMultiple(px, spacingGrid),
1417
+ });
1418
+ }
1419
+ }
1420
+ // Typography
1421
+ const fs = style.fontSize;
1422
+ const lh = style.lineHeight;
1423
+ const ls = style.letterSpacing;
1424
+ if (fs)
1425
+ fontSizesSet.add(fs);
1426
+ if (lh && lh !== 'normal')
1427
+ lineHeightsSet.add(lh);
1428
+ if (ls && ls !== 'normal')
1429
+ letterSpacingsSet.add(ls);
1430
+ // Touch targets
1431
+ const tag = el.tagName.toLowerCase();
1432
+ if (interactiveTags.has(tag)) {
1433
+ const w = rect.width;
1434
+ const h = rect.height;
1435
+ if (w > 0 && h > 0 && (w < minTouchTarget || h < minTouchTarget)) {
1436
+ touchTargetViolations.push({ element: label, width: Math.round(w), height: Math.round(h), minRequired: minTouchTarget });
1437
+ }
1438
+ }
1439
+ // Contrast — Walk up DOM tree to find effective opaque background
1440
+ const textColor = style.color;
1441
+ if (textColor) {
1442
+ const fg = parseRgb(textColor);
1443
+ if (fg) {
1444
+ if (hasBackdropFilter(el)) {
1445
+ // Background can't be determined from CSS alone — mark as unresolvable
1446
+ // and exclude from scoring (bgResolved: false)
1447
+ const text = el.textContent?.trim() || '';
1448
+ if (text.length > 0 && text.length < 200) {
1449
+ contrastViolations.push({
1450
+ element: label,
1451
+ textColor,
1452
+ bgColor: 'unknown (backdrop-filter)',
1453
+ ratio: 0,
1454
+ required: minContrast,
1455
+ bgResolved: false,
1456
+ });
1457
+ }
1458
+ }
1459
+ else {
1460
+ const effectiveBg = getEffectiveBackground(el);
1461
+ // bgResolved: true — background was successfully determined via DOM traversal
1462
+ const ratio = contrastRatio(fg, effectiveBg);
1463
+ if (ratio > 1.05 && ratio < minContrast) {
1464
+ // Only flag elements with visible text content
1465
+ const text = el.textContent?.trim() || '';
1466
+ if (text.length > 0 && text.length < 200) {
1467
+ contrastViolations.push({
1468
+ element: label,
1469
+ textColor,
1470
+ bgColor: `rgb(${effectiveBg.join(',')})`,
1471
+ ratio: Math.round(ratio * 100) / 100,
1472
+ required: minContrast,
1473
+ bgResolved: true,
1474
+ });
1475
+ }
1476
+ }
1477
+ }
1478
+ }
1479
+ }
1480
+ }
1481
+ const spacingScale = Array.from(spacingValuesSet).sort((a, b) => a - b).map(v => Math.round(v));
1482
+ // ── WCAG Accessibility Audit ──────────────────────────────────────
1483
+ const a11yViolations = [];
1484
+ const headingStructure = [];
1485
+ // 1. Images without alt text
1486
+ const images = root.querySelectorAll('img');
1487
+ for (const img of Array.from(images)) {
1488
+ if (!img.getAttribute('alt') && !img.getAttribute('aria-label') && !img.getAttribute('role')?.includes('presentation')) {
1489
+ a11yViolations.push({ type: 'missing-alt', element: elementLabel(img), details: `src: ${(img.getAttribute('src') || '').slice(0, 80)}` });
1490
+ }
1491
+ }
1492
+ // 2. Form inputs without labels
1493
+ const inputs = root.querySelectorAll('input, select, textarea');
1494
+ for (const input of Array.from(inputs)) {
1495
+ const id = input.getAttribute('id');
1496
+ const hasLabel = id && document.querySelector(`label[for="${id}"]`);
1497
+ const hasAria = input.getAttribute('aria-label') || input.getAttribute('aria-labelledby');
1498
+ const hasTitle = input.getAttribute('title');
1499
+ if (!hasLabel && !hasAria && !hasTitle && input.getAttribute('type') !== 'hidden') {
1500
+ a11yViolations.push({ type: 'missing-label', element: elementLabel(input), details: `type: ${input.getAttribute('type') || 'text'}` });
1501
+ }
1502
+ }
1503
+ // 3. Heading hierarchy
1504
+ const headings = root.querySelectorAll('h1, h2, h3, h4, h5, h6');
1505
+ let prevLevel = 0;
1506
+ for (const h of Array.from(headings)) {
1507
+ const level = parseInt(h.tagName[1]);
1508
+ headingStructure.push(h.tagName.toLowerCase());
1509
+ if (prevLevel > 0 && level > prevLevel + 1) {
1510
+ a11yViolations.push({ type: 'heading-skip', element: elementLabel(h), details: `Jumped from h${prevLevel} to h${level}` });
1511
+ }
1512
+ prevLevel = level;
1513
+ }
1514
+ // 4. Empty links
1515
+ const links = root.querySelectorAll('a');
1516
+ for (const link of Array.from(links)) {
1517
+ const text = (link.textContent || '').trim();
1518
+ const aria = link.getAttribute('aria-label');
1519
+ const title = link.getAttribute('title');
1520
+ const hasImg = link.querySelector('img[alt]');
1521
+ if (!text && !aria && !title && !hasImg) {
1522
+ a11yViolations.push({ type: 'empty-link', element: elementLabel(link), details: `href: ${(link.getAttribute('href') || '').slice(0, 60)}` });
1523
+ }
1524
+ }
1525
+ // 5. Empty buttons
1526
+ const buttons = root.querySelectorAll('button');
1527
+ for (const btn of Array.from(buttons)) {
1528
+ const text = (btn.textContent || '').trim();
1529
+ const aria = btn.getAttribute('aria-label');
1530
+ if (!text && !aria) {
1531
+ a11yViolations.push({ type: 'empty-button', element: elementLabel(btn), details: '' });
1532
+ }
1533
+ }
1534
+ return {
1535
+ colorScheme: detectPageColorScheme(),
1536
+ spacingViolations: spacingViolations.slice(0, 50),
1537
+ touchTargetViolations: touchTargetViolations.slice(0, 50),
1538
+ contrastViolations: contrastViolations.slice(0, 50),
1539
+ typography: {
1540
+ fontSizes: Array.from(fontSizesSet).slice(0, 20),
1541
+ lineHeights: Array.from(lineHeightsSet).slice(0, 20),
1542
+ letterSpacings: Array.from(letterSpacingsSet).slice(0, 20),
1543
+ },
1544
+ spacingScale: [...new Set(spacingScale)].slice(0, 30),
1545
+ accessibilityViolations: a11yViolations.slice(0, 50),
1546
+ headingStructure,
1547
+ };
1548
+ }, { sel: selector, spacingGrid, minTouchTarget, minContrast });
1549
+ });
1550
+ // Weighted scoring: contrast failures are most serious (accessibility),
1551
+ // touch target issues affect usability, spacing is cosmetic, a11y is significant.
1552
+ // Only count contrast violations where we could resolve the background (bgResolved: true).
1553
+ // Violations with unresolvable backgrounds (backdrop-filter etc.) are excluded from scoring.
1554
+ const resolvedContrastViolations = auditData.contrastViolations.filter(v => v.bgResolved !== false);
1555
+ const unresolvedContrastViolations = auditData.contrastViolations.filter(v => v.bgResolved === false);
1556
+ const contrastPenalty = Math.min(40, resolvedContrastViolations.length * 5); // cap at 40pts
1557
+ const touchPenalty = Math.min(30, auditData.touchTargetViolations.length * 3); // cap at 30pts
1558
+ const spacingPenalty = Math.min(20, auditData.spacingViolations.length * 1);
1559
+ const a11yPenalty = Math.min(30, auditData.accessibilityViolations.length * 4);
1560
+ // Bonus for zero violations in a category (up to 5 pts total)
1561
+ let bonus = 0;
1562
+ if (resolvedContrastViolations.length === 0)
1563
+ bonus += 2;
1564
+ if (auditData.touchTargetViolations.length === 0)
1565
+ bonus += 1;
1566
+ if (auditData.accessibilityViolations.length === 0)
1567
+ bonus += 2;
1568
+ const totalPenalty = contrastPenalty + touchPenalty + spacingPenalty + a11yPenalty;
1569
+ const score = Math.min(100, Math.max(0, Math.round(100 - totalPenalty + bonus)));
1570
+ const parts = [];
1571
+ if (auditData.spacingViolations.length > 0)
1572
+ parts.push(`${auditData.spacingViolations.length} spacing violation(s)`);
1573
+ if (auditData.touchTargetViolations.length > 0)
1574
+ parts.push(`${auditData.touchTargetViolations.length} touch target violation(s)`);
1575
+ if (resolvedContrastViolations.length > 0)
1576
+ parts.push(`${resolvedContrastViolations.length} contrast violation(s)`);
1577
+ if (unresolvedContrastViolations.length > 0)
1578
+ parts.push(`${unresolvedContrastViolations.length} unresolvable contrast check(s)`);
1579
+ if (auditData.accessibilityViolations.length > 0)
1580
+ parts.push(`${auditData.accessibilityViolations.length} accessibility violation(s)`);
1581
+ const summary = parts.length === 0
1582
+ ? 'No design violations found.'
1583
+ : `Found: ${parts.join(', ')}.`;
1584
+ const audit = { score, summary, ...auditData };
1585
+ return { audit, finalUrl };
1586
+ }
1587
+ // ── browserDesignAnalysis ──────────────────────────────────────────────────────
1588
+ /**
1589
+ * Extract structured visual design intelligence from a URL using a browser.
1590
+ * Returns a DesignAnalysis object with effects, palette, layout, type scale,
1591
+ * and quality signals.
1592
+ */
1593
+ export async function browserDesignAnalysis(url, options = {}) {
1594
+ const { width = 1440, height = 900, waitMs = 0, timeoutMs = 60000, userAgent, headers, cookies, stealth = false, } = options;
1595
+ const { extractDesignAnalysis } = await import('./design-analysis.js');
1596
+ const { result: analysis, finalUrl } = await withBrowserPage(url, { width, height, userAgent, headers, cookies, stealth, waitMs, timeoutMs }, async (page) => {
1597
+ return extractDesignAnalysis(page);
1598
+ });
1599
+ return { analysis, finalUrl };
1600
+ }