@iflow-mcp/jakeliume-webpeel 0.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +15 -0
- package/README.md +313 -0
- package/dist/cache.d.ts +30 -0
- package/dist/cache.js +139 -0
- package/dist/cli/commands/auth.d.ts +5 -0
- package/dist/cli/commands/auth.js +411 -0
- package/dist/cli/commands/doctor.d.ts +37 -0
- package/dist/cli/commands/doctor.js +371 -0
- package/dist/cli/commands/fetch.d.ts +6 -0
- package/dist/cli/commands/fetch.js +1345 -0
- package/dist/cli/commands/guide.d.ts +2 -0
- package/dist/cli/commands/guide.js +183 -0
- package/dist/cli/commands/interact.d.ts +5 -0
- package/dist/cli/commands/interact.js +840 -0
- package/dist/cli/commands/jobs.d.ts +5 -0
- package/dist/cli/commands/jobs.js +997 -0
- package/dist/cli/commands/monitor.d.ts +12 -0
- package/dist/cli/commands/monitor.js +197 -0
- package/dist/cli/commands/observe.d.ts +12 -0
- package/dist/cli/commands/observe.js +158 -0
- package/dist/cli/commands/screenshot.d.ts +5 -0
- package/dist/cli/commands/screenshot.js +282 -0
- package/dist/cli/commands/search.d.ts +5 -0
- package/dist/cli/commands/search.js +1021 -0
- package/dist/cli/commands/setup.d.ts +13 -0
- package/dist/cli/commands/setup.js +244 -0
- package/dist/cli/commands/skill.d.ts +15 -0
- package/dist/cli/commands/skill.js +195 -0
- package/dist/cli/utils.d.ts +84 -0
- package/dist/cli/utils.js +806 -0
- package/dist/cli-auth.d.ts +75 -0
- package/dist/cli-auth.js +369 -0
- package/dist/cli.d.ts +17 -0
- package/dist/cli.js +99 -0
- package/dist/core/actions.d.ts +69 -0
- package/dist/core/actions.js +495 -0
- package/dist/core/agent.d.ts +98 -0
- package/dist/core/agent.js +558 -0
- package/dist/core/answer.d.ts +42 -0
- package/dist/core/answer.js +395 -0
- package/dist/core/application-tracker.d.ts +84 -0
- package/dist/core/application-tracker.js +184 -0
- package/dist/core/apply.d.ts +162 -0
- package/dist/core/apply.js +816 -0
- package/dist/core/auth-detection.d.ts +35 -0
- package/dist/core/auth-detection.js +358 -0
- package/dist/core/auto-extract.d.ts +82 -0
- package/dist/core/auto-extract.js +604 -0
- package/dist/core/auto-interact.d.ts +23 -0
- package/dist/core/auto-interact.js +246 -0
- package/dist/core/bm25-filter.d.ts +66 -0
- package/dist/core/bm25-filter.js +288 -0
- package/dist/core/branding.d.ts +54 -0
- package/dist/core/branding.js +234 -0
- package/dist/core/browser-fetch.d.ts +323 -0
- package/dist/core/browser-fetch.js +1600 -0
- package/dist/core/browser-pool.d.ts +91 -0
- package/dist/core/browser-pool.js +550 -0
- package/dist/core/budget.d.ts +42 -0
- package/dist/core/budget.js +324 -0
- package/dist/core/business-intel.d.ts +47 -0
- package/dist/core/business-intel.js +279 -0
- package/dist/core/cache.d.ts +13 -0
- package/dist/core/cache.js +121 -0
- package/dist/core/cf-worker-proxy.d.ts +32 -0
- package/dist/core/cf-worker-proxy.js +87 -0
- package/dist/core/challenge-detection.d.ts +26 -0
- package/dist/core/challenge-detection.js +468 -0
- package/dist/core/change-tracking.d.ts +75 -0
- package/dist/core/change-tracking.js +276 -0
- package/dist/core/chunker.d.ts +46 -0
- package/dist/core/chunker.js +249 -0
- package/dist/core/chunking.d.ts +42 -0
- package/dist/core/chunking.js +181 -0
- package/dist/core/circuit-breaker.d.ts +44 -0
- package/dist/core/circuit-breaker.js +85 -0
- package/dist/core/content-pruner.d.ts +47 -0
- package/dist/core/content-pruner.js +425 -0
- package/dist/core/cookie-cache.d.ts +60 -0
- package/dist/core/cookie-cache.js +163 -0
- package/dist/core/crawl-checkpoint.d.ts +54 -0
- package/dist/core/crawl-checkpoint.js +104 -0
- package/dist/core/crawler.d.ts +84 -0
- package/dist/core/crawler.js +349 -0
- package/dist/core/cross-verify.d.ts +27 -0
- package/dist/core/cross-verify.js +93 -0
- package/dist/core/deep-fetch.d.ts +74 -0
- package/dist/core/deep-fetch.js +405 -0
- package/dist/core/deep-research.d.ts +141 -0
- package/dist/core/deep-research.js +972 -0
- package/dist/core/design-analysis.d.ts +70 -0
- package/dist/core/design-analysis.js +490 -0
- package/dist/core/design-compare.d.ts +38 -0
- package/dist/core/design-compare.js +264 -0
- package/dist/core/diff.d.ts +61 -0
- package/dist/core/diff.js +289 -0
- package/dist/core/dns-cache.d.ts +20 -0
- package/dist/core/dns-cache.js +198 -0
- package/dist/core/documents.d.ts +23 -0
- package/dist/core/documents.js +123 -0
- package/dist/core/domain-memory.d.ts +66 -0
- package/dist/core/domain-memory.js +163 -0
- package/dist/core/domain-verify.d.ts +40 -0
- package/dist/core/domain-verify.js +379 -0
- package/dist/core/engine-ranker.d.ts +112 -0
- package/dist/core/engine-ranker.js +395 -0
- package/dist/core/extract-inline.d.ts +38 -0
- package/dist/core/extract-inline.js +215 -0
- package/dist/core/extract-listings.d.ts +38 -0
- package/dist/core/extract-listings.js +461 -0
- package/dist/core/extract.d.ts +9 -0
- package/dist/core/extract.js +139 -0
- package/dist/core/fetch-cache.d.ts +57 -0
- package/dist/core/fetch-cache.js +95 -0
- package/dist/core/fetcher.d.ts +13 -0
- package/dist/core/fetcher.js +12 -0
- package/dist/core/google-cache.d.ts +29 -0
- package/dist/core/google-cache.js +180 -0
- package/dist/core/google-serp-parser.d.ts +82 -0
- package/dist/core/google-serp-parser.js +287 -0
- package/dist/core/hotel-search.d.ts +122 -0
- package/dist/core/hotel-search.js +382 -0
- package/dist/core/http-fetch.d.ts +72 -0
- package/dist/core/http-fetch.js +820 -0
- package/dist/core/human.d.ts +175 -0
- package/dist/core/human.js +680 -0
- package/dist/core/image-caption.d.ts +44 -0
- package/dist/core/image-caption.js +271 -0
- package/dist/core/jobs.d.ts +75 -0
- package/dist/core/jobs.js +634 -0
- package/dist/core/json-ld.d.ts +15 -0
- package/dist/core/json-ld.js +617 -0
- package/dist/core/language-detect.d.ts +18 -0
- package/dist/core/language-detect.js +135 -0
- package/dist/core/links.d.ts +10 -0
- package/dist/core/links.js +44 -0
- package/dist/core/llm-extract.d.ts +71 -0
- package/dist/core/llm-extract.js +507 -0
- package/dist/core/llm-provider.d.ts +100 -0
- package/dist/core/llm-provider.js +702 -0
- package/dist/core/local-search.d.ts +60 -0
- package/dist/core/local-search.js +308 -0
- package/dist/core/logger.d.ts +28 -0
- package/dist/core/logger.js +104 -0
- package/dist/core/map.d.ts +33 -0
- package/dist/core/map.js +127 -0
- package/dist/core/markdown.d.ts +92 -0
- package/dist/core/markdown.js +809 -0
- package/dist/core/metadata.d.ts +34 -0
- package/dist/core/metadata.js +422 -0
- package/dist/core/observe.d.ts +113 -0
- package/dist/core/observe.js +395 -0
- package/dist/core/ocr.d.ts +12 -0
- package/dist/core/ocr.js +33 -0
- package/dist/core/paginate.d.ts +31 -0
- package/dist/core/paginate.js +106 -0
- package/dist/core/pdf.d.ts +8 -0
- package/dist/core/pdf.js +25 -0
- package/dist/core/peel-tls.d.ts +25 -0
- package/dist/core/peel-tls.js +220 -0
- package/dist/core/pipeline.d.ts +132 -0
- package/dist/core/pipeline.js +1666 -0
- package/dist/core/profiles.d.ts +61 -0
- package/dist/core/profiles.js +350 -0
- package/dist/core/prompt-guard.d.ts +30 -0
- package/dist/core/prompt-guard.js +119 -0
- package/dist/core/proxy-config.d.ts +90 -0
- package/dist/core/proxy-config.js +172 -0
- package/dist/core/quick-answer.d.ts +53 -0
- package/dist/core/quick-answer.js +833 -0
- package/dist/core/rate-governor.d.ts +80 -0
- package/dist/core/rate-governor.js +238 -0
- package/dist/core/readability.d.ts +57 -0
- package/dist/core/readability.js +533 -0
- package/dist/core/research.d.ts +66 -0
- package/dist/core/research.js +270 -0
- package/dist/core/retry.d.ts +60 -0
- package/dist/core/retry.js +119 -0
- package/dist/core/safe-browsing.d.ts +30 -0
- package/dist/core/safe-browsing.js +206 -0
- package/dist/core/schema-extraction.d.ts +66 -0
- package/dist/core/schema-extraction.js +352 -0
- package/dist/core/schema-postprocess.d.ts +32 -0
- package/dist/core/schema-postprocess.js +469 -0
- package/dist/core/schema-templates.d.ts +19 -0
- package/dist/core/schema-templates.js +143 -0
- package/dist/core/screenshot.d.ts +224 -0
- package/dist/core/screenshot.js +207 -0
- package/dist/core/search-engines.d.ts +25 -0
- package/dist/core/search-engines.js +182 -0
- package/dist/core/search-provider.d.ts +243 -0
- package/dist/core/search-provider.js +1629 -0
- package/dist/core/searxng-provider.d.ts +35 -0
- package/dist/core/searxng-provider.js +105 -0
- package/dist/core/selective-evidence.d.ts +151 -0
- package/dist/core/selective-evidence.js +389 -0
- package/dist/core/site-search.d.ts +44 -0
- package/dist/core/site-search.js +252 -0
- package/dist/core/sitemap.d.ts +23 -0
- package/dist/core/sitemap.js +105 -0
- package/dist/core/source-credibility.d.ts +29 -0
- package/dist/core/source-credibility.js +584 -0
- package/dist/core/source-scoring.d.ts +166 -0
- package/dist/core/source-scoring.js +396 -0
- package/dist/core/stemmer.d.ts +38 -0
- package/dist/core/stemmer.js +509 -0
- package/dist/core/strategies.d.ts +104 -0
- package/dist/core/strategies.js +1044 -0
- package/dist/core/strategy-hooks.d.ts +145 -0
- package/dist/core/strategy-hooks.js +74 -0
- package/dist/core/structured-extract.d.ts +43 -0
- package/dist/core/structured-extract.js +550 -0
- package/dist/core/summarize.d.ts +17 -0
- package/dist/core/summarize.js +78 -0
- package/dist/core/synonyms.d.ts +42 -0
- package/dist/core/synonyms.js +184 -0
- package/dist/core/system-monitor.d.ts +61 -0
- package/dist/core/system-monitor.js +133 -0
- package/dist/core/table-format.d.ts +30 -0
- package/dist/core/table-format.js +146 -0
- package/dist/core/threat-feeds.d.ts +23 -0
- package/dist/core/threat-feeds.js +104 -0
- package/dist/core/timing.d.ts +21 -0
- package/dist/core/timing.js +33 -0
- package/dist/core/transcript-export.d.ts +47 -0
- package/dist/core/transcript-export.js +107 -0
- package/dist/core/user-agents.d.ts +82 -0
- package/dist/core/user-agents.js +239 -0
- package/dist/core/vertical-search.d.ts +54 -0
- package/dist/core/vertical-search.js +158 -0
- package/dist/core/watch-manager.d.ts +175 -0
- package/dist/core/watch-manager.js +416 -0
- package/dist/core/watch.d.ts +101 -0
- package/dist/core/watch.js +389 -0
- package/dist/core/youtube.d.ts +130 -0
- package/dist/core/youtube.js +1175 -0
- package/dist/ee/challenge-re-export.d.ts +1 -0
- package/dist/ee/challenge-re-export.js +1 -0
- package/dist/ee/challenge-solver.d.ts +72 -0
- package/dist/ee/challenge-solver.js +720 -0
- package/dist/ee/domain-extractors.d.ts +8 -0
- package/dist/ee/domain-extractors.js +8 -0
- package/dist/ee/domain-intel.d.ts +16 -0
- package/dist/ee/domain-intel.js +133 -0
- package/dist/ee/extractors/allrecipes.d.ts +2 -0
- package/dist/ee/extractors/allrecipes.js +120 -0
- package/dist/ee/extractors/amazon.d.ts +2 -0
- package/dist/ee/extractors/amazon.js +78 -0
- package/dist/ee/extractors/arxiv.d.ts +2 -0
- package/dist/ee/extractors/arxiv.js +137 -0
- package/dist/ee/extractors/bestbuy.d.ts +2 -0
- package/dist/ee/extractors/bestbuy.js +78 -0
- package/dist/ee/extractors/carscom.d.ts +2 -0
- package/dist/ee/extractors/carscom.js +121 -0
- package/dist/ee/extractors/coingecko.d.ts +2 -0
- package/dist/ee/extractors/coingecko.js +134 -0
- package/dist/ee/extractors/craigslist.d.ts +2 -0
- package/dist/ee/extractors/craigslist.js +92 -0
- package/dist/ee/extractors/devto.d.ts +2 -0
- package/dist/ee/extractors/devto.js +135 -0
- package/dist/ee/extractors/ebay.d.ts +2 -0
- package/dist/ee/extractors/ebay.js +90 -0
- package/dist/ee/extractors/espn.d.ts +2 -0
- package/dist/ee/extractors/espn.js +260 -0
- package/dist/ee/extractors/etsy.d.ts +2 -0
- package/dist/ee/extractors/etsy.js +52 -0
- package/dist/ee/extractors/facebook.d.ts +2 -0
- package/dist/ee/extractors/facebook.js +46 -0
- package/dist/ee/extractors/github.d.ts +2 -0
- package/dist/ee/extractors/github.js +196 -0
- package/dist/ee/extractors/google-flights.d.ts +2 -0
- package/dist/ee/extractors/google-flights.js +176 -0
- package/dist/ee/extractors/hackernews.d.ts +2 -0
- package/dist/ee/extractors/hackernews.js +147 -0
- package/dist/ee/extractors/imdb.d.ts +2 -0
- package/dist/ee/extractors/imdb.js +172 -0
- package/dist/ee/extractors/index.d.ts +26 -0
- package/dist/ee/extractors/index.js +247 -0
- package/dist/ee/extractors/instagram.d.ts +2 -0
- package/dist/ee/extractors/instagram.js +102 -0
- package/dist/ee/extractors/kalshi.d.ts +2 -0
- package/dist/ee/extractors/kalshi.js +121 -0
- package/dist/ee/extractors/kayak-cars.d.ts +2 -0
- package/dist/ee/extractors/kayak-cars.js +270 -0
- package/dist/ee/extractors/linkedin.d.ts +2 -0
- package/dist/ee/extractors/linkedin.js +113 -0
- package/dist/ee/extractors/medium.d.ts +2 -0
- package/dist/ee/extractors/medium.js +130 -0
- package/dist/ee/extractors/news.d.ts +4 -0
- package/dist/ee/extractors/news.js +173 -0
- package/dist/ee/extractors/npm.d.ts +2 -0
- package/dist/ee/extractors/npm.js +86 -0
- package/dist/ee/extractors/pdf.d.ts +2 -0
- package/dist/ee/extractors/pdf.js +108 -0
- package/dist/ee/extractors/pinterest.d.ts +2 -0
- package/dist/ee/extractors/pinterest.js +34 -0
- package/dist/ee/extractors/polymarket.d.ts +2 -0
- package/dist/ee/extractors/polymarket.js +358 -0
- package/dist/ee/extractors/producthunt.d.ts +2 -0
- package/dist/ee/extractors/producthunt.js +88 -0
- package/dist/ee/extractors/pubmed.d.ts +2 -0
- package/dist/ee/extractors/pubmed.js +162 -0
- package/dist/ee/extractors/pypi.d.ts +2 -0
- package/dist/ee/extractors/pypi.js +80 -0
- package/dist/ee/extractors/reddit.d.ts +2 -0
- package/dist/ee/extractors/reddit.js +438 -0
- package/dist/ee/extractors/redfin.d.ts +2 -0
- package/dist/ee/extractors/redfin.js +156 -0
- package/dist/ee/extractors/semanticscholar.d.ts +2 -0
- package/dist/ee/extractors/semanticscholar.js +131 -0
- package/dist/ee/extractors/shared.d.ts +12 -0
- package/dist/ee/extractors/shared.js +76 -0
- package/dist/ee/extractors/soundcloud.d.ts +2 -0
- package/dist/ee/extractors/soundcloud.js +34 -0
- package/dist/ee/extractors/sportsbetting.d.ts +2 -0
- package/dist/ee/extractors/sportsbetting.js +37 -0
- package/dist/ee/extractors/spotify.d.ts +2 -0
- package/dist/ee/extractors/spotify.js +34 -0
- package/dist/ee/extractors/stackoverflow.d.ts +2 -0
- package/dist/ee/extractors/stackoverflow.js +61 -0
- package/dist/ee/extractors/substack.d.ts +2 -0
- package/dist/ee/extractors/substack.js +115 -0
- package/dist/ee/extractors/substackroot.d.ts +2 -0
- package/dist/ee/extractors/substackroot.js +46 -0
- package/dist/ee/extractors/tiktok.d.ts +2 -0
- package/dist/ee/extractors/tiktok.js +29 -0
- package/dist/ee/extractors/tradingview.d.ts +2 -0
- package/dist/ee/extractors/tradingview.js +182 -0
- package/dist/ee/extractors/twitch.d.ts +2 -0
- package/dist/ee/extractors/twitch.js +36 -0
- package/dist/ee/extractors/twitter.d.ts +2 -0
- package/dist/ee/extractors/twitter.js +327 -0
- package/dist/ee/extractors/types.d.ts +14 -0
- package/dist/ee/extractors/types.js +1 -0
- package/dist/ee/extractors/walmart.d.ts +2 -0
- package/dist/ee/extractors/walmart.js +50 -0
- package/dist/ee/extractors/weather.d.ts +2 -0
- package/dist/ee/extractors/weather.js +133 -0
- package/dist/ee/extractors/wikipedia.d.ts +4 -0
- package/dist/ee/extractors/wikipedia.js +235 -0
- package/dist/ee/extractors/yelp.d.ts +2 -0
- package/dist/ee/extractors/yelp.js +216 -0
- package/dist/ee/extractors/youtube.d.ts +2 -0
- package/dist/ee/extractors/youtube.js +189 -0
- package/dist/ee/extractors/zillow.d.ts +54 -0
- package/dist/ee/extractors/zillow.js +247 -0
- package/dist/ee/extractors-re-export.d.ts +1 -0
- package/dist/ee/extractors-re-export.js +1 -0
- package/dist/ee/premium-hooks.d.ts +20 -0
- package/dist/ee/premium-hooks.js +50 -0
- package/dist/ee/spa-detection.d.ts +2 -0
- package/dist/ee/spa-detection.js +2 -0
- package/dist/ee/stability.d.ts +4 -0
- package/dist/ee/stability.js +29 -0
- package/dist/ee/swr-cache.d.ts +14 -0
- package/dist/ee/swr-cache.js +34 -0
- package/dist/index.d.ts +143 -0
- package/dist/index.js +291 -0
- package/dist/integrations/index.d.ts +2 -0
- package/dist/integrations/index.js +2 -0
- package/dist/integrations/langchain.d.ts +64 -0
- package/dist/integrations/langchain.js +115 -0
- package/dist/integrations/llamaindex.d.ts +50 -0
- package/dist/integrations/llamaindex.js +91 -0
- package/dist/mcp/handlers/act.d.ts +5 -0
- package/dist/mcp/handlers/act.js +34 -0
- package/dist/mcp/handlers/definitions.d.ts +6 -0
- package/dist/mcp/handlers/definitions.js +395 -0
- package/dist/mcp/handlers/extract.d.ts +7 -0
- package/dist/mcp/handlers/extract.js +135 -0
- package/dist/mcp/handlers/fetch.d.ts +6 -0
- package/dist/mcp/handlers/fetch.js +98 -0
- package/dist/mcp/handlers/find.d.ts +5 -0
- package/dist/mcp/handlers/find.js +137 -0
- package/dist/mcp/handlers/index.d.ts +13 -0
- package/dist/mcp/handlers/index.js +63 -0
- package/dist/mcp/handlers/legacy.d.ts +25 -0
- package/dist/mcp/handlers/legacy.js +450 -0
- package/dist/mcp/handlers/meta.d.ts +6 -0
- package/dist/mcp/handlers/meta.js +40 -0
- package/dist/mcp/handlers/monitor.d.ts +5 -0
- package/dist/mcp/handlers/monitor.js +41 -0
- package/dist/mcp/handlers/observe.d.ts +8 -0
- package/dist/mcp/handlers/observe.js +37 -0
- package/dist/mcp/handlers/read.d.ts +6 -0
- package/dist/mcp/handlers/read.js +78 -0
- package/dist/mcp/handlers/see.d.ts +5 -0
- package/dist/mcp/handlers/see.js +75 -0
- package/dist/mcp/handlers/types.d.ts +29 -0
- package/dist/mcp/handlers/types.js +28 -0
- package/dist/mcp/server.d.ts +7 -0
- package/dist/mcp/server.js +108 -0
- package/dist/mcp/smart-router.d.ts +23 -0
- package/dist/mcp/smart-router.js +178 -0
- package/dist/server/app.d.ts +14 -0
- package/dist/server/app.js +632 -0
- package/dist/server/auth-store.d.ts +28 -0
- package/dist/server/auth-store.js +88 -0
- package/dist/server/bull-queues.d.ts +60 -0
- package/dist/server/bull-queues.js +90 -0
- package/dist/server/email-service.d.ts +55 -0
- package/dist/server/email-service.js +291 -0
- package/dist/server/job-queue.d.ts +100 -0
- package/dist/server/job-queue.js +145 -0
- package/dist/server/logger.d.ts +10 -0
- package/dist/server/logger.js +37 -0
- package/dist/server/middleware/audit-log.d.ts +14 -0
- package/dist/server/middleware/audit-log.js +73 -0
- package/dist/server/middleware/auth.d.ts +35 -0
- package/dist/server/middleware/auth.js +225 -0
- package/dist/server/middleware/rate-limit.d.ts +50 -0
- package/dist/server/middleware/rate-limit.js +270 -0
- package/dist/server/middleware/scope-guard.d.ts +25 -0
- package/dist/server/middleware/scope-guard.js +45 -0
- package/dist/server/middleware/url-validator.d.ts +15 -0
- package/dist/server/middleware/url-validator.js +201 -0
- package/dist/server/openapi.yaml +6418 -0
- package/dist/server/pg-auth-store.d.ts +146 -0
- package/dist/server/pg-auth-store.js +576 -0
- package/dist/server/pg-job-queue.d.ts +59 -0
- package/dist/server/pg-job-queue.js +375 -0
- package/dist/server/routes/activity.d.ts +6 -0
- package/dist/server/routes/activity.js +79 -0
- package/dist/server/routes/admin-active.d.ts +7 -0
- package/dist/server/routes/admin-active.js +120 -0
- package/dist/server/routes/admin-stats.d.ts +7 -0
- package/dist/server/routes/admin-stats.js +176 -0
- package/dist/server/routes/agent.d.ts +24 -0
- package/dist/server/routes/agent.js +480 -0
- package/dist/server/routes/answer.d.ts +5 -0
- package/dist/server/routes/answer.js +125 -0
- package/dist/server/routes/ask.d.ts +28 -0
- package/dist/server/routes/ask.js +295 -0
- package/dist/server/routes/batch.d.ts +6 -0
- package/dist/server/routes/batch.js +493 -0
- package/dist/server/routes/cache-warm.d.ts +25 -0
- package/dist/server/routes/cache-warm.js +212 -0
- package/dist/server/routes/cli-usage.d.ts +6 -0
- package/dist/server/routes/cli-usage.js +127 -0
- package/dist/server/routes/compat.d.ts +23 -0
- package/dist/server/routes/compat.js +652 -0
- package/dist/server/routes/crawl.d.ts +13 -0
- package/dist/server/routes/crawl.js +287 -0
- package/dist/server/routes/deep-fetch.d.ts +8 -0
- package/dist/server/routes/deep-fetch.js +57 -0
- package/dist/server/routes/deep-research.d.ts +11 -0
- package/dist/server/routes/deep-research.js +232 -0
- package/dist/server/routes/demo.d.ts +24 -0
- package/dist/server/routes/demo.js +517 -0
- package/dist/server/routes/do.d.ts +8 -0
- package/dist/server/routes/do.js +72 -0
- package/dist/server/routes/extract.d.ts +14 -0
- package/dist/server/routes/extract.js +325 -0
- package/dist/server/routes/feed.d.ts +15 -0
- package/dist/server/routes/feed.js +311 -0
- package/dist/server/routes/fetch-queue.d.ts +13 -0
- package/dist/server/routes/fetch-queue.js +357 -0
- package/dist/server/routes/fetch.d.ts +7 -0
- package/dist/server/routes/fetch.js +1274 -0
- package/dist/server/routes/go.d.ts +14 -0
- package/dist/server/routes/go.js +81 -0
- package/dist/server/routes/health.d.ts +11 -0
- package/dist/server/routes/health.js +141 -0
- package/dist/server/routes/jobs.d.ts +7 -0
- package/dist/server/routes/jobs.js +574 -0
- package/dist/server/routes/map.d.ts +11 -0
- package/dist/server/routes/map.js +116 -0
- package/dist/server/routes/mcp.d.ts +14 -0
- package/dist/server/routes/mcp.js +197 -0
- package/dist/server/routes/metrics.d.ts +37 -0
- package/dist/server/routes/metrics.js +149 -0
- package/dist/server/routes/oauth.d.ts +9 -0
- package/dist/server/routes/oauth.js +396 -0
- package/dist/server/routes/playground.d.ts +17 -0
- package/dist/server/routes/playground.js +283 -0
- package/dist/server/routes/reader.d.ts +18 -0
- package/dist/server/routes/reader.js +192 -0
- package/dist/server/routes/research.d.ts +14 -0
- package/dist/server/routes/research.js +482 -0
- package/dist/server/routes/screenshot.d.ts +22 -0
- package/dist/server/routes/screenshot.js +820 -0
- package/dist/server/routes/search.d.ts +6 -0
- package/dist/server/routes/search.js +874 -0
- package/dist/server/routes/session.d.ts +17 -0
- package/dist/server/routes/session.js +548 -0
- package/dist/server/routes/share.d.ts +18 -0
- package/dist/server/routes/share.js +462 -0
- package/dist/server/routes/smart-search/handlers/cars.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/cars.js +102 -0
- package/dist/server/routes/smart-search/handlers/flights.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/flights.js +72 -0
- package/dist/server/routes/smart-search/handlers/general.d.ts +13 -0
- package/dist/server/routes/smart-search/handlers/general.js +717 -0
- package/dist/server/routes/smart-search/handlers/hotels.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/hotels.js +88 -0
- package/dist/server/routes/smart-search/handlers/products.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/products.js +1309 -0
- package/dist/server/routes/smart-search/handlers/rental.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/rental.js +154 -0
- package/dist/server/routes/smart-search/handlers/restaurants.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/restaurants.js +225 -0
- package/dist/server/routes/smart-search/handlers/transit-verdict.d.ts +41 -0
- package/dist/server/routes/smart-search/handlers/transit-verdict.js +224 -0
- package/dist/server/routes/smart-search/index.d.ts +19 -0
- package/dist/server/routes/smart-search/index.js +546 -0
- package/dist/server/routes/smart-search/intent.d.ts +3 -0
- package/dist/server/routes/smart-search/intent.js +264 -0
- package/dist/server/routes/smart-search/llm.d.ts +16 -0
- package/dist/server/routes/smart-search/llm.js +70 -0
- package/dist/server/routes/smart-search/sources/reddit.d.ts +18 -0
- package/dist/server/routes/smart-search/sources/reddit.js +34 -0
- package/dist/server/routes/smart-search/sources/yelp.d.ts +25 -0
- package/dist/server/routes/smart-search/sources/yelp.js +171 -0
- package/dist/server/routes/smart-search/sources/youtube.d.ts +8 -0
- package/dist/server/routes/smart-search/sources/youtube.js +9 -0
- package/dist/server/routes/smart-search/types.d.ts +81 -0
- package/dist/server/routes/smart-search/types.js +1 -0
- package/dist/server/routes/smart-search/utils.d.ts +20 -0
- package/dist/server/routes/smart-search/utils.js +146 -0
- package/dist/server/routes/stats.d.ts +6 -0
- package/dist/server/routes/stats.js +71 -0
- package/dist/server/routes/stripe.d.ts +15 -0
- package/dist/server/routes/stripe.js +296 -0
- package/dist/server/routes/transcript-export.d.ts +10 -0
- package/dist/server/routes/transcript-export.js +178 -0
- package/dist/server/routes/usage.d.ts +9 -0
- package/dist/server/routes/usage.js +279 -0
- package/dist/server/routes/users.d.ts +8 -0
- package/dist/server/routes/users.js +1867 -0
- package/dist/server/routes/watch.d.ts +15 -0
- package/dist/server/routes/watch.js +309 -0
- package/dist/server/routes/webhooks.d.ts +26 -0
- package/dist/server/routes/webhooks.js +170 -0
- package/dist/server/routes/youtube.d.ts +6 -0
- package/dist/server/routes/youtube.js +130 -0
- package/dist/server/sentry.d.ts +14 -0
- package/dist/server/sentry.js +104 -0
- package/dist/server/types.d.ts +15 -0
- package/dist/server/types.js +7 -0
- package/dist/server/utils/response.d.ts +44 -0
- package/dist/server/utils/response.js +69 -0
- package/dist/server/utils/sse.d.ts +22 -0
- package/dist/server/utils/sse.js +38 -0
- package/dist/types.d.ts +552 -0
- package/dist/types.js +39 -0
- package/llms.txt +105 -0
- package/package.json +189 -0
|
@@ -0,0 +1,1600 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Browser-based fetching — uses Playwright via the browser pool.
|
|
3
|
+
* Handles browserFetch, browserScreenshot, retryFetch, and scrollAndWait.
|
|
4
|
+
*/
|
|
5
|
+
import { TimeoutError, BlockedError, NetworkError, WebPeelError } from '../types.js';
|
|
6
|
+
import { detectChallenge } from './challenge-detection.js';
|
|
7
|
+
import { getRealisticUserAgent } from './user-agents.js';
|
|
8
|
+
import { getRandomUserAgent, applyStealthScripts, takePooledPage, ensurePagePool, recyclePooledPage, getBrowser, getStealthBrowser, getStealthPlaywright, getProfileBrowser, PAGE_POOL_SIZE, MAX_CONCURRENT_PAGES, getPooledPagesCount, ANTI_DETECTION_ARGS, getRandomViewport, } from './browser-pool.js';
|
|
9
|
+
// Proprietary stealth module — gitignored, loaded conditionally
|
|
10
|
+
let applyStealthPatches;
|
|
11
|
+
let applyAcceptLanguageHeader;
|
|
12
|
+
try {
|
|
13
|
+
// @ts-ignore — proprietary module, gitignored
|
|
14
|
+
const mod = await import('./stealth-patches.js');
|
|
15
|
+
applyStealthPatches = mod.applyStealthPatches;
|
|
16
|
+
applyAcceptLanguageHeader = mod.applyAcceptLanguageHeader;
|
|
17
|
+
}
|
|
18
|
+
catch { /* Not available in CI/open-source builds */ }
|
|
19
|
+
import { validateUrl, validateUserAgent, createAbortError } from './http-fetch.js';
|
|
20
|
+
import { autoInteract } from './auto-interact.js';
|
|
21
|
+
import { createLogger } from './logger.js';
|
|
22
|
+
const log = createLogger('browser');
|
|
23
|
+
// ── Execution context error detection ─────────────────────────────────────────
|
|
24
|
+
/**
|
|
25
|
+
* Check if an error indicates the page execution context was destroyed.
|
|
26
|
+
* This happens on SPAs (like Polymarket) when scrolling triggers navigation.
|
|
27
|
+
*/
|
|
28
|
+
function isContextDestroyedError(err) {
|
|
29
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
30
|
+
return (msg.includes('Execution context was destroyed') ||
|
|
31
|
+
msg.includes('Target closed') ||
|
|
32
|
+
msg.includes('frame was detached') ||
|
|
33
|
+
msg.includes('Session closed'));
|
|
34
|
+
}
|
|
35
|
+
/**
|
|
36
|
+
* Resilient scrollThrough helper — scrolls from top to bottom to trigger
|
|
37
|
+
* IntersectionObservers, lazy loading, and animations.
|
|
38
|
+
*
|
|
39
|
+
* Handles SPAs gracefully: if the execution context is destroyed during scroll
|
|
40
|
+
* (e.g., Polymarket navigation), logs a warning and stops — does NOT throw.
|
|
41
|
+
*
|
|
42
|
+
* Also tries inner scrollable containers (virtual scroll) before falling back
|
|
43
|
+
* to window.scrollTo.
|
|
44
|
+
*/
|
|
45
|
+
async function resilientScrollThrough(page, delayMs = 250) {
|
|
46
|
+
try {
|
|
47
|
+
// Use shared container detection from actions module
|
|
48
|
+
const { detectScrollContainer } = await import('./actions.js');
|
|
49
|
+
const containerSelector = (await detectScrollContainer(page)) || null;
|
|
50
|
+
if (containerSelector) {
|
|
51
|
+
// Scroll inner container
|
|
52
|
+
try {
|
|
53
|
+
const scrollHeight = await page.evaluate((sel) => {
|
|
54
|
+
const el = document.querySelector(sel);
|
|
55
|
+
return el ? el.scrollHeight : document.body.scrollHeight;
|
|
56
|
+
}, containerSelector);
|
|
57
|
+
const vh = await page.evaluate(() => window.innerHeight);
|
|
58
|
+
for (let y = 0; y < scrollHeight; y += Math.round(vh * 0.75)) {
|
|
59
|
+
await page.evaluate(([sel, sy]) => {
|
|
60
|
+
const el = document.querySelector(sel);
|
|
61
|
+
if (el)
|
|
62
|
+
el.scrollTop = sy;
|
|
63
|
+
}, [containerSelector, y]);
|
|
64
|
+
await page.waitForTimeout(delayMs);
|
|
65
|
+
}
|
|
66
|
+
await page.evaluate((sel) => {
|
|
67
|
+
const el = document.querySelector(sel);
|
|
68
|
+
if (el)
|
|
69
|
+
el.scrollTop = el.scrollHeight;
|
|
70
|
+
}, containerSelector);
|
|
71
|
+
await page.waitForTimeout(Math.round(delayMs * 1.6));
|
|
72
|
+
await page.evaluate((sel) => {
|
|
73
|
+
const el = document.querySelector(sel);
|
|
74
|
+
if (el)
|
|
75
|
+
el.scrollTop = 0;
|
|
76
|
+
}, containerSelector);
|
|
77
|
+
await page.waitForTimeout(Math.round(delayMs * 2.4));
|
|
78
|
+
return;
|
|
79
|
+
}
|
|
80
|
+
catch (innerErr) {
|
|
81
|
+
if (isContextDestroyedError(innerErr)) {
|
|
82
|
+
log.warn('Execution context destroyed during inner container scroll — continuing with captured content');
|
|
83
|
+
return;
|
|
84
|
+
}
|
|
85
|
+
// Fall through to window scroll
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
// Window-level scroll (standard path)
|
|
89
|
+
const scrollHeight = await page.evaluate(() => document.body.scrollHeight);
|
|
90
|
+
const viewportHeight = await page.evaluate(() => window.innerHeight);
|
|
91
|
+
for (let y = 0; y < scrollHeight; y += Math.round(viewportHeight * 0.75)) {
|
|
92
|
+
await page.evaluate((sy) => window.scrollTo({ top: sy, behavior: 'instant' }), y);
|
|
93
|
+
await page.waitForTimeout(delayMs);
|
|
94
|
+
}
|
|
95
|
+
await page.evaluate(() => window.scrollTo({ top: document.body.scrollHeight, behavior: 'instant' }));
|
|
96
|
+
await page.waitForTimeout(Math.round(delayMs * 1.6));
|
|
97
|
+
await page.evaluate(() => window.scrollTo({ top: 0, behavior: 'instant' }));
|
|
98
|
+
await page.waitForTimeout(Math.round(delayMs * 2.4));
|
|
99
|
+
}
|
|
100
|
+
catch (err) {
|
|
101
|
+
if (isContextDestroyedError(err)) {
|
|
102
|
+
log.warn('Execution context destroyed during scrollThrough — continuing with captured content');
|
|
103
|
+
return;
|
|
104
|
+
}
|
|
105
|
+
// Re-throw unexpected errors
|
|
106
|
+
throw err;
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
// ── Concurrency state (owned by this module) ─────────────────────────────────
|
|
110
|
+
let activePagesCount = 0;
|
|
111
|
+
// ── browserFetch ──────────────────────────────────────────────────────────────
|
|
112
|
+
/**
|
|
113
|
+
* Fetch using headless Chromium via Playwright
|
|
114
|
+
* Slower but can handle JavaScript-heavy sites and bypass some bot detection
|
|
115
|
+
*/
|
|
116
|
+
export async function browserFetch(url, options = {}) {
|
|
117
|
+
// SECURITY: Validate URL to prevent SSRF
|
|
118
|
+
validateUrl(url);
|
|
119
|
+
const { userAgent, waitMs = 0, timeoutMs = 30000, screenshot = false, screenshotFullPage = false, headers, cookies, stealth = false, actions, keepPageOpen = false, signal, profileDir, headed = false, storageState, proxy, device = 'desktop', viewportWidth: optViewportWidth, viewportHeight: optViewportHeight, deviceScaleFactor: optDeviceScaleFactor, waitUntil: optWaitUntil, waitSelector, blockResources, isSPA = false, languages, } = options;
|
|
120
|
+
// Device emulation profiles (with deviceScaleFactor for crisp screenshots)
|
|
121
|
+
const deviceProfiles = {
|
|
122
|
+
desktop: { width: 1920, height: 1080, deviceScaleFactor: 1, userAgent: undefined },
|
|
123
|
+
mobile: {
|
|
124
|
+
width: 390,
|
|
125
|
+
height: 844,
|
|
126
|
+
deviceScaleFactor: 3,
|
|
127
|
+
userAgent: 'Mozilla/5.0 (iPhone; CPU iPhone OS 17_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Mobile/15E148 Safari/604.1',
|
|
128
|
+
},
|
|
129
|
+
tablet: {
|
|
130
|
+
width: 820,
|
|
131
|
+
height: 1180,
|
|
132
|
+
deviceScaleFactor: 2,
|
|
133
|
+
userAgent: 'Mozilla/5.0 (iPad; CPU OS 17_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Mobile/15E148 Safari/604.1',
|
|
134
|
+
},
|
|
135
|
+
};
|
|
136
|
+
const deviceProfile = deviceProfiles[device] ?? deviceProfiles.desktop;
|
|
137
|
+
const effectiveViewportWidth = optViewportWidth ?? deviceProfile.width;
|
|
138
|
+
const effectiveViewportHeight = optViewportHeight ?? deviceProfile.height;
|
|
139
|
+
const effectiveScaleFactor = optDeviceScaleFactor ?? deviceProfile.deviceScaleFactor;
|
|
140
|
+
const effectiveWaitUntil = optWaitUntil || 'domcontentloaded';
|
|
141
|
+
// Validate user agent if provided
|
|
142
|
+
// In stealth mode with no custom UA, always use a realistic Chrome UA
|
|
143
|
+
const validatedUserAgent = userAgent
|
|
144
|
+
? validateUserAgent(userAgent)
|
|
145
|
+
: (stealth ? getRealisticUserAgent() : getRandomUserAgent());
|
|
146
|
+
// Validate wait time
|
|
147
|
+
if (waitMs < 0 || waitMs > 60000) {
|
|
148
|
+
throw new WebPeelError('Wait time must be between 0 and 60000ms');
|
|
149
|
+
}
|
|
150
|
+
if (signal?.aborted) {
|
|
151
|
+
throw createAbortError();
|
|
152
|
+
}
|
|
153
|
+
// SECURITY: Validate custom headers if provided
|
|
154
|
+
if (headers) {
|
|
155
|
+
for (const [key, value] of Object.entries(headers)) {
|
|
156
|
+
// Block Host header override
|
|
157
|
+
if (key.toLowerCase() === 'host') {
|
|
158
|
+
throw new WebPeelError('Custom Host header is not allowed');
|
|
159
|
+
}
|
|
160
|
+
if (typeof value !== 'string' || value.length > 500) {
|
|
161
|
+
throw new WebPeelError('Invalid header value');
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
// SECURITY: Limit concurrent browser pages with timeout
|
|
166
|
+
const queueStartTime = Date.now();
|
|
167
|
+
const QUEUE_TIMEOUT_MS = 30000; // 30 second max wait
|
|
168
|
+
while (activePagesCount >= MAX_CONCURRENT_PAGES) {
|
|
169
|
+
if (Date.now() - queueStartTime > QUEUE_TIMEOUT_MS) {
|
|
170
|
+
throw new TimeoutError('Browser page queue timeout - too many concurrent requests');
|
|
171
|
+
}
|
|
172
|
+
await new Promise(resolve => setTimeout(resolve, 100));
|
|
173
|
+
}
|
|
174
|
+
activePagesCount++;
|
|
175
|
+
let page = null;
|
|
176
|
+
let usingPooledPage = false;
|
|
177
|
+
let abortHandler;
|
|
178
|
+
// Declared here (outside try) so the finally block can reference it
|
|
179
|
+
const usingProfileBrowser = !!profileDir;
|
|
180
|
+
// Owned context created when storageState injection is requested
|
|
181
|
+
let ownedContext;
|
|
182
|
+
// Owned browser launched when proxy is specified (dedicated browser with proxy at launch level)
|
|
183
|
+
let ownedBrowser;
|
|
184
|
+
try {
|
|
185
|
+
const browser = usingProfileBrowser
|
|
186
|
+
? await getProfileBrowser(profileDir, headed, stealth)
|
|
187
|
+
: stealth
|
|
188
|
+
? await getStealthBrowser()
|
|
189
|
+
: await getBrowser();
|
|
190
|
+
// Only use the shared page pool for non-stealth, non-profile, non-keepOpen, non-storageState, non-proxy fetches
|
|
191
|
+
const shouldUsePagePool = !stealth && !userAgent && !keepPageOpen && !usingProfileBrowser && !storageState && !proxy;
|
|
192
|
+
if (shouldUsePagePool) {
|
|
193
|
+
page = takePooledPage();
|
|
194
|
+
usingPooledPage = !!page;
|
|
195
|
+
if (usingPooledPage && getPooledPagesCount() < PAGE_POOL_SIZE) {
|
|
196
|
+
void ensurePagePool(browser).catch(() => { });
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
if (!page) {
|
|
200
|
+
const effectiveLocale = (languages && languages.length > 0) ? languages[0] : 'en-US';
|
|
201
|
+
const pageOptions = {
|
|
202
|
+
userAgent: validatedUserAgent,
|
|
203
|
+
// viewport: null lets the browser use its natural window size (set via --window-size),
|
|
204
|
+
// avoiding the telltale Playwright default of 1280×720.
|
|
205
|
+
viewport: null,
|
|
206
|
+
...(stealth
|
|
207
|
+
? {
|
|
208
|
+
locale: effectiveLocale,
|
|
209
|
+
timezoneId: 'America/New_York',
|
|
210
|
+
javaScriptEnabled: true,
|
|
211
|
+
}
|
|
212
|
+
: {}),
|
|
213
|
+
};
|
|
214
|
+
if (proxy) {
|
|
215
|
+
// Parse proxy URL to extract auth credentials for Playwright
|
|
216
|
+
let playwrightProxy;
|
|
217
|
+
try {
|
|
218
|
+
const proxyUrl = new URL(proxy);
|
|
219
|
+
playwrightProxy = {
|
|
220
|
+
server: `${proxyUrl.protocol}//${proxyUrl.host}`,
|
|
221
|
+
username: proxyUrl.username || undefined,
|
|
222
|
+
password: proxyUrl.password || undefined,
|
|
223
|
+
};
|
|
224
|
+
}
|
|
225
|
+
catch (e) {
|
|
226
|
+
// Fallback: use proxy string as-is
|
|
227
|
+
log.debug('proxy URL parse failed, using as-is:', e instanceof Error ? e.message : e);
|
|
228
|
+
playwrightProxy = { server: proxy };
|
|
229
|
+
}
|
|
230
|
+
// Launch a DEDICATED fresh browser with proxy at the launch level.
|
|
231
|
+
// Context-level proxy is unreliable for anti-bot sites — they check the browser's
|
|
232
|
+
// IP at connection time (set at launch), not at context creation.
|
|
233
|
+
const pw = stealth ? await getStealthPlaywright() : (await import('playwright')).chromium;
|
|
234
|
+
const vp = getRandomViewport();
|
|
235
|
+
ownedBrowser = await pw.launch({
|
|
236
|
+
headless: true,
|
|
237
|
+
args: [...ANTI_DETECTION_ARGS, `--window-size=${vp.width},${vp.height}`],
|
|
238
|
+
proxy: playwrightProxy,
|
|
239
|
+
});
|
|
240
|
+
ownedContext = await ownedBrowser.newContext({
|
|
241
|
+
userAgent: validatedUserAgent || getRandomUserAgent(),
|
|
242
|
+
locale: effectiveLocale,
|
|
243
|
+
timezoneId: 'America/New_York',
|
|
244
|
+
javaScriptEnabled: true,
|
|
245
|
+
viewport: { width: effectiveViewportWidth || vp.width, height: effectiveViewportHeight || vp.height },
|
|
246
|
+
deviceScaleFactor: effectiveScaleFactor,
|
|
247
|
+
...(storageState ? { storageState } : {}),
|
|
248
|
+
});
|
|
249
|
+
page = await ownedContext.newPage();
|
|
250
|
+
}
|
|
251
|
+
else if (storageState) {
|
|
252
|
+
// Create an isolated context with the injected storage state (cookies + localStorage)
|
|
253
|
+
ownedContext = await browser.newContext({
|
|
254
|
+
...pageOptions,
|
|
255
|
+
storageState,
|
|
256
|
+
viewport: { width: effectiveViewportWidth, height: effectiveViewportHeight },
|
|
257
|
+
deviceScaleFactor: effectiveScaleFactor,
|
|
258
|
+
});
|
|
259
|
+
page = await ownedContext.newPage();
|
|
260
|
+
}
|
|
261
|
+
else {
|
|
262
|
+
// When deviceScaleFactor differs from default (1), create an isolated context
|
|
263
|
+
// so the scale factor is applied (Playwright requires it at context level)
|
|
264
|
+
if (effectiveScaleFactor !== 1) {
|
|
265
|
+
ownedContext = await browser.newContext({
|
|
266
|
+
...pageOptions,
|
|
267
|
+
viewport: { width: effectiveViewportWidth, height: effectiveViewportHeight },
|
|
268
|
+
deviceScaleFactor: effectiveScaleFactor,
|
|
269
|
+
});
|
|
270
|
+
page = await ownedContext.newPage();
|
|
271
|
+
}
|
|
272
|
+
else {
|
|
273
|
+
page = await browser.newPage(pageOptions);
|
|
274
|
+
// Apply viewport for device emulation or explicit viewport overrides
|
|
275
|
+
if (device !== 'desktop' || optViewportWidth !== undefined || optViewportHeight !== undefined) {
|
|
276
|
+
await page.setViewportSize({ width: effectiveViewportWidth, height: effectiveViewportHeight }).catch(() => { });
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
await applyStealthScripts(page, languages);
|
|
281
|
+
// Apply supplemental stealth patches (canvas noise, connection API, battery, etc.)
|
|
282
|
+
// These go beyond what puppeteer-extra-plugin-stealth provides.
|
|
283
|
+
if (stealth) {
|
|
284
|
+
await applyStealthPatches?.(page);
|
|
285
|
+
await applyAcceptLanguageHeader?.(page, 'en-US');
|
|
286
|
+
}
|
|
287
|
+
usingPooledPage = false;
|
|
288
|
+
}
|
|
289
|
+
else {
|
|
290
|
+
await page.setViewportSize({ width: effectiveViewportWidth, height: effectiveViewportHeight }).catch(() => { });
|
|
291
|
+
}
|
|
292
|
+
if (signal) {
|
|
293
|
+
abortHandler = () => {
|
|
294
|
+
if (page && !page.isClosed()) {
|
|
295
|
+
void page.close().catch(() => { });
|
|
296
|
+
}
|
|
297
|
+
};
|
|
298
|
+
signal.addEventListener('abort', abortHandler, { once: true });
|
|
299
|
+
}
|
|
300
|
+
await page.unroute('**/*').catch(() => { });
|
|
301
|
+
const mergedHeaders = { ...(headers || {}) };
|
|
302
|
+
if (usingPooledPage) {
|
|
303
|
+
mergedHeaders['User-Agent'] = validatedUserAgent;
|
|
304
|
+
}
|
|
305
|
+
// Apply device user-agent (mobile/tablet) unless caller overrode userAgent
|
|
306
|
+
if (deviceProfile.userAgent && !userAgent) {
|
|
307
|
+
mergedHeaders['User-Agent'] = deviceProfile.userAgent;
|
|
308
|
+
}
|
|
309
|
+
if (usingPooledPage || Object.keys(mergedHeaders).length > 0) {
|
|
310
|
+
await page.setExtraHTTPHeaders(mergedHeaders);
|
|
311
|
+
}
|
|
312
|
+
// Set cookies if provided
|
|
313
|
+
if (cookies && cookies.length > 0) {
|
|
314
|
+
const parsedCookies = cookies.map(cookie => {
|
|
315
|
+
const [nameValue] = cookie.split(';').map(s => s.trim());
|
|
316
|
+
const [name, value] = nameValue.split('=');
|
|
317
|
+
if (!name || value === undefined) {
|
|
318
|
+
throw new WebPeelError(`Invalid cookie format: ${cookie}`);
|
|
319
|
+
}
|
|
320
|
+
return {
|
|
321
|
+
name: name.trim(),
|
|
322
|
+
value: value.trim(),
|
|
323
|
+
url,
|
|
324
|
+
};
|
|
325
|
+
});
|
|
326
|
+
await page.context().addCookies(parsedCookies);
|
|
327
|
+
}
|
|
328
|
+
if (signal?.aborted) {
|
|
329
|
+
throw createAbortError();
|
|
330
|
+
}
|
|
331
|
+
// Block resources: custom list takes precedence; otherwise use defaults in non-screenshot/non-stealth mode.
|
|
332
|
+
// In stealth mode, blocking common resources can be a bot-detection signal.
|
|
333
|
+
if (blockResources && blockResources.length > 0) {
|
|
334
|
+
const blockedTypes = new Set(blockResources);
|
|
335
|
+
await page.route('**/*', (route) => {
|
|
336
|
+
const resourceType = route.request().resourceType();
|
|
337
|
+
if (blockedTypes.has(resourceType)) {
|
|
338
|
+
route.abort();
|
|
339
|
+
}
|
|
340
|
+
else {
|
|
341
|
+
route.continue();
|
|
342
|
+
}
|
|
343
|
+
});
|
|
344
|
+
}
|
|
345
|
+
else if (screenshot) {
|
|
346
|
+
// Screenshots need all resources (images, CSS) for visual accuracy
|
|
347
|
+
await page.route('**/*', (route) => route.continue());
|
|
348
|
+
}
|
|
349
|
+
else {
|
|
350
|
+
// Default: block images/fonts/media for speed + bandwidth savings.
|
|
351
|
+
// In stealth mode, we keep stylesheets (blocking CSS is a bot signal)
|
|
352
|
+
// but still block images/fonts/media (saves ~70% bandwidth per page).
|
|
353
|
+
const blocklist = stealth
|
|
354
|
+
? ['image', 'font', 'media'] // stealth: keep CSS, block heavy assets
|
|
355
|
+
: ['image', 'font', 'media', 'stylesheet']; // normal: block everything non-text
|
|
356
|
+
await page.route('**/*', (route) => {
|
|
357
|
+
const resourceType = route.request().resourceType();
|
|
358
|
+
if (blocklist.includes(resourceType)) {
|
|
359
|
+
route.abort();
|
|
360
|
+
}
|
|
361
|
+
else {
|
|
362
|
+
route.continue();
|
|
363
|
+
}
|
|
364
|
+
});
|
|
365
|
+
}
|
|
366
|
+
// SECURITY: Wrap entire operation in timeout
|
|
367
|
+
let screenshotBuffer;
|
|
368
|
+
const throwIfAborted = () => {
|
|
369
|
+
if (signal?.aborted) {
|
|
370
|
+
throw createAbortError();
|
|
371
|
+
}
|
|
372
|
+
};
|
|
373
|
+
const fetchPromise = (async () => {
|
|
374
|
+
let response;
|
|
375
|
+
try {
|
|
376
|
+
response = await page.goto(url, {
|
|
377
|
+
waitUntil: effectiveWaitUntil,
|
|
378
|
+
timeout: timeoutMs,
|
|
379
|
+
});
|
|
380
|
+
}
|
|
381
|
+
catch (gotoError) {
|
|
382
|
+
const msg = gotoError?.message || String(gotoError);
|
|
383
|
+
if (/net::ERR_HTTP2_PROTOCOL_ERROR/i.test(msg)) {
|
|
384
|
+
throw new BlockedError(`Site blocked the request (HTTP/2 protocol error). The site likely has anti-bot protection. Try using stealth mode or a proxy.`);
|
|
385
|
+
}
|
|
386
|
+
if (/net::ERR_CONNECTION_REFUSED/i.test(msg)) {
|
|
387
|
+
throw new NetworkError(`Connection refused by the server at ${url}. The server may be down or blocking your IP.`);
|
|
388
|
+
}
|
|
389
|
+
if (/net::ERR_CONNECTION_RESET/i.test(msg)) {
|
|
390
|
+
throw new BlockedError(`Connection was reset by the server. This typically indicates anti-bot protection or IP blocking. Try using stealth mode or a different IP.`);
|
|
391
|
+
}
|
|
392
|
+
if (/net::ERR_SSL/i.test(msg)) {
|
|
393
|
+
throw new NetworkError(`SSL/TLS error connecting to site. URL: ${url}`);
|
|
394
|
+
}
|
|
395
|
+
if (/net::ERR_NAME_NOT_RESOLVED/i.test(msg)) {
|
|
396
|
+
throw new NetworkError(`Domain not found: ${url}`);
|
|
397
|
+
}
|
|
398
|
+
if (/net::ERR_CERT/i.test(msg)) {
|
|
399
|
+
throw new NetworkError(`SSL certificate error for ${url}`);
|
|
400
|
+
}
|
|
401
|
+
if (/NS_ERROR_NET_RESET/i.test(msg)) {
|
|
402
|
+
throw new NetworkError(`Connection reset (Firefox). The site may be blocking automated access. URL: ${url}`);
|
|
403
|
+
}
|
|
404
|
+
if (/timeout/i.test(msg)) {
|
|
405
|
+
throw new TimeoutError(`Page load timed out after ${timeoutMs}ms: ${url}`);
|
|
406
|
+
}
|
|
407
|
+
if (/net::ERR_/i.test(msg)) {
|
|
408
|
+
throw new NetworkError(`Browser network error: ${msg.match(/net::ERR_\w+/i)?.[0] || msg}`);
|
|
409
|
+
}
|
|
410
|
+
throw gotoError;
|
|
411
|
+
}
|
|
412
|
+
throwIfAborted();
|
|
413
|
+
// Wait for a specific CSS selector if requested
|
|
414
|
+
if (waitSelector) {
|
|
415
|
+
await page.waitForSelector(waitSelector, { timeout: timeoutMs }).catch(() => {
|
|
416
|
+
log.debug(`waitSelector "${waitSelector}" not found within timeout`);
|
|
417
|
+
});
|
|
418
|
+
throwIfAborted();
|
|
419
|
+
}
|
|
420
|
+
// Quick check: if body text is very thin, wait for JS to render more content.
|
|
421
|
+
// Only adds latency when the page clearly hasn't loaded yet.
|
|
422
|
+
// eslint-disable-next-line @typescript-eslint/no-implied-eval
|
|
423
|
+
const bodyTextLength = await page.evaluate('document.body?.innerText?.trim().length || 0').catch(() => 0);
|
|
424
|
+
if (bodyTextLength < 500) {
|
|
425
|
+
await page.waitForLoadState('networkidle', { timeout: 1500 }).catch(() => { });
|
|
426
|
+
throwIfAborted();
|
|
427
|
+
}
|
|
428
|
+
// DOM stability check: wait for SPA hydration to settle.
|
|
429
|
+
// Polls innerText length every 500ms — if still growing, keep waiting.
|
|
430
|
+
// SPAs (Kayak, Google Flights, Expedia) get a longer timeout to allow async data loads.
|
|
431
|
+
{
|
|
432
|
+
const stabilityStart = Date.now();
|
|
433
|
+
// SPA sites (Kayak, Google Flights, Expedia) need up to 12s for results to load.
|
|
434
|
+
// Normal rendered pages need just 3s extra.
|
|
435
|
+
const MAX_STABILITY_WAIT_MS = isSPA ? 12000 : 3000;
|
|
436
|
+
// SPA: must be stable for 2s (4 consecutive 500ms checks). Normal: 1s (2 checks).
|
|
437
|
+
const STABLE_CHECKS_REQUIRED = isSPA ? 4 : 2;
|
|
438
|
+
const POLL_INTERVAL_MS = 500;
|
|
439
|
+
const MIN_CONTENT_LENGTH = 200; // Don't consider near-empty pages stable
|
|
440
|
+
let prevLength = await page.evaluate('document.body?.innerText?.length || 0').catch(() => 0);
|
|
441
|
+
let stableCount = 0;
|
|
442
|
+
while (Date.now() - stabilityStart < MAX_STABILITY_WAIT_MS) {
|
|
443
|
+
throwIfAborted();
|
|
444
|
+
await page.waitForTimeout(POLL_INTERVAL_MS);
|
|
445
|
+
const curLength = await page.evaluate('document.body?.innerText?.length || 0').catch(() => 0);
|
|
446
|
+
if (curLength !== prevLength) {
|
|
447
|
+
// Content changed — reset stability counter
|
|
448
|
+
stableCount = 0;
|
|
449
|
+
}
|
|
450
|
+
else if (curLength >= MIN_CONTENT_LENGTH) {
|
|
451
|
+
stableCount++;
|
|
452
|
+
if (stableCount >= STABLE_CHECKS_REQUIRED)
|
|
453
|
+
break; // stable long enough
|
|
454
|
+
}
|
|
455
|
+
prevLength = curLength;
|
|
456
|
+
}
|
|
457
|
+
if (isSPA) {
|
|
458
|
+
log.debug(`SPA stability check: ${Date.now() - stabilityStart}ms, length=${prevLength}`);
|
|
459
|
+
}
|
|
460
|
+
}
|
|
461
|
+
const finalUrl = page.url();
|
|
462
|
+
const contentType = response?.headers()?.['content-type'] || '';
|
|
463
|
+
const contentTypeLower = contentType.toLowerCase();
|
|
464
|
+
const urlLower = finalUrl.toLowerCase();
|
|
465
|
+
const isPdf = contentTypeLower.includes('application/pdf') || urlLower.endsWith('.pdf');
|
|
466
|
+
const isDocx = contentTypeLower.includes('wordprocessingml.document') || urlLower.endsWith('.docx');
|
|
467
|
+
const isBinaryDoc = !!response && (isPdf || isDocx);
|
|
468
|
+
// Small randomized delay in stealth mode (simulate human behavior)
|
|
469
|
+
// Keep it short — enough to look human, not enough to kill latency
|
|
470
|
+
if (stealth) {
|
|
471
|
+
const extraDelayMs = 200 + Math.floor(Math.random() * 601);
|
|
472
|
+
await page.waitForTimeout(extraDelayMs);
|
|
473
|
+
throwIfAborted();
|
|
474
|
+
}
|
|
475
|
+
// Human-like delay for proxied requests (helps bypass bot detection on strict sites)
|
|
476
|
+
if (proxy) {
|
|
477
|
+
// Realistic human behavior to bypass behavioral analysis
|
|
478
|
+
const humanDelay = 800 + Math.random() * 1200;
|
|
479
|
+
await page.waitForTimeout(humanDelay);
|
|
480
|
+
throwIfAborted();
|
|
481
|
+
// Realistic mouse movement (simulate human cursor)
|
|
482
|
+
try {
|
|
483
|
+
const vw = await page.evaluate(() => window.innerWidth);
|
|
484
|
+
const vh = await page.evaluate(() => window.innerHeight);
|
|
485
|
+
await page.mouse.move(100 + Math.random() * (vw - 200), 100 + Math.random() * (vh - 200), { steps: 5 + Math.floor(Math.random() * 10) });
|
|
486
|
+
// Small scroll to trigger lazy-loaded content
|
|
487
|
+
await page.evaluate(() => window.scrollBy(0, 200 + Math.random() * 400));
|
|
488
|
+
await page.waitForTimeout(300 + Math.random() * 500);
|
|
489
|
+
throwIfAborted();
|
|
490
|
+
// Second mouse move
|
|
491
|
+
await page.mouse.move(50 + Math.random() * (vw - 100), 50 + Math.random() * (vh - 100), { steps: 3 + Math.floor(Math.random() * 5) });
|
|
492
|
+
}
|
|
493
|
+
catch {
|
|
494
|
+
// Non-fatal: mouse/scroll simulation failed
|
|
495
|
+
}
|
|
496
|
+
throwIfAborted();
|
|
497
|
+
}
|
|
498
|
+
// Wait for additional time if requested (for dynamic content / screenshots)
|
|
499
|
+
if (waitMs > 0) {
|
|
500
|
+
await page.waitForTimeout(waitMs);
|
|
501
|
+
throwIfAborted();
|
|
502
|
+
}
|
|
503
|
+
// Auto-interact: dismiss cookie banners, consent popups, overlays
|
|
504
|
+
// before content extraction. Runs before user-specified actions so
|
|
505
|
+
// that popups don't interfere with custom interactions.
|
|
506
|
+
let autoInteractResult;
|
|
507
|
+
if (!isBinaryDoc) {
|
|
508
|
+
try {
|
|
509
|
+
autoInteractResult = await Promise.race([
|
|
510
|
+
autoInteract(page),
|
|
511
|
+
new Promise((resolve) => setTimeout(() => resolve({ cookieBannerDismissed: false, consentHandled: false, loadMoreClicked: 0, overlaysDismissed: 0 }), 3500)),
|
|
512
|
+
]);
|
|
513
|
+
}
|
|
514
|
+
catch {
|
|
515
|
+
// Never block extraction
|
|
516
|
+
}
|
|
517
|
+
throwIfAborted();
|
|
518
|
+
}
|
|
519
|
+
// Execute page actions if provided
|
|
520
|
+
if (actions && actions.length > 0) {
|
|
521
|
+
const { executeActions } = await import('./actions.js');
|
|
522
|
+
const actionScreenshot = await executeActions(page, actions);
|
|
523
|
+
if (actionScreenshot) {
|
|
524
|
+
screenshotBuffer = actionScreenshot;
|
|
525
|
+
}
|
|
526
|
+
throwIfAborted();
|
|
527
|
+
}
|
|
528
|
+
// If the navigation returned a binary document (PDF/DOCX), grab the raw body.
|
|
529
|
+
if (isBinaryDoc) {
|
|
530
|
+
const buffer = await response.body();
|
|
531
|
+
throwIfAborted();
|
|
532
|
+
// Capture screenshot if requested (and not already captured by actions)
|
|
533
|
+
if (screenshot && !screenshotBuffer) {
|
|
534
|
+
screenshotBuffer = await page.screenshot({
|
|
535
|
+
fullPage: screenshotFullPage,
|
|
536
|
+
type: 'png',
|
|
537
|
+
});
|
|
538
|
+
}
|
|
539
|
+
return {
|
|
540
|
+
html: '',
|
|
541
|
+
finalUrl,
|
|
542
|
+
buffer,
|
|
543
|
+
contentType,
|
|
544
|
+
statusCode: response.status(),
|
|
545
|
+
};
|
|
546
|
+
}
|
|
547
|
+
const html = await page.content();
|
|
548
|
+
throwIfAborted();
|
|
549
|
+
return {
|
|
550
|
+
html,
|
|
551
|
+
finalUrl,
|
|
552
|
+
contentType,
|
|
553
|
+
statusCode: response?.status(),
|
|
554
|
+
autoInteractResult,
|
|
555
|
+
};
|
|
556
|
+
})();
|
|
557
|
+
let operationTimeout;
|
|
558
|
+
const timeoutPromise = new Promise((_, reject) => {
|
|
559
|
+
operationTimeout = setTimeout(() => reject(new TimeoutError(`Operation timed out after ${timeoutMs}ms`)), timeoutMs);
|
|
560
|
+
});
|
|
561
|
+
const fetchData = await Promise.race([fetchPromise, timeoutPromise]);
|
|
562
|
+
if (operationTimeout) {
|
|
563
|
+
clearTimeout(operationTimeout);
|
|
564
|
+
}
|
|
565
|
+
const { html, finalUrl } = fetchData;
|
|
566
|
+
const fetchBuffer = 'buffer' in fetchData ? fetchData.buffer : undefined;
|
|
567
|
+
const fetchContentType = 'contentType' in fetchData ? fetchData.contentType : undefined;
|
|
568
|
+
const fetchStatusCode = 'statusCode' in fetchData ? fetchData.statusCode : undefined;
|
|
569
|
+
const fetchAutoInteract = 'autoInteractResult' in fetchData ? fetchData.autoInteractResult : undefined;
|
|
570
|
+
const isBinaryDoc = !!fetchBuffer;
|
|
571
|
+
// SECURITY: Limit HTML size (skip for binary documents where html is empty)
|
|
572
|
+
if (!isBinaryDoc) {
|
|
573
|
+
if (html.length > 10 * 1024 * 1024) { // 10MB limit
|
|
574
|
+
throw new WebPeelError('Response too large (max 10MB)');
|
|
575
|
+
}
|
|
576
|
+
if (!html || html.length < 100) {
|
|
577
|
+
throw new BlockedError('Empty or suspiciously small response from browser.');
|
|
578
|
+
}
|
|
579
|
+
// Run challenge detection on browser-fetched HTML (covers both regular and stealth modes)
|
|
580
|
+
// Note: skip empty-shell type — that's a rendering quality issue (SPA needs more JS time),
|
|
581
|
+
// not a bot challenge. The caller's escalation logic handles empty-shell separately.
|
|
582
|
+
const browserChallengeResult = detectChallenge(html, fetchStatusCode);
|
|
583
|
+
if (browserChallengeResult.isChallenge && browserChallengeResult.type !== 'empty-shell') {
|
|
584
|
+
throw new BlockedError(`Challenge page detected (${browserChallengeResult.type || 'unknown'}, confidence: ${browserChallengeResult.confidence.toFixed(2)}). ` +
|
|
585
|
+
`Site requires human verification. Try a different approach or use a CAPTCHA solving service.`);
|
|
586
|
+
}
|
|
587
|
+
}
|
|
588
|
+
// Capture screenshot if requested (and not already captured by actions or document handler)
|
|
589
|
+
if (screenshot && !screenshotBuffer) {
|
|
590
|
+
screenshotBuffer = await page.screenshot({
|
|
591
|
+
fullPage: screenshotFullPage,
|
|
592
|
+
type: 'png'
|
|
593
|
+
});
|
|
594
|
+
}
|
|
595
|
+
// If keepPageOpen, return page/browser for caller to use (e.g., branding extraction)
|
|
596
|
+
if (keepPageOpen && page) {
|
|
597
|
+
return {
|
|
598
|
+
html,
|
|
599
|
+
buffer: fetchBuffer,
|
|
600
|
+
url: finalUrl,
|
|
601
|
+
statusCode: fetchStatusCode,
|
|
602
|
+
contentType: fetchContentType,
|
|
603
|
+
screenshot: screenshotBuffer,
|
|
604
|
+
page,
|
|
605
|
+
// Use ownedBrowser for proxy case, otherwise the shared browser
|
|
606
|
+
browser: ownedBrowser ?? browser,
|
|
607
|
+
...(fetchAutoInteract !== undefined ? { autoInteract: fetchAutoInteract } : {}),
|
|
608
|
+
};
|
|
609
|
+
}
|
|
610
|
+
return {
|
|
611
|
+
html,
|
|
612
|
+
buffer: fetchBuffer,
|
|
613
|
+
url: finalUrl,
|
|
614
|
+
statusCode: fetchStatusCode,
|
|
615
|
+
contentType: fetchContentType,
|
|
616
|
+
screenshot: screenshotBuffer,
|
|
617
|
+
...(fetchAutoInteract !== undefined ? { autoInteract: fetchAutoInteract } : {}),
|
|
618
|
+
};
|
|
619
|
+
}
|
|
620
|
+
catch (error) {
|
|
621
|
+
if (error instanceof BlockedError || error instanceof WebPeelError || error instanceof TimeoutError) {
|
|
622
|
+
throw error;
|
|
623
|
+
}
|
|
624
|
+
if (error instanceof Error && error.name === 'AbortError') {
|
|
625
|
+
throw error;
|
|
626
|
+
}
|
|
627
|
+
if (error instanceof Error && error.message.includes('Timeout')) {
|
|
628
|
+
throw new TimeoutError(`Browser navigation timed out`);
|
|
629
|
+
}
|
|
630
|
+
throw new NetworkError(`Browser fetch failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
631
|
+
}
|
|
632
|
+
finally {
|
|
633
|
+
if (signal && abortHandler) {
|
|
634
|
+
signal.removeEventListener('abort', abortHandler);
|
|
635
|
+
}
|
|
636
|
+
// CRITICAL: Always release/close page and decrement counter (unless keepPageOpen and no error)
|
|
637
|
+
if (page && !keepPageOpen) {
|
|
638
|
+
if (usingPooledPage) {
|
|
639
|
+
await recyclePooledPage(page);
|
|
640
|
+
}
|
|
641
|
+
else if (ownedContext) {
|
|
642
|
+
// Close the owned context (also closes the page)
|
|
643
|
+
await ownedContext.close().catch(() => { });
|
|
644
|
+
}
|
|
645
|
+
else if (!usingProfileBrowser) {
|
|
646
|
+
// Profile browser pages are NOT closed — the profile browser stays alive
|
|
647
|
+
// so that the next fetch in the same process reuses the session.
|
|
648
|
+
await page.close().catch(() => { });
|
|
649
|
+
}
|
|
650
|
+
}
|
|
651
|
+
// Close the dedicated proxy browser if one was launched (not when keeping page open)
|
|
652
|
+
if (ownedBrowser && !keepPageOpen) {
|
|
653
|
+
await ownedBrowser.close().catch(() => { });
|
|
654
|
+
}
|
|
655
|
+
activePagesCount--;
|
|
656
|
+
}
|
|
657
|
+
}
|
|
658
|
+
// ── browserScreenshot ─────────────────────────────────────────────────────────
|
|
659
|
+
/**
|
|
660
|
+
* Capture a screenshot of a URL using headless Chromium via Playwright.
|
|
661
|
+
*/
|
|
662
|
+
export async function browserScreenshot(url, options = {}) {
|
|
663
|
+
// SECURITY: Validate URL to prevent SSRF
|
|
664
|
+
validateUrl(url);
|
|
665
|
+
const { fullPage = false, width, height, format = 'png', quality, waitMs = 0, timeoutMs = 30000, userAgent, headers, cookies, stealth = false, actions, scrollThrough = false, selector, } = options;
|
|
666
|
+
const validatedUserAgent = userAgent ? validateUserAgent(userAgent) : getRandomUserAgent();
|
|
667
|
+
// Basic validation
|
|
668
|
+
if (waitMs < 0 || waitMs > 60000) {
|
|
669
|
+
throw new WebPeelError('Wait time must be between 0 and 60000ms');
|
|
670
|
+
}
|
|
671
|
+
if (timeoutMs < 1000 || timeoutMs > 120000) {
|
|
672
|
+
throw new WebPeelError('Timeout must be between 1000 and 120000ms');
|
|
673
|
+
}
|
|
674
|
+
if (width !== undefined && (!Number.isFinite(width) || width < 100 || width > 5000)) {
|
|
675
|
+
throw new WebPeelError('Width must be between 100 and 5000');
|
|
676
|
+
}
|
|
677
|
+
if (height !== undefined && (!Number.isFinite(height) || height < 100 || height > 5000)) {
|
|
678
|
+
throw new WebPeelError('Height must be between 100 and 5000');
|
|
679
|
+
}
|
|
680
|
+
if (format !== 'png' && format !== 'jpeg') {
|
|
681
|
+
throw new WebPeelError('Format must be png or jpeg');
|
|
682
|
+
}
|
|
683
|
+
if (format === 'jpeg' && quality !== undefined) {
|
|
684
|
+
if (!Number.isFinite(quality) || quality < 1 || quality > 100) {
|
|
685
|
+
throw new WebPeelError('JPEG quality must be between 1 and 100');
|
|
686
|
+
}
|
|
687
|
+
}
|
|
688
|
+
// SECURITY: Validate custom headers if provided
|
|
689
|
+
if (headers) {
|
|
690
|
+
for (const [key, value] of Object.entries(headers)) {
|
|
691
|
+
if (key.toLowerCase() === 'host') {
|
|
692
|
+
throw new WebPeelError('Custom Host header is not allowed');
|
|
693
|
+
}
|
|
694
|
+
if (typeof value !== 'string' || value.length > 500) {
|
|
695
|
+
throw new WebPeelError('Invalid header value');
|
|
696
|
+
}
|
|
697
|
+
}
|
|
698
|
+
}
|
|
699
|
+
// SECURITY: Limit concurrent browser pages with timeout
|
|
700
|
+
const queueStartTime = Date.now();
|
|
701
|
+
const QUEUE_TIMEOUT_MS = 30000;
|
|
702
|
+
while (activePagesCount >= MAX_CONCURRENT_PAGES) {
|
|
703
|
+
if (Date.now() - queueStartTime > QUEUE_TIMEOUT_MS) {
|
|
704
|
+
throw new TimeoutError('Browser page queue timeout - too many concurrent requests');
|
|
705
|
+
}
|
|
706
|
+
await new Promise(resolve => setTimeout(resolve, 100));
|
|
707
|
+
}
|
|
708
|
+
activePagesCount++;
|
|
709
|
+
let page = null;
|
|
710
|
+
let usingPooledPage = false;
|
|
711
|
+
try {
|
|
712
|
+
const browser = stealth ? await getStealthBrowser() : await getBrowser();
|
|
713
|
+
const shouldUsePagePool = !stealth && !userAgent;
|
|
714
|
+
if (shouldUsePagePool) {
|
|
715
|
+
page = takePooledPage();
|
|
716
|
+
usingPooledPage = !!page;
|
|
717
|
+
if (usingPooledPage && getPooledPagesCount() < PAGE_POOL_SIZE) {
|
|
718
|
+
void ensurePagePool(browser).catch(() => { });
|
|
719
|
+
}
|
|
720
|
+
}
|
|
721
|
+
if (!page) {
|
|
722
|
+
page = await browser.newPage({
|
|
723
|
+
userAgent: validatedUserAgent,
|
|
724
|
+
viewport: width || height ? {
|
|
725
|
+
width: width || 1280,
|
|
726
|
+
height: height || 720,
|
|
727
|
+
} : null, // Use browser window size when no explicit dimensions requested
|
|
728
|
+
});
|
|
729
|
+
await applyStealthScripts(page);
|
|
730
|
+
usingPooledPage = false;
|
|
731
|
+
}
|
|
732
|
+
else {
|
|
733
|
+
await page.setViewportSize({
|
|
734
|
+
width: width || 1280,
|
|
735
|
+
height: height || 720,
|
|
736
|
+
}).catch(() => { });
|
|
737
|
+
}
|
|
738
|
+
await page.unroute('**/*').catch(() => { });
|
|
739
|
+
const mergedHeaders = { ...(headers || {}) };
|
|
740
|
+
if (usingPooledPage) {
|
|
741
|
+
mergedHeaders['User-Agent'] = validatedUserAgent;
|
|
742
|
+
}
|
|
743
|
+
if (usingPooledPage || Object.keys(mergedHeaders).length > 0) {
|
|
744
|
+
await page.setExtraHTTPHeaders(mergedHeaders);
|
|
745
|
+
}
|
|
746
|
+
if (cookies && cookies.length > 0) {
|
|
747
|
+
const parsedCookies = cookies.map(cookie => {
|
|
748
|
+
const [nameValue] = cookie.split(';').map(s => s.trim());
|
|
749
|
+
const [name, value] = nameValue.split('=');
|
|
750
|
+
if (!name || value === undefined) {
|
|
751
|
+
throw new WebPeelError(`Invalid cookie format: ${cookie}`);
|
|
752
|
+
}
|
|
753
|
+
return {
|
|
754
|
+
name: name.trim(),
|
|
755
|
+
value: value.trim(),
|
|
756
|
+
url,
|
|
757
|
+
};
|
|
758
|
+
});
|
|
759
|
+
await page.context().addCookies(parsedCookies);
|
|
760
|
+
}
|
|
761
|
+
// For screenshots, allow all resources
|
|
762
|
+
await page.route('**/*', (route) => route.continue());
|
|
763
|
+
let screenshotBuffer;
|
|
764
|
+
const doWork = (async () => {
|
|
765
|
+
try {
|
|
766
|
+
await page.goto(url, {
|
|
767
|
+
waitUntil: 'domcontentloaded',
|
|
768
|
+
timeout: timeoutMs,
|
|
769
|
+
});
|
|
770
|
+
}
|
|
771
|
+
catch (gotoError) {
|
|
772
|
+
const msg = gotoError?.message || String(gotoError);
|
|
773
|
+
if (/net::ERR_HTTP2_PROTOCOL_ERROR/i.test(msg)) {
|
|
774
|
+
throw new BlockedError(`Site blocked the request (HTTP/2 protocol error). The site likely has anti-bot protection. Try using stealth mode or a proxy.`);
|
|
775
|
+
}
|
|
776
|
+
if (/net::ERR_CONNECTION_REFUSED/i.test(msg)) {
|
|
777
|
+
throw new NetworkError(`Connection refused by the server at ${url}. The server may be down or blocking your IP.`);
|
|
778
|
+
}
|
|
779
|
+
if (/net::ERR_CONNECTION_RESET/i.test(msg)) {
|
|
780
|
+
throw new BlockedError(`Connection was reset by the server. This typically indicates anti-bot protection or IP blocking. Try using stealth mode or a different IP.`);
|
|
781
|
+
}
|
|
782
|
+
if (/net::ERR_SSL/i.test(msg)) {
|
|
783
|
+
throw new NetworkError(`SSL/TLS error connecting to site. URL: ${url}`);
|
|
784
|
+
}
|
|
785
|
+
if (/net::ERR_NAME_NOT_RESOLVED/i.test(msg)) {
|
|
786
|
+
throw new NetworkError(`Domain not found: ${url}`);
|
|
787
|
+
}
|
|
788
|
+
if (/net::ERR_CERT/i.test(msg)) {
|
|
789
|
+
throw new NetworkError(`SSL certificate error for ${url}`);
|
|
790
|
+
}
|
|
791
|
+
if (/NS_ERROR_NET_RESET/i.test(msg)) {
|
|
792
|
+
throw new NetworkError(`Connection reset (Firefox). The site may be blocking automated access. URL: ${url}`);
|
|
793
|
+
}
|
|
794
|
+
if (/timeout/i.test(msg)) {
|
|
795
|
+
throw new TimeoutError(`Page load timed out after ${timeoutMs}ms: ${url}`);
|
|
796
|
+
}
|
|
797
|
+
if (/net::ERR_/i.test(msg)) {
|
|
798
|
+
throw new NetworkError(`Browser network error: ${msg.match(/net::ERR_\w+/i)?.[0] || msg}`);
|
|
799
|
+
}
|
|
800
|
+
throw gotoError;
|
|
801
|
+
}
|
|
802
|
+
if (waitMs > 0) {
|
|
803
|
+
await page.waitForTimeout(waitMs);
|
|
804
|
+
}
|
|
805
|
+
// Element-level screenshot (clip to a specific CSS selector)
|
|
806
|
+
if (selector) {
|
|
807
|
+
const count = await page.locator(selector).count();
|
|
808
|
+
if (count === 0)
|
|
809
|
+
throw new WebPeelError(`Element not found: ${selector}`);
|
|
810
|
+
const element = await page.locator(selector).first();
|
|
811
|
+
const buf = await element.screenshot({
|
|
812
|
+
type: format,
|
|
813
|
+
...(format === 'jpeg' && typeof quality === 'number' ? { quality } : {}),
|
|
814
|
+
});
|
|
815
|
+
return { finalUrl: page.url(), screenshotBuffer: buf };
|
|
816
|
+
}
|
|
817
|
+
// Scroll through the page to trigger IntersectionObservers, lazy loading, animations
|
|
818
|
+
if (scrollThrough) {
|
|
819
|
+
await resilientScrollThrough(page, 250);
|
|
820
|
+
}
|
|
821
|
+
if (actions && actions.length > 0) {
|
|
822
|
+
const { executeActions } = await import('./actions.js');
|
|
823
|
+
const actionScreenshot = await executeActions(page, actions, {
|
|
824
|
+
fullPage,
|
|
825
|
+
type: format,
|
|
826
|
+
quality,
|
|
827
|
+
});
|
|
828
|
+
if (actionScreenshot) {
|
|
829
|
+
screenshotBuffer = actionScreenshot;
|
|
830
|
+
}
|
|
831
|
+
}
|
|
832
|
+
const finalUrl = page.url();
|
|
833
|
+
// Capture screenshot if not captured via actions
|
|
834
|
+
if (!screenshotBuffer) {
|
|
835
|
+
screenshotBuffer = await page.screenshot({
|
|
836
|
+
fullPage,
|
|
837
|
+
type: format,
|
|
838
|
+
...(format === 'jpeg' && typeof quality === 'number' ? { quality } : {}),
|
|
839
|
+
});
|
|
840
|
+
}
|
|
841
|
+
return { finalUrl, screenshotBuffer: screenshotBuffer };
|
|
842
|
+
})();
|
|
843
|
+
let operationTimeout;
|
|
844
|
+
const timeoutPromise = new Promise((_, reject) => {
|
|
845
|
+
operationTimeout = setTimeout(() => reject(new TimeoutError(`Operation timed out after ${timeoutMs}ms`)), timeoutMs);
|
|
846
|
+
});
|
|
847
|
+
const { finalUrl, screenshotBuffer: buf } = await Promise.race([doWork, timeoutPromise]);
|
|
848
|
+
if (operationTimeout) {
|
|
849
|
+
clearTimeout(operationTimeout);
|
|
850
|
+
}
|
|
851
|
+
return { buffer: buf, finalUrl };
|
|
852
|
+
}
|
|
853
|
+
catch (error) {
|
|
854
|
+
if (error instanceof BlockedError || error instanceof WebPeelError || error instanceof TimeoutError) {
|
|
855
|
+
throw error;
|
|
856
|
+
}
|
|
857
|
+
if (error instanceof Error && error.message.includes('Timeout')) {
|
|
858
|
+
throw new TimeoutError('Browser screenshot timed out');
|
|
859
|
+
}
|
|
860
|
+
throw new NetworkError(`Browser screenshot failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
861
|
+
}
|
|
862
|
+
finally {
|
|
863
|
+
if (page) {
|
|
864
|
+
if (usingPooledPage) {
|
|
865
|
+
await recyclePooledPage(page);
|
|
866
|
+
}
|
|
867
|
+
else {
|
|
868
|
+
await page.close().catch(() => { });
|
|
869
|
+
}
|
|
870
|
+
}
|
|
871
|
+
activePagesCount--;
|
|
872
|
+
}
|
|
873
|
+
}
|
|
874
|
+
// ── browserDiff ───────────────────────────────────────────────────────────────
|
|
875
|
+
/**
|
|
876
|
+
* Capture screenshots of two URLs and compute a pixel-level visual diff.
|
|
877
|
+
*/
|
|
878
|
+
export async function browserDiff(url1, url2, options = {}) {
|
|
879
|
+
const { width = 1280, height = 720, fullPage = false, threshold = 0.1, stealth = false, waitMs = 0, timeoutMs = 30000, } = options;
|
|
880
|
+
// Take both screenshots as PNG (required for pixelmatch)
|
|
881
|
+
const [res1, res2] = await Promise.all([
|
|
882
|
+
browserScreenshot(url1, { width, height, fullPage, format: 'png', stealth, waitMs, timeoutMs }),
|
|
883
|
+
browserScreenshot(url2, { width, height, fullPage, format: 'png', stealth, waitMs, timeoutMs }),
|
|
884
|
+
]);
|
|
885
|
+
// Dynamically import pngjs and pixelmatch (ESM-compatible)
|
|
886
|
+
const { PNG } = await import('pngjs');
|
|
887
|
+
const pixelmatch = (await import('pixelmatch')).default;
|
|
888
|
+
const img1 = PNG.sync.read(res1.buffer);
|
|
889
|
+
const img2 = PNG.sync.read(res2.buffer);
|
|
890
|
+
// Use the larger of the two dimensions
|
|
891
|
+
const outWidth = Math.max(img1.width, img2.width);
|
|
892
|
+
const outHeight = Math.max(img1.height, img2.height);
|
|
893
|
+
// Pad images to the same size if needed
|
|
894
|
+
function padImage(img, targetW, targetH) {
|
|
895
|
+
if (img.width === targetW && img.height === targetH) {
|
|
896
|
+
return img.data;
|
|
897
|
+
}
|
|
898
|
+
const padded = Buffer.alloc(targetW * targetH * 4, 0);
|
|
899
|
+
for (let y = 0; y < img.height && y < targetH; y++) {
|
|
900
|
+
for (let x = 0; x < img.width && x < targetW; x++) {
|
|
901
|
+
const srcIdx = (y * img.width + x) * 4;
|
|
902
|
+
const dstIdx = (y * targetW + x) * 4;
|
|
903
|
+
padded[dstIdx] = img.data[srcIdx];
|
|
904
|
+
padded[dstIdx + 1] = img.data[srcIdx + 1];
|
|
905
|
+
padded[dstIdx + 2] = img.data[srcIdx + 2];
|
|
906
|
+
padded[dstIdx + 3] = img.data[srcIdx + 3];
|
|
907
|
+
}
|
|
908
|
+
}
|
|
909
|
+
return padded;
|
|
910
|
+
}
|
|
911
|
+
const data1 = padImage(img1, outWidth, outHeight);
|
|
912
|
+
const data2 = padImage(img2, outWidth, outHeight);
|
|
913
|
+
const diffData = Buffer.alloc(outWidth * outHeight * 4);
|
|
914
|
+
const diffPixels = pixelmatch(data1, data2, diffData, outWidth, outHeight, { threshold });
|
|
915
|
+
const totalPixels = outWidth * outHeight;
|
|
916
|
+
const diffPercent = totalPixels > 0 ? (diffPixels / totalPixels) * 100 : 0;
|
|
917
|
+
const diffPng = new PNG({ width: outWidth, height: outHeight });
|
|
918
|
+
diffPng.data = diffData;
|
|
919
|
+
const diffBuffer = PNG.sync.write(diffPng);
|
|
920
|
+
return {
|
|
921
|
+
diffBuffer,
|
|
922
|
+
diffPixels,
|
|
923
|
+
totalPixels,
|
|
924
|
+
diffPercent,
|
|
925
|
+
dimensions: { width: outWidth, height: outHeight },
|
|
926
|
+
};
|
|
927
|
+
}
|
|
928
|
+
// ── retryFetch ────────────────────────────────────────────────────────────────
|
|
929
|
+
/**
|
|
930
|
+
* Retry a fetch operation with exponential backoff
|
|
931
|
+
*/
|
|
932
|
+
export async function retryFetch(fn, maxAttempts = 3, baseDelayMs = 1000) {
|
|
933
|
+
let lastError = null;
|
|
934
|
+
for (let attempt = 1; attempt <= maxAttempts; attempt++) {
|
|
935
|
+
try {
|
|
936
|
+
return await fn();
|
|
937
|
+
}
|
|
938
|
+
catch (error) {
|
|
939
|
+
lastError = error instanceof Error ? error : new Error('Unknown error');
|
|
940
|
+
// Don't retry on blocked errors or timeouts
|
|
941
|
+
if (error instanceof BlockedError || error instanceof TimeoutError) {
|
|
942
|
+
throw error;
|
|
943
|
+
}
|
|
944
|
+
if (attempt < maxAttempts) {
|
|
945
|
+
const delay = baseDelayMs * Math.pow(2, attempt - 1);
|
|
946
|
+
await new Promise((resolve) => setTimeout(resolve, delay));
|
|
947
|
+
}
|
|
948
|
+
}
|
|
949
|
+
}
|
|
950
|
+
throw lastError || new NetworkError('Retry failed');
|
|
951
|
+
}
|
|
952
|
+
// ── scrollAndWait ─────────────────────────────────────────────────────────────
|
|
953
|
+
/**
|
|
954
|
+
* Scroll to the bottom of the page N times, waiting for the network to
|
|
955
|
+
* settle between each scroll. Useful for triggering lazy-loaded content
|
|
956
|
+
* (infinite scroll, deferred images, etc.).
|
|
957
|
+
*
|
|
958
|
+
* @param page - Playwright Page instance.
|
|
959
|
+
* @param times - Number of scroll-and-wait cycles (default: 3).
|
|
960
|
+
* @returns The final page HTML after all scrolls complete.
|
|
961
|
+
*/
|
|
962
|
+
export async function scrollAndWait(page, times = 3) {
|
|
963
|
+
for (let i = 0; i < times; i++) {
|
|
964
|
+
// eslint-disable-next-line @typescript-eslint/no-implied-eval
|
|
965
|
+
await page.evaluate('window.scrollTo(0, document.body.scrollHeight)');
|
|
966
|
+
// Wait for network to settle (500 ms of no new requests) or 2 s max.
|
|
967
|
+
try {
|
|
968
|
+
await page.waitForLoadState('networkidle', { timeout: 2000 });
|
|
969
|
+
}
|
|
970
|
+
catch (e) {
|
|
971
|
+
// networkidle may never fire — fall back to a flat delay.
|
|
972
|
+
log.debug('networkidle timeout, falling back to flat delay:', e instanceof Error ? e.message : e);
|
|
973
|
+
await page.waitForTimeout(1000);
|
|
974
|
+
}
|
|
975
|
+
}
|
|
976
|
+
return page.content();
|
|
977
|
+
}
|
|
978
|
+
// ── browserFilmstrip ──────────────────────────────────────────────────────────
|
|
979
|
+
/**
|
|
980
|
+
* Capture multiple screenshots at evenly distributed scroll positions.
|
|
981
|
+
* Returns an array of Buffers (one per frame).
|
|
982
|
+
*/
|
|
983
|
+
export async function browserFilmstrip(url, options = {}) {
|
|
984
|
+
validateUrl(url);
|
|
985
|
+
const { frames: frameCount = 6, width, height, format = 'png', quality, waitMs = 0, timeoutMs = 30000, userAgent, headers, cookies, stealth = false, } = options;
|
|
986
|
+
// Clamp frames between 2 and 12
|
|
987
|
+
const numFrames = Math.max(2, Math.min(12, frameCount));
|
|
988
|
+
const validatedUserAgent = userAgent ? validateUserAgent(userAgent) : getRandomUserAgent();
|
|
989
|
+
const queueStartTime = Date.now();
|
|
990
|
+
const QUEUE_TIMEOUT_MS = 30000;
|
|
991
|
+
while (activePagesCount >= MAX_CONCURRENT_PAGES) {
|
|
992
|
+
if (Date.now() - queueStartTime > QUEUE_TIMEOUT_MS) {
|
|
993
|
+
throw new TimeoutError('Browser page queue timeout - too many concurrent requests');
|
|
994
|
+
}
|
|
995
|
+
await new Promise(resolve => setTimeout(resolve, 100));
|
|
996
|
+
}
|
|
997
|
+
activePagesCount++;
|
|
998
|
+
let page = null;
|
|
999
|
+
try {
|
|
1000
|
+
const browser = stealth ? await getStealthBrowser() : await getBrowser();
|
|
1001
|
+
page = await browser.newPage({
|
|
1002
|
+
userAgent: validatedUserAgent,
|
|
1003
|
+
viewport: { width: width || 1280, height: height || 720 },
|
|
1004
|
+
});
|
|
1005
|
+
await applyStealthScripts(page);
|
|
1006
|
+
if (headers)
|
|
1007
|
+
await page.setExtraHTTPHeaders(headers);
|
|
1008
|
+
if (cookies && cookies.length > 0) {
|
|
1009
|
+
const parsedCookies = cookies.map(cookie => {
|
|
1010
|
+
const [nameValue] = cookie.split(';').map(s => s.trim());
|
|
1011
|
+
const [name, value] = nameValue.split('=');
|
|
1012
|
+
if (!name || value === undefined) {
|
|
1013
|
+
throw new WebPeelError(`Invalid cookie format: ${cookie}`);
|
|
1014
|
+
}
|
|
1015
|
+
return { name: name.trim(), value: value.trim(), url };
|
|
1016
|
+
});
|
|
1017
|
+
await page.context().addCookies(parsedCookies);
|
|
1018
|
+
}
|
|
1019
|
+
await page.route('**/*', (route) => route.continue());
|
|
1020
|
+
try {
|
|
1021
|
+
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: timeoutMs });
|
|
1022
|
+
}
|
|
1023
|
+
catch (gotoError) {
|
|
1024
|
+
const msg = gotoError?.message || String(gotoError);
|
|
1025
|
+
if (/timeout/i.test(msg)) {
|
|
1026
|
+
throw new TimeoutError(`Page load timed out after ${timeoutMs}ms: ${url}`);
|
|
1027
|
+
}
|
|
1028
|
+
if (/net::ERR_/i.test(msg)) {
|
|
1029
|
+
throw new NetworkError(`Browser network error: ${msg.match(/net::ERR_\w+/i)?.[0] || msg}`);
|
|
1030
|
+
}
|
|
1031
|
+
throw gotoError;
|
|
1032
|
+
}
|
|
1033
|
+
if (waitMs > 0)
|
|
1034
|
+
await page.waitForTimeout(waitMs);
|
|
1035
|
+
// Wait a bit for initial animations
|
|
1036
|
+
await page.waitForTimeout(800);
|
|
1037
|
+
const scrollHeight = await page.evaluate(() => document.body.scrollHeight);
|
|
1038
|
+
const viewportHeight = await page.evaluate(() => window.innerHeight);
|
|
1039
|
+
const capturedFrames = [];
|
|
1040
|
+
// Calculate scroll positions (evenly distributed)
|
|
1041
|
+
const positions = [];
|
|
1042
|
+
for (let i = 0; i < numFrames; i++) {
|
|
1043
|
+
positions.push(Math.round((scrollHeight - viewportHeight) * i / (numFrames - 1)));
|
|
1044
|
+
}
|
|
1045
|
+
for (const pos of positions) {
|
|
1046
|
+
await page.evaluate((y) => window.scrollTo({ top: y, behavior: 'instant' }), pos);
|
|
1047
|
+
await page.waitForTimeout(350); // Let animations settle
|
|
1048
|
+
const buf = await page.screenshot({
|
|
1049
|
+
type: format,
|
|
1050
|
+
...(format === 'jpeg' && typeof quality === 'number' ? { quality } : {}),
|
|
1051
|
+
});
|
|
1052
|
+
capturedFrames.push(buf);
|
|
1053
|
+
}
|
|
1054
|
+
const finalUrl = page.url();
|
|
1055
|
+
return { frames: capturedFrames, finalUrl };
|
|
1056
|
+
}
|
|
1057
|
+
catch (error) {
|
|
1058
|
+
if (error instanceof BlockedError || error instanceof WebPeelError || error instanceof TimeoutError) {
|
|
1059
|
+
throw error;
|
|
1060
|
+
}
|
|
1061
|
+
if (error instanceof Error && error.message.includes('Timeout')) {
|
|
1062
|
+
throw new TimeoutError('Browser filmstrip timed out');
|
|
1063
|
+
}
|
|
1064
|
+
throw new NetworkError(`Browser filmstrip failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
1065
|
+
}
|
|
1066
|
+
finally {
|
|
1067
|
+
if (page)
|
|
1068
|
+
await page.close().catch(() => { });
|
|
1069
|
+
activePagesCount--;
|
|
1070
|
+
}
|
|
1071
|
+
}
|
|
1072
|
+
// ── withBrowserPage ───────────────────────────────────────────────────────────
|
|
1073
|
+
/**
|
|
1074
|
+
* Shared boilerplate for the 4 new screenshot functions:
|
|
1075
|
+
* - Queue concurrency wait
|
|
1076
|
+
* - Launch browser (stealth or normal)
|
|
1077
|
+
* - Open a new page with viewport + userAgent
|
|
1078
|
+
* - Apply stealth scripts
|
|
1079
|
+
* - Set custom headers and cookies
|
|
1080
|
+
* - Navigate to the URL (with error normalisation)
|
|
1081
|
+
* - Wait optional extra time
|
|
1082
|
+
* - Call `fn(page)` for the unique per-function logic
|
|
1083
|
+
* - Always close the page and decrement the counter
|
|
1084
|
+
*
|
|
1085
|
+
* NOTE: Do NOT touch browserFetch / browserScreenshot / browserFilmstrip —
|
|
1086
|
+
* they have slightly different pooling / keep-open logic.
|
|
1087
|
+
*/
|
|
1088
|
+
async function withBrowserPage(url, opts, fn) {
|
|
1089
|
+
validateUrl(url);
|
|
1090
|
+
const { width = 1440, height = 900, userAgent, headers, cookies, stealth = false, waitMs = 0, timeoutMs = 60000, } = opts;
|
|
1091
|
+
const validatedUserAgent = userAgent ? validateUserAgent(userAgent) : getRandomUserAgent();
|
|
1092
|
+
const queueStartTime = Date.now();
|
|
1093
|
+
const QUEUE_TIMEOUT_MS = 30000;
|
|
1094
|
+
while (activePagesCount >= MAX_CONCURRENT_PAGES) {
|
|
1095
|
+
if (Date.now() - queueStartTime > QUEUE_TIMEOUT_MS) {
|
|
1096
|
+
throw new TimeoutError('Browser page queue timeout - too many concurrent requests');
|
|
1097
|
+
}
|
|
1098
|
+
await new Promise(resolve => setTimeout(resolve, 100));
|
|
1099
|
+
}
|
|
1100
|
+
activePagesCount++;
|
|
1101
|
+
let page = null;
|
|
1102
|
+
try {
|
|
1103
|
+
const browser = stealth ? await getStealthBrowser() : await getBrowser();
|
|
1104
|
+
page = await browser.newPage({
|
|
1105
|
+
userAgent: validatedUserAgent,
|
|
1106
|
+
viewport: { width, height },
|
|
1107
|
+
});
|
|
1108
|
+
await applyStealthScripts(page);
|
|
1109
|
+
if (headers)
|
|
1110
|
+
await page.setExtraHTTPHeaders(headers);
|
|
1111
|
+
if (cookies && cookies.length > 0) {
|
|
1112
|
+
const parsedCookies = cookies.map(cookie => {
|
|
1113
|
+
const [nameValue] = cookie.split(';').map((s) => s.trim());
|
|
1114
|
+
const [name, value] = nameValue.split('=');
|
|
1115
|
+
if (!name || value === undefined)
|
|
1116
|
+
throw new WebPeelError(`Invalid cookie format: ${cookie}`);
|
|
1117
|
+
return { name: name.trim(), value: value.trim(), url };
|
|
1118
|
+
});
|
|
1119
|
+
await page.context().addCookies(parsedCookies);
|
|
1120
|
+
}
|
|
1121
|
+
try {
|
|
1122
|
+
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: timeoutMs });
|
|
1123
|
+
}
|
|
1124
|
+
catch (gotoError) {
|
|
1125
|
+
const msg = gotoError?.message || String(gotoError);
|
|
1126
|
+
if (/timeout/i.test(msg))
|
|
1127
|
+
throw new TimeoutError(`Page load timed out after ${timeoutMs}ms: ${url}`);
|
|
1128
|
+
if (/net::ERR_/i.test(msg))
|
|
1129
|
+
throw new NetworkError(`Browser network error: ${msg.match(/net::ERR_\w+/i)?.[0] || msg}`);
|
|
1130
|
+
throw gotoError;
|
|
1131
|
+
}
|
|
1132
|
+
if (waitMs > 0)
|
|
1133
|
+
await page.waitForTimeout(waitMs);
|
|
1134
|
+
const result = await fn(page);
|
|
1135
|
+
const finalUrl = page.url();
|
|
1136
|
+
return { result, finalUrl };
|
|
1137
|
+
}
|
|
1138
|
+
catch (error) {
|
|
1139
|
+
if (error instanceof BlockedError || error instanceof WebPeelError || error instanceof TimeoutError)
|
|
1140
|
+
throw error;
|
|
1141
|
+
if (error instanceof Error && error.message.includes('Timeout'))
|
|
1142
|
+
throw new TimeoutError('Browser operation timed out');
|
|
1143
|
+
throw new NetworkError(`Browser operation failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
1144
|
+
}
|
|
1145
|
+
finally {
|
|
1146
|
+
if (page)
|
|
1147
|
+
await page.close().catch(() => { });
|
|
1148
|
+
activePagesCount--;
|
|
1149
|
+
}
|
|
1150
|
+
}
|
|
1151
|
+
// ── browserAudit ──────────────────────────────────────────────────────────────
|
|
1152
|
+
/**
|
|
1153
|
+
* Section-aware audit screenshots.
|
|
1154
|
+
* Finds all elements matching a CSS selector and captures a viewport screenshot
|
|
1155
|
+
* scrolled to each one. Returns one image buffer per matching element.
|
|
1156
|
+
*/
|
|
1157
|
+
export async function browserAudit(url, options = {}) {
|
|
1158
|
+
const { width = 1440, height = 900, format = 'jpeg', quality = 80, selector = 'section', waitMs = 0, timeoutMs = 60000, userAgent, headers, cookies, stealth = false, scrollThrough = false, } = options;
|
|
1159
|
+
const { result: frames, finalUrl } = await withBrowserPage(url, { width, height, userAgent, headers, cookies, stealth, waitMs, timeoutMs }, async (page) => {
|
|
1160
|
+
// Scroll through to trigger lazy content
|
|
1161
|
+
if (scrollThrough) {
|
|
1162
|
+
await resilientScrollThrough(page, 200);
|
|
1163
|
+
}
|
|
1164
|
+
const elements = await page.evaluate((sel) => {
|
|
1165
|
+
const nodes = Array.from(document.querySelectorAll(sel));
|
|
1166
|
+
return nodes.map(el => {
|
|
1167
|
+
const rect = el.getBoundingClientRect();
|
|
1168
|
+
const scrollY = window.scrollY || document.documentElement.scrollTop;
|
|
1169
|
+
return {
|
|
1170
|
+
tag: el.tagName.toLowerCase(),
|
|
1171
|
+
id: el.id || '',
|
|
1172
|
+
className: el.className || '',
|
|
1173
|
+
top: rect.top + scrollY,
|
|
1174
|
+
height: rect.height,
|
|
1175
|
+
};
|
|
1176
|
+
});
|
|
1177
|
+
}, selector);
|
|
1178
|
+
const capturedFrames = [];
|
|
1179
|
+
for (let i = 0; i < elements.length; i++) {
|
|
1180
|
+
const el = elements[i];
|
|
1181
|
+
await page.evaluate((y) => window.scrollTo({ top: y, behavior: 'instant' }), el.top);
|
|
1182
|
+
await page.waitForTimeout(200);
|
|
1183
|
+
const buf = await page.screenshot({
|
|
1184
|
+
type: format,
|
|
1185
|
+
...(format === 'jpeg' && typeof quality === 'number' ? { quality } : {}),
|
|
1186
|
+
});
|
|
1187
|
+
capturedFrames.push({ index: i, ...el, buffer: buf });
|
|
1188
|
+
}
|
|
1189
|
+
return capturedFrames;
|
|
1190
|
+
});
|
|
1191
|
+
return { frames, finalUrl };
|
|
1192
|
+
}
|
|
1193
|
+
// ── browserAnimationCapture ───────────────────────────────────────────────────
|
|
1194
|
+
/**
|
|
1195
|
+
* Capture N viewport screenshots at fixed intervals to record CSS animation states.
|
|
1196
|
+
*/
|
|
1197
|
+
export async function browserAnimationCapture(url, options = {}) {
|
|
1198
|
+
const { frames: frameCount = 6, intervalMs = 500, scrollTo, selector, width = 1440, height = 900, format = 'jpeg', quality = 80, waitMs = 0, timeoutMs = 60000, userAgent, headers, cookies, stealth = false, } = options;
|
|
1199
|
+
const numFrames = Math.max(1, Math.min(30, frameCount));
|
|
1200
|
+
const { result: frames, finalUrl } = await withBrowserPage(url, { width, height, userAgent, headers, cookies, stealth, waitMs, timeoutMs }, async (page) => {
|
|
1201
|
+
// Position the viewport
|
|
1202
|
+
if (selector) {
|
|
1203
|
+
await page.evaluate((sel) => {
|
|
1204
|
+
const el = document.querySelector(sel);
|
|
1205
|
+
if (el)
|
|
1206
|
+
el.scrollIntoView({ behavior: 'instant', block: 'start' });
|
|
1207
|
+
}, selector);
|
|
1208
|
+
await page.waitForTimeout(300);
|
|
1209
|
+
}
|
|
1210
|
+
else if (typeof scrollTo === 'number') {
|
|
1211
|
+
await page.evaluate((y) => window.scrollTo({ top: y, behavior: 'instant' }), scrollTo);
|
|
1212
|
+
await page.waitForTimeout(300);
|
|
1213
|
+
}
|
|
1214
|
+
const capturedFrames = [];
|
|
1215
|
+
const startTime = Date.now();
|
|
1216
|
+
for (let i = 0; i < numFrames; i++) {
|
|
1217
|
+
const buf = await page.screenshot({
|
|
1218
|
+
type: format,
|
|
1219
|
+
...(format === 'jpeg' && typeof quality === 'number' ? { quality } : {}),
|
|
1220
|
+
});
|
|
1221
|
+
capturedFrames.push({ index: i, timestampMs: Date.now() - startTime, buffer: buf });
|
|
1222
|
+
if (i < numFrames - 1) {
|
|
1223
|
+
await page.waitForTimeout(intervalMs);
|
|
1224
|
+
}
|
|
1225
|
+
}
|
|
1226
|
+
return capturedFrames;
|
|
1227
|
+
});
|
|
1228
|
+
return { frames, finalUrl };
|
|
1229
|
+
}
|
|
1230
|
+
// ── browserViewports ──────────────────────────────────────────────────────────
|
|
1231
|
+
/**
|
|
1232
|
+
* Capture screenshots at multiple viewport widths in a single browser session.
|
|
1233
|
+
* Resizes the viewport between each capture.
|
|
1234
|
+
*/
|
|
1235
|
+
export async function browserViewports(url, options) {
|
|
1236
|
+
const { viewports, fullPage = false, format = 'jpeg', quality = 80, waitMs = 0, timeoutMs = 90000, userAgent, headers, cookies, stealth = false, scrollThrough = false, } = options;
|
|
1237
|
+
if (!viewports || viewports.length === 0) {
|
|
1238
|
+
throw new WebPeelError('At least one viewport is required');
|
|
1239
|
+
}
|
|
1240
|
+
// Use first viewport dimensions for initial page setup
|
|
1241
|
+
const firstVp = viewports[0];
|
|
1242
|
+
const { result: frames, finalUrl } = await withBrowserPage(url, { width: firstVp.width, height: firstVp.height, userAgent, headers, cookies, stealth, waitMs, timeoutMs }, async (page) => {
|
|
1243
|
+
const capturedFrames = [];
|
|
1244
|
+
for (const vp of viewports) {
|
|
1245
|
+
const label = vp.label || `${vp.width}x${vp.height}`;
|
|
1246
|
+
// Resize viewport
|
|
1247
|
+
await page.setViewportSize({ width: vp.width, height: vp.height });
|
|
1248
|
+
await page.waitForTimeout(500); // Wait for reflow
|
|
1249
|
+
if (scrollThrough) {
|
|
1250
|
+
await resilientScrollThrough(page, 150);
|
|
1251
|
+
}
|
|
1252
|
+
const buf = await page.screenshot({
|
|
1253
|
+
fullPage,
|
|
1254
|
+
type: format,
|
|
1255
|
+
...(format === 'jpeg' && typeof quality === 'number' ? { quality } : {}),
|
|
1256
|
+
});
|
|
1257
|
+
capturedFrames.push({ width: vp.width, height: vp.height, label, buffer: buf });
|
|
1258
|
+
}
|
|
1259
|
+
return capturedFrames;
|
|
1260
|
+
});
|
|
1261
|
+
return { frames, finalUrl };
|
|
1262
|
+
}
|
|
1263
|
+
/**
|
|
1264
|
+
* Extract computed CSS values and validate against design rules.
|
|
1265
|
+
* Returns structured JSON instead of pixel images.
|
|
1266
|
+
*/
|
|
1267
|
+
export async function browserDesignAudit(url, options = {}) {
|
|
1268
|
+
const { rules = {}, selector = 'body', width = 1440, height = 900, waitMs = 0, timeoutMs = 60000, userAgent, headers, cookies, stealth = false, } = options;
|
|
1269
|
+
const spacingGrid = rules.spacingGrid ?? 2; // 2px base grid (accepts any even value; catches odd-pixel errors)
|
|
1270
|
+
const minTouchTarget = rules.minTouchTarget ?? 44;
|
|
1271
|
+
const minContrast = rules.minContrast ?? 4.5;
|
|
1272
|
+
const { result: auditData, finalUrl } = await withBrowserPage(url, { width, height, userAgent, headers, cookies, stealth, waitMs, timeoutMs }, async (page) => {
|
|
1273
|
+
// Run design audit inside the browser
|
|
1274
|
+
return page.evaluate((params) => {
|
|
1275
|
+
const { sel, spacingGrid, minTouchTarget, minContrast } = params;
|
|
1276
|
+
// --- Helpers ---
|
|
1277
|
+
function parsePixels(val) {
|
|
1278
|
+
const n = parseFloat(val);
|
|
1279
|
+
return isNaN(n) ? 0 : n;
|
|
1280
|
+
}
|
|
1281
|
+
function parseRgb(color) {
|
|
1282
|
+
const m = color.match(/rgba?\(([0-9]+),\s*([0-9]+),\s*([0-9]+)/);
|
|
1283
|
+
if (!m)
|
|
1284
|
+
return null;
|
|
1285
|
+
return [parseInt(m[1]), parseInt(m[2]), parseInt(m[3])];
|
|
1286
|
+
}
|
|
1287
|
+
function parseRgba(color) {
|
|
1288
|
+
const m = color.match(/rgba?\((\d+),\s*(\d+),\s*(\d+)(?:,\s*([\d.]+))?\)/);
|
|
1289
|
+
if (!m)
|
|
1290
|
+
return null;
|
|
1291
|
+
return [parseInt(m[1]), parseInt(m[2]), parseInt(m[3]), m[4] !== undefined ? parseFloat(m[4]) : 1];
|
|
1292
|
+
}
|
|
1293
|
+
function getEffectiveBackground(el) {
|
|
1294
|
+
let current = el;
|
|
1295
|
+
while (current && current !== document.documentElement) {
|
|
1296
|
+
const style = window.getComputedStyle(current);
|
|
1297
|
+
const bg = style.backgroundColor;
|
|
1298
|
+
const parsed = parseRgba(bg);
|
|
1299
|
+
if (parsed && parsed[3] > 0.5) {
|
|
1300
|
+
return [parsed[0], parsed[1], parsed[2]];
|
|
1301
|
+
}
|
|
1302
|
+
current = current.parentElement;
|
|
1303
|
+
}
|
|
1304
|
+
// Check html element
|
|
1305
|
+
const htmlStyle = window.getComputedStyle(document.documentElement);
|
|
1306
|
+
const htmlBg = parseRgba(htmlStyle.backgroundColor);
|
|
1307
|
+
if (htmlBg && htmlBg[3] > 0.5) {
|
|
1308
|
+
return [htmlBg[0], htmlBg[1], htmlBg[2]];
|
|
1309
|
+
}
|
|
1310
|
+
// Check body element
|
|
1311
|
+
const bodyStyle = window.getComputedStyle(document.body);
|
|
1312
|
+
const bodyBg = parseRgba(bodyStyle.backgroundColor);
|
|
1313
|
+
if (bodyBg && bodyBg[3] > 0.5) {
|
|
1314
|
+
return [bodyBg[0], bodyBg[1], bodyBg[2]];
|
|
1315
|
+
}
|
|
1316
|
+
// Check color-scheme CSS property or meta tag
|
|
1317
|
+
const colorScheme = htmlStyle.colorScheme ||
|
|
1318
|
+
document.querySelector('meta[name="color-scheme"]')?.getAttribute('content') || '';
|
|
1319
|
+
if (colorScheme.includes('dark')) {
|
|
1320
|
+
return [0, 0, 0]; // Dark scheme default
|
|
1321
|
+
}
|
|
1322
|
+
// Ultimate fallback: white (standard web default)
|
|
1323
|
+
return [255, 255, 255];
|
|
1324
|
+
}
|
|
1325
|
+
function hasBackdropFilter(el) {
|
|
1326
|
+
let current = el;
|
|
1327
|
+
while (current) {
|
|
1328
|
+
const style = window.getComputedStyle(current);
|
|
1329
|
+
const bf = style.backdropFilter;
|
|
1330
|
+
if (bf && bf !== 'none' && bf !== '')
|
|
1331
|
+
return true;
|
|
1332
|
+
current = current.parentElement;
|
|
1333
|
+
}
|
|
1334
|
+
return false;
|
|
1335
|
+
}
|
|
1336
|
+
function detectPageColorScheme() {
|
|
1337
|
+
const htmlStyle = window.getComputedStyle(document.documentElement);
|
|
1338
|
+
const htmlBg = parseRgba(htmlStyle.backgroundColor);
|
|
1339
|
+
if (htmlBg && htmlBg[3] > 0.5) {
|
|
1340
|
+
const lum = luminance(htmlBg[0], htmlBg[1], htmlBg[2]);
|
|
1341
|
+
return lum < 0.18 ? 'dark' : 'light';
|
|
1342
|
+
}
|
|
1343
|
+
const bodyStyle = window.getComputedStyle(document.body);
|
|
1344
|
+
const bodyBg = parseRgba(bodyStyle.backgroundColor);
|
|
1345
|
+
if (bodyBg && bodyBg[3] > 0.5) {
|
|
1346
|
+
const lum = luminance(bodyBg[0], bodyBg[1], bodyBg[2]);
|
|
1347
|
+
return lum < 0.18 ? 'dark' : 'light';
|
|
1348
|
+
}
|
|
1349
|
+
const colorScheme = htmlStyle.colorScheme ||
|
|
1350
|
+
document.querySelector('meta[name="color-scheme"]')?.getAttribute('content') || '';
|
|
1351
|
+
if (colorScheme.includes('dark'))
|
|
1352
|
+
return 'dark';
|
|
1353
|
+
if (colorScheme.includes('light'))
|
|
1354
|
+
return 'light';
|
|
1355
|
+
return 'unknown';
|
|
1356
|
+
}
|
|
1357
|
+
function luminance(r, g, b) {
|
|
1358
|
+
const [rs, gs, bs] = [r, g, b].map(c => {
|
|
1359
|
+
const s = c / 255;
|
|
1360
|
+
return s <= 0.03928 ? s / 12.92 : Math.pow((s + 0.055) / 1.055, 2.4);
|
|
1361
|
+
});
|
|
1362
|
+
return 0.2126 * rs + 0.7152 * gs + 0.0722 * bs;
|
|
1363
|
+
}
|
|
1364
|
+
function contrastRatio(c1, c2) {
|
|
1365
|
+
const l1 = luminance(...c1);
|
|
1366
|
+
const l2 = luminance(...c2);
|
|
1367
|
+
const lighter = Math.max(l1, l2);
|
|
1368
|
+
const darker = Math.min(l1, l2);
|
|
1369
|
+
return (lighter + 0.05) / (darker + 0.05);
|
|
1370
|
+
}
|
|
1371
|
+
function elementLabel(el) {
|
|
1372
|
+
const id = el.id ? `#${el.id}` : '';
|
|
1373
|
+
const cls = el.className && typeof el.className === 'string'
|
|
1374
|
+
? '.' + el.className.trim().split(/\s+/).slice(0, 2).join('.')
|
|
1375
|
+
: '';
|
|
1376
|
+
return `${el.tagName.toLowerCase()}${id}${cls}`;
|
|
1377
|
+
}
|
|
1378
|
+
function nearestMultiple(val, grid) {
|
|
1379
|
+
if (grid <= 0)
|
|
1380
|
+
return val;
|
|
1381
|
+
return Math.round(val / grid) * grid;
|
|
1382
|
+
}
|
|
1383
|
+
const root = document.querySelector(sel) || document.body;
|
|
1384
|
+
const allElements = Array.from(root.querySelectorAll('*'));
|
|
1385
|
+
const spacingViolations = [];
|
|
1386
|
+
const touchTargetViolations = [];
|
|
1387
|
+
const contrastViolations = [];
|
|
1388
|
+
const fontSizesSet = new Set();
|
|
1389
|
+
const lineHeightsSet = new Set();
|
|
1390
|
+
const letterSpacingsSet = new Set();
|
|
1391
|
+
const spacingValuesSet = new Set();
|
|
1392
|
+
const interactiveTags = new Set(['a', 'button', 'input', 'select', 'textarea', 'label']);
|
|
1393
|
+
for (const el of allElements) {
|
|
1394
|
+
const style = window.getComputedStyle(el);
|
|
1395
|
+
const rect = el.getBoundingClientRect();
|
|
1396
|
+
// Skip invisible elements
|
|
1397
|
+
if (rect.width === 0 && rect.height === 0)
|
|
1398
|
+
continue;
|
|
1399
|
+
const label = elementLabel(el);
|
|
1400
|
+
// Spacing
|
|
1401
|
+
const spacingProps = ['marginTop', 'marginRight', 'marginBottom', 'marginLeft',
|
|
1402
|
+
'paddingTop', 'paddingRight', 'paddingBottom', 'paddingLeft', 'gap', 'rowGap', 'columnGap'];
|
|
1403
|
+
for (const prop of spacingProps) {
|
|
1404
|
+
const raw = style[prop];
|
|
1405
|
+
if (!raw || raw === 'normal' || raw === 'auto')
|
|
1406
|
+
continue;
|
|
1407
|
+
const px = parsePixels(raw);
|
|
1408
|
+
if (px <= 0)
|
|
1409
|
+
continue;
|
|
1410
|
+
spacingValuesSet.add(px);
|
|
1411
|
+
if (spacingGrid > 0 && Math.round(px) % spacingGrid !== 0) {
|
|
1412
|
+
spacingViolations.push({
|
|
1413
|
+
element: label,
|
|
1414
|
+
property: prop,
|
|
1415
|
+
value: Math.round(px),
|
|
1416
|
+
nearestGridValue: nearestMultiple(px, spacingGrid),
|
|
1417
|
+
});
|
|
1418
|
+
}
|
|
1419
|
+
}
|
|
1420
|
+
// Typography
|
|
1421
|
+
const fs = style.fontSize;
|
|
1422
|
+
const lh = style.lineHeight;
|
|
1423
|
+
const ls = style.letterSpacing;
|
|
1424
|
+
if (fs)
|
|
1425
|
+
fontSizesSet.add(fs);
|
|
1426
|
+
if (lh && lh !== 'normal')
|
|
1427
|
+
lineHeightsSet.add(lh);
|
|
1428
|
+
if (ls && ls !== 'normal')
|
|
1429
|
+
letterSpacingsSet.add(ls);
|
|
1430
|
+
// Touch targets
|
|
1431
|
+
const tag = el.tagName.toLowerCase();
|
|
1432
|
+
if (interactiveTags.has(tag)) {
|
|
1433
|
+
const w = rect.width;
|
|
1434
|
+
const h = rect.height;
|
|
1435
|
+
if (w > 0 && h > 0 && (w < minTouchTarget || h < minTouchTarget)) {
|
|
1436
|
+
touchTargetViolations.push({ element: label, width: Math.round(w), height: Math.round(h), minRequired: minTouchTarget });
|
|
1437
|
+
}
|
|
1438
|
+
}
|
|
1439
|
+
// Contrast — Walk up DOM tree to find effective opaque background
|
|
1440
|
+
const textColor = style.color;
|
|
1441
|
+
if (textColor) {
|
|
1442
|
+
const fg = parseRgb(textColor);
|
|
1443
|
+
if (fg) {
|
|
1444
|
+
if (hasBackdropFilter(el)) {
|
|
1445
|
+
// Background can't be determined from CSS alone — mark as unresolvable
|
|
1446
|
+
// and exclude from scoring (bgResolved: false)
|
|
1447
|
+
const text = el.textContent?.trim() || '';
|
|
1448
|
+
if (text.length > 0 && text.length < 200) {
|
|
1449
|
+
contrastViolations.push({
|
|
1450
|
+
element: label,
|
|
1451
|
+
textColor,
|
|
1452
|
+
bgColor: 'unknown (backdrop-filter)',
|
|
1453
|
+
ratio: 0,
|
|
1454
|
+
required: minContrast,
|
|
1455
|
+
bgResolved: false,
|
|
1456
|
+
});
|
|
1457
|
+
}
|
|
1458
|
+
}
|
|
1459
|
+
else {
|
|
1460
|
+
const effectiveBg = getEffectiveBackground(el);
|
|
1461
|
+
// bgResolved: true — background was successfully determined via DOM traversal
|
|
1462
|
+
const ratio = contrastRatio(fg, effectiveBg);
|
|
1463
|
+
if (ratio > 1.05 && ratio < minContrast) {
|
|
1464
|
+
// Only flag elements with visible text content
|
|
1465
|
+
const text = el.textContent?.trim() || '';
|
|
1466
|
+
if (text.length > 0 && text.length < 200) {
|
|
1467
|
+
contrastViolations.push({
|
|
1468
|
+
element: label,
|
|
1469
|
+
textColor,
|
|
1470
|
+
bgColor: `rgb(${effectiveBg.join(',')})`,
|
|
1471
|
+
ratio: Math.round(ratio * 100) / 100,
|
|
1472
|
+
required: minContrast,
|
|
1473
|
+
bgResolved: true,
|
|
1474
|
+
});
|
|
1475
|
+
}
|
|
1476
|
+
}
|
|
1477
|
+
}
|
|
1478
|
+
}
|
|
1479
|
+
}
|
|
1480
|
+
}
|
|
1481
|
+
const spacingScale = Array.from(spacingValuesSet).sort((a, b) => a - b).map(v => Math.round(v));
|
|
1482
|
+
// ── WCAG Accessibility Audit ──────────────────────────────────────
|
|
1483
|
+
const a11yViolations = [];
|
|
1484
|
+
const headingStructure = [];
|
|
1485
|
+
// 1. Images without alt text
|
|
1486
|
+
const images = root.querySelectorAll('img');
|
|
1487
|
+
for (const img of Array.from(images)) {
|
|
1488
|
+
if (!img.getAttribute('alt') && !img.getAttribute('aria-label') && !img.getAttribute('role')?.includes('presentation')) {
|
|
1489
|
+
a11yViolations.push({ type: 'missing-alt', element: elementLabel(img), details: `src: ${(img.getAttribute('src') || '').slice(0, 80)}` });
|
|
1490
|
+
}
|
|
1491
|
+
}
|
|
1492
|
+
// 2. Form inputs without labels
|
|
1493
|
+
const inputs = root.querySelectorAll('input, select, textarea');
|
|
1494
|
+
for (const input of Array.from(inputs)) {
|
|
1495
|
+
const id = input.getAttribute('id');
|
|
1496
|
+
const hasLabel = id && document.querySelector(`label[for="${id}"]`);
|
|
1497
|
+
const hasAria = input.getAttribute('aria-label') || input.getAttribute('aria-labelledby');
|
|
1498
|
+
const hasTitle = input.getAttribute('title');
|
|
1499
|
+
if (!hasLabel && !hasAria && !hasTitle && input.getAttribute('type') !== 'hidden') {
|
|
1500
|
+
a11yViolations.push({ type: 'missing-label', element: elementLabel(input), details: `type: ${input.getAttribute('type') || 'text'}` });
|
|
1501
|
+
}
|
|
1502
|
+
}
|
|
1503
|
+
// 3. Heading hierarchy
|
|
1504
|
+
const headings = root.querySelectorAll('h1, h2, h3, h4, h5, h6');
|
|
1505
|
+
let prevLevel = 0;
|
|
1506
|
+
for (const h of Array.from(headings)) {
|
|
1507
|
+
const level = parseInt(h.tagName[1]);
|
|
1508
|
+
headingStructure.push(h.tagName.toLowerCase());
|
|
1509
|
+
if (prevLevel > 0 && level > prevLevel + 1) {
|
|
1510
|
+
a11yViolations.push({ type: 'heading-skip', element: elementLabel(h), details: `Jumped from h${prevLevel} to h${level}` });
|
|
1511
|
+
}
|
|
1512
|
+
prevLevel = level;
|
|
1513
|
+
}
|
|
1514
|
+
// 4. Empty links
|
|
1515
|
+
const links = root.querySelectorAll('a');
|
|
1516
|
+
for (const link of Array.from(links)) {
|
|
1517
|
+
const text = (link.textContent || '').trim();
|
|
1518
|
+
const aria = link.getAttribute('aria-label');
|
|
1519
|
+
const title = link.getAttribute('title');
|
|
1520
|
+
const hasImg = link.querySelector('img[alt]');
|
|
1521
|
+
if (!text && !aria && !title && !hasImg) {
|
|
1522
|
+
a11yViolations.push({ type: 'empty-link', element: elementLabel(link), details: `href: ${(link.getAttribute('href') || '').slice(0, 60)}` });
|
|
1523
|
+
}
|
|
1524
|
+
}
|
|
1525
|
+
// 5. Empty buttons
|
|
1526
|
+
const buttons = root.querySelectorAll('button');
|
|
1527
|
+
for (const btn of Array.from(buttons)) {
|
|
1528
|
+
const text = (btn.textContent || '').trim();
|
|
1529
|
+
const aria = btn.getAttribute('aria-label');
|
|
1530
|
+
if (!text && !aria) {
|
|
1531
|
+
a11yViolations.push({ type: 'empty-button', element: elementLabel(btn), details: '' });
|
|
1532
|
+
}
|
|
1533
|
+
}
|
|
1534
|
+
return {
|
|
1535
|
+
colorScheme: detectPageColorScheme(),
|
|
1536
|
+
spacingViolations: spacingViolations.slice(0, 50),
|
|
1537
|
+
touchTargetViolations: touchTargetViolations.slice(0, 50),
|
|
1538
|
+
contrastViolations: contrastViolations.slice(0, 50),
|
|
1539
|
+
typography: {
|
|
1540
|
+
fontSizes: Array.from(fontSizesSet).slice(0, 20),
|
|
1541
|
+
lineHeights: Array.from(lineHeightsSet).slice(0, 20),
|
|
1542
|
+
letterSpacings: Array.from(letterSpacingsSet).slice(0, 20),
|
|
1543
|
+
},
|
|
1544
|
+
spacingScale: [...new Set(spacingScale)].slice(0, 30),
|
|
1545
|
+
accessibilityViolations: a11yViolations.slice(0, 50),
|
|
1546
|
+
headingStructure,
|
|
1547
|
+
};
|
|
1548
|
+
}, { sel: selector, spacingGrid, minTouchTarget, minContrast });
|
|
1549
|
+
});
|
|
1550
|
+
// Weighted scoring: contrast failures are most serious (accessibility),
|
|
1551
|
+
// touch target issues affect usability, spacing is cosmetic, a11y is significant.
|
|
1552
|
+
// Only count contrast violations where we could resolve the background (bgResolved: true).
|
|
1553
|
+
// Violations with unresolvable backgrounds (backdrop-filter etc.) are excluded from scoring.
|
|
1554
|
+
const resolvedContrastViolations = auditData.contrastViolations.filter(v => v.bgResolved !== false);
|
|
1555
|
+
const unresolvedContrastViolations = auditData.contrastViolations.filter(v => v.bgResolved === false);
|
|
1556
|
+
const contrastPenalty = Math.min(40, resolvedContrastViolations.length * 5); // cap at 40pts
|
|
1557
|
+
const touchPenalty = Math.min(30, auditData.touchTargetViolations.length * 3); // cap at 30pts
|
|
1558
|
+
const spacingPenalty = Math.min(20, auditData.spacingViolations.length * 1);
|
|
1559
|
+
const a11yPenalty = Math.min(30, auditData.accessibilityViolations.length * 4);
|
|
1560
|
+
// Bonus for zero violations in a category (up to 5 pts total)
|
|
1561
|
+
let bonus = 0;
|
|
1562
|
+
if (resolvedContrastViolations.length === 0)
|
|
1563
|
+
bonus += 2;
|
|
1564
|
+
if (auditData.touchTargetViolations.length === 0)
|
|
1565
|
+
bonus += 1;
|
|
1566
|
+
if (auditData.accessibilityViolations.length === 0)
|
|
1567
|
+
bonus += 2;
|
|
1568
|
+
const totalPenalty = contrastPenalty + touchPenalty + spacingPenalty + a11yPenalty;
|
|
1569
|
+
const score = Math.min(100, Math.max(0, Math.round(100 - totalPenalty + bonus)));
|
|
1570
|
+
const parts = [];
|
|
1571
|
+
if (auditData.spacingViolations.length > 0)
|
|
1572
|
+
parts.push(`${auditData.spacingViolations.length} spacing violation(s)`);
|
|
1573
|
+
if (auditData.touchTargetViolations.length > 0)
|
|
1574
|
+
parts.push(`${auditData.touchTargetViolations.length} touch target violation(s)`);
|
|
1575
|
+
if (resolvedContrastViolations.length > 0)
|
|
1576
|
+
parts.push(`${resolvedContrastViolations.length} contrast violation(s)`);
|
|
1577
|
+
if (unresolvedContrastViolations.length > 0)
|
|
1578
|
+
parts.push(`${unresolvedContrastViolations.length} unresolvable contrast check(s)`);
|
|
1579
|
+
if (auditData.accessibilityViolations.length > 0)
|
|
1580
|
+
parts.push(`${auditData.accessibilityViolations.length} accessibility violation(s)`);
|
|
1581
|
+
const summary = parts.length === 0
|
|
1582
|
+
? 'No design violations found.'
|
|
1583
|
+
: `Found: ${parts.join(', ')}.`;
|
|
1584
|
+
const audit = { score, summary, ...auditData };
|
|
1585
|
+
return { audit, finalUrl };
|
|
1586
|
+
}
|
|
1587
|
+
// ── browserDesignAnalysis ──────────────────────────────────────────────────────
|
|
1588
|
+
/**
|
|
1589
|
+
* Extract structured visual design intelligence from a URL using a browser.
|
|
1590
|
+
* Returns a DesignAnalysis object with effects, palette, layout, type scale,
|
|
1591
|
+
* and quality signals.
|
|
1592
|
+
*/
|
|
1593
|
+
export async function browserDesignAnalysis(url, options = {}) {
|
|
1594
|
+
const { width = 1440, height = 900, waitMs = 0, timeoutMs = 60000, userAgent, headers, cookies, stealth = false, } = options;
|
|
1595
|
+
const { extractDesignAnalysis } = await import('./design-analysis.js');
|
|
1596
|
+
const { result: analysis, finalUrl } = await withBrowserPage(url, { width, height, userAgent, headers, cookies, stealth, waitMs, timeoutMs }, async (page) => {
|
|
1597
|
+
return extractDesignAnalysis(page);
|
|
1598
|
+
});
|
|
1599
|
+
return { analysis, finalUrl };
|
|
1600
|
+
}
|