@iflow-mcp/jakeliume-webpeel 0.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +15 -0
- package/README.md +313 -0
- package/dist/cache.d.ts +30 -0
- package/dist/cache.js +139 -0
- package/dist/cli/commands/auth.d.ts +5 -0
- package/dist/cli/commands/auth.js +411 -0
- package/dist/cli/commands/doctor.d.ts +37 -0
- package/dist/cli/commands/doctor.js +371 -0
- package/dist/cli/commands/fetch.d.ts +6 -0
- package/dist/cli/commands/fetch.js +1345 -0
- package/dist/cli/commands/guide.d.ts +2 -0
- package/dist/cli/commands/guide.js +183 -0
- package/dist/cli/commands/interact.d.ts +5 -0
- package/dist/cli/commands/interact.js +840 -0
- package/dist/cli/commands/jobs.d.ts +5 -0
- package/dist/cli/commands/jobs.js +997 -0
- package/dist/cli/commands/monitor.d.ts +12 -0
- package/dist/cli/commands/monitor.js +197 -0
- package/dist/cli/commands/observe.d.ts +12 -0
- package/dist/cli/commands/observe.js +158 -0
- package/dist/cli/commands/screenshot.d.ts +5 -0
- package/dist/cli/commands/screenshot.js +282 -0
- package/dist/cli/commands/search.d.ts +5 -0
- package/dist/cli/commands/search.js +1021 -0
- package/dist/cli/commands/setup.d.ts +13 -0
- package/dist/cli/commands/setup.js +244 -0
- package/dist/cli/commands/skill.d.ts +15 -0
- package/dist/cli/commands/skill.js +195 -0
- package/dist/cli/utils.d.ts +84 -0
- package/dist/cli/utils.js +806 -0
- package/dist/cli-auth.d.ts +75 -0
- package/dist/cli-auth.js +369 -0
- package/dist/cli.d.ts +17 -0
- package/dist/cli.js +99 -0
- package/dist/core/actions.d.ts +69 -0
- package/dist/core/actions.js +495 -0
- package/dist/core/agent.d.ts +98 -0
- package/dist/core/agent.js +558 -0
- package/dist/core/answer.d.ts +42 -0
- package/dist/core/answer.js +395 -0
- package/dist/core/application-tracker.d.ts +84 -0
- package/dist/core/application-tracker.js +184 -0
- package/dist/core/apply.d.ts +162 -0
- package/dist/core/apply.js +816 -0
- package/dist/core/auth-detection.d.ts +35 -0
- package/dist/core/auth-detection.js +358 -0
- package/dist/core/auto-extract.d.ts +82 -0
- package/dist/core/auto-extract.js +604 -0
- package/dist/core/auto-interact.d.ts +23 -0
- package/dist/core/auto-interact.js +246 -0
- package/dist/core/bm25-filter.d.ts +66 -0
- package/dist/core/bm25-filter.js +288 -0
- package/dist/core/branding.d.ts +54 -0
- package/dist/core/branding.js +234 -0
- package/dist/core/browser-fetch.d.ts +323 -0
- package/dist/core/browser-fetch.js +1600 -0
- package/dist/core/browser-pool.d.ts +91 -0
- package/dist/core/browser-pool.js +550 -0
- package/dist/core/budget.d.ts +42 -0
- package/dist/core/budget.js +324 -0
- package/dist/core/business-intel.d.ts +47 -0
- package/dist/core/business-intel.js +279 -0
- package/dist/core/cache.d.ts +13 -0
- package/dist/core/cache.js +121 -0
- package/dist/core/cf-worker-proxy.d.ts +32 -0
- package/dist/core/cf-worker-proxy.js +87 -0
- package/dist/core/challenge-detection.d.ts +26 -0
- package/dist/core/challenge-detection.js +468 -0
- package/dist/core/change-tracking.d.ts +75 -0
- package/dist/core/change-tracking.js +276 -0
- package/dist/core/chunker.d.ts +46 -0
- package/dist/core/chunker.js +249 -0
- package/dist/core/chunking.d.ts +42 -0
- package/dist/core/chunking.js +181 -0
- package/dist/core/circuit-breaker.d.ts +44 -0
- package/dist/core/circuit-breaker.js +85 -0
- package/dist/core/content-pruner.d.ts +47 -0
- package/dist/core/content-pruner.js +425 -0
- package/dist/core/cookie-cache.d.ts +60 -0
- package/dist/core/cookie-cache.js +163 -0
- package/dist/core/crawl-checkpoint.d.ts +54 -0
- package/dist/core/crawl-checkpoint.js +104 -0
- package/dist/core/crawler.d.ts +84 -0
- package/dist/core/crawler.js +349 -0
- package/dist/core/cross-verify.d.ts +27 -0
- package/dist/core/cross-verify.js +93 -0
- package/dist/core/deep-fetch.d.ts +74 -0
- package/dist/core/deep-fetch.js +405 -0
- package/dist/core/deep-research.d.ts +141 -0
- package/dist/core/deep-research.js +972 -0
- package/dist/core/design-analysis.d.ts +70 -0
- package/dist/core/design-analysis.js +490 -0
- package/dist/core/design-compare.d.ts +38 -0
- package/dist/core/design-compare.js +264 -0
- package/dist/core/diff.d.ts +61 -0
- package/dist/core/diff.js +289 -0
- package/dist/core/dns-cache.d.ts +20 -0
- package/dist/core/dns-cache.js +198 -0
- package/dist/core/documents.d.ts +23 -0
- package/dist/core/documents.js +123 -0
- package/dist/core/domain-memory.d.ts +66 -0
- package/dist/core/domain-memory.js +163 -0
- package/dist/core/domain-verify.d.ts +40 -0
- package/dist/core/domain-verify.js +379 -0
- package/dist/core/engine-ranker.d.ts +112 -0
- package/dist/core/engine-ranker.js +395 -0
- package/dist/core/extract-inline.d.ts +38 -0
- package/dist/core/extract-inline.js +215 -0
- package/dist/core/extract-listings.d.ts +38 -0
- package/dist/core/extract-listings.js +461 -0
- package/dist/core/extract.d.ts +9 -0
- package/dist/core/extract.js +139 -0
- package/dist/core/fetch-cache.d.ts +57 -0
- package/dist/core/fetch-cache.js +95 -0
- package/dist/core/fetcher.d.ts +13 -0
- package/dist/core/fetcher.js +12 -0
- package/dist/core/google-cache.d.ts +29 -0
- package/dist/core/google-cache.js +180 -0
- package/dist/core/google-serp-parser.d.ts +82 -0
- package/dist/core/google-serp-parser.js +287 -0
- package/dist/core/hotel-search.d.ts +122 -0
- package/dist/core/hotel-search.js +382 -0
- package/dist/core/http-fetch.d.ts +72 -0
- package/dist/core/http-fetch.js +820 -0
- package/dist/core/human.d.ts +175 -0
- package/dist/core/human.js +680 -0
- package/dist/core/image-caption.d.ts +44 -0
- package/dist/core/image-caption.js +271 -0
- package/dist/core/jobs.d.ts +75 -0
- package/dist/core/jobs.js +634 -0
- package/dist/core/json-ld.d.ts +15 -0
- package/dist/core/json-ld.js +617 -0
- package/dist/core/language-detect.d.ts +18 -0
- package/dist/core/language-detect.js +135 -0
- package/dist/core/links.d.ts +10 -0
- package/dist/core/links.js +44 -0
- package/dist/core/llm-extract.d.ts +71 -0
- package/dist/core/llm-extract.js +507 -0
- package/dist/core/llm-provider.d.ts +100 -0
- package/dist/core/llm-provider.js +702 -0
- package/dist/core/local-search.d.ts +60 -0
- package/dist/core/local-search.js +308 -0
- package/dist/core/logger.d.ts +28 -0
- package/dist/core/logger.js +104 -0
- package/dist/core/map.d.ts +33 -0
- package/dist/core/map.js +127 -0
- package/dist/core/markdown.d.ts +92 -0
- package/dist/core/markdown.js +809 -0
- package/dist/core/metadata.d.ts +34 -0
- package/dist/core/metadata.js +422 -0
- package/dist/core/observe.d.ts +113 -0
- package/dist/core/observe.js +395 -0
- package/dist/core/ocr.d.ts +12 -0
- package/dist/core/ocr.js +33 -0
- package/dist/core/paginate.d.ts +31 -0
- package/dist/core/paginate.js +106 -0
- package/dist/core/pdf.d.ts +8 -0
- package/dist/core/pdf.js +25 -0
- package/dist/core/peel-tls.d.ts +25 -0
- package/dist/core/peel-tls.js +220 -0
- package/dist/core/pipeline.d.ts +132 -0
- package/dist/core/pipeline.js +1666 -0
- package/dist/core/profiles.d.ts +61 -0
- package/dist/core/profiles.js +350 -0
- package/dist/core/prompt-guard.d.ts +30 -0
- package/dist/core/prompt-guard.js +119 -0
- package/dist/core/proxy-config.d.ts +90 -0
- package/dist/core/proxy-config.js +172 -0
- package/dist/core/quick-answer.d.ts +53 -0
- package/dist/core/quick-answer.js +833 -0
- package/dist/core/rate-governor.d.ts +80 -0
- package/dist/core/rate-governor.js +238 -0
- package/dist/core/readability.d.ts +57 -0
- package/dist/core/readability.js +533 -0
- package/dist/core/research.d.ts +66 -0
- package/dist/core/research.js +270 -0
- package/dist/core/retry.d.ts +60 -0
- package/dist/core/retry.js +119 -0
- package/dist/core/safe-browsing.d.ts +30 -0
- package/dist/core/safe-browsing.js +206 -0
- package/dist/core/schema-extraction.d.ts +66 -0
- package/dist/core/schema-extraction.js +352 -0
- package/dist/core/schema-postprocess.d.ts +32 -0
- package/dist/core/schema-postprocess.js +469 -0
- package/dist/core/schema-templates.d.ts +19 -0
- package/dist/core/schema-templates.js +143 -0
- package/dist/core/screenshot.d.ts +224 -0
- package/dist/core/screenshot.js +207 -0
- package/dist/core/search-engines.d.ts +25 -0
- package/dist/core/search-engines.js +182 -0
- package/dist/core/search-provider.d.ts +243 -0
- package/dist/core/search-provider.js +1629 -0
- package/dist/core/searxng-provider.d.ts +35 -0
- package/dist/core/searxng-provider.js +105 -0
- package/dist/core/selective-evidence.d.ts +151 -0
- package/dist/core/selective-evidence.js +389 -0
- package/dist/core/site-search.d.ts +44 -0
- package/dist/core/site-search.js +252 -0
- package/dist/core/sitemap.d.ts +23 -0
- package/dist/core/sitemap.js +105 -0
- package/dist/core/source-credibility.d.ts +29 -0
- package/dist/core/source-credibility.js +584 -0
- package/dist/core/source-scoring.d.ts +166 -0
- package/dist/core/source-scoring.js +396 -0
- package/dist/core/stemmer.d.ts +38 -0
- package/dist/core/stemmer.js +509 -0
- package/dist/core/strategies.d.ts +104 -0
- package/dist/core/strategies.js +1044 -0
- package/dist/core/strategy-hooks.d.ts +145 -0
- package/dist/core/strategy-hooks.js +74 -0
- package/dist/core/structured-extract.d.ts +43 -0
- package/dist/core/structured-extract.js +550 -0
- package/dist/core/summarize.d.ts +17 -0
- package/dist/core/summarize.js +78 -0
- package/dist/core/synonyms.d.ts +42 -0
- package/dist/core/synonyms.js +184 -0
- package/dist/core/system-monitor.d.ts +61 -0
- package/dist/core/system-monitor.js +133 -0
- package/dist/core/table-format.d.ts +30 -0
- package/dist/core/table-format.js +146 -0
- package/dist/core/threat-feeds.d.ts +23 -0
- package/dist/core/threat-feeds.js +104 -0
- package/dist/core/timing.d.ts +21 -0
- package/dist/core/timing.js +33 -0
- package/dist/core/transcript-export.d.ts +47 -0
- package/dist/core/transcript-export.js +107 -0
- package/dist/core/user-agents.d.ts +82 -0
- package/dist/core/user-agents.js +239 -0
- package/dist/core/vertical-search.d.ts +54 -0
- package/dist/core/vertical-search.js +158 -0
- package/dist/core/watch-manager.d.ts +175 -0
- package/dist/core/watch-manager.js +416 -0
- package/dist/core/watch.d.ts +101 -0
- package/dist/core/watch.js +389 -0
- package/dist/core/youtube.d.ts +130 -0
- package/dist/core/youtube.js +1175 -0
- package/dist/ee/challenge-re-export.d.ts +1 -0
- package/dist/ee/challenge-re-export.js +1 -0
- package/dist/ee/challenge-solver.d.ts +72 -0
- package/dist/ee/challenge-solver.js +720 -0
- package/dist/ee/domain-extractors.d.ts +8 -0
- package/dist/ee/domain-extractors.js +8 -0
- package/dist/ee/domain-intel.d.ts +16 -0
- package/dist/ee/domain-intel.js +133 -0
- package/dist/ee/extractors/allrecipes.d.ts +2 -0
- package/dist/ee/extractors/allrecipes.js +120 -0
- package/dist/ee/extractors/amazon.d.ts +2 -0
- package/dist/ee/extractors/amazon.js +78 -0
- package/dist/ee/extractors/arxiv.d.ts +2 -0
- package/dist/ee/extractors/arxiv.js +137 -0
- package/dist/ee/extractors/bestbuy.d.ts +2 -0
- package/dist/ee/extractors/bestbuy.js +78 -0
- package/dist/ee/extractors/carscom.d.ts +2 -0
- package/dist/ee/extractors/carscom.js +121 -0
- package/dist/ee/extractors/coingecko.d.ts +2 -0
- package/dist/ee/extractors/coingecko.js +134 -0
- package/dist/ee/extractors/craigslist.d.ts +2 -0
- package/dist/ee/extractors/craigslist.js +92 -0
- package/dist/ee/extractors/devto.d.ts +2 -0
- package/dist/ee/extractors/devto.js +135 -0
- package/dist/ee/extractors/ebay.d.ts +2 -0
- package/dist/ee/extractors/ebay.js +90 -0
- package/dist/ee/extractors/espn.d.ts +2 -0
- package/dist/ee/extractors/espn.js +260 -0
- package/dist/ee/extractors/etsy.d.ts +2 -0
- package/dist/ee/extractors/etsy.js +52 -0
- package/dist/ee/extractors/facebook.d.ts +2 -0
- package/dist/ee/extractors/facebook.js +46 -0
- package/dist/ee/extractors/github.d.ts +2 -0
- package/dist/ee/extractors/github.js +196 -0
- package/dist/ee/extractors/google-flights.d.ts +2 -0
- package/dist/ee/extractors/google-flights.js +176 -0
- package/dist/ee/extractors/hackernews.d.ts +2 -0
- package/dist/ee/extractors/hackernews.js +147 -0
- package/dist/ee/extractors/imdb.d.ts +2 -0
- package/dist/ee/extractors/imdb.js +172 -0
- package/dist/ee/extractors/index.d.ts +26 -0
- package/dist/ee/extractors/index.js +247 -0
- package/dist/ee/extractors/instagram.d.ts +2 -0
- package/dist/ee/extractors/instagram.js +102 -0
- package/dist/ee/extractors/kalshi.d.ts +2 -0
- package/dist/ee/extractors/kalshi.js +121 -0
- package/dist/ee/extractors/kayak-cars.d.ts +2 -0
- package/dist/ee/extractors/kayak-cars.js +270 -0
- package/dist/ee/extractors/linkedin.d.ts +2 -0
- package/dist/ee/extractors/linkedin.js +113 -0
- package/dist/ee/extractors/medium.d.ts +2 -0
- package/dist/ee/extractors/medium.js +130 -0
- package/dist/ee/extractors/news.d.ts +4 -0
- package/dist/ee/extractors/news.js +173 -0
- package/dist/ee/extractors/npm.d.ts +2 -0
- package/dist/ee/extractors/npm.js +86 -0
- package/dist/ee/extractors/pdf.d.ts +2 -0
- package/dist/ee/extractors/pdf.js +108 -0
- package/dist/ee/extractors/pinterest.d.ts +2 -0
- package/dist/ee/extractors/pinterest.js +34 -0
- package/dist/ee/extractors/polymarket.d.ts +2 -0
- package/dist/ee/extractors/polymarket.js +358 -0
- package/dist/ee/extractors/producthunt.d.ts +2 -0
- package/dist/ee/extractors/producthunt.js +88 -0
- package/dist/ee/extractors/pubmed.d.ts +2 -0
- package/dist/ee/extractors/pubmed.js +162 -0
- package/dist/ee/extractors/pypi.d.ts +2 -0
- package/dist/ee/extractors/pypi.js +80 -0
- package/dist/ee/extractors/reddit.d.ts +2 -0
- package/dist/ee/extractors/reddit.js +438 -0
- package/dist/ee/extractors/redfin.d.ts +2 -0
- package/dist/ee/extractors/redfin.js +156 -0
- package/dist/ee/extractors/semanticscholar.d.ts +2 -0
- package/dist/ee/extractors/semanticscholar.js +131 -0
- package/dist/ee/extractors/shared.d.ts +12 -0
- package/dist/ee/extractors/shared.js +76 -0
- package/dist/ee/extractors/soundcloud.d.ts +2 -0
- package/dist/ee/extractors/soundcloud.js +34 -0
- package/dist/ee/extractors/sportsbetting.d.ts +2 -0
- package/dist/ee/extractors/sportsbetting.js +37 -0
- package/dist/ee/extractors/spotify.d.ts +2 -0
- package/dist/ee/extractors/spotify.js +34 -0
- package/dist/ee/extractors/stackoverflow.d.ts +2 -0
- package/dist/ee/extractors/stackoverflow.js +61 -0
- package/dist/ee/extractors/substack.d.ts +2 -0
- package/dist/ee/extractors/substack.js +115 -0
- package/dist/ee/extractors/substackroot.d.ts +2 -0
- package/dist/ee/extractors/substackroot.js +46 -0
- package/dist/ee/extractors/tiktok.d.ts +2 -0
- package/dist/ee/extractors/tiktok.js +29 -0
- package/dist/ee/extractors/tradingview.d.ts +2 -0
- package/dist/ee/extractors/tradingview.js +182 -0
- package/dist/ee/extractors/twitch.d.ts +2 -0
- package/dist/ee/extractors/twitch.js +36 -0
- package/dist/ee/extractors/twitter.d.ts +2 -0
- package/dist/ee/extractors/twitter.js +327 -0
- package/dist/ee/extractors/types.d.ts +14 -0
- package/dist/ee/extractors/types.js +1 -0
- package/dist/ee/extractors/walmart.d.ts +2 -0
- package/dist/ee/extractors/walmart.js +50 -0
- package/dist/ee/extractors/weather.d.ts +2 -0
- package/dist/ee/extractors/weather.js +133 -0
- package/dist/ee/extractors/wikipedia.d.ts +4 -0
- package/dist/ee/extractors/wikipedia.js +235 -0
- package/dist/ee/extractors/yelp.d.ts +2 -0
- package/dist/ee/extractors/yelp.js +216 -0
- package/dist/ee/extractors/youtube.d.ts +2 -0
- package/dist/ee/extractors/youtube.js +189 -0
- package/dist/ee/extractors/zillow.d.ts +54 -0
- package/dist/ee/extractors/zillow.js +247 -0
- package/dist/ee/extractors-re-export.d.ts +1 -0
- package/dist/ee/extractors-re-export.js +1 -0
- package/dist/ee/premium-hooks.d.ts +20 -0
- package/dist/ee/premium-hooks.js +50 -0
- package/dist/ee/spa-detection.d.ts +2 -0
- package/dist/ee/spa-detection.js +2 -0
- package/dist/ee/stability.d.ts +4 -0
- package/dist/ee/stability.js +29 -0
- package/dist/ee/swr-cache.d.ts +14 -0
- package/dist/ee/swr-cache.js +34 -0
- package/dist/index.d.ts +143 -0
- package/dist/index.js +291 -0
- package/dist/integrations/index.d.ts +2 -0
- package/dist/integrations/index.js +2 -0
- package/dist/integrations/langchain.d.ts +64 -0
- package/dist/integrations/langchain.js +115 -0
- package/dist/integrations/llamaindex.d.ts +50 -0
- package/dist/integrations/llamaindex.js +91 -0
- package/dist/mcp/handlers/act.d.ts +5 -0
- package/dist/mcp/handlers/act.js +34 -0
- package/dist/mcp/handlers/definitions.d.ts +6 -0
- package/dist/mcp/handlers/definitions.js +395 -0
- package/dist/mcp/handlers/extract.d.ts +7 -0
- package/dist/mcp/handlers/extract.js +135 -0
- package/dist/mcp/handlers/fetch.d.ts +6 -0
- package/dist/mcp/handlers/fetch.js +98 -0
- package/dist/mcp/handlers/find.d.ts +5 -0
- package/dist/mcp/handlers/find.js +137 -0
- package/dist/mcp/handlers/index.d.ts +13 -0
- package/dist/mcp/handlers/index.js +63 -0
- package/dist/mcp/handlers/legacy.d.ts +25 -0
- package/dist/mcp/handlers/legacy.js +450 -0
- package/dist/mcp/handlers/meta.d.ts +6 -0
- package/dist/mcp/handlers/meta.js +40 -0
- package/dist/mcp/handlers/monitor.d.ts +5 -0
- package/dist/mcp/handlers/monitor.js +41 -0
- package/dist/mcp/handlers/observe.d.ts +8 -0
- package/dist/mcp/handlers/observe.js +37 -0
- package/dist/mcp/handlers/read.d.ts +6 -0
- package/dist/mcp/handlers/read.js +78 -0
- package/dist/mcp/handlers/see.d.ts +5 -0
- package/dist/mcp/handlers/see.js +75 -0
- package/dist/mcp/handlers/types.d.ts +29 -0
- package/dist/mcp/handlers/types.js +28 -0
- package/dist/mcp/server.d.ts +7 -0
- package/dist/mcp/server.js +108 -0
- package/dist/mcp/smart-router.d.ts +23 -0
- package/dist/mcp/smart-router.js +178 -0
- package/dist/server/app.d.ts +14 -0
- package/dist/server/app.js +632 -0
- package/dist/server/auth-store.d.ts +28 -0
- package/dist/server/auth-store.js +88 -0
- package/dist/server/bull-queues.d.ts +60 -0
- package/dist/server/bull-queues.js +90 -0
- package/dist/server/email-service.d.ts +55 -0
- package/dist/server/email-service.js +291 -0
- package/dist/server/job-queue.d.ts +100 -0
- package/dist/server/job-queue.js +145 -0
- package/dist/server/logger.d.ts +10 -0
- package/dist/server/logger.js +37 -0
- package/dist/server/middleware/audit-log.d.ts +14 -0
- package/dist/server/middleware/audit-log.js +73 -0
- package/dist/server/middleware/auth.d.ts +35 -0
- package/dist/server/middleware/auth.js +225 -0
- package/dist/server/middleware/rate-limit.d.ts +50 -0
- package/dist/server/middleware/rate-limit.js +270 -0
- package/dist/server/middleware/scope-guard.d.ts +25 -0
- package/dist/server/middleware/scope-guard.js +45 -0
- package/dist/server/middleware/url-validator.d.ts +15 -0
- package/dist/server/middleware/url-validator.js +201 -0
- package/dist/server/openapi.yaml +6418 -0
- package/dist/server/pg-auth-store.d.ts +146 -0
- package/dist/server/pg-auth-store.js +576 -0
- package/dist/server/pg-job-queue.d.ts +59 -0
- package/dist/server/pg-job-queue.js +375 -0
- package/dist/server/routes/activity.d.ts +6 -0
- package/dist/server/routes/activity.js +79 -0
- package/dist/server/routes/admin-active.d.ts +7 -0
- package/dist/server/routes/admin-active.js +120 -0
- package/dist/server/routes/admin-stats.d.ts +7 -0
- package/dist/server/routes/admin-stats.js +176 -0
- package/dist/server/routes/agent.d.ts +24 -0
- package/dist/server/routes/agent.js +480 -0
- package/dist/server/routes/answer.d.ts +5 -0
- package/dist/server/routes/answer.js +125 -0
- package/dist/server/routes/ask.d.ts +28 -0
- package/dist/server/routes/ask.js +295 -0
- package/dist/server/routes/batch.d.ts +6 -0
- package/dist/server/routes/batch.js +493 -0
- package/dist/server/routes/cache-warm.d.ts +25 -0
- package/dist/server/routes/cache-warm.js +212 -0
- package/dist/server/routes/cli-usage.d.ts +6 -0
- package/dist/server/routes/cli-usage.js +127 -0
- package/dist/server/routes/compat.d.ts +23 -0
- package/dist/server/routes/compat.js +652 -0
- package/dist/server/routes/crawl.d.ts +13 -0
- package/dist/server/routes/crawl.js +287 -0
- package/dist/server/routes/deep-fetch.d.ts +8 -0
- package/dist/server/routes/deep-fetch.js +57 -0
- package/dist/server/routes/deep-research.d.ts +11 -0
- package/dist/server/routes/deep-research.js +232 -0
- package/dist/server/routes/demo.d.ts +24 -0
- package/dist/server/routes/demo.js +517 -0
- package/dist/server/routes/do.d.ts +8 -0
- package/dist/server/routes/do.js +72 -0
- package/dist/server/routes/extract.d.ts +14 -0
- package/dist/server/routes/extract.js +325 -0
- package/dist/server/routes/feed.d.ts +15 -0
- package/dist/server/routes/feed.js +311 -0
- package/dist/server/routes/fetch-queue.d.ts +13 -0
- package/dist/server/routes/fetch-queue.js +357 -0
- package/dist/server/routes/fetch.d.ts +7 -0
- package/dist/server/routes/fetch.js +1274 -0
- package/dist/server/routes/go.d.ts +14 -0
- package/dist/server/routes/go.js +81 -0
- package/dist/server/routes/health.d.ts +11 -0
- package/dist/server/routes/health.js +141 -0
- package/dist/server/routes/jobs.d.ts +7 -0
- package/dist/server/routes/jobs.js +574 -0
- package/dist/server/routes/map.d.ts +11 -0
- package/dist/server/routes/map.js +116 -0
- package/dist/server/routes/mcp.d.ts +14 -0
- package/dist/server/routes/mcp.js +197 -0
- package/dist/server/routes/metrics.d.ts +37 -0
- package/dist/server/routes/metrics.js +149 -0
- package/dist/server/routes/oauth.d.ts +9 -0
- package/dist/server/routes/oauth.js +396 -0
- package/dist/server/routes/playground.d.ts +17 -0
- package/dist/server/routes/playground.js +283 -0
- package/dist/server/routes/reader.d.ts +18 -0
- package/dist/server/routes/reader.js +192 -0
- package/dist/server/routes/research.d.ts +14 -0
- package/dist/server/routes/research.js +482 -0
- package/dist/server/routes/screenshot.d.ts +22 -0
- package/dist/server/routes/screenshot.js +820 -0
- package/dist/server/routes/search.d.ts +6 -0
- package/dist/server/routes/search.js +874 -0
- package/dist/server/routes/session.d.ts +17 -0
- package/dist/server/routes/session.js +548 -0
- package/dist/server/routes/share.d.ts +18 -0
- package/dist/server/routes/share.js +462 -0
- package/dist/server/routes/smart-search/handlers/cars.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/cars.js +102 -0
- package/dist/server/routes/smart-search/handlers/flights.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/flights.js +72 -0
- package/dist/server/routes/smart-search/handlers/general.d.ts +13 -0
- package/dist/server/routes/smart-search/handlers/general.js +717 -0
- package/dist/server/routes/smart-search/handlers/hotels.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/hotels.js +88 -0
- package/dist/server/routes/smart-search/handlers/products.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/products.js +1309 -0
- package/dist/server/routes/smart-search/handlers/rental.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/rental.js +154 -0
- package/dist/server/routes/smart-search/handlers/restaurants.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/restaurants.js +225 -0
- package/dist/server/routes/smart-search/handlers/transit-verdict.d.ts +41 -0
- package/dist/server/routes/smart-search/handlers/transit-verdict.js +224 -0
- package/dist/server/routes/smart-search/index.d.ts +19 -0
- package/dist/server/routes/smart-search/index.js +546 -0
- package/dist/server/routes/smart-search/intent.d.ts +3 -0
- package/dist/server/routes/smart-search/intent.js +264 -0
- package/dist/server/routes/smart-search/llm.d.ts +16 -0
- package/dist/server/routes/smart-search/llm.js +70 -0
- package/dist/server/routes/smart-search/sources/reddit.d.ts +18 -0
- package/dist/server/routes/smart-search/sources/reddit.js +34 -0
- package/dist/server/routes/smart-search/sources/yelp.d.ts +25 -0
- package/dist/server/routes/smart-search/sources/yelp.js +171 -0
- package/dist/server/routes/smart-search/sources/youtube.d.ts +8 -0
- package/dist/server/routes/smart-search/sources/youtube.js +9 -0
- package/dist/server/routes/smart-search/types.d.ts +81 -0
- package/dist/server/routes/smart-search/types.js +1 -0
- package/dist/server/routes/smart-search/utils.d.ts +20 -0
- package/dist/server/routes/smart-search/utils.js +146 -0
- package/dist/server/routes/stats.d.ts +6 -0
- package/dist/server/routes/stats.js +71 -0
- package/dist/server/routes/stripe.d.ts +15 -0
- package/dist/server/routes/stripe.js +296 -0
- package/dist/server/routes/transcript-export.d.ts +10 -0
- package/dist/server/routes/transcript-export.js +178 -0
- package/dist/server/routes/usage.d.ts +9 -0
- package/dist/server/routes/usage.js +279 -0
- package/dist/server/routes/users.d.ts +8 -0
- package/dist/server/routes/users.js +1867 -0
- package/dist/server/routes/watch.d.ts +15 -0
- package/dist/server/routes/watch.js +309 -0
- package/dist/server/routes/webhooks.d.ts +26 -0
- package/dist/server/routes/webhooks.js +170 -0
- package/dist/server/routes/youtube.d.ts +6 -0
- package/dist/server/routes/youtube.js +130 -0
- package/dist/server/sentry.d.ts +14 -0
- package/dist/server/sentry.js +104 -0
- package/dist/server/types.d.ts +15 -0
- package/dist/server/types.js +7 -0
- package/dist/server/utils/response.d.ts +44 -0
- package/dist/server/utils/response.js +69 -0
- package/dist/server/utils/sse.d.ts +22 -0
- package/dist/server/utils/sse.js +38 -0
- package/dist/types.d.ts +552 -0
- package/dist/types.js +39 -0
- package/llms.txt +105 -0
- package/package.json +189 -0
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Browser lifecycle & page pool management.
|
|
3
|
+
* Handles Playwright loading, browser instances, and the idle page pool.
|
|
4
|
+
*/
|
|
5
|
+
import type { Browser, Page } from 'playwright';
|
|
6
|
+
type ChromiumType = typeof import('playwright').chromium;
|
|
7
|
+
import { closePool } from './http-fetch.js';
|
|
8
|
+
export { closePool };
|
|
9
|
+
/**
|
|
10
|
+
* Checks whether the Chromium binary bundled with our playwright alias exists.
|
|
11
|
+
* If missing, auto-installs it using `npx playwright install chromium` (which
|
|
12
|
+
* resolves to rebrowser-playwright because of the package.json alias).
|
|
13
|
+
*
|
|
14
|
+
* Call this once before the first browser launch so users get a helpful message
|
|
15
|
+
* instead of a confusing "Executable doesn't exist" stack trace.
|
|
16
|
+
*/
|
|
17
|
+
export declare function ensureChromiumInstalled(): Promise<void>;
|
|
18
|
+
/** Whether Playwright has been loaded (for diagnostics). */
|
|
19
|
+
export declare let playwrightLoaded: boolean;
|
|
20
|
+
export declare function getStealthPlaywright(): Promise<ChromiumType>;
|
|
21
|
+
/**
|
|
22
|
+
* Returns a realistic Chrome user agent.
|
|
23
|
+
* Delegates to the curated user-agents module so stealth mode never exposes
|
|
24
|
+
* the default "Chrome for Testing" UA which is a reliable bot-detection signal.
|
|
25
|
+
*/
|
|
26
|
+
export declare function getRandomUserAgent(): string;
|
|
27
|
+
/**
|
|
28
|
+
* Common Chromium launch arguments for anti-bot-detection.
|
|
29
|
+
* Applied to BOTH regular and stealth browser instances.
|
|
30
|
+
* NOTE: --window-size is intentionally omitted here; it is added dynamically
|
|
31
|
+
* per browser launch using a random realistic viewport (see getRandomViewport()).
|
|
32
|
+
*/
|
|
33
|
+
export declare const ANTI_DETECTION_ARGS: readonly string[];
|
|
34
|
+
/**
|
|
35
|
+
* Returns a random realistic viewport weighted by real-world market share.
|
|
36
|
+
* Used to avoid the telltale Playwright default of 1280×720.
|
|
37
|
+
*/
|
|
38
|
+
export declare function getRandomViewport(): {
|
|
39
|
+
width: number;
|
|
40
|
+
height: number;
|
|
41
|
+
};
|
|
42
|
+
/**
|
|
43
|
+
* Apply stealth init scripts to a page to reduce bot-detection signals:
|
|
44
|
+
* 1. Hides the `window.__pwInitScripts` Playwright leak.
|
|
45
|
+
* 2. Patches `navigator.userAgentData.brands` to include "Google Chrome"
|
|
46
|
+
* (Chrome for Testing only ships "Chromium" which is a known detection signal).
|
|
47
|
+
*/
|
|
48
|
+
export declare function applyStealthScripts(page: Page, languages?: string[]): Promise<void>;
|
|
49
|
+
/**
|
|
50
|
+
* Check current process memory usage against the pod limit.
|
|
51
|
+
* Returns { ok: true } when safe to launch a new browser context.
|
|
52
|
+
* Returns { ok: false } when memory is too high — caller should skip browser rendering.
|
|
53
|
+
*/
|
|
54
|
+
export declare function checkMemoryPressure(): {
|
|
55
|
+
ok: boolean;
|
|
56
|
+
rss: number;
|
|
57
|
+
heapUsed: number;
|
|
58
|
+
limit: number;
|
|
59
|
+
};
|
|
60
|
+
export declare const MAX_CONCURRENT_PAGES: number;
|
|
61
|
+
export declare const PAGE_POOL_SIZE: number;
|
|
62
|
+
export declare function removePooledPage(page: Page): void;
|
|
63
|
+
export declare function takePooledPage(): Page | null;
|
|
64
|
+
/** Returns the current number of pooled pages (for size checks in browser-fetch). */
|
|
65
|
+
export declare function getPooledPagesCount(): number;
|
|
66
|
+
export declare function ensurePagePool(browser?: Browser): Promise<void>;
|
|
67
|
+
export declare function recyclePooledPage(page: Page): Promise<void>;
|
|
68
|
+
export declare function getBrowser(): Promise<Browser>;
|
|
69
|
+
export declare function getStealthBrowser(): Promise<Browser>;
|
|
70
|
+
/**
|
|
71
|
+
* Get or create a browser instance with a persistent user data directory.
|
|
72
|
+
* Profile browsers bypass the shared browser pool so cookies/sessions survive
|
|
73
|
+
* between fetch calls.
|
|
74
|
+
*
|
|
75
|
+
* @param profileDir Absolute path to the Chrome user-data-dir directory
|
|
76
|
+
* @param headed Whether to launch in headed (visible) mode
|
|
77
|
+
* @param stealth Whether to use playwright-extra stealth instead of plain chromium
|
|
78
|
+
*/
|
|
79
|
+
export declare function getProfileBrowser(profileDir: string, headed?: boolean, stealth?: boolean): Promise<Browser>;
|
|
80
|
+
export declare function warmup(): Promise<void>;
|
|
81
|
+
/**
|
|
82
|
+
* Clean up browser resources (shared pool, stealth browser, and all profile browsers).
|
|
83
|
+
*/
|
|
84
|
+
export declare function cleanup(): Promise<void>;
|
|
85
|
+
/**
|
|
86
|
+
* Close a specific persistent profile browser (e.g. when done with a session).
|
|
87
|
+
* Safe to call even if the browser has already been closed.
|
|
88
|
+
*
|
|
89
|
+
* @param profileDir Path to the profile directory used when launching
|
|
90
|
+
*/
|
|
91
|
+
export declare function closeProfileBrowser(profileDir: string): Promise<void>;
|
|
@@ -0,0 +1,550 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Browser lifecycle & page pool management.
|
|
3
|
+
* Handles Playwright loading, browser instances, and the idle page pool.
|
|
4
|
+
*/
|
|
5
|
+
import { getRealisticUserAgent } from './user-agents.js';
|
|
6
|
+
import { startDnsWarmup } from './dns-cache.js';
|
|
7
|
+
import { closePool } from './http-fetch.js';
|
|
8
|
+
import { existsSync } from 'fs';
|
|
9
|
+
import { execSync } from 'child_process';
|
|
10
|
+
// Re-export closePool so fetcher.ts can barrel it from this module.
|
|
11
|
+
export { closePool };
|
|
12
|
+
// ── Browser auto-install ──────────────────────────────────────────────────────
|
|
13
|
+
/**
|
|
14
|
+
* Checks whether the Chromium binary bundled with our playwright alias exists.
|
|
15
|
+
* If missing, auto-installs it using `npx playwright install chromium` (which
|
|
16
|
+
* resolves to rebrowser-playwright because of the package.json alias).
|
|
17
|
+
*
|
|
18
|
+
* Call this once before the first browser launch so users get a helpful message
|
|
19
|
+
* instead of a confusing "Executable doesn't exist" stack trace.
|
|
20
|
+
*/
|
|
21
|
+
export async function ensureChromiumInstalled() {
|
|
22
|
+
// Dynamically get the expected executable path from our playwright alias
|
|
23
|
+
let execPath;
|
|
24
|
+
try {
|
|
25
|
+
const pw = await import('playwright');
|
|
26
|
+
execPath = pw.chromium.executablePath();
|
|
27
|
+
}
|
|
28
|
+
catch {
|
|
29
|
+
// If playwright itself can't be imported we'll let the launch error surface
|
|
30
|
+
return;
|
|
31
|
+
}
|
|
32
|
+
if (existsSync(execPath)) {
|
|
33
|
+
return; // Already installed — fast path
|
|
34
|
+
}
|
|
35
|
+
// Binary is missing — auto-install with visible feedback
|
|
36
|
+
console.error('\x1b[33m⚙️ Installing browser for first-time use (this takes ~30s)...\x1b[0m');
|
|
37
|
+
console.error('\x1b[90m Running: npx playwright install chromium\x1b[0m');
|
|
38
|
+
try {
|
|
39
|
+
execSync('npx playwright install chromium', {
|
|
40
|
+
stdio: 'inherit',
|
|
41
|
+
// Resolve npx relative to this package so we always use the aliased
|
|
42
|
+
// rebrowser-playwright, not whatever the user has globally.
|
|
43
|
+
cwd: new URL('../../..', import.meta.url).pathname,
|
|
44
|
+
});
|
|
45
|
+
console.error('\x1b[32m✅ Browser installed successfully.\x1b[0m');
|
|
46
|
+
}
|
|
47
|
+
catch (installErr) {
|
|
48
|
+
const msg = installErr instanceof Error ? installErr.message : String(installErr);
|
|
49
|
+
console.error(`\x1b[31m❌ Auto-install failed: ${msg}\x1b[0m`);
|
|
50
|
+
console.error('\x1b[31m Please install the browser manually:\x1b[0m');
|
|
51
|
+
console.error('\x1b[36m npx playwright install chromium\x1b[0m');
|
|
52
|
+
throw new Error('Chromium is not installed. Run: npx playwright install chromium');
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
// ── Playwright lazy loading ───────────────────────────────────────────────────
|
|
56
|
+
let _chromium = null;
|
|
57
|
+
let _stealthChromium = null;
|
|
58
|
+
/** Whether Playwright has been loaded (for diagnostics). */
|
|
59
|
+
export let playwrightLoaded = false;
|
|
60
|
+
async function getPlaywright() {
|
|
61
|
+
if (!_chromium) {
|
|
62
|
+
await ensureChromiumInstalled();
|
|
63
|
+
const pw = await import('playwright');
|
|
64
|
+
_chromium = pw.chromium;
|
|
65
|
+
playwrightLoaded = true;
|
|
66
|
+
}
|
|
67
|
+
return _chromium;
|
|
68
|
+
}
|
|
69
|
+
export async function getStealthPlaywright() {
|
|
70
|
+
if (!_stealthChromium) {
|
|
71
|
+
const pwExtra = await import('playwright-extra');
|
|
72
|
+
const StealthPlugin = (await import('puppeteer-extra-plugin-stealth')).default;
|
|
73
|
+
_stealthChromium = pwExtra.chromium;
|
|
74
|
+
_stealthChromium.use(StealthPlugin());
|
|
75
|
+
playwrightLoaded = true;
|
|
76
|
+
}
|
|
77
|
+
return _stealthChromium;
|
|
78
|
+
}
|
|
79
|
+
// ── User agent & viewport helpers ─────────────────────────────────────────────
|
|
80
|
+
/**
|
|
81
|
+
* Returns a realistic Chrome user agent.
|
|
82
|
+
* Delegates to the curated user-agents module so stealth mode never exposes
|
|
83
|
+
* the default "Chrome for Testing" UA which is a reliable bot-detection signal.
|
|
84
|
+
*/
|
|
85
|
+
export function getRandomUserAgent() {
|
|
86
|
+
return getRealisticUserAgent();
|
|
87
|
+
}
|
|
88
|
+
/**
|
|
89
|
+
* Common Chromium launch arguments for anti-bot-detection.
|
|
90
|
+
* Applied to BOTH regular and stealth browser instances.
|
|
91
|
+
* NOTE: --window-size is intentionally omitted here; it is added dynamically
|
|
92
|
+
* per browser launch using a random realistic viewport (see getRandomViewport()).
|
|
93
|
+
*/
|
|
94
|
+
export const ANTI_DETECTION_ARGS = [
|
|
95
|
+
'--disable-blink-features=AutomationControlled',
|
|
96
|
+
'--disable-infobars',
|
|
97
|
+
'--disable-dev-shm-usage',
|
|
98
|
+
'--no-sandbox',
|
|
99
|
+
'--disable-setuid-sandbox',
|
|
100
|
+
'--disable-gpu',
|
|
101
|
+
'--start-maximized',
|
|
102
|
+
// Chrome branding / stealth hardening
|
|
103
|
+
'--disable-features=ChromeUserAgentDataBranding,IsolateOrigins,site-per-process',
|
|
104
|
+
'--disable-component-extensions-with-background-pages',
|
|
105
|
+
'--disable-default-apps',
|
|
106
|
+
'--disable-extensions',
|
|
107
|
+
'--disable-hang-monitor',
|
|
108
|
+
'--disable-popup-blocking',
|
|
109
|
+
'--disable-prompt-on-repost',
|
|
110
|
+
'--disable-sync',
|
|
111
|
+
'--metrics-recording-only',
|
|
112
|
+
'--no-first-run',
|
|
113
|
+
];
|
|
114
|
+
/**
|
|
115
|
+
* Returns a random realistic viewport weighted by real-world market share.
|
|
116
|
+
* Used to avoid the telltale Playwright default of 1280×720.
|
|
117
|
+
*/
|
|
118
|
+
export function getRandomViewport() {
|
|
119
|
+
// Common real-world resolutions weighted by market share
|
|
120
|
+
const viewports = [
|
|
121
|
+
{ width: 1920, height: 1080, weight: 35 }, // Full HD
|
|
122
|
+
{ width: 1366, height: 768, weight: 20 }, // Laptop
|
|
123
|
+
{ width: 1536, height: 864, weight: 15 }, // Scaled laptop
|
|
124
|
+
{ width: 1440, height: 900, weight: 10 }, // MacBook
|
|
125
|
+
{ width: 1680, height: 1050, weight: 8 }, // Large laptop
|
|
126
|
+
{ width: 2560, height: 1440, weight: 7 }, // QHD
|
|
127
|
+
{ width: 1280, height: 800, weight: 5 }, // Older laptop
|
|
128
|
+
];
|
|
129
|
+
const total = viewports.reduce((s, v) => s + v.weight, 0);
|
|
130
|
+
let r = Math.random() * total;
|
|
131
|
+
for (const v of viewports) {
|
|
132
|
+
r -= v.weight;
|
|
133
|
+
if (r <= 0)
|
|
134
|
+
return { width: v.width, height: v.height };
|
|
135
|
+
}
|
|
136
|
+
return { width: 1920, height: 1080 };
|
|
137
|
+
}
|
|
138
|
+
/**
|
|
139
|
+
* Apply stealth init scripts to a page to reduce bot-detection signals:
|
|
140
|
+
* 1. Hides the `window.__pwInitScripts` Playwright leak.
|
|
141
|
+
* 2. Patches `navigator.userAgentData.brands` to include "Google Chrome"
|
|
142
|
+
* (Chrome for Testing only ships "Chromium" which is a known detection signal).
|
|
143
|
+
*/
|
|
144
|
+
export async function applyStealthScripts(page, languages) {
|
|
145
|
+
const langList = languages && languages.length > 0 ? languages : ['en-US', 'en'];
|
|
146
|
+
// 1. Hide Playwright's __pwInitScripts marker
|
|
147
|
+
// Uses string form to avoid TypeScript DOM-lib requirements (tsconfig has no DOM lib).
|
|
148
|
+
await page.addInitScript(`
|
|
149
|
+
Object.defineProperty(window, '__pwInitScripts', {
|
|
150
|
+
get: () => undefined,
|
|
151
|
+
set: () => {},
|
|
152
|
+
configurable: true,
|
|
153
|
+
});
|
|
154
|
+
`);
|
|
155
|
+
// 2. Patch userAgentData brands to include "Google Chrome"
|
|
156
|
+
// Chrome for Testing only ships "Chromium" — a well-known bot-detection signal.
|
|
157
|
+
await page.addInitScript(`
|
|
158
|
+
(function () {
|
|
159
|
+
var uad = navigator.userAgentData;
|
|
160
|
+
if (!uad) return;
|
|
161
|
+
var originalBrands = uad.brands || [];
|
|
162
|
+
var hasChromeEntry = originalBrands.some(function(b) { return b.brand === 'Google Chrome'; });
|
|
163
|
+
if (hasChromeEntry) return;
|
|
164
|
+
|
|
165
|
+
var chromiumEntry = originalBrands.find(function(b) { return b.brand === 'Chromium'; });
|
|
166
|
+
var version = (chromiumEntry && chromiumEntry.version) || '136';
|
|
167
|
+
var patchedBrands = [
|
|
168
|
+
{ brand: 'Chromium', version: version },
|
|
169
|
+
{ brand: 'Google Chrome', version: version },
|
|
170
|
+
{ brand: 'Not=A?Brand', version: '99' },
|
|
171
|
+
];
|
|
172
|
+
|
|
173
|
+
Object.defineProperty(navigator, 'userAgentData', {
|
|
174
|
+
get: function() {
|
|
175
|
+
return {
|
|
176
|
+
brands: patchedBrands,
|
|
177
|
+
mobile: false,
|
|
178
|
+
platform: uad.platform || 'Windows',
|
|
179
|
+
getHighEntropyValues: uad.getHighEntropyValues ? uad.getHighEntropyValues.bind(uad) : undefined,
|
|
180
|
+
toJSON: function() {
|
|
181
|
+
return {
|
|
182
|
+
brands: patchedBrands,
|
|
183
|
+
mobile: false,
|
|
184
|
+
platform: uad.platform || 'Windows',
|
|
185
|
+
};
|
|
186
|
+
},
|
|
187
|
+
};
|
|
188
|
+
},
|
|
189
|
+
configurable: true,
|
|
190
|
+
});
|
|
191
|
+
})();
|
|
192
|
+
`);
|
|
193
|
+
// 3. Hide navigator.webdriver (THE #1 BOT SIGNAL)
|
|
194
|
+
await page.addInitScript(`
|
|
195
|
+
Object.defineProperty(navigator, 'webdriver', {
|
|
196
|
+
get: () => false,
|
|
197
|
+
configurable: true,
|
|
198
|
+
});
|
|
199
|
+
try { delete Object.getPrototypeOf(navigator).webdriver; } catch (e) {}
|
|
200
|
+
`);
|
|
201
|
+
// 4. Fake navigator.plugins (empty = bot signal, real Chrome has plugins)
|
|
202
|
+
await page.addInitScript(`
|
|
203
|
+
Object.defineProperty(navigator, 'plugins', {
|
|
204
|
+
get: () => {
|
|
205
|
+
var arr = [
|
|
206
|
+
{ name: 'Chrome PDF Plugin', filename: 'internal-pdf-viewer', description: 'Portable Document Format' },
|
|
207
|
+
{ name: 'Chrome PDF Viewer', filename: 'mhjfbmdgcfjbbpaeojofohoefgiehjai', description: '' },
|
|
208
|
+
{ name: 'Native Client', filename: 'internal-nacl-plugin', description: '' },
|
|
209
|
+
];
|
|
210
|
+
arr.item = function(i) { return arr[i] || null; };
|
|
211
|
+
arr.namedItem = function(n) { return arr.find(function(p) { return p.name === n; }) || null; };
|
|
212
|
+
arr.refresh = function() {};
|
|
213
|
+
return arr;
|
|
214
|
+
},
|
|
215
|
+
configurable: true,
|
|
216
|
+
});
|
|
217
|
+
`);
|
|
218
|
+
// 5. Fake navigator.languages (use provided language preferences or default to en-US)
|
|
219
|
+
const langJson = JSON.stringify(langList);
|
|
220
|
+
await page.addInitScript(`Object.defineProperty(navigator, 'languages', { get: () => ${langJson}, configurable: true });`);
|
|
221
|
+
// 6. Fake window.chrome object (missing in headless = detected)
|
|
222
|
+
await page.addInitScript(`
|
|
223
|
+
if (!window.chrome) {
|
|
224
|
+
window.chrome = {
|
|
225
|
+
app: {
|
|
226
|
+
isInstalled: false,
|
|
227
|
+
InstallState: { INSTALLED: 'installed', NOT_INSTALLED: 'not_installed' },
|
|
228
|
+
RunningState: { CANNOT_RUN: 'cannot_run', READY_TO_RUN: 'ready_to_run', RUNNING: 'running' }
|
|
229
|
+
},
|
|
230
|
+
runtime: {
|
|
231
|
+
OnInstalledReason: {}, OnRestartRequiredReason: {}, PlatformArch: {},
|
|
232
|
+
PlatformNaclArch: {}, PlatformOs: {}, RequestUpdateCheckStatus: {},
|
|
233
|
+
connect: function() {}, sendMessage: function() {}
|
|
234
|
+
},
|
|
235
|
+
};
|
|
236
|
+
}
|
|
237
|
+
`);
|
|
238
|
+
// 7. Fix permissions query (notifications should be 'prompt' not 'denied')
|
|
239
|
+
await page.addInitScript(`
|
|
240
|
+
try {
|
|
241
|
+
var originalQuery = window.Permissions && window.Permissions.prototype && window.Permissions.prototype.query;
|
|
242
|
+
if (originalQuery) {
|
|
243
|
+
window.Permissions.prototype.query = function(params) {
|
|
244
|
+
if (params && params.name === 'notifications') {
|
|
245
|
+
return Promise.resolve({ state: Notification.permission });
|
|
246
|
+
}
|
|
247
|
+
return originalQuery.call(this, params);
|
|
248
|
+
};
|
|
249
|
+
}
|
|
250
|
+
} catch (e) {}
|
|
251
|
+
`);
|
|
252
|
+
// 8. WebGL vendor/renderer spoofing (headless shows "Google SwiftShader")
|
|
253
|
+
await page.addInitScript(`
|
|
254
|
+
try {
|
|
255
|
+
var getParameter = WebGLRenderingContext.prototype.getParameter;
|
|
256
|
+
WebGLRenderingContext.prototype.getParameter = function(parameter) {
|
|
257
|
+
if (parameter === 37445) return 'Intel Inc.';
|
|
258
|
+
if (parameter === 37446) return 'Intel Iris OpenGL Engine';
|
|
259
|
+
return getParameter.call(this, parameter);
|
|
260
|
+
};
|
|
261
|
+
if (typeof WebGL2RenderingContext !== 'undefined') {
|
|
262
|
+
var getParameter2 = WebGL2RenderingContext.prototype.getParameter;
|
|
263
|
+
WebGL2RenderingContext.prototype.getParameter = function(parameter) {
|
|
264
|
+
if (parameter === 37445) return 'Intel Inc.';
|
|
265
|
+
if (parameter === 37446) return 'Intel Iris OpenGL Engine';
|
|
266
|
+
return getParameter2.call(this, parameter);
|
|
267
|
+
};
|
|
268
|
+
}
|
|
269
|
+
} catch (e) {}
|
|
270
|
+
`);
|
|
271
|
+
// 9. Hide automation-related properties
|
|
272
|
+
await page.addInitScript(`
|
|
273
|
+
try { Object.defineProperty(document, '$cdc_asdjflasutopfhvcZLmcfl_', { get: () => undefined }); } catch (e) {}
|
|
274
|
+
try { delete window.callPhantom; } catch (e) {}
|
|
275
|
+
try { delete window._phantom; } catch (e) {}
|
|
276
|
+
try { delete window.__nightmare; } catch (e) {}
|
|
277
|
+
`);
|
|
278
|
+
}
|
|
279
|
+
// ── Memory pressure guard ─────────────────────────────────────────────────────
|
|
280
|
+
const MEMORY_LIMIT_MB = parseInt(process.env.MEMORY_LIMIT_MB || '768', 10); // Env-tunable guardrail for production pods
|
|
281
|
+
/**
|
|
282
|
+
* Check current process memory usage against the pod limit.
|
|
283
|
+
* Returns { ok: true } when safe to launch a new browser context.
|
|
284
|
+
* Returns { ok: false } when memory is too high — caller should skip browser rendering.
|
|
285
|
+
*/
|
|
286
|
+
export function checkMemoryPressure() {
|
|
287
|
+
const mem = process.memoryUsage();
|
|
288
|
+
const rssMB = Math.round(mem.rss / 1024 / 1024);
|
|
289
|
+
const heapMB = Math.round(mem.heapUsed / 1024 / 1024);
|
|
290
|
+
return {
|
|
291
|
+
ok: rssMB < MEMORY_LIMIT_MB,
|
|
292
|
+
rss: rssMB,
|
|
293
|
+
heapUsed: heapMB,
|
|
294
|
+
limit: MEMORY_LIMIT_MB,
|
|
295
|
+
};
|
|
296
|
+
}
|
|
297
|
+
// ── Page pool constants & state ───────────────────────────────────────────────
|
|
298
|
+
export const MAX_CONCURRENT_PAGES = parseInt(process.env.MAX_CONCURRENT_PAGES || '5', 10);
|
|
299
|
+
export const PAGE_POOL_SIZE = parseInt(process.env.PAGE_POOL_SIZE || '3', 10);
|
|
300
|
+
let sharedBrowser = null;
|
|
301
|
+
let sharedStealthBrowser = null;
|
|
302
|
+
const pooledPages = new Set();
|
|
303
|
+
const idlePagePool = [];
|
|
304
|
+
let pagePoolFillPromise = null;
|
|
305
|
+
// ── Profile browser instances ─────────────────────────────────────────────────
|
|
306
|
+
// Profile browsers are NOT shared — each profileDir gets its own instance.
|
|
307
|
+
// These are keyed by profile path and kept alive between fetches in the same process.
|
|
308
|
+
const profileBrowsers = new Map();
|
|
309
|
+
// ── Pool helpers ──────────────────────────────────────────────────────────────
|
|
310
|
+
export function removePooledPage(page) {
|
|
311
|
+
pooledPages.delete(page);
|
|
312
|
+
const idleIndex = idlePagePool.indexOf(page);
|
|
313
|
+
if (idleIndex >= 0) {
|
|
314
|
+
idlePagePool.splice(idleIndex, 1);
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
export function takePooledPage() {
|
|
318
|
+
while (idlePagePool.length > 0) {
|
|
319
|
+
const page = idlePagePool.shift();
|
|
320
|
+
if (page.isClosed()) {
|
|
321
|
+
removePooledPage(page);
|
|
322
|
+
continue;
|
|
323
|
+
}
|
|
324
|
+
return page;
|
|
325
|
+
}
|
|
326
|
+
return null;
|
|
327
|
+
}
|
|
328
|
+
/** Returns the current number of pooled pages (for size checks in browser-fetch). */
|
|
329
|
+
export function getPooledPagesCount() {
|
|
330
|
+
return pooledPages.size;
|
|
331
|
+
}
|
|
332
|
+
export async function ensurePagePool(browser) {
|
|
333
|
+
const activeBrowser = browser ?? sharedBrowser;
|
|
334
|
+
if (!activeBrowser || !activeBrowser.isConnected()) {
|
|
335
|
+
return;
|
|
336
|
+
}
|
|
337
|
+
if (pagePoolFillPromise) {
|
|
338
|
+
await pagePoolFillPromise;
|
|
339
|
+
return;
|
|
340
|
+
}
|
|
341
|
+
pagePoolFillPromise = (async () => {
|
|
342
|
+
while (pooledPages.size < PAGE_POOL_SIZE) {
|
|
343
|
+
const pooledPage = await activeBrowser.newPage({
|
|
344
|
+
userAgent: getRandomUserAgent(),
|
|
345
|
+
viewport: null, // Use browser window size (set via --window-size at launch)
|
|
346
|
+
});
|
|
347
|
+
await applyStealthScripts(pooledPage);
|
|
348
|
+
pooledPages.add(pooledPage);
|
|
349
|
+
idlePagePool.push(pooledPage);
|
|
350
|
+
}
|
|
351
|
+
})().finally(() => {
|
|
352
|
+
pagePoolFillPromise = null;
|
|
353
|
+
});
|
|
354
|
+
await pagePoolFillPromise;
|
|
355
|
+
}
|
|
356
|
+
export async function recyclePooledPage(page) {
|
|
357
|
+
if (!pooledPages.has(page)) {
|
|
358
|
+
await page.close().catch(() => { });
|
|
359
|
+
return;
|
|
360
|
+
}
|
|
361
|
+
if (page.isClosed()) {
|
|
362
|
+
removePooledPage(page);
|
|
363
|
+
if (sharedBrowser?.isConnected()) {
|
|
364
|
+
void ensurePagePool(sharedBrowser).catch(() => { });
|
|
365
|
+
}
|
|
366
|
+
return;
|
|
367
|
+
}
|
|
368
|
+
try {
|
|
369
|
+
await page.unroute('**/*').catch(() => { });
|
|
370
|
+
await page.context().clearCookies().catch(() => { });
|
|
371
|
+
await page.setExtraHTTPHeaders({});
|
|
372
|
+
await page.goto('about:blank', { waitUntil: 'domcontentloaded', timeout: 5000 }).catch(() => { });
|
|
373
|
+
if (!idlePagePool.includes(page)) {
|
|
374
|
+
idlePagePool.push(page);
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
catch (e) {
|
|
378
|
+
// Non-fatal: page reset failed, removing from pool and closing
|
|
379
|
+
if (process.env.DEBUG)
|
|
380
|
+
console.debug('[webpeel]', 'page reset failed:', e instanceof Error ? e.message : e);
|
|
381
|
+
removePooledPage(page);
|
|
382
|
+
await page.close().catch(() => { });
|
|
383
|
+
}
|
|
384
|
+
if (sharedBrowser?.isConnected() && pooledPages.size < PAGE_POOL_SIZE) {
|
|
385
|
+
void ensurePagePool(sharedBrowser).catch(() => { });
|
|
386
|
+
}
|
|
387
|
+
}
|
|
388
|
+
// ── Browser getters ───────────────────────────────────────────────────────────
|
|
389
|
+
export async function getBrowser() {
|
|
390
|
+
// SECURITY: Check if browser is still connected and healthy
|
|
391
|
+
if (sharedBrowser) {
|
|
392
|
+
try {
|
|
393
|
+
if (sharedBrowser.isConnected()) {
|
|
394
|
+
if (pooledPages.size < PAGE_POOL_SIZE) {
|
|
395
|
+
void ensurePagePool(sharedBrowser).catch(() => { });
|
|
396
|
+
}
|
|
397
|
+
return sharedBrowser;
|
|
398
|
+
}
|
|
399
|
+
}
|
|
400
|
+
catch (e) {
|
|
401
|
+
// Browser is dead, recreate
|
|
402
|
+
if (process.env.DEBUG)
|
|
403
|
+
console.debug('[webpeel]', 'shared browser health check failed, recreating:', e instanceof Error ? e.message : e);
|
|
404
|
+
sharedBrowser = null;
|
|
405
|
+
}
|
|
406
|
+
}
|
|
407
|
+
// Memory guard: check pressure before launching a new browser instance
|
|
408
|
+
const memPressure = checkMemoryPressure();
|
|
409
|
+
if (!memPressure.ok) {
|
|
410
|
+
// High memory — attempt to free existing resources first
|
|
411
|
+
console.warn(`[webpeel] Memory pressure detected (${memPressure.rss}MB RSS / ${memPressure.limit}MB limit). Cleaning up browser resources before launch.`);
|
|
412
|
+
// Close pooled pages to free memory
|
|
413
|
+
const pagesToClose = Array.from(pooledPages);
|
|
414
|
+
pooledPages.clear();
|
|
415
|
+
idlePagePool.length = 0;
|
|
416
|
+
pagePoolFillPromise = null;
|
|
417
|
+
await Promise.all(pagesToClose.map((page) => page.close().catch(() => { })));
|
|
418
|
+
// Close shared browser if it exists
|
|
419
|
+
if (sharedBrowser) {
|
|
420
|
+
await sharedBrowser.close().catch(() => { });
|
|
421
|
+
sharedBrowser = null;
|
|
422
|
+
}
|
|
423
|
+
// Re-check after cleanup
|
|
424
|
+
const memAfterCleanup = checkMemoryPressure();
|
|
425
|
+
if (!memAfterCleanup.ok) {
|
|
426
|
+
throw new Error(`[webpeel] Memory still too high after cleanup (${memAfterCleanup.rss}MB / ${memAfterCleanup.limit}MB). Skipping browser rendering to avoid OOM.`);
|
|
427
|
+
}
|
|
428
|
+
console.log(`[webpeel] Memory freed: ${memPressure.rss}MB → ${memAfterCleanup.rss}MB. Proceeding with browser launch.`);
|
|
429
|
+
}
|
|
430
|
+
pooledPages.clear();
|
|
431
|
+
idlePagePool.length = 0;
|
|
432
|
+
pagePoolFillPromise = null;
|
|
433
|
+
const vp = getRandomViewport();
|
|
434
|
+
const pw = await getPlaywright();
|
|
435
|
+
sharedBrowser = await pw.launch({
|
|
436
|
+
headless: true,
|
|
437
|
+
args: [...ANTI_DETECTION_ARGS, `--window-size=${vp.width},${vp.height}`],
|
|
438
|
+
});
|
|
439
|
+
void ensurePagePool(sharedBrowser).catch(() => { });
|
|
440
|
+
return sharedBrowser;
|
|
441
|
+
}
|
|
442
|
+
export async function getStealthBrowser() {
|
|
443
|
+
// SECURITY: Check if stealth browser is still connected and healthy
|
|
444
|
+
if (sharedStealthBrowser) {
|
|
445
|
+
try {
|
|
446
|
+
if (sharedStealthBrowser.isConnected()) {
|
|
447
|
+
return sharedStealthBrowser;
|
|
448
|
+
}
|
|
449
|
+
}
|
|
450
|
+
catch (e) {
|
|
451
|
+
// Browser is dead, recreate
|
|
452
|
+
if (process.env.DEBUG)
|
|
453
|
+
console.debug('[webpeel]', 'stealth browser health check failed, recreating:', e instanceof Error ? e.message : e);
|
|
454
|
+
sharedStealthBrowser = null;
|
|
455
|
+
}
|
|
456
|
+
}
|
|
457
|
+
const stealthVp = getRandomViewport();
|
|
458
|
+
const stealthPw = await getStealthPlaywright();
|
|
459
|
+
const stealthBrowser = await stealthPw.launch({
|
|
460
|
+
headless: true,
|
|
461
|
+
args: [...ANTI_DETECTION_ARGS, `--window-size=${stealthVp.width},${stealthVp.height}`],
|
|
462
|
+
});
|
|
463
|
+
if (!stealthBrowser)
|
|
464
|
+
throw new Error('Failed to launch stealth browser');
|
|
465
|
+
sharedStealthBrowser = stealthBrowser;
|
|
466
|
+
return stealthBrowser;
|
|
467
|
+
}
|
|
468
|
+
/**
|
|
469
|
+
* Get or create a browser instance with a persistent user data directory.
|
|
470
|
+
* Profile browsers bypass the shared browser pool so cookies/sessions survive
|
|
471
|
+
* between fetch calls.
|
|
472
|
+
*
|
|
473
|
+
* @param profileDir Absolute path to the Chrome user-data-dir directory
|
|
474
|
+
* @param headed Whether to launch in headed (visible) mode
|
|
475
|
+
* @param stealth Whether to use playwright-extra stealth instead of plain chromium
|
|
476
|
+
*/
|
|
477
|
+
export async function getProfileBrowser(profileDir, headed = false, stealth = false) {
|
|
478
|
+
const existing = profileBrowsers.get(profileDir);
|
|
479
|
+
if (existing) {
|
|
480
|
+
try {
|
|
481
|
+
if (existing.isConnected())
|
|
482
|
+
return existing;
|
|
483
|
+
}
|
|
484
|
+
catch (e) {
|
|
485
|
+
// Profile browser is dead, recreate
|
|
486
|
+
if (process.env.DEBUG)
|
|
487
|
+
console.debug('[webpeel]', 'profile browser health check failed, recreating:', e instanceof Error ? e.message : e);
|
|
488
|
+
}
|
|
489
|
+
profileBrowsers.delete(profileDir);
|
|
490
|
+
}
|
|
491
|
+
const profileVp = getRandomViewport();
|
|
492
|
+
const launchOptions = {
|
|
493
|
+
headless: !headed,
|
|
494
|
+
args: [
|
|
495
|
+
...ANTI_DETECTION_ARGS,
|
|
496
|
+
`--window-size=${profileVp.width},${profileVp.height}`,
|
|
497
|
+
`--user-data-dir=${profileDir}`,
|
|
498
|
+
],
|
|
499
|
+
};
|
|
500
|
+
const launched = stealth
|
|
501
|
+
? await (await getStealthPlaywright()).launch(launchOptions)
|
|
502
|
+
: await (await getPlaywright()).launch(launchOptions);
|
|
503
|
+
if (!launched)
|
|
504
|
+
throw new Error('Failed to launch profile browser');
|
|
505
|
+
profileBrowsers.set(profileDir, launched);
|
|
506
|
+
return launched;
|
|
507
|
+
}
|
|
508
|
+
// ── Warmup ────────────────────────────────────────────────────────────────────
|
|
509
|
+
export async function warmup() {
|
|
510
|
+
startDnsWarmup();
|
|
511
|
+
const browser = await getBrowser();
|
|
512
|
+
await ensurePagePool(browser);
|
|
513
|
+
}
|
|
514
|
+
// ── Cleanup ───────────────────────────────────────────────────────────────────
|
|
515
|
+
/**
|
|
516
|
+
* Clean up browser resources (shared pool, stealth browser, and all profile browsers).
|
|
517
|
+
*/
|
|
518
|
+
export async function cleanup() {
|
|
519
|
+
const pagesToClose = Array.from(pooledPages);
|
|
520
|
+
pooledPages.clear();
|
|
521
|
+
idlePagePool.length = 0;
|
|
522
|
+
pagePoolFillPromise = null;
|
|
523
|
+
await Promise.all(pagesToClose.map((page) => page.close().catch(() => { })));
|
|
524
|
+
if (sharedBrowser) {
|
|
525
|
+
await sharedBrowser.close();
|
|
526
|
+
sharedBrowser = null;
|
|
527
|
+
}
|
|
528
|
+
if (sharedStealthBrowser) {
|
|
529
|
+
await sharedStealthBrowser.close();
|
|
530
|
+
sharedStealthBrowser = null;
|
|
531
|
+
}
|
|
532
|
+
// Close all persistent profile browsers
|
|
533
|
+
const profileBrowserList = Array.from(profileBrowsers.values());
|
|
534
|
+
profileBrowsers.clear();
|
|
535
|
+
await Promise.all(profileBrowserList.map(b => b.close().catch(() => { })));
|
|
536
|
+
await closePool().catch(() => { });
|
|
537
|
+
}
|
|
538
|
+
/**
|
|
539
|
+
* Close a specific persistent profile browser (e.g. when done with a session).
|
|
540
|
+
* Safe to call even if the browser has already been closed.
|
|
541
|
+
*
|
|
542
|
+
* @param profileDir Path to the profile directory used when launching
|
|
543
|
+
*/
|
|
544
|
+
export async function closeProfileBrowser(profileDir) {
|
|
545
|
+
const browser = profileBrowsers.get(profileDir);
|
|
546
|
+
if (browser) {
|
|
547
|
+
profileBrowsers.delete(profileDir);
|
|
548
|
+
await browser.close().catch(() => { });
|
|
549
|
+
}
|
|
550
|
+
}
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Smart content distillation for WebPeel
|
|
3
|
+
*
|
|
4
|
+
* Intelligently compresses content to fit within a token budget using
|
|
5
|
+
* heuristic-based techniques — no LLM required.
|
|
6
|
+
*
|
|
7
|
+
* This is NOT simple truncation: it prioritises information-dense content
|
|
8
|
+
* and progressively removes lower-value sections while preserving structure.
|
|
9
|
+
*
|
|
10
|
+
* @module budget
|
|
11
|
+
*/
|
|
12
|
+
/** Tokens per listing item used for budget estimation in extract-all mode */
|
|
13
|
+
export declare const TOKENS_PER_LISTING_ITEM = 50;
|
|
14
|
+
/**
|
|
15
|
+
* Distill content to fit within a token budget using smart compression.
|
|
16
|
+
*
|
|
17
|
+
* Strategy (applied progressively until within budget):
|
|
18
|
+
* 1. Remove image markdown —  → keep meaningful alt text
|
|
19
|
+
* 2. Remove boilerplate sections (cookie banners, nav headings, etc.)
|
|
20
|
+
* 3. Compress tables to MAX_TABLE_ROWS data rows
|
|
21
|
+
* 4. Collapse redundant whitespace
|
|
22
|
+
* 5. Remove low information-density paragraphs
|
|
23
|
+
* 6. Hard-truncate with notice as last resort
|
|
24
|
+
*
|
|
25
|
+
* @param content The content string to distill
|
|
26
|
+
* @param budget Maximum token budget (rough: 1 token ≈ 4 chars)
|
|
27
|
+
* @param format Content format: 'markdown' | 'text' | 'json'
|
|
28
|
+
* @returns Distilled content within the budget
|
|
29
|
+
*/
|
|
30
|
+
export declare function distillToBudget(content: string, budget: number, format: 'markdown' | 'text' | 'json'): string;
|
|
31
|
+
/**
|
|
32
|
+
* Calculate how many listing items fit within a token budget.
|
|
33
|
+
*
|
|
34
|
+
* @param totalItems Total available items
|
|
35
|
+
* @param budget Token budget
|
|
36
|
+
* @returns { maxItems, truncated, totalAvailable }
|
|
37
|
+
*/
|
|
38
|
+
export declare function budgetListings(totalItems: number, budget: number): {
|
|
39
|
+
maxItems: number;
|
|
40
|
+
truncated: boolean;
|
|
41
|
+
totalAvailable: number;
|
|
42
|
+
};
|