@iflow-mcp/jakeliume-webpeel 0.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +15 -0
- package/README.md +313 -0
- package/dist/cache.d.ts +30 -0
- package/dist/cache.js +139 -0
- package/dist/cli/commands/auth.d.ts +5 -0
- package/dist/cli/commands/auth.js +411 -0
- package/dist/cli/commands/doctor.d.ts +37 -0
- package/dist/cli/commands/doctor.js +371 -0
- package/dist/cli/commands/fetch.d.ts +6 -0
- package/dist/cli/commands/fetch.js +1345 -0
- package/dist/cli/commands/guide.d.ts +2 -0
- package/dist/cli/commands/guide.js +183 -0
- package/dist/cli/commands/interact.d.ts +5 -0
- package/dist/cli/commands/interact.js +840 -0
- package/dist/cli/commands/jobs.d.ts +5 -0
- package/dist/cli/commands/jobs.js +997 -0
- package/dist/cli/commands/monitor.d.ts +12 -0
- package/dist/cli/commands/monitor.js +197 -0
- package/dist/cli/commands/observe.d.ts +12 -0
- package/dist/cli/commands/observe.js +158 -0
- package/dist/cli/commands/screenshot.d.ts +5 -0
- package/dist/cli/commands/screenshot.js +282 -0
- package/dist/cli/commands/search.d.ts +5 -0
- package/dist/cli/commands/search.js +1021 -0
- package/dist/cli/commands/setup.d.ts +13 -0
- package/dist/cli/commands/setup.js +244 -0
- package/dist/cli/commands/skill.d.ts +15 -0
- package/dist/cli/commands/skill.js +195 -0
- package/dist/cli/utils.d.ts +84 -0
- package/dist/cli/utils.js +806 -0
- package/dist/cli-auth.d.ts +75 -0
- package/dist/cli-auth.js +369 -0
- package/dist/cli.d.ts +17 -0
- package/dist/cli.js +99 -0
- package/dist/core/actions.d.ts +69 -0
- package/dist/core/actions.js +495 -0
- package/dist/core/agent.d.ts +98 -0
- package/dist/core/agent.js +558 -0
- package/dist/core/answer.d.ts +42 -0
- package/dist/core/answer.js +395 -0
- package/dist/core/application-tracker.d.ts +84 -0
- package/dist/core/application-tracker.js +184 -0
- package/dist/core/apply.d.ts +162 -0
- package/dist/core/apply.js +816 -0
- package/dist/core/auth-detection.d.ts +35 -0
- package/dist/core/auth-detection.js +358 -0
- package/dist/core/auto-extract.d.ts +82 -0
- package/dist/core/auto-extract.js +604 -0
- package/dist/core/auto-interact.d.ts +23 -0
- package/dist/core/auto-interact.js +246 -0
- package/dist/core/bm25-filter.d.ts +66 -0
- package/dist/core/bm25-filter.js +288 -0
- package/dist/core/branding.d.ts +54 -0
- package/dist/core/branding.js +234 -0
- package/dist/core/browser-fetch.d.ts +323 -0
- package/dist/core/browser-fetch.js +1600 -0
- package/dist/core/browser-pool.d.ts +91 -0
- package/dist/core/browser-pool.js +550 -0
- package/dist/core/budget.d.ts +42 -0
- package/dist/core/budget.js +324 -0
- package/dist/core/business-intel.d.ts +47 -0
- package/dist/core/business-intel.js +279 -0
- package/dist/core/cache.d.ts +13 -0
- package/dist/core/cache.js +121 -0
- package/dist/core/cf-worker-proxy.d.ts +32 -0
- package/dist/core/cf-worker-proxy.js +87 -0
- package/dist/core/challenge-detection.d.ts +26 -0
- package/dist/core/challenge-detection.js +468 -0
- package/dist/core/change-tracking.d.ts +75 -0
- package/dist/core/change-tracking.js +276 -0
- package/dist/core/chunker.d.ts +46 -0
- package/dist/core/chunker.js +249 -0
- package/dist/core/chunking.d.ts +42 -0
- package/dist/core/chunking.js +181 -0
- package/dist/core/circuit-breaker.d.ts +44 -0
- package/dist/core/circuit-breaker.js +85 -0
- package/dist/core/content-pruner.d.ts +47 -0
- package/dist/core/content-pruner.js +425 -0
- package/dist/core/cookie-cache.d.ts +60 -0
- package/dist/core/cookie-cache.js +163 -0
- package/dist/core/crawl-checkpoint.d.ts +54 -0
- package/dist/core/crawl-checkpoint.js +104 -0
- package/dist/core/crawler.d.ts +84 -0
- package/dist/core/crawler.js +349 -0
- package/dist/core/cross-verify.d.ts +27 -0
- package/dist/core/cross-verify.js +93 -0
- package/dist/core/deep-fetch.d.ts +74 -0
- package/dist/core/deep-fetch.js +405 -0
- package/dist/core/deep-research.d.ts +141 -0
- package/dist/core/deep-research.js +972 -0
- package/dist/core/design-analysis.d.ts +70 -0
- package/dist/core/design-analysis.js +490 -0
- package/dist/core/design-compare.d.ts +38 -0
- package/dist/core/design-compare.js +264 -0
- package/dist/core/diff.d.ts +61 -0
- package/dist/core/diff.js +289 -0
- package/dist/core/dns-cache.d.ts +20 -0
- package/dist/core/dns-cache.js +198 -0
- package/dist/core/documents.d.ts +23 -0
- package/dist/core/documents.js +123 -0
- package/dist/core/domain-memory.d.ts +66 -0
- package/dist/core/domain-memory.js +163 -0
- package/dist/core/domain-verify.d.ts +40 -0
- package/dist/core/domain-verify.js +379 -0
- package/dist/core/engine-ranker.d.ts +112 -0
- package/dist/core/engine-ranker.js +395 -0
- package/dist/core/extract-inline.d.ts +38 -0
- package/dist/core/extract-inline.js +215 -0
- package/dist/core/extract-listings.d.ts +38 -0
- package/dist/core/extract-listings.js +461 -0
- package/dist/core/extract.d.ts +9 -0
- package/dist/core/extract.js +139 -0
- package/dist/core/fetch-cache.d.ts +57 -0
- package/dist/core/fetch-cache.js +95 -0
- package/dist/core/fetcher.d.ts +13 -0
- package/dist/core/fetcher.js +12 -0
- package/dist/core/google-cache.d.ts +29 -0
- package/dist/core/google-cache.js +180 -0
- package/dist/core/google-serp-parser.d.ts +82 -0
- package/dist/core/google-serp-parser.js +287 -0
- package/dist/core/hotel-search.d.ts +122 -0
- package/dist/core/hotel-search.js +382 -0
- package/dist/core/http-fetch.d.ts +72 -0
- package/dist/core/http-fetch.js +820 -0
- package/dist/core/human.d.ts +175 -0
- package/dist/core/human.js +680 -0
- package/dist/core/image-caption.d.ts +44 -0
- package/dist/core/image-caption.js +271 -0
- package/dist/core/jobs.d.ts +75 -0
- package/dist/core/jobs.js +634 -0
- package/dist/core/json-ld.d.ts +15 -0
- package/dist/core/json-ld.js +617 -0
- package/dist/core/language-detect.d.ts +18 -0
- package/dist/core/language-detect.js +135 -0
- package/dist/core/links.d.ts +10 -0
- package/dist/core/links.js +44 -0
- package/dist/core/llm-extract.d.ts +71 -0
- package/dist/core/llm-extract.js +507 -0
- package/dist/core/llm-provider.d.ts +100 -0
- package/dist/core/llm-provider.js +702 -0
- package/dist/core/local-search.d.ts +60 -0
- package/dist/core/local-search.js +308 -0
- package/dist/core/logger.d.ts +28 -0
- package/dist/core/logger.js +104 -0
- package/dist/core/map.d.ts +33 -0
- package/dist/core/map.js +127 -0
- package/dist/core/markdown.d.ts +92 -0
- package/dist/core/markdown.js +809 -0
- package/dist/core/metadata.d.ts +34 -0
- package/dist/core/metadata.js +422 -0
- package/dist/core/observe.d.ts +113 -0
- package/dist/core/observe.js +395 -0
- package/dist/core/ocr.d.ts +12 -0
- package/dist/core/ocr.js +33 -0
- package/dist/core/paginate.d.ts +31 -0
- package/dist/core/paginate.js +106 -0
- package/dist/core/pdf.d.ts +8 -0
- package/dist/core/pdf.js +25 -0
- package/dist/core/peel-tls.d.ts +25 -0
- package/dist/core/peel-tls.js +220 -0
- package/dist/core/pipeline.d.ts +132 -0
- package/dist/core/pipeline.js +1666 -0
- package/dist/core/profiles.d.ts +61 -0
- package/dist/core/profiles.js +350 -0
- package/dist/core/prompt-guard.d.ts +30 -0
- package/dist/core/prompt-guard.js +119 -0
- package/dist/core/proxy-config.d.ts +90 -0
- package/dist/core/proxy-config.js +172 -0
- package/dist/core/quick-answer.d.ts +53 -0
- package/dist/core/quick-answer.js +833 -0
- package/dist/core/rate-governor.d.ts +80 -0
- package/dist/core/rate-governor.js +238 -0
- package/dist/core/readability.d.ts +57 -0
- package/dist/core/readability.js +533 -0
- package/dist/core/research.d.ts +66 -0
- package/dist/core/research.js +270 -0
- package/dist/core/retry.d.ts +60 -0
- package/dist/core/retry.js +119 -0
- package/dist/core/safe-browsing.d.ts +30 -0
- package/dist/core/safe-browsing.js +206 -0
- package/dist/core/schema-extraction.d.ts +66 -0
- package/dist/core/schema-extraction.js +352 -0
- package/dist/core/schema-postprocess.d.ts +32 -0
- package/dist/core/schema-postprocess.js +469 -0
- package/dist/core/schema-templates.d.ts +19 -0
- package/dist/core/schema-templates.js +143 -0
- package/dist/core/screenshot.d.ts +224 -0
- package/dist/core/screenshot.js +207 -0
- package/dist/core/search-engines.d.ts +25 -0
- package/dist/core/search-engines.js +182 -0
- package/dist/core/search-provider.d.ts +243 -0
- package/dist/core/search-provider.js +1629 -0
- package/dist/core/searxng-provider.d.ts +35 -0
- package/dist/core/searxng-provider.js +105 -0
- package/dist/core/selective-evidence.d.ts +151 -0
- package/dist/core/selective-evidence.js +389 -0
- package/dist/core/site-search.d.ts +44 -0
- package/dist/core/site-search.js +252 -0
- package/dist/core/sitemap.d.ts +23 -0
- package/dist/core/sitemap.js +105 -0
- package/dist/core/source-credibility.d.ts +29 -0
- package/dist/core/source-credibility.js +584 -0
- package/dist/core/source-scoring.d.ts +166 -0
- package/dist/core/source-scoring.js +396 -0
- package/dist/core/stemmer.d.ts +38 -0
- package/dist/core/stemmer.js +509 -0
- package/dist/core/strategies.d.ts +104 -0
- package/dist/core/strategies.js +1044 -0
- package/dist/core/strategy-hooks.d.ts +145 -0
- package/dist/core/strategy-hooks.js +74 -0
- package/dist/core/structured-extract.d.ts +43 -0
- package/dist/core/structured-extract.js +550 -0
- package/dist/core/summarize.d.ts +17 -0
- package/dist/core/summarize.js +78 -0
- package/dist/core/synonyms.d.ts +42 -0
- package/dist/core/synonyms.js +184 -0
- package/dist/core/system-monitor.d.ts +61 -0
- package/dist/core/system-monitor.js +133 -0
- package/dist/core/table-format.d.ts +30 -0
- package/dist/core/table-format.js +146 -0
- package/dist/core/threat-feeds.d.ts +23 -0
- package/dist/core/threat-feeds.js +104 -0
- package/dist/core/timing.d.ts +21 -0
- package/dist/core/timing.js +33 -0
- package/dist/core/transcript-export.d.ts +47 -0
- package/dist/core/transcript-export.js +107 -0
- package/dist/core/user-agents.d.ts +82 -0
- package/dist/core/user-agents.js +239 -0
- package/dist/core/vertical-search.d.ts +54 -0
- package/dist/core/vertical-search.js +158 -0
- package/dist/core/watch-manager.d.ts +175 -0
- package/dist/core/watch-manager.js +416 -0
- package/dist/core/watch.d.ts +101 -0
- package/dist/core/watch.js +389 -0
- package/dist/core/youtube.d.ts +130 -0
- package/dist/core/youtube.js +1175 -0
- package/dist/ee/challenge-re-export.d.ts +1 -0
- package/dist/ee/challenge-re-export.js +1 -0
- package/dist/ee/challenge-solver.d.ts +72 -0
- package/dist/ee/challenge-solver.js +720 -0
- package/dist/ee/domain-extractors.d.ts +8 -0
- package/dist/ee/domain-extractors.js +8 -0
- package/dist/ee/domain-intel.d.ts +16 -0
- package/dist/ee/domain-intel.js +133 -0
- package/dist/ee/extractors/allrecipes.d.ts +2 -0
- package/dist/ee/extractors/allrecipes.js +120 -0
- package/dist/ee/extractors/amazon.d.ts +2 -0
- package/dist/ee/extractors/amazon.js +78 -0
- package/dist/ee/extractors/arxiv.d.ts +2 -0
- package/dist/ee/extractors/arxiv.js +137 -0
- package/dist/ee/extractors/bestbuy.d.ts +2 -0
- package/dist/ee/extractors/bestbuy.js +78 -0
- package/dist/ee/extractors/carscom.d.ts +2 -0
- package/dist/ee/extractors/carscom.js +121 -0
- package/dist/ee/extractors/coingecko.d.ts +2 -0
- package/dist/ee/extractors/coingecko.js +134 -0
- package/dist/ee/extractors/craigslist.d.ts +2 -0
- package/dist/ee/extractors/craigslist.js +92 -0
- package/dist/ee/extractors/devto.d.ts +2 -0
- package/dist/ee/extractors/devto.js +135 -0
- package/dist/ee/extractors/ebay.d.ts +2 -0
- package/dist/ee/extractors/ebay.js +90 -0
- package/dist/ee/extractors/espn.d.ts +2 -0
- package/dist/ee/extractors/espn.js +260 -0
- package/dist/ee/extractors/etsy.d.ts +2 -0
- package/dist/ee/extractors/etsy.js +52 -0
- package/dist/ee/extractors/facebook.d.ts +2 -0
- package/dist/ee/extractors/facebook.js +46 -0
- package/dist/ee/extractors/github.d.ts +2 -0
- package/dist/ee/extractors/github.js +196 -0
- package/dist/ee/extractors/google-flights.d.ts +2 -0
- package/dist/ee/extractors/google-flights.js +176 -0
- package/dist/ee/extractors/hackernews.d.ts +2 -0
- package/dist/ee/extractors/hackernews.js +147 -0
- package/dist/ee/extractors/imdb.d.ts +2 -0
- package/dist/ee/extractors/imdb.js +172 -0
- package/dist/ee/extractors/index.d.ts +26 -0
- package/dist/ee/extractors/index.js +247 -0
- package/dist/ee/extractors/instagram.d.ts +2 -0
- package/dist/ee/extractors/instagram.js +102 -0
- package/dist/ee/extractors/kalshi.d.ts +2 -0
- package/dist/ee/extractors/kalshi.js +121 -0
- package/dist/ee/extractors/kayak-cars.d.ts +2 -0
- package/dist/ee/extractors/kayak-cars.js +270 -0
- package/dist/ee/extractors/linkedin.d.ts +2 -0
- package/dist/ee/extractors/linkedin.js +113 -0
- package/dist/ee/extractors/medium.d.ts +2 -0
- package/dist/ee/extractors/medium.js +130 -0
- package/dist/ee/extractors/news.d.ts +4 -0
- package/dist/ee/extractors/news.js +173 -0
- package/dist/ee/extractors/npm.d.ts +2 -0
- package/dist/ee/extractors/npm.js +86 -0
- package/dist/ee/extractors/pdf.d.ts +2 -0
- package/dist/ee/extractors/pdf.js +108 -0
- package/dist/ee/extractors/pinterest.d.ts +2 -0
- package/dist/ee/extractors/pinterest.js +34 -0
- package/dist/ee/extractors/polymarket.d.ts +2 -0
- package/dist/ee/extractors/polymarket.js +358 -0
- package/dist/ee/extractors/producthunt.d.ts +2 -0
- package/dist/ee/extractors/producthunt.js +88 -0
- package/dist/ee/extractors/pubmed.d.ts +2 -0
- package/dist/ee/extractors/pubmed.js +162 -0
- package/dist/ee/extractors/pypi.d.ts +2 -0
- package/dist/ee/extractors/pypi.js +80 -0
- package/dist/ee/extractors/reddit.d.ts +2 -0
- package/dist/ee/extractors/reddit.js +438 -0
- package/dist/ee/extractors/redfin.d.ts +2 -0
- package/dist/ee/extractors/redfin.js +156 -0
- package/dist/ee/extractors/semanticscholar.d.ts +2 -0
- package/dist/ee/extractors/semanticscholar.js +131 -0
- package/dist/ee/extractors/shared.d.ts +12 -0
- package/dist/ee/extractors/shared.js +76 -0
- package/dist/ee/extractors/soundcloud.d.ts +2 -0
- package/dist/ee/extractors/soundcloud.js +34 -0
- package/dist/ee/extractors/sportsbetting.d.ts +2 -0
- package/dist/ee/extractors/sportsbetting.js +37 -0
- package/dist/ee/extractors/spotify.d.ts +2 -0
- package/dist/ee/extractors/spotify.js +34 -0
- package/dist/ee/extractors/stackoverflow.d.ts +2 -0
- package/dist/ee/extractors/stackoverflow.js +61 -0
- package/dist/ee/extractors/substack.d.ts +2 -0
- package/dist/ee/extractors/substack.js +115 -0
- package/dist/ee/extractors/substackroot.d.ts +2 -0
- package/dist/ee/extractors/substackroot.js +46 -0
- package/dist/ee/extractors/tiktok.d.ts +2 -0
- package/dist/ee/extractors/tiktok.js +29 -0
- package/dist/ee/extractors/tradingview.d.ts +2 -0
- package/dist/ee/extractors/tradingview.js +182 -0
- package/dist/ee/extractors/twitch.d.ts +2 -0
- package/dist/ee/extractors/twitch.js +36 -0
- package/dist/ee/extractors/twitter.d.ts +2 -0
- package/dist/ee/extractors/twitter.js +327 -0
- package/dist/ee/extractors/types.d.ts +14 -0
- package/dist/ee/extractors/types.js +1 -0
- package/dist/ee/extractors/walmart.d.ts +2 -0
- package/dist/ee/extractors/walmart.js +50 -0
- package/dist/ee/extractors/weather.d.ts +2 -0
- package/dist/ee/extractors/weather.js +133 -0
- package/dist/ee/extractors/wikipedia.d.ts +4 -0
- package/dist/ee/extractors/wikipedia.js +235 -0
- package/dist/ee/extractors/yelp.d.ts +2 -0
- package/dist/ee/extractors/yelp.js +216 -0
- package/dist/ee/extractors/youtube.d.ts +2 -0
- package/dist/ee/extractors/youtube.js +189 -0
- package/dist/ee/extractors/zillow.d.ts +54 -0
- package/dist/ee/extractors/zillow.js +247 -0
- package/dist/ee/extractors-re-export.d.ts +1 -0
- package/dist/ee/extractors-re-export.js +1 -0
- package/dist/ee/premium-hooks.d.ts +20 -0
- package/dist/ee/premium-hooks.js +50 -0
- package/dist/ee/spa-detection.d.ts +2 -0
- package/dist/ee/spa-detection.js +2 -0
- package/dist/ee/stability.d.ts +4 -0
- package/dist/ee/stability.js +29 -0
- package/dist/ee/swr-cache.d.ts +14 -0
- package/dist/ee/swr-cache.js +34 -0
- package/dist/index.d.ts +143 -0
- package/dist/index.js +291 -0
- package/dist/integrations/index.d.ts +2 -0
- package/dist/integrations/index.js +2 -0
- package/dist/integrations/langchain.d.ts +64 -0
- package/dist/integrations/langchain.js +115 -0
- package/dist/integrations/llamaindex.d.ts +50 -0
- package/dist/integrations/llamaindex.js +91 -0
- package/dist/mcp/handlers/act.d.ts +5 -0
- package/dist/mcp/handlers/act.js +34 -0
- package/dist/mcp/handlers/definitions.d.ts +6 -0
- package/dist/mcp/handlers/definitions.js +395 -0
- package/dist/mcp/handlers/extract.d.ts +7 -0
- package/dist/mcp/handlers/extract.js +135 -0
- package/dist/mcp/handlers/fetch.d.ts +6 -0
- package/dist/mcp/handlers/fetch.js +98 -0
- package/dist/mcp/handlers/find.d.ts +5 -0
- package/dist/mcp/handlers/find.js +137 -0
- package/dist/mcp/handlers/index.d.ts +13 -0
- package/dist/mcp/handlers/index.js +63 -0
- package/dist/mcp/handlers/legacy.d.ts +25 -0
- package/dist/mcp/handlers/legacy.js +450 -0
- package/dist/mcp/handlers/meta.d.ts +6 -0
- package/dist/mcp/handlers/meta.js +40 -0
- package/dist/mcp/handlers/monitor.d.ts +5 -0
- package/dist/mcp/handlers/monitor.js +41 -0
- package/dist/mcp/handlers/observe.d.ts +8 -0
- package/dist/mcp/handlers/observe.js +37 -0
- package/dist/mcp/handlers/read.d.ts +6 -0
- package/dist/mcp/handlers/read.js +78 -0
- package/dist/mcp/handlers/see.d.ts +5 -0
- package/dist/mcp/handlers/see.js +75 -0
- package/dist/mcp/handlers/types.d.ts +29 -0
- package/dist/mcp/handlers/types.js +28 -0
- package/dist/mcp/server.d.ts +7 -0
- package/dist/mcp/server.js +108 -0
- package/dist/mcp/smart-router.d.ts +23 -0
- package/dist/mcp/smart-router.js +178 -0
- package/dist/server/app.d.ts +14 -0
- package/dist/server/app.js +632 -0
- package/dist/server/auth-store.d.ts +28 -0
- package/dist/server/auth-store.js +88 -0
- package/dist/server/bull-queues.d.ts +60 -0
- package/dist/server/bull-queues.js +90 -0
- package/dist/server/email-service.d.ts +55 -0
- package/dist/server/email-service.js +291 -0
- package/dist/server/job-queue.d.ts +100 -0
- package/dist/server/job-queue.js +145 -0
- package/dist/server/logger.d.ts +10 -0
- package/dist/server/logger.js +37 -0
- package/dist/server/middleware/audit-log.d.ts +14 -0
- package/dist/server/middleware/audit-log.js +73 -0
- package/dist/server/middleware/auth.d.ts +35 -0
- package/dist/server/middleware/auth.js +225 -0
- package/dist/server/middleware/rate-limit.d.ts +50 -0
- package/dist/server/middleware/rate-limit.js +270 -0
- package/dist/server/middleware/scope-guard.d.ts +25 -0
- package/dist/server/middleware/scope-guard.js +45 -0
- package/dist/server/middleware/url-validator.d.ts +15 -0
- package/dist/server/middleware/url-validator.js +201 -0
- package/dist/server/openapi.yaml +6418 -0
- package/dist/server/pg-auth-store.d.ts +146 -0
- package/dist/server/pg-auth-store.js +576 -0
- package/dist/server/pg-job-queue.d.ts +59 -0
- package/dist/server/pg-job-queue.js +375 -0
- package/dist/server/routes/activity.d.ts +6 -0
- package/dist/server/routes/activity.js +79 -0
- package/dist/server/routes/admin-active.d.ts +7 -0
- package/dist/server/routes/admin-active.js +120 -0
- package/dist/server/routes/admin-stats.d.ts +7 -0
- package/dist/server/routes/admin-stats.js +176 -0
- package/dist/server/routes/agent.d.ts +24 -0
- package/dist/server/routes/agent.js +480 -0
- package/dist/server/routes/answer.d.ts +5 -0
- package/dist/server/routes/answer.js +125 -0
- package/dist/server/routes/ask.d.ts +28 -0
- package/dist/server/routes/ask.js +295 -0
- package/dist/server/routes/batch.d.ts +6 -0
- package/dist/server/routes/batch.js +493 -0
- package/dist/server/routes/cache-warm.d.ts +25 -0
- package/dist/server/routes/cache-warm.js +212 -0
- package/dist/server/routes/cli-usage.d.ts +6 -0
- package/dist/server/routes/cli-usage.js +127 -0
- package/dist/server/routes/compat.d.ts +23 -0
- package/dist/server/routes/compat.js +652 -0
- package/dist/server/routes/crawl.d.ts +13 -0
- package/dist/server/routes/crawl.js +287 -0
- package/dist/server/routes/deep-fetch.d.ts +8 -0
- package/dist/server/routes/deep-fetch.js +57 -0
- package/dist/server/routes/deep-research.d.ts +11 -0
- package/dist/server/routes/deep-research.js +232 -0
- package/dist/server/routes/demo.d.ts +24 -0
- package/dist/server/routes/demo.js +517 -0
- package/dist/server/routes/do.d.ts +8 -0
- package/dist/server/routes/do.js +72 -0
- package/dist/server/routes/extract.d.ts +14 -0
- package/dist/server/routes/extract.js +325 -0
- package/dist/server/routes/feed.d.ts +15 -0
- package/dist/server/routes/feed.js +311 -0
- package/dist/server/routes/fetch-queue.d.ts +13 -0
- package/dist/server/routes/fetch-queue.js +357 -0
- package/dist/server/routes/fetch.d.ts +7 -0
- package/dist/server/routes/fetch.js +1274 -0
- package/dist/server/routes/go.d.ts +14 -0
- package/dist/server/routes/go.js +81 -0
- package/dist/server/routes/health.d.ts +11 -0
- package/dist/server/routes/health.js +141 -0
- package/dist/server/routes/jobs.d.ts +7 -0
- package/dist/server/routes/jobs.js +574 -0
- package/dist/server/routes/map.d.ts +11 -0
- package/dist/server/routes/map.js +116 -0
- package/dist/server/routes/mcp.d.ts +14 -0
- package/dist/server/routes/mcp.js +197 -0
- package/dist/server/routes/metrics.d.ts +37 -0
- package/dist/server/routes/metrics.js +149 -0
- package/dist/server/routes/oauth.d.ts +9 -0
- package/dist/server/routes/oauth.js +396 -0
- package/dist/server/routes/playground.d.ts +17 -0
- package/dist/server/routes/playground.js +283 -0
- package/dist/server/routes/reader.d.ts +18 -0
- package/dist/server/routes/reader.js +192 -0
- package/dist/server/routes/research.d.ts +14 -0
- package/dist/server/routes/research.js +482 -0
- package/dist/server/routes/screenshot.d.ts +22 -0
- package/dist/server/routes/screenshot.js +820 -0
- package/dist/server/routes/search.d.ts +6 -0
- package/dist/server/routes/search.js +874 -0
- package/dist/server/routes/session.d.ts +17 -0
- package/dist/server/routes/session.js +548 -0
- package/dist/server/routes/share.d.ts +18 -0
- package/dist/server/routes/share.js +462 -0
- package/dist/server/routes/smart-search/handlers/cars.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/cars.js +102 -0
- package/dist/server/routes/smart-search/handlers/flights.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/flights.js +72 -0
- package/dist/server/routes/smart-search/handlers/general.d.ts +13 -0
- package/dist/server/routes/smart-search/handlers/general.js +717 -0
- package/dist/server/routes/smart-search/handlers/hotels.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/hotels.js +88 -0
- package/dist/server/routes/smart-search/handlers/products.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/products.js +1309 -0
- package/dist/server/routes/smart-search/handlers/rental.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/rental.js +154 -0
- package/dist/server/routes/smart-search/handlers/restaurants.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/restaurants.js +225 -0
- package/dist/server/routes/smart-search/handlers/transit-verdict.d.ts +41 -0
- package/dist/server/routes/smart-search/handlers/transit-verdict.js +224 -0
- package/dist/server/routes/smart-search/index.d.ts +19 -0
- package/dist/server/routes/smart-search/index.js +546 -0
- package/dist/server/routes/smart-search/intent.d.ts +3 -0
- package/dist/server/routes/smart-search/intent.js +264 -0
- package/dist/server/routes/smart-search/llm.d.ts +16 -0
- package/dist/server/routes/smart-search/llm.js +70 -0
- package/dist/server/routes/smart-search/sources/reddit.d.ts +18 -0
- package/dist/server/routes/smart-search/sources/reddit.js +34 -0
- package/dist/server/routes/smart-search/sources/yelp.d.ts +25 -0
- package/dist/server/routes/smart-search/sources/yelp.js +171 -0
- package/dist/server/routes/smart-search/sources/youtube.d.ts +8 -0
- package/dist/server/routes/smart-search/sources/youtube.js +9 -0
- package/dist/server/routes/smart-search/types.d.ts +81 -0
- package/dist/server/routes/smart-search/types.js +1 -0
- package/dist/server/routes/smart-search/utils.d.ts +20 -0
- package/dist/server/routes/smart-search/utils.js +146 -0
- package/dist/server/routes/stats.d.ts +6 -0
- package/dist/server/routes/stats.js +71 -0
- package/dist/server/routes/stripe.d.ts +15 -0
- package/dist/server/routes/stripe.js +296 -0
- package/dist/server/routes/transcript-export.d.ts +10 -0
- package/dist/server/routes/transcript-export.js +178 -0
- package/dist/server/routes/usage.d.ts +9 -0
- package/dist/server/routes/usage.js +279 -0
- package/dist/server/routes/users.d.ts +8 -0
- package/dist/server/routes/users.js +1867 -0
- package/dist/server/routes/watch.d.ts +15 -0
- package/dist/server/routes/watch.js +309 -0
- package/dist/server/routes/webhooks.d.ts +26 -0
- package/dist/server/routes/webhooks.js +170 -0
- package/dist/server/routes/youtube.d.ts +6 -0
- package/dist/server/routes/youtube.js +130 -0
- package/dist/server/sentry.d.ts +14 -0
- package/dist/server/sentry.js +104 -0
- package/dist/server/types.d.ts +15 -0
- package/dist/server/types.js +7 -0
- package/dist/server/utils/response.d.ts +44 -0
- package/dist/server/utils/response.js +69 -0
- package/dist/server/utils/sse.d.ts +22 -0
- package/dist/server/utils/sse.js +38 -0
- package/dist/types.d.ts +552 -0
- package/dist/types.js +39 -0
- package/llms.txt +105 -0
- package/package.json +189 -0
|
@@ -0,0 +1,270 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* WebPeel Deep Research Agent
|
|
3
|
+
*
|
|
4
|
+
* Autonomously searches the web, fetches top sources, filters content with
|
|
5
|
+
* BM25, optionally follows promising links, and synthesizes a comprehensive
|
|
6
|
+
* report using an LLM.
|
|
7
|
+
*
|
|
8
|
+
* Design principle: orchestrate existing modules (peel, bm25-filter,
|
|
9
|
+
* llm-extract) — don't reinvent anything.
|
|
10
|
+
*/
|
|
11
|
+
// Regex for markdown links: [title](url) — supports https:// and protocol-relative //
|
|
12
|
+
const LINK_REGEX = /\[([^\]]*)\]\(((?:https?:)?\/\/[^)]+)\)/g;
|
|
13
|
+
/**
|
|
14
|
+
* Resolve a DDG redirect URL to the actual destination.
|
|
15
|
+
* DDG HTML search uses `//duckduckgo.com/l/?uddg=https%3A%2F%2Factual-url&rut=...`
|
|
16
|
+
*/
|
|
17
|
+
function resolveDdgRedirect(rawUrl) {
|
|
18
|
+
try {
|
|
19
|
+
// Normalize protocol-relative URLs
|
|
20
|
+
const normalised = rawUrl.startsWith('//') ? `https:${rawUrl}` : rawUrl;
|
|
21
|
+
const parsed = new URL(normalised);
|
|
22
|
+
if (parsed.hostname.includes('duckduckgo.com') && parsed.pathname === '/l/') {
|
|
23
|
+
const target = parsed.searchParams.get('uddg');
|
|
24
|
+
if (target)
|
|
25
|
+
return target;
|
|
26
|
+
}
|
|
27
|
+
// Not a DDG redirect — return as-is if it's a real http(s) URL
|
|
28
|
+
if (normalised.startsWith('http://') || normalised.startsWith('https://')) {
|
|
29
|
+
return normalised;
|
|
30
|
+
}
|
|
31
|
+
return null;
|
|
32
|
+
}
|
|
33
|
+
catch {
|
|
34
|
+
return null;
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* Extract unique non-DDG links from markdown content.
|
|
39
|
+
* Handles DDG redirect URLs by extracting the actual destination from the `uddg` param.
|
|
40
|
+
*/
|
|
41
|
+
function extractLinks(markdown, visitedUrls) {
|
|
42
|
+
const found = [];
|
|
43
|
+
const regex = new RegExp(LINK_REGEX.source, 'g');
|
|
44
|
+
let match;
|
|
45
|
+
while ((match = regex.exec(markdown)) !== null) {
|
|
46
|
+
const [, title, rawUrl] = match;
|
|
47
|
+
if (!rawUrl)
|
|
48
|
+
continue;
|
|
49
|
+
const resolvedUrl = resolveDdgRedirect(rawUrl);
|
|
50
|
+
if (!resolvedUrl)
|
|
51
|
+
continue;
|
|
52
|
+
if (resolvedUrl.includes('duckduckgo.com'))
|
|
53
|
+
continue;
|
|
54
|
+
if (visitedUrls.has(resolvedUrl))
|
|
55
|
+
continue;
|
|
56
|
+
found.push({ title: title || '', url: resolvedUrl });
|
|
57
|
+
visitedUrls.add(resolvedUrl);
|
|
58
|
+
}
|
|
59
|
+
return found;
|
|
60
|
+
}
|
|
61
|
+
/**
|
|
62
|
+
* Conduct autonomous multi-step web research on a topic.
|
|
63
|
+
*/
|
|
64
|
+
export async function research(options) {
|
|
65
|
+
const { query, maxSources = 5, maxDepth = 1, timeout = 60000, outputFormat = 'report', onProgress, } = options;
|
|
66
|
+
const startTime = Date.now();
|
|
67
|
+
const sources = [];
|
|
68
|
+
const visitedUrls = new Set();
|
|
69
|
+
// Lazy imports so users who don't call research() don't pay the cost
|
|
70
|
+
const { peel } = await import('../index.js');
|
|
71
|
+
const { filterByRelevance, computeRelevanceScore } = await import('./bm25-filter.js');
|
|
72
|
+
// -------------------------------------------------------------------------
|
|
73
|
+
// Phase 1: Search
|
|
74
|
+
// -------------------------------------------------------------------------
|
|
75
|
+
onProgress?.({ phase: 'searching', message: `Searching for: ${query}` });
|
|
76
|
+
let searchUrls = [];
|
|
77
|
+
try {
|
|
78
|
+
// Use the search provider abstraction instead of directly scraping DDG.
|
|
79
|
+
// This picks the best available provider (Serper > Brave > DDG with fallbacks).
|
|
80
|
+
const { getBestSearchProvider } = await import('./search-provider.js');
|
|
81
|
+
const { provider, apiKey } = getBestSearchProvider();
|
|
82
|
+
const searchResults = await provider.searchWeb(query, {
|
|
83
|
+
count: Math.min(maxSources * 2, 10), // Fetch extra for filtering
|
|
84
|
+
apiKey,
|
|
85
|
+
});
|
|
86
|
+
if (searchResults.length > 0) {
|
|
87
|
+
searchUrls = searchResults.map(r => ({ title: r.title, url: r.url }));
|
|
88
|
+
}
|
|
89
|
+
else {
|
|
90
|
+
// Fallback: scrape DDG directly via peel (works locally)
|
|
91
|
+
const searchResult = await peel(`https://html.duckduckgo.com/html/?q=${encodeURIComponent(query)}`, {
|
|
92
|
+
format: 'markdown',
|
|
93
|
+
timeout: 10000,
|
|
94
|
+
});
|
|
95
|
+
searchUrls = extractLinks(searchResult.content, visitedUrls);
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
catch (e) {
|
|
99
|
+
if (process.env.DEBUG)
|
|
100
|
+
console.debug('[webpeel]', 'search failed:', e instanceof Error ? e.message : e);
|
|
101
|
+
}
|
|
102
|
+
// -------------------------------------------------------------------------
|
|
103
|
+
// Phase 2: Fetch top sources in parallel
|
|
104
|
+
// -------------------------------------------------------------------------
|
|
105
|
+
const sourcesToFetch = searchUrls.slice(0, maxSources);
|
|
106
|
+
onProgress?.({
|
|
107
|
+
phase: 'fetching',
|
|
108
|
+
message: `Fetching ${sourcesToFetch.length} sources`,
|
|
109
|
+
sourcesFound: searchUrls.length,
|
|
110
|
+
});
|
|
111
|
+
const fetchPromises = sourcesToFetch.map(async ({ title, url }) => {
|
|
112
|
+
try {
|
|
113
|
+
// Guard: bail if we've burned more than 70% of the time budget
|
|
114
|
+
if (Date.now() - startTime > timeout * 0.7)
|
|
115
|
+
return null;
|
|
116
|
+
const result = await peel(url, {
|
|
117
|
+
format: 'markdown',
|
|
118
|
+
timeout: 15000,
|
|
119
|
+
budget: 3000,
|
|
120
|
+
});
|
|
121
|
+
// Phase 3 (inline): BM25 filter content to query + compute relevance
|
|
122
|
+
const filtered = filterByRelevance(result.content, { query });
|
|
123
|
+
const relevance = computeRelevanceScore(result.content, query);
|
|
124
|
+
return {
|
|
125
|
+
url,
|
|
126
|
+
title: result.title || title || url,
|
|
127
|
+
findings: filtered.content.slice(0, 4000),
|
|
128
|
+
relevance,
|
|
129
|
+
};
|
|
130
|
+
}
|
|
131
|
+
catch {
|
|
132
|
+
return null;
|
|
133
|
+
}
|
|
134
|
+
});
|
|
135
|
+
const fetchResults = await Promise.allSettled(fetchPromises);
|
|
136
|
+
for (const r of fetchResults) {
|
|
137
|
+
if (r.status === 'fulfilled' && r.value) {
|
|
138
|
+
sources.push(r.value);
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
// Sort by relevance (descending)
|
|
142
|
+
sources.sort((a, b) => b.relevance - a.relevance);
|
|
143
|
+
onProgress?.({
|
|
144
|
+
phase: 'extracting',
|
|
145
|
+
message: `Extracted content from ${sources.length} sources`,
|
|
146
|
+
sourcesFetched: sources.length,
|
|
147
|
+
});
|
|
148
|
+
// -------------------------------------------------------------------------
|
|
149
|
+
// Phase 4: Follow promising links (only when maxDepth > 1)
|
|
150
|
+
// -------------------------------------------------------------------------
|
|
151
|
+
if (maxDepth > 1 && sources.length > 0 && Date.now() - startTime < timeout * 0.5) {
|
|
152
|
+
onProgress?.({ phase: 'following', message: 'Following promising links for deeper research' });
|
|
153
|
+
const topSources = sources.slice(0, 2);
|
|
154
|
+
for (const source of topSources) {
|
|
155
|
+
const linkedUrls = extractLinks(source.findings, visitedUrls).slice(0, 2);
|
|
156
|
+
for (const { url: followUrl } of linkedUrls) {
|
|
157
|
+
if (Date.now() - startTime > timeout * 0.7)
|
|
158
|
+
break;
|
|
159
|
+
try {
|
|
160
|
+
const followResult = await peel(followUrl, {
|
|
161
|
+
format: 'markdown',
|
|
162
|
+
timeout: 10000,
|
|
163
|
+
budget: 2000,
|
|
164
|
+
});
|
|
165
|
+
const filtered = filterByRelevance(followResult.content, { query });
|
|
166
|
+
const followRelevance = computeRelevanceScore(followResult.content, query);
|
|
167
|
+
sources.push({
|
|
168
|
+
url: followUrl,
|
|
169
|
+
title: followResult.title || followUrl,
|
|
170
|
+
findings: filtered.content.slice(0, 3000),
|
|
171
|
+
// Slightly lower weight for follow-up links
|
|
172
|
+
relevance: followRelevance * 0.8,
|
|
173
|
+
});
|
|
174
|
+
}
|
|
175
|
+
catch (e) {
|
|
176
|
+
if (process.env.DEBUG)
|
|
177
|
+
console.debug('[webpeel]', 'followup fetch failed:', e instanceof Error ? e.message : e);
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
sources.sort((a, b) => b.relevance - a.relevance);
|
|
182
|
+
}
|
|
183
|
+
// -------------------------------------------------------------------------
|
|
184
|
+
// Phase 5: Synthesize
|
|
185
|
+
// -------------------------------------------------------------------------
|
|
186
|
+
let report = '';
|
|
187
|
+
let tokensUsed;
|
|
188
|
+
let cost;
|
|
189
|
+
const apiKey = options.apiKey ?? process.env.OPENAI_API_KEY;
|
|
190
|
+
if (outputFormat === 'report' && apiKey && sources.length > 0) {
|
|
191
|
+
onProgress?.({ phase: 'synthesizing', message: 'Synthesizing research report' });
|
|
192
|
+
const model = options.model ?? 'gpt-4o-mini';
|
|
193
|
+
const baseUrl = options.baseUrl ?? 'https://api.openai.com/v1';
|
|
194
|
+
const sourceSummaries = sources
|
|
195
|
+
.slice(0, 8)
|
|
196
|
+
.map((s, i) => `Source ${i + 1}: ${s.title}\nURL: ${s.url}\nRelevance: ${Math.round(s.relevance * 100)}%\n\n${s.findings.slice(0, 2000)}`)
|
|
197
|
+
.join('\n\n---\n\n');
|
|
198
|
+
const synthPrompt = `Based on the following web research sources, write a comprehensive research report answering this question:
|
|
199
|
+
|
|
200
|
+
"${query}"
|
|
201
|
+
|
|
202
|
+
Sources:
|
|
203
|
+
${sourceSummaries}
|
|
204
|
+
|
|
205
|
+
Instructions:
|
|
206
|
+
1. Synthesize information from ALL relevant sources
|
|
207
|
+
2. Include specific data points, numbers, and facts
|
|
208
|
+
3. Cite sources using [Source N] format
|
|
209
|
+
4. If sources disagree, note the conflicting information
|
|
210
|
+
5. End with a "Sources" section listing all URLs
|
|
211
|
+
6. Write in markdown format
|
|
212
|
+
7. Be thorough but concise`;
|
|
213
|
+
try {
|
|
214
|
+
const response = await fetch(`${baseUrl}/chat/completions`, {
|
|
215
|
+
method: 'POST',
|
|
216
|
+
headers: {
|
|
217
|
+
'Content-Type': 'application/json',
|
|
218
|
+
Authorization: `Bearer ${apiKey}`,
|
|
219
|
+
},
|
|
220
|
+
body: JSON.stringify({
|
|
221
|
+
model,
|
|
222
|
+
messages: [
|
|
223
|
+
{
|
|
224
|
+
role: 'system',
|
|
225
|
+
content: 'You are a research analyst. Produce well-structured, factual research reports based on the provided web sources. Always cite your sources.',
|
|
226
|
+
},
|
|
227
|
+
{ role: 'user', content: synthPrompt },
|
|
228
|
+
],
|
|
229
|
+
temperature: 0.3,
|
|
230
|
+
max_tokens: 4000,
|
|
231
|
+
}),
|
|
232
|
+
signal: AbortSignal.timeout(30000),
|
|
233
|
+
});
|
|
234
|
+
if (response.ok) {
|
|
235
|
+
const data = (await response.json());
|
|
236
|
+
report = data.choices?.[0]?.message?.content ?? '';
|
|
237
|
+
tokensUsed = {
|
|
238
|
+
input: data.usage?.prompt_tokens ?? 0,
|
|
239
|
+
output: data.usage?.completion_tokens ?? 0,
|
|
240
|
+
};
|
|
241
|
+
const { estimateCost } = await import('./llm-extract.js');
|
|
242
|
+
cost = estimateCost(model, tokensUsed.input, tokensUsed.output);
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
catch (e) {
|
|
246
|
+
if (process.env.DEBUG)
|
|
247
|
+
console.debug('[webpeel]', 'llm synthesis failed:', e instanceof Error ? e.message : e);
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
// If synthesis wasn't attempted or failed, produce raw sources report
|
|
251
|
+
if (!report) {
|
|
252
|
+
report = sources
|
|
253
|
+
.map((s, i) => `## Source ${i + 1}: ${s.title}\n**URL:** ${s.url}\n**Relevance:** ${Math.round(s.relevance * 100)}%\n\n${s.findings.slice(0, 2000)}`)
|
|
254
|
+
.join('\n\n---\n\n');
|
|
255
|
+
}
|
|
256
|
+
return {
|
|
257
|
+
report,
|
|
258
|
+
sources: sources.map(s => ({
|
|
259
|
+
url: s.url,
|
|
260
|
+
title: s.title,
|
|
261
|
+
findings: s.findings.slice(0, 500),
|
|
262
|
+
relevance: s.relevance,
|
|
263
|
+
})),
|
|
264
|
+
totalSourcesFound: searchUrls.length,
|
|
265
|
+
sourcesConsulted: sources.length,
|
|
266
|
+
elapsed: Date.now() - startTime,
|
|
267
|
+
tokensUsed,
|
|
268
|
+
cost,
|
|
269
|
+
};
|
|
270
|
+
}
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Smart retry with exponential backoff and jitter.
|
|
3
|
+
* Inspired by Crawl4AI's RateLimiter — the cleanest implementation found.
|
|
4
|
+
*
|
|
5
|
+
* Features:
|
|
6
|
+
* - Exponential backoff with ±25% jitter (prevents thundering herd)
|
|
7
|
+
* - Per-domain delay tracking (optional)
|
|
8
|
+
* - Success reduces delay by 25% (gradual recovery)
|
|
9
|
+
* - Configurable retry predicate
|
|
10
|
+
*/
|
|
11
|
+
export interface RetryOptions {
|
|
12
|
+
/** Max retry attempts (default: 3) */
|
|
13
|
+
maxRetries?: number;
|
|
14
|
+
/** Initial delay in ms (default: 1000) */
|
|
15
|
+
baseDelayMs?: number;
|
|
16
|
+
/** Max delay cap in ms (default: 30000) */
|
|
17
|
+
maxDelayMs?: number;
|
|
18
|
+
/** Add ±25% jitter (default: true) */
|
|
19
|
+
jitter?: boolean;
|
|
20
|
+
/** Custom predicate: should we retry this error? (default: isRetryable) */
|
|
21
|
+
retryOn?: (error: Error, attempt: number) => boolean;
|
|
22
|
+
/** Called before each retry (for logging/metrics) */
|
|
23
|
+
onRetry?: (error: Error, attempt: number, delayMs: number) => void;
|
|
24
|
+
/** Label for logging */
|
|
25
|
+
label?: string;
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* Execute a function with retry logic.
|
|
29
|
+
* Throws the last error if all retries are exhausted.
|
|
30
|
+
*/
|
|
31
|
+
export declare function withRetry<T>(fn: () => Promise<T>, options?: RetryOptions): Promise<T>;
|
|
32
|
+
/**
|
|
33
|
+
* Per-domain rate state tracker.
|
|
34
|
+
* Adapts delay per target domain based on success/failure patterns.
|
|
35
|
+
* Useful for avoiding rate limits on target sites.
|
|
36
|
+
*/
|
|
37
|
+
export declare class DomainRateLimiter {
|
|
38
|
+
private domains;
|
|
39
|
+
private baseDelay;
|
|
40
|
+
private maxDelay;
|
|
41
|
+
private rateLimitCodes;
|
|
42
|
+
constructor(options?: {
|
|
43
|
+
baseDelay?: number;
|
|
44
|
+
maxDelay?: number;
|
|
45
|
+
rateLimitCodes?: number[];
|
|
46
|
+
});
|
|
47
|
+
/** Get the hostname from a URL */
|
|
48
|
+
private getDomain;
|
|
49
|
+
/** Wait if needed before making a request to this domain */
|
|
50
|
+
throttle(url: string): Promise<void>;
|
|
51
|
+
/** Record a response status for adaptive delay */
|
|
52
|
+
recordResult(url: string, statusCode: number): void;
|
|
53
|
+
/** Get current state for diagnostics */
|
|
54
|
+
getStats(): Record<string, {
|
|
55
|
+
delay: number;
|
|
56
|
+
failCount: number;
|
|
57
|
+
}>;
|
|
58
|
+
}
|
|
59
|
+
/** Singleton domain rate limiter for the fetch layer */
|
|
60
|
+
export declare const domainLimiter: DomainRateLimiter;
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Smart retry with exponential backoff and jitter.
|
|
3
|
+
* Inspired by Crawl4AI's RateLimiter — the cleanest implementation found.
|
|
4
|
+
*
|
|
5
|
+
* Features:
|
|
6
|
+
* - Exponential backoff with ±25% jitter (prevents thundering herd)
|
|
7
|
+
* - Per-domain delay tracking (optional)
|
|
8
|
+
* - Success reduces delay by 25% (gradual recovery)
|
|
9
|
+
* - Configurable retry predicate
|
|
10
|
+
*/
|
|
11
|
+
import { isRetryable } from '../errors.js';
|
|
12
|
+
import { createLogger } from './logger.js';
|
|
13
|
+
const log = createLogger('retry');
|
|
14
|
+
/**
|
|
15
|
+
* Execute a function with retry logic.
|
|
16
|
+
* Throws the last error if all retries are exhausted.
|
|
17
|
+
*/
|
|
18
|
+
export async function withRetry(fn, options = {}) {
|
|
19
|
+
const { maxRetries = 3, baseDelayMs = 1000, maxDelayMs = 30_000, jitter = true, retryOn = (err) => isRetryable(err), onRetry, label = 'operation', } = options;
|
|
20
|
+
let delay = baseDelayMs;
|
|
21
|
+
let lastError;
|
|
22
|
+
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
|
23
|
+
try {
|
|
24
|
+
const result = await fn();
|
|
25
|
+
// If we succeeded after retries, log it
|
|
26
|
+
if (attempt > 0) {
|
|
27
|
+
log.info(`${label} succeeded after ${attempt} retries`);
|
|
28
|
+
}
|
|
29
|
+
return result;
|
|
30
|
+
}
|
|
31
|
+
catch (err) {
|
|
32
|
+
lastError = err instanceof Error ? err : new Error(String(err));
|
|
33
|
+
// Last attempt or non-retryable — throw immediately
|
|
34
|
+
if (attempt === maxRetries || !retryOn(lastError, attempt)) {
|
|
35
|
+
throw lastError;
|
|
36
|
+
}
|
|
37
|
+
// Calculate delay with exponential backoff and jitter
|
|
38
|
+
const jitterFactor = jitter ? (0.75 + Math.random() * 0.5) : 1;
|
|
39
|
+
const actualDelay = Math.min(delay * jitterFactor, maxDelayMs);
|
|
40
|
+
if (onRetry) {
|
|
41
|
+
onRetry(lastError, attempt + 1, actualDelay);
|
|
42
|
+
}
|
|
43
|
+
log.info(`${label} attempt ${attempt + 1} failed: ${lastError.message}. Retrying in ${Math.round(actualDelay)}ms`);
|
|
44
|
+
await new Promise(r => setTimeout(r, actualDelay));
|
|
45
|
+
// Exponential increase for next attempt
|
|
46
|
+
delay = Math.min(delay * 2, maxDelayMs);
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
throw lastError ?? new Error('Retry exhausted with no error');
|
|
50
|
+
}
|
|
51
|
+
/**
|
|
52
|
+
* Per-domain rate state tracker.
|
|
53
|
+
* Adapts delay per target domain based on success/failure patterns.
|
|
54
|
+
* Useful for avoiding rate limits on target sites.
|
|
55
|
+
*/
|
|
56
|
+
export class DomainRateLimiter {
|
|
57
|
+
domains = new Map();
|
|
58
|
+
baseDelay;
|
|
59
|
+
maxDelay;
|
|
60
|
+
rateLimitCodes;
|
|
61
|
+
constructor(options = {}) {
|
|
62
|
+
this.baseDelay = options.baseDelay ?? 1000;
|
|
63
|
+
this.maxDelay = options.maxDelay ?? 60_000;
|
|
64
|
+
this.rateLimitCodes = options.rateLimitCodes ?? [429, 503];
|
|
65
|
+
}
|
|
66
|
+
/** Get the hostname from a URL */
|
|
67
|
+
getDomain(url) {
|
|
68
|
+
try {
|
|
69
|
+
return new URL(url).hostname;
|
|
70
|
+
}
|
|
71
|
+
catch {
|
|
72
|
+
return url;
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
/** Wait if needed before making a request to this domain */
|
|
76
|
+
async throttle(url) {
|
|
77
|
+
const domain = this.getDomain(url);
|
|
78
|
+
const state = this.domains.get(domain);
|
|
79
|
+
if (!state)
|
|
80
|
+
return;
|
|
81
|
+
const elapsed = Date.now() - state.lastHit;
|
|
82
|
+
const waitTime = Math.max(0, state.delay - elapsed);
|
|
83
|
+
if (waitTime > 0) {
|
|
84
|
+
await new Promise(r => setTimeout(r, waitTime));
|
|
85
|
+
}
|
|
86
|
+
state.lastHit = Date.now();
|
|
87
|
+
}
|
|
88
|
+
/** Record a response status for adaptive delay */
|
|
89
|
+
recordResult(url, statusCode) {
|
|
90
|
+
const domain = this.getDomain(url);
|
|
91
|
+
let state = this.domains.get(domain);
|
|
92
|
+
if (!state) {
|
|
93
|
+
state = { delay: 0, failCount: 0, lastHit: Date.now() };
|
|
94
|
+
this.domains.set(domain, state);
|
|
95
|
+
}
|
|
96
|
+
if (this.rateLimitCodes.includes(statusCode)) {
|
|
97
|
+
state.failCount++;
|
|
98
|
+
// Exponential backoff with ±25% jitter
|
|
99
|
+
const jitter = 0.75 + Math.random() * 0.5;
|
|
100
|
+
state.delay = Math.min((state.delay || this.baseDelay) * 2 * jitter, this.maxDelay);
|
|
101
|
+
log.warn(`Domain ${domain} rate limited (${statusCode}). Delay: ${Math.round(state.delay)}ms`);
|
|
102
|
+
}
|
|
103
|
+
else {
|
|
104
|
+
// Gradual recovery on success
|
|
105
|
+
state.delay = Math.max(0, state.delay * 0.75);
|
|
106
|
+
state.failCount = 0;
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
/** Get current state for diagnostics */
|
|
110
|
+
getStats() {
|
|
111
|
+
const stats = {};
|
|
112
|
+
for (const [domain, state] of this.domains) {
|
|
113
|
+
stats[domain] = { delay: Math.round(state.delay), failCount: state.failCount };
|
|
114
|
+
}
|
|
115
|
+
return stats;
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
/** Singleton domain rate limiter for the fetch layer */
|
|
119
|
+
export const domainLimiter = new DomainRateLimiter();
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Domain safety check using Google Safe Browsing Lookup API v4.
|
|
3
|
+
* Free: 10,000 lookups/day.
|
|
4
|
+
* Falls back to a local blocklist when no API key is configured.
|
|
5
|
+
* Also checks community threat feeds (URLhaus, PhishTank, OpenPhish) for known threats.
|
|
6
|
+
*/
|
|
7
|
+
import { type ThreatFeedResult } from './threat-feeds.js';
|
|
8
|
+
export type { ThreatFeedResult };
|
|
9
|
+
export interface SafeBrowsingResult {
|
|
10
|
+
safe: boolean;
|
|
11
|
+
threats: string[];
|
|
12
|
+
source: 'google-api' | 'local-blocklist' | 'unchecked';
|
|
13
|
+
/** Community threat feed check result (URLhaus, PhishTank, OpenPhish) */
|
|
14
|
+
threatFeeds?: ThreatFeedResult;
|
|
15
|
+
}
|
|
16
|
+
/**
|
|
17
|
+
* Check URL safety.
|
|
18
|
+
*
|
|
19
|
+
* Flow:
|
|
20
|
+
* 1. If SAFE_BROWSING_API_KEY (or passed apiKey) is set, race Google API vs 2s timeout.
|
|
21
|
+
* Falls back to local blocklist on timeout or error.
|
|
22
|
+
* 2. Run community threat feeds (URLhaus, PhishTank, OpenPhish) in parallel.
|
|
23
|
+
* 3. Without an API key, use local heuristic blocklist only.
|
|
24
|
+
*
|
|
25
|
+
* If ANY source flags the URL as unsafe, the overall result is safe: false.
|
|
26
|
+
*
|
|
27
|
+
* @param url The URL to check
|
|
28
|
+
* @param apiKey Google Safe Browsing API key (optional). Falls back to SAFE_BROWSING_API_KEY env var.
|
|
29
|
+
*/
|
|
30
|
+
export declare function checkUrlSafety(url: string, apiKey?: string): Promise<SafeBrowsingResult>;
|
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Domain safety check using Google Safe Browsing Lookup API v4.
|
|
3
|
+
* Free: 10,000 lookups/day.
|
|
4
|
+
* Falls back to a local blocklist when no API key is configured.
|
|
5
|
+
* Also checks community threat feeds (URLhaus, PhishTank, OpenPhish) for known threats.
|
|
6
|
+
*/
|
|
7
|
+
import { checkThreatFeeds } from './threat-feeds.js';
|
|
8
|
+
// Known brands commonly impersonated in phishing
|
|
9
|
+
const KNOWN_BRANDS = [
|
|
10
|
+
'amazon', 'google', 'facebook', 'apple', 'microsoft', 'paypal', 'netflix',
|
|
11
|
+
'instagram', 'twitter', 'linkedin', 'dropbox', 'chase', 'wellsfargo', 'bankofamerica',
|
|
12
|
+
'citibank', 'hsbc', 'ebay', 'walmart', 'target', 'bestbuy', 'fedex', 'ups', 'usps',
|
|
13
|
+
'irs', 'dmv', 'gov', 'yahoo', 'outlook', 'hotmail',
|
|
14
|
+
];
|
|
15
|
+
// TLDs heavily abused for phishing/malware (free-domain registrars)
|
|
16
|
+
const SUSPICIOUS_TLDS = new Set(['.tk', '.ml', '.ga', '.cf', '.gq', '.top', '.click', '.loan', '.win', '.xyz', '.club', '.work']);
|
|
17
|
+
// Private/reserved IPv4 ranges (safe for local dev)
|
|
18
|
+
const PRIVATE_IP_RANGES = [
|
|
19
|
+
/^127\.\d+\.\d+\.\d+$/, // loopback
|
|
20
|
+
/^10\.\d+\.\d+\.\d+$/, // RFC 1918
|
|
21
|
+
/^192\.168\.\d+\.\d+$/, // RFC 1918
|
|
22
|
+
/^172\.(1[6-9]|2\d|3[01])\.\d+\.\d+$/, // RFC 1918
|
|
23
|
+
/^169\.254\.\d+\.\d+$/, // link-local
|
|
24
|
+
/^::1$/, // IPv6 loopback
|
|
25
|
+
/^fc00:/, // IPv6 private
|
|
26
|
+
/^fd[0-9a-f]{2}:/i, // IPv6 ULA
|
|
27
|
+
];
|
|
28
|
+
function isPrivateIp(host) {
|
|
29
|
+
return PRIVATE_IP_RANGES.some((re) => re.test(host));
|
|
30
|
+
}
|
|
31
|
+
function isIpAddress(host) {
|
|
32
|
+
// IPv4
|
|
33
|
+
if (/^\d{1,3}(\.\d{1,3}){3}$/.test(host))
|
|
34
|
+
return true;
|
|
35
|
+
// IPv6 (bare or bracketed)
|
|
36
|
+
if (/^\[?[0-9a-fA-F:]+\]?$/.test(host))
|
|
37
|
+
return true;
|
|
38
|
+
return false;
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Local heuristic blocklist — catches common attack patterns without an API key.
|
|
42
|
+
*/
|
|
43
|
+
function checkLocalBlocklist(url) {
|
|
44
|
+
const threats = [];
|
|
45
|
+
// 1. Data URIs — always suspicious
|
|
46
|
+
if (/^data:/i.test(url.trim())) {
|
|
47
|
+
threats.push('DATA_URI');
|
|
48
|
+
return { safe: false, threats, source: 'local-blocklist' };
|
|
49
|
+
}
|
|
50
|
+
let parsed = null;
|
|
51
|
+
try {
|
|
52
|
+
parsed = new URL(url);
|
|
53
|
+
}
|
|
54
|
+
catch {
|
|
55
|
+
// Unparseable URL — flag as suspicious
|
|
56
|
+
threats.push('INVALID_URL');
|
|
57
|
+
return { safe: false, threats, source: 'local-blocklist' };
|
|
58
|
+
}
|
|
59
|
+
const { hostname, username, password } = parsed;
|
|
60
|
+
// 2. @ sign trick: http://google.com@evil.com/login → username = 'google.com'
|
|
61
|
+
if (username || password) {
|
|
62
|
+
threats.push('URL_CREDENTIALS_TRICK');
|
|
63
|
+
return { safe: false, threats, source: 'local-blocklist' };
|
|
64
|
+
}
|
|
65
|
+
// 3. Punycode homograph attacks (xn-- internationalized domains)
|
|
66
|
+
if (/\bxn--/i.test(hostname)) {
|
|
67
|
+
// Allow legitimate IDN TLDs (e.g. .xn--p1ai = .рф)
|
|
68
|
+
const parts = hostname.split('.');
|
|
69
|
+
const hasPunycodeLabel = parts.slice(0, -1).some((p) => /^xn--/i.test(p));
|
|
70
|
+
if (hasPunycodeLabel) {
|
|
71
|
+
threats.push('PUNYCODE_HOMOGRAPH');
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
// 4. IP-only URLs pointing to non-private ranges
|
|
75
|
+
if (isIpAddress(hostname)) {
|
|
76
|
+
const bare = hostname.replace(/^\[|\]$/g, ''); // strip brackets from IPv6
|
|
77
|
+
if (!isPrivateIp(bare)) {
|
|
78
|
+
threats.push('SUSPICIOUS_IP');
|
|
79
|
+
}
|
|
80
|
+
if (threats.length > 0)
|
|
81
|
+
return { safe: false, threats, source: 'local-blocklist' };
|
|
82
|
+
return { safe: true, threats: [], source: 'local-blocklist' };
|
|
83
|
+
}
|
|
84
|
+
const lowerHost = hostname.toLowerCase();
|
|
85
|
+
// Remove www prefix for analysis
|
|
86
|
+
const hostNoWww = lowerHost.replace(/^www\./, '');
|
|
87
|
+
const parts = hostNoWww.split('.');
|
|
88
|
+
const tld = parts.length >= 2 ? '.' + parts[parts.length - 1] : '';
|
|
89
|
+
const sld = parts.length >= 2 ? parts[parts.length - 2] : '';
|
|
90
|
+
// 5. Known-bad TLDs combined with brand names (amazon-login.tk)
|
|
91
|
+
if (SUSPICIOUS_TLDS.has(tld)) {
|
|
92
|
+
const containsBrand = KNOWN_BRANDS.some((brand) => hostNoWww.includes(brand));
|
|
93
|
+
if (containsBrand) {
|
|
94
|
+
threats.push('PHISHING');
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
// 6. Excessive hyphens in SLD (amaz0n-login-verify-account.com)
|
|
98
|
+
const hyphenCount = (sld.match(/-/g) || []).length;
|
|
99
|
+
if (hyphenCount >= 3) {
|
|
100
|
+
threats.push('EXCESSIVE_HYPHENS');
|
|
101
|
+
}
|
|
102
|
+
// 7. Brand name in subdomain combined with suspicious TLD
|
|
103
|
+
if (SUSPICIOUS_TLDS.has(tld)) {
|
|
104
|
+
const subdomains = parts.slice(0, -2).join('.');
|
|
105
|
+
const subHasBrand = KNOWN_BRANDS.some((brand) => subdomains.includes(brand));
|
|
106
|
+
if (subHasBrand && !threats.includes('PHISHING')) {
|
|
107
|
+
threats.push('PHISHING');
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
// 8. Excessive subdomains: login.secure.verify.account.bank.xyz.com
|
|
111
|
+
if (parts.length > 5) {
|
|
112
|
+
threats.push('EXCESSIVE_SUBDOMAINS');
|
|
113
|
+
}
|
|
114
|
+
if (threats.length > 0) {
|
|
115
|
+
return { safe: false, threats, source: 'local-blocklist' };
|
|
116
|
+
}
|
|
117
|
+
return { safe: true, threats: [], source: 'local-blocklist' };
|
|
118
|
+
}
|
|
119
|
+
/**
|
|
120
|
+
* Check a URL against the Google Safe Browsing Lookup API v4.
|
|
121
|
+
* Returns null on any error (network timeout, bad key, etc.) so caller can fall back.
|
|
122
|
+
*/
|
|
123
|
+
async function checkGoogleSafeBrowsing(url, apiKey) {
|
|
124
|
+
const endpoint = `https://safebrowsing.googleapis.com/v4/threatMatches:find?key=${encodeURIComponent(apiKey)}`;
|
|
125
|
+
const body = {
|
|
126
|
+
client: { clientId: 'webpeel', clientVersion: '1.0.0' },
|
|
127
|
+
threatInfo: {
|
|
128
|
+
threatTypes: ['MALWARE', 'SOCIAL_ENGINEERING', 'UNWANTED_SOFTWARE', 'POTENTIALLY_HARMFUL_APPLICATION'],
|
|
129
|
+
platformTypes: ['ANY_PLATFORM'],
|
|
130
|
+
threatEntryTypes: ['URL'],
|
|
131
|
+
threatEntries: [{ url }],
|
|
132
|
+
},
|
|
133
|
+
};
|
|
134
|
+
const controller = new AbortController();
|
|
135
|
+
const timeoutId = setTimeout(() => controller.abort(), 2000);
|
|
136
|
+
try {
|
|
137
|
+
const resp = await fetch(endpoint, {
|
|
138
|
+
method: 'POST',
|
|
139
|
+
headers: { 'Content-Type': 'application/json' },
|
|
140
|
+
body: JSON.stringify(body),
|
|
141
|
+
signal: controller.signal,
|
|
142
|
+
});
|
|
143
|
+
clearTimeout(timeoutId);
|
|
144
|
+
if (!resp.ok)
|
|
145
|
+
return null;
|
|
146
|
+
const data = await resp.json();
|
|
147
|
+
if (!data.matches || data.matches.length === 0) {
|
|
148
|
+
return { safe: true, threats: [], source: 'google-api' };
|
|
149
|
+
}
|
|
150
|
+
const threats = [...new Set(data.matches.map((m) => m.threatType))];
|
|
151
|
+
return { safe: false, threats, source: 'google-api' };
|
|
152
|
+
}
|
|
153
|
+
catch {
|
|
154
|
+
clearTimeout(timeoutId);
|
|
155
|
+
return null;
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
/**
|
|
159
|
+
* Check URL safety.
|
|
160
|
+
*
|
|
161
|
+
* Flow:
|
|
162
|
+
* 1. If SAFE_BROWSING_API_KEY (or passed apiKey) is set, race Google API vs 2s timeout.
|
|
163
|
+
* Falls back to local blocklist on timeout or error.
|
|
164
|
+
* 2. Run community threat feeds (URLhaus, PhishTank, OpenPhish) in parallel.
|
|
165
|
+
* 3. Without an API key, use local heuristic blocklist only.
|
|
166
|
+
*
|
|
167
|
+
* If ANY source flags the URL as unsafe, the overall result is safe: false.
|
|
168
|
+
*
|
|
169
|
+
* @param url The URL to check
|
|
170
|
+
* @param apiKey Google Safe Browsing API key (optional). Falls back to SAFE_BROWSING_API_KEY env var.
|
|
171
|
+
*/
|
|
172
|
+
export async function checkUrlSafety(url, apiKey) {
|
|
173
|
+
const key = apiKey ?? process.env.SAFE_BROWSING_API_KEY;
|
|
174
|
+
// Run Google Safe Browsing + community threat feeds in parallel
|
|
175
|
+
const [baseResult, threatFeedsResult] = await Promise.all([
|
|
176
|
+
(async () => {
|
|
177
|
+
if (key) {
|
|
178
|
+
// Race: Google API with 2s timeout, fallback to local
|
|
179
|
+
const timeoutResult = checkLocalBlocklist(url);
|
|
180
|
+
const googleResult = await Promise.race([
|
|
181
|
+
checkGoogleSafeBrowsing(url, key),
|
|
182
|
+
new Promise((resolve) => setTimeout(() => resolve(null), 2000)),
|
|
183
|
+
]);
|
|
184
|
+
if (googleResult !== null)
|
|
185
|
+
return googleResult;
|
|
186
|
+
// API timed out or errored — use local blocklist result
|
|
187
|
+
return timeoutResult;
|
|
188
|
+
}
|
|
189
|
+
// No API key — local blocklist only
|
|
190
|
+
return checkLocalBlocklist(url);
|
|
191
|
+
})(),
|
|
192
|
+
checkThreatFeeds(url),
|
|
193
|
+
]);
|
|
194
|
+
// Merge: if threat feeds found threats, combine into final result
|
|
195
|
+
const allThreats = [
|
|
196
|
+
...baseResult.threats,
|
|
197
|
+
...threatFeedsResult.threats,
|
|
198
|
+
];
|
|
199
|
+
const isUnsafe = !baseResult.safe || !threatFeedsResult.safe;
|
|
200
|
+
return {
|
|
201
|
+
safe: !isUnsafe,
|
|
202
|
+
threats: [...new Set(allThreats)],
|
|
203
|
+
source: baseResult.source,
|
|
204
|
+
threatFeeds: threatFeedsResult,
|
|
205
|
+
};
|
|
206
|
+
}
|