@iflow-mcp/jakeliume-webpeel 0.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +15 -0
- package/README.md +313 -0
- package/dist/cache.d.ts +30 -0
- package/dist/cache.js +139 -0
- package/dist/cli/commands/auth.d.ts +5 -0
- package/dist/cli/commands/auth.js +411 -0
- package/dist/cli/commands/doctor.d.ts +37 -0
- package/dist/cli/commands/doctor.js +371 -0
- package/dist/cli/commands/fetch.d.ts +6 -0
- package/dist/cli/commands/fetch.js +1345 -0
- package/dist/cli/commands/guide.d.ts +2 -0
- package/dist/cli/commands/guide.js +183 -0
- package/dist/cli/commands/interact.d.ts +5 -0
- package/dist/cli/commands/interact.js +840 -0
- package/dist/cli/commands/jobs.d.ts +5 -0
- package/dist/cli/commands/jobs.js +997 -0
- package/dist/cli/commands/monitor.d.ts +12 -0
- package/dist/cli/commands/monitor.js +197 -0
- package/dist/cli/commands/observe.d.ts +12 -0
- package/dist/cli/commands/observe.js +158 -0
- package/dist/cli/commands/screenshot.d.ts +5 -0
- package/dist/cli/commands/screenshot.js +282 -0
- package/dist/cli/commands/search.d.ts +5 -0
- package/dist/cli/commands/search.js +1021 -0
- package/dist/cli/commands/setup.d.ts +13 -0
- package/dist/cli/commands/setup.js +244 -0
- package/dist/cli/commands/skill.d.ts +15 -0
- package/dist/cli/commands/skill.js +195 -0
- package/dist/cli/utils.d.ts +84 -0
- package/dist/cli/utils.js +806 -0
- package/dist/cli-auth.d.ts +75 -0
- package/dist/cli-auth.js +369 -0
- package/dist/cli.d.ts +17 -0
- package/dist/cli.js +99 -0
- package/dist/core/actions.d.ts +69 -0
- package/dist/core/actions.js +495 -0
- package/dist/core/agent.d.ts +98 -0
- package/dist/core/agent.js +558 -0
- package/dist/core/answer.d.ts +42 -0
- package/dist/core/answer.js +395 -0
- package/dist/core/application-tracker.d.ts +84 -0
- package/dist/core/application-tracker.js +184 -0
- package/dist/core/apply.d.ts +162 -0
- package/dist/core/apply.js +816 -0
- package/dist/core/auth-detection.d.ts +35 -0
- package/dist/core/auth-detection.js +358 -0
- package/dist/core/auto-extract.d.ts +82 -0
- package/dist/core/auto-extract.js +604 -0
- package/dist/core/auto-interact.d.ts +23 -0
- package/dist/core/auto-interact.js +246 -0
- package/dist/core/bm25-filter.d.ts +66 -0
- package/dist/core/bm25-filter.js +288 -0
- package/dist/core/branding.d.ts +54 -0
- package/dist/core/branding.js +234 -0
- package/dist/core/browser-fetch.d.ts +323 -0
- package/dist/core/browser-fetch.js +1600 -0
- package/dist/core/browser-pool.d.ts +91 -0
- package/dist/core/browser-pool.js +550 -0
- package/dist/core/budget.d.ts +42 -0
- package/dist/core/budget.js +324 -0
- package/dist/core/business-intel.d.ts +47 -0
- package/dist/core/business-intel.js +279 -0
- package/dist/core/cache.d.ts +13 -0
- package/dist/core/cache.js +121 -0
- package/dist/core/cf-worker-proxy.d.ts +32 -0
- package/dist/core/cf-worker-proxy.js +87 -0
- package/dist/core/challenge-detection.d.ts +26 -0
- package/dist/core/challenge-detection.js +468 -0
- package/dist/core/change-tracking.d.ts +75 -0
- package/dist/core/change-tracking.js +276 -0
- package/dist/core/chunker.d.ts +46 -0
- package/dist/core/chunker.js +249 -0
- package/dist/core/chunking.d.ts +42 -0
- package/dist/core/chunking.js +181 -0
- package/dist/core/circuit-breaker.d.ts +44 -0
- package/dist/core/circuit-breaker.js +85 -0
- package/dist/core/content-pruner.d.ts +47 -0
- package/dist/core/content-pruner.js +425 -0
- package/dist/core/cookie-cache.d.ts +60 -0
- package/dist/core/cookie-cache.js +163 -0
- package/dist/core/crawl-checkpoint.d.ts +54 -0
- package/dist/core/crawl-checkpoint.js +104 -0
- package/dist/core/crawler.d.ts +84 -0
- package/dist/core/crawler.js +349 -0
- package/dist/core/cross-verify.d.ts +27 -0
- package/dist/core/cross-verify.js +93 -0
- package/dist/core/deep-fetch.d.ts +74 -0
- package/dist/core/deep-fetch.js +405 -0
- package/dist/core/deep-research.d.ts +141 -0
- package/dist/core/deep-research.js +972 -0
- package/dist/core/design-analysis.d.ts +70 -0
- package/dist/core/design-analysis.js +490 -0
- package/dist/core/design-compare.d.ts +38 -0
- package/dist/core/design-compare.js +264 -0
- package/dist/core/diff.d.ts +61 -0
- package/dist/core/diff.js +289 -0
- package/dist/core/dns-cache.d.ts +20 -0
- package/dist/core/dns-cache.js +198 -0
- package/dist/core/documents.d.ts +23 -0
- package/dist/core/documents.js +123 -0
- package/dist/core/domain-memory.d.ts +66 -0
- package/dist/core/domain-memory.js +163 -0
- package/dist/core/domain-verify.d.ts +40 -0
- package/dist/core/domain-verify.js +379 -0
- package/dist/core/engine-ranker.d.ts +112 -0
- package/dist/core/engine-ranker.js +395 -0
- package/dist/core/extract-inline.d.ts +38 -0
- package/dist/core/extract-inline.js +215 -0
- package/dist/core/extract-listings.d.ts +38 -0
- package/dist/core/extract-listings.js +461 -0
- package/dist/core/extract.d.ts +9 -0
- package/dist/core/extract.js +139 -0
- package/dist/core/fetch-cache.d.ts +57 -0
- package/dist/core/fetch-cache.js +95 -0
- package/dist/core/fetcher.d.ts +13 -0
- package/dist/core/fetcher.js +12 -0
- package/dist/core/google-cache.d.ts +29 -0
- package/dist/core/google-cache.js +180 -0
- package/dist/core/google-serp-parser.d.ts +82 -0
- package/dist/core/google-serp-parser.js +287 -0
- package/dist/core/hotel-search.d.ts +122 -0
- package/dist/core/hotel-search.js +382 -0
- package/dist/core/http-fetch.d.ts +72 -0
- package/dist/core/http-fetch.js +820 -0
- package/dist/core/human.d.ts +175 -0
- package/dist/core/human.js +680 -0
- package/dist/core/image-caption.d.ts +44 -0
- package/dist/core/image-caption.js +271 -0
- package/dist/core/jobs.d.ts +75 -0
- package/dist/core/jobs.js +634 -0
- package/dist/core/json-ld.d.ts +15 -0
- package/dist/core/json-ld.js +617 -0
- package/dist/core/language-detect.d.ts +18 -0
- package/dist/core/language-detect.js +135 -0
- package/dist/core/links.d.ts +10 -0
- package/dist/core/links.js +44 -0
- package/dist/core/llm-extract.d.ts +71 -0
- package/dist/core/llm-extract.js +507 -0
- package/dist/core/llm-provider.d.ts +100 -0
- package/dist/core/llm-provider.js +702 -0
- package/dist/core/local-search.d.ts +60 -0
- package/dist/core/local-search.js +308 -0
- package/dist/core/logger.d.ts +28 -0
- package/dist/core/logger.js +104 -0
- package/dist/core/map.d.ts +33 -0
- package/dist/core/map.js +127 -0
- package/dist/core/markdown.d.ts +92 -0
- package/dist/core/markdown.js +809 -0
- package/dist/core/metadata.d.ts +34 -0
- package/dist/core/metadata.js +422 -0
- package/dist/core/observe.d.ts +113 -0
- package/dist/core/observe.js +395 -0
- package/dist/core/ocr.d.ts +12 -0
- package/dist/core/ocr.js +33 -0
- package/dist/core/paginate.d.ts +31 -0
- package/dist/core/paginate.js +106 -0
- package/dist/core/pdf.d.ts +8 -0
- package/dist/core/pdf.js +25 -0
- package/dist/core/peel-tls.d.ts +25 -0
- package/dist/core/peel-tls.js +220 -0
- package/dist/core/pipeline.d.ts +132 -0
- package/dist/core/pipeline.js +1666 -0
- package/dist/core/profiles.d.ts +61 -0
- package/dist/core/profiles.js +350 -0
- package/dist/core/prompt-guard.d.ts +30 -0
- package/dist/core/prompt-guard.js +119 -0
- package/dist/core/proxy-config.d.ts +90 -0
- package/dist/core/proxy-config.js +172 -0
- package/dist/core/quick-answer.d.ts +53 -0
- package/dist/core/quick-answer.js +833 -0
- package/dist/core/rate-governor.d.ts +80 -0
- package/dist/core/rate-governor.js +238 -0
- package/dist/core/readability.d.ts +57 -0
- package/dist/core/readability.js +533 -0
- package/dist/core/research.d.ts +66 -0
- package/dist/core/research.js +270 -0
- package/dist/core/retry.d.ts +60 -0
- package/dist/core/retry.js +119 -0
- package/dist/core/safe-browsing.d.ts +30 -0
- package/dist/core/safe-browsing.js +206 -0
- package/dist/core/schema-extraction.d.ts +66 -0
- package/dist/core/schema-extraction.js +352 -0
- package/dist/core/schema-postprocess.d.ts +32 -0
- package/dist/core/schema-postprocess.js +469 -0
- package/dist/core/schema-templates.d.ts +19 -0
- package/dist/core/schema-templates.js +143 -0
- package/dist/core/screenshot.d.ts +224 -0
- package/dist/core/screenshot.js +207 -0
- package/dist/core/search-engines.d.ts +25 -0
- package/dist/core/search-engines.js +182 -0
- package/dist/core/search-provider.d.ts +243 -0
- package/dist/core/search-provider.js +1629 -0
- package/dist/core/searxng-provider.d.ts +35 -0
- package/dist/core/searxng-provider.js +105 -0
- package/dist/core/selective-evidence.d.ts +151 -0
- package/dist/core/selective-evidence.js +389 -0
- package/dist/core/site-search.d.ts +44 -0
- package/dist/core/site-search.js +252 -0
- package/dist/core/sitemap.d.ts +23 -0
- package/dist/core/sitemap.js +105 -0
- package/dist/core/source-credibility.d.ts +29 -0
- package/dist/core/source-credibility.js +584 -0
- package/dist/core/source-scoring.d.ts +166 -0
- package/dist/core/source-scoring.js +396 -0
- package/dist/core/stemmer.d.ts +38 -0
- package/dist/core/stemmer.js +509 -0
- package/dist/core/strategies.d.ts +104 -0
- package/dist/core/strategies.js +1044 -0
- package/dist/core/strategy-hooks.d.ts +145 -0
- package/dist/core/strategy-hooks.js +74 -0
- package/dist/core/structured-extract.d.ts +43 -0
- package/dist/core/structured-extract.js +550 -0
- package/dist/core/summarize.d.ts +17 -0
- package/dist/core/summarize.js +78 -0
- package/dist/core/synonyms.d.ts +42 -0
- package/dist/core/synonyms.js +184 -0
- package/dist/core/system-monitor.d.ts +61 -0
- package/dist/core/system-monitor.js +133 -0
- package/dist/core/table-format.d.ts +30 -0
- package/dist/core/table-format.js +146 -0
- package/dist/core/threat-feeds.d.ts +23 -0
- package/dist/core/threat-feeds.js +104 -0
- package/dist/core/timing.d.ts +21 -0
- package/dist/core/timing.js +33 -0
- package/dist/core/transcript-export.d.ts +47 -0
- package/dist/core/transcript-export.js +107 -0
- package/dist/core/user-agents.d.ts +82 -0
- package/dist/core/user-agents.js +239 -0
- package/dist/core/vertical-search.d.ts +54 -0
- package/dist/core/vertical-search.js +158 -0
- package/dist/core/watch-manager.d.ts +175 -0
- package/dist/core/watch-manager.js +416 -0
- package/dist/core/watch.d.ts +101 -0
- package/dist/core/watch.js +389 -0
- package/dist/core/youtube.d.ts +130 -0
- package/dist/core/youtube.js +1175 -0
- package/dist/ee/challenge-re-export.d.ts +1 -0
- package/dist/ee/challenge-re-export.js +1 -0
- package/dist/ee/challenge-solver.d.ts +72 -0
- package/dist/ee/challenge-solver.js +720 -0
- package/dist/ee/domain-extractors.d.ts +8 -0
- package/dist/ee/domain-extractors.js +8 -0
- package/dist/ee/domain-intel.d.ts +16 -0
- package/dist/ee/domain-intel.js +133 -0
- package/dist/ee/extractors/allrecipes.d.ts +2 -0
- package/dist/ee/extractors/allrecipes.js +120 -0
- package/dist/ee/extractors/amazon.d.ts +2 -0
- package/dist/ee/extractors/amazon.js +78 -0
- package/dist/ee/extractors/arxiv.d.ts +2 -0
- package/dist/ee/extractors/arxiv.js +137 -0
- package/dist/ee/extractors/bestbuy.d.ts +2 -0
- package/dist/ee/extractors/bestbuy.js +78 -0
- package/dist/ee/extractors/carscom.d.ts +2 -0
- package/dist/ee/extractors/carscom.js +121 -0
- package/dist/ee/extractors/coingecko.d.ts +2 -0
- package/dist/ee/extractors/coingecko.js +134 -0
- package/dist/ee/extractors/craigslist.d.ts +2 -0
- package/dist/ee/extractors/craigslist.js +92 -0
- package/dist/ee/extractors/devto.d.ts +2 -0
- package/dist/ee/extractors/devto.js +135 -0
- package/dist/ee/extractors/ebay.d.ts +2 -0
- package/dist/ee/extractors/ebay.js +90 -0
- package/dist/ee/extractors/espn.d.ts +2 -0
- package/dist/ee/extractors/espn.js +260 -0
- package/dist/ee/extractors/etsy.d.ts +2 -0
- package/dist/ee/extractors/etsy.js +52 -0
- package/dist/ee/extractors/facebook.d.ts +2 -0
- package/dist/ee/extractors/facebook.js +46 -0
- package/dist/ee/extractors/github.d.ts +2 -0
- package/dist/ee/extractors/github.js +196 -0
- package/dist/ee/extractors/google-flights.d.ts +2 -0
- package/dist/ee/extractors/google-flights.js +176 -0
- package/dist/ee/extractors/hackernews.d.ts +2 -0
- package/dist/ee/extractors/hackernews.js +147 -0
- package/dist/ee/extractors/imdb.d.ts +2 -0
- package/dist/ee/extractors/imdb.js +172 -0
- package/dist/ee/extractors/index.d.ts +26 -0
- package/dist/ee/extractors/index.js +247 -0
- package/dist/ee/extractors/instagram.d.ts +2 -0
- package/dist/ee/extractors/instagram.js +102 -0
- package/dist/ee/extractors/kalshi.d.ts +2 -0
- package/dist/ee/extractors/kalshi.js +121 -0
- package/dist/ee/extractors/kayak-cars.d.ts +2 -0
- package/dist/ee/extractors/kayak-cars.js +270 -0
- package/dist/ee/extractors/linkedin.d.ts +2 -0
- package/dist/ee/extractors/linkedin.js +113 -0
- package/dist/ee/extractors/medium.d.ts +2 -0
- package/dist/ee/extractors/medium.js +130 -0
- package/dist/ee/extractors/news.d.ts +4 -0
- package/dist/ee/extractors/news.js +173 -0
- package/dist/ee/extractors/npm.d.ts +2 -0
- package/dist/ee/extractors/npm.js +86 -0
- package/dist/ee/extractors/pdf.d.ts +2 -0
- package/dist/ee/extractors/pdf.js +108 -0
- package/dist/ee/extractors/pinterest.d.ts +2 -0
- package/dist/ee/extractors/pinterest.js +34 -0
- package/dist/ee/extractors/polymarket.d.ts +2 -0
- package/dist/ee/extractors/polymarket.js +358 -0
- package/dist/ee/extractors/producthunt.d.ts +2 -0
- package/dist/ee/extractors/producthunt.js +88 -0
- package/dist/ee/extractors/pubmed.d.ts +2 -0
- package/dist/ee/extractors/pubmed.js +162 -0
- package/dist/ee/extractors/pypi.d.ts +2 -0
- package/dist/ee/extractors/pypi.js +80 -0
- package/dist/ee/extractors/reddit.d.ts +2 -0
- package/dist/ee/extractors/reddit.js +438 -0
- package/dist/ee/extractors/redfin.d.ts +2 -0
- package/dist/ee/extractors/redfin.js +156 -0
- package/dist/ee/extractors/semanticscholar.d.ts +2 -0
- package/dist/ee/extractors/semanticscholar.js +131 -0
- package/dist/ee/extractors/shared.d.ts +12 -0
- package/dist/ee/extractors/shared.js +76 -0
- package/dist/ee/extractors/soundcloud.d.ts +2 -0
- package/dist/ee/extractors/soundcloud.js +34 -0
- package/dist/ee/extractors/sportsbetting.d.ts +2 -0
- package/dist/ee/extractors/sportsbetting.js +37 -0
- package/dist/ee/extractors/spotify.d.ts +2 -0
- package/dist/ee/extractors/spotify.js +34 -0
- package/dist/ee/extractors/stackoverflow.d.ts +2 -0
- package/dist/ee/extractors/stackoverflow.js +61 -0
- package/dist/ee/extractors/substack.d.ts +2 -0
- package/dist/ee/extractors/substack.js +115 -0
- package/dist/ee/extractors/substackroot.d.ts +2 -0
- package/dist/ee/extractors/substackroot.js +46 -0
- package/dist/ee/extractors/tiktok.d.ts +2 -0
- package/dist/ee/extractors/tiktok.js +29 -0
- package/dist/ee/extractors/tradingview.d.ts +2 -0
- package/dist/ee/extractors/tradingview.js +182 -0
- package/dist/ee/extractors/twitch.d.ts +2 -0
- package/dist/ee/extractors/twitch.js +36 -0
- package/dist/ee/extractors/twitter.d.ts +2 -0
- package/dist/ee/extractors/twitter.js +327 -0
- package/dist/ee/extractors/types.d.ts +14 -0
- package/dist/ee/extractors/types.js +1 -0
- package/dist/ee/extractors/walmart.d.ts +2 -0
- package/dist/ee/extractors/walmart.js +50 -0
- package/dist/ee/extractors/weather.d.ts +2 -0
- package/dist/ee/extractors/weather.js +133 -0
- package/dist/ee/extractors/wikipedia.d.ts +4 -0
- package/dist/ee/extractors/wikipedia.js +235 -0
- package/dist/ee/extractors/yelp.d.ts +2 -0
- package/dist/ee/extractors/yelp.js +216 -0
- package/dist/ee/extractors/youtube.d.ts +2 -0
- package/dist/ee/extractors/youtube.js +189 -0
- package/dist/ee/extractors/zillow.d.ts +54 -0
- package/dist/ee/extractors/zillow.js +247 -0
- package/dist/ee/extractors-re-export.d.ts +1 -0
- package/dist/ee/extractors-re-export.js +1 -0
- package/dist/ee/premium-hooks.d.ts +20 -0
- package/dist/ee/premium-hooks.js +50 -0
- package/dist/ee/spa-detection.d.ts +2 -0
- package/dist/ee/spa-detection.js +2 -0
- package/dist/ee/stability.d.ts +4 -0
- package/dist/ee/stability.js +29 -0
- package/dist/ee/swr-cache.d.ts +14 -0
- package/dist/ee/swr-cache.js +34 -0
- package/dist/index.d.ts +143 -0
- package/dist/index.js +291 -0
- package/dist/integrations/index.d.ts +2 -0
- package/dist/integrations/index.js +2 -0
- package/dist/integrations/langchain.d.ts +64 -0
- package/dist/integrations/langchain.js +115 -0
- package/dist/integrations/llamaindex.d.ts +50 -0
- package/dist/integrations/llamaindex.js +91 -0
- package/dist/mcp/handlers/act.d.ts +5 -0
- package/dist/mcp/handlers/act.js +34 -0
- package/dist/mcp/handlers/definitions.d.ts +6 -0
- package/dist/mcp/handlers/definitions.js +395 -0
- package/dist/mcp/handlers/extract.d.ts +7 -0
- package/dist/mcp/handlers/extract.js +135 -0
- package/dist/mcp/handlers/fetch.d.ts +6 -0
- package/dist/mcp/handlers/fetch.js +98 -0
- package/dist/mcp/handlers/find.d.ts +5 -0
- package/dist/mcp/handlers/find.js +137 -0
- package/dist/mcp/handlers/index.d.ts +13 -0
- package/dist/mcp/handlers/index.js +63 -0
- package/dist/mcp/handlers/legacy.d.ts +25 -0
- package/dist/mcp/handlers/legacy.js +450 -0
- package/dist/mcp/handlers/meta.d.ts +6 -0
- package/dist/mcp/handlers/meta.js +40 -0
- package/dist/mcp/handlers/monitor.d.ts +5 -0
- package/dist/mcp/handlers/monitor.js +41 -0
- package/dist/mcp/handlers/observe.d.ts +8 -0
- package/dist/mcp/handlers/observe.js +37 -0
- package/dist/mcp/handlers/read.d.ts +6 -0
- package/dist/mcp/handlers/read.js +78 -0
- package/dist/mcp/handlers/see.d.ts +5 -0
- package/dist/mcp/handlers/see.js +75 -0
- package/dist/mcp/handlers/types.d.ts +29 -0
- package/dist/mcp/handlers/types.js +28 -0
- package/dist/mcp/server.d.ts +7 -0
- package/dist/mcp/server.js +108 -0
- package/dist/mcp/smart-router.d.ts +23 -0
- package/dist/mcp/smart-router.js +178 -0
- package/dist/server/app.d.ts +14 -0
- package/dist/server/app.js +632 -0
- package/dist/server/auth-store.d.ts +28 -0
- package/dist/server/auth-store.js +88 -0
- package/dist/server/bull-queues.d.ts +60 -0
- package/dist/server/bull-queues.js +90 -0
- package/dist/server/email-service.d.ts +55 -0
- package/dist/server/email-service.js +291 -0
- package/dist/server/job-queue.d.ts +100 -0
- package/dist/server/job-queue.js +145 -0
- package/dist/server/logger.d.ts +10 -0
- package/dist/server/logger.js +37 -0
- package/dist/server/middleware/audit-log.d.ts +14 -0
- package/dist/server/middleware/audit-log.js +73 -0
- package/dist/server/middleware/auth.d.ts +35 -0
- package/dist/server/middleware/auth.js +225 -0
- package/dist/server/middleware/rate-limit.d.ts +50 -0
- package/dist/server/middleware/rate-limit.js +270 -0
- package/dist/server/middleware/scope-guard.d.ts +25 -0
- package/dist/server/middleware/scope-guard.js +45 -0
- package/dist/server/middleware/url-validator.d.ts +15 -0
- package/dist/server/middleware/url-validator.js +201 -0
- package/dist/server/openapi.yaml +6418 -0
- package/dist/server/pg-auth-store.d.ts +146 -0
- package/dist/server/pg-auth-store.js +576 -0
- package/dist/server/pg-job-queue.d.ts +59 -0
- package/dist/server/pg-job-queue.js +375 -0
- package/dist/server/routes/activity.d.ts +6 -0
- package/dist/server/routes/activity.js +79 -0
- package/dist/server/routes/admin-active.d.ts +7 -0
- package/dist/server/routes/admin-active.js +120 -0
- package/dist/server/routes/admin-stats.d.ts +7 -0
- package/dist/server/routes/admin-stats.js +176 -0
- package/dist/server/routes/agent.d.ts +24 -0
- package/dist/server/routes/agent.js +480 -0
- package/dist/server/routes/answer.d.ts +5 -0
- package/dist/server/routes/answer.js +125 -0
- package/dist/server/routes/ask.d.ts +28 -0
- package/dist/server/routes/ask.js +295 -0
- package/dist/server/routes/batch.d.ts +6 -0
- package/dist/server/routes/batch.js +493 -0
- package/dist/server/routes/cache-warm.d.ts +25 -0
- package/dist/server/routes/cache-warm.js +212 -0
- package/dist/server/routes/cli-usage.d.ts +6 -0
- package/dist/server/routes/cli-usage.js +127 -0
- package/dist/server/routes/compat.d.ts +23 -0
- package/dist/server/routes/compat.js +652 -0
- package/dist/server/routes/crawl.d.ts +13 -0
- package/dist/server/routes/crawl.js +287 -0
- package/dist/server/routes/deep-fetch.d.ts +8 -0
- package/dist/server/routes/deep-fetch.js +57 -0
- package/dist/server/routes/deep-research.d.ts +11 -0
- package/dist/server/routes/deep-research.js +232 -0
- package/dist/server/routes/demo.d.ts +24 -0
- package/dist/server/routes/demo.js +517 -0
- package/dist/server/routes/do.d.ts +8 -0
- package/dist/server/routes/do.js +72 -0
- package/dist/server/routes/extract.d.ts +14 -0
- package/dist/server/routes/extract.js +325 -0
- package/dist/server/routes/feed.d.ts +15 -0
- package/dist/server/routes/feed.js +311 -0
- package/dist/server/routes/fetch-queue.d.ts +13 -0
- package/dist/server/routes/fetch-queue.js +357 -0
- package/dist/server/routes/fetch.d.ts +7 -0
- package/dist/server/routes/fetch.js +1274 -0
- package/dist/server/routes/go.d.ts +14 -0
- package/dist/server/routes/go.js +81 -0
- package/dist/server/routes/health.d.ts +11 -0
- package/dist/server/routes/health.js +141 -0
- package/dist/server/routes/jobs.d.ts +7 -0
- package/dist/server/routes/jobs.js +574 -0
- package/dist/server/routes/map.d.ts +11 -0
- package/dist/server/routes/map.js +116 -0
- package/dist/server/routes/mcp.d.ts +14 -0
- package/dist/server/routes/mcp.js +197 -0
- package/dist/server/routes/metrics.d.ts +37 -0
- package/dist/server/routes/metrics.js +149 -0
- package/dist/server/routes/oauth.d.ts +9 -0
- package/dist/server/routes/oauth.js +396 -0
- package/dist/server/routes/playground.d.ts +17 -0
- package/dist/server/routes/playground.js +283 -0
- package/dist/server/routes/reader.d.ts +18 -0
- package/dist/server/routes/reader.js +192 -0
- package/dist/server/routes/research.d.ts +14 -0
- package/dist/server/routes/research.js +482 -0
- package/dist/server/routes/screenshot.d.ts +22 -0
- package/dist/server/routes/screenshot.js +820 -0
- package/dist/server/routes/search.d.ts +6 -0
- package/dist/server/routes/search.js +874 -0
- package/dist/server/routes/session.d.ts +17 -0
- package/dist/server/routes/session.js +548 -0
- package/dist/server/routes/share.d.ts +18 -0
- package/dist/server/routes/share.js +462 -0
- package/dist/server/routes/smart-search/handlers/cars.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/cars.js +102 -0
- package/dist/server/routes/smart-search/handlers/flights.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/flights.js +72 -0
- package/dist/server/routes/smart-search/handlers/general.d.ts +13 -0
- package/dist/server/routes/smart-search/handlers/general.js +717 -0
- package/dist/server/routes/smart-search/handlers/hotels.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/hotels.js +88 -0
- package/dist/server/routes/smart-search/handlers/products.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/products.js +1309 -0
- package/dist/server/routes/smart-search/handlers/rental.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/rental.js +154 -0
- package/dist/server/routes/smart-search/handlers/restaurants.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/restaurants.js +225 -0
- package/dist/server/routes/smart-search/handlers/transit-verdict.d.ts +41 -0
- package/dist/server/routes/smart-search/handlers/transit-verdict.js +224 -0
- package/dist/server/routes/smart-search/index.d.ts +19 -0
- package/dist/server/routes/smart-search/index.js +546 -0
- package/dist/server/routes/smart-search/intent.d.ts +3 -0
- package/dist/server/routes/smart-search/intent.js +264 -0
- package/dist/server/routes/smart-search/llm.d.ts +16 -0
- package/dist/server/routes/smart-search/llm.js +70 -0
- package/dist/server/routes/smart-search/sources/reddit.d.ts +18 -0
- package/dist/server/routes/smart-search/sources/reddit.js +34 -0
- package/dist/server/routes/smart-search/sources/yelp.d.ts +25 -0
- package/dist/server/routes/smart-search/sources/yelp.js +171 -0
- package/dist/server/routes/smart-search/sources/youtube.d.ts +8 -0
- package/dist/server/routes/smart-search/sources/youtube.js +9 -0
- package/dist/server/routes/smart-search/types.d.ts +81 -0
- package/dist/server/routes/smart-search/types.js +1 -0
- package/dist/server/routes/smart-search/utils.d.ts +20 -0
- package/dist/server/routes/smart-search/utils.js +146 -0
- package/dist/server/routes/stats.d.ts +6 -0
- package/dist/server/routes/stats.js +71 -0
- package/dist/server/routes/stripe.d.ts +15 -0
- package/dist/server/routes/stripe.js +296 -0
- package/dist/server/routes/transcript-export.d.ts +10 -0
- package/dist/server/routes/transcript-export.js +178 -0
- package/dist/server/routes/usage.d.ts +9 -0
- package/dist/server/routes/usage.js +279 -0
- package/dist/server/routes/users.d.ts +8 -0
- package/dist/server/routes/users.js +1867 -0
- package/dist/server/routes/watch.d.ts +15 -0
- package/dist/server/routes/watch.js +309 -0
- package/dist/server/routes/webhooks.d.ts +26 -0
- package/dist/server/routes/webhooks.js +170 -0
- package/dist/server/routes/youtube.d.ts +6 -0
- package/dist/server/routes/youtube.js +130 -0
- package/dist/server/sentry.d.ts +14 -0
- package/dist/server/sentry.js +104 -0
- package/dist/server/types.d.ts +15 -0
- package/dist/server/types.js +7 -0
- package/dist/server/utils/response.d.ts +44 -0
- package/dist/server/utils/response.js +69 -0
- package/dist/server/utils/sse.d.ts +22 -0
- package/dist/server/utils/sse.js +38 -0
- package/dist/types.d.ts +552 -0
- package/dist/types.js +39 -0
- package/llms.txt +105 -0
- package/package.json +189 -0
|
@@ -0,0 +1,379 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Active domain verification — runtime TLS, HTTP header, and DNS signals.
|
|
3
|
+
*
|
|
4
|
+
* Runs during the fetch pipeline for sites that are NOT already in the known
|
|
5
|
+
* official/established lists. All network operations have a hard 3-second
|
|
6
|
+
* timeout and fail-open (any error → null for that section).
|
|
7
|
+
*
|
|
8
|
+
* Scoring adds bonus points (0–80) on top of the static source-credibility score.
|
|
9
|
+
*/
|
|
10
|
+
import tls from 'tls';
|
|
11
|
+
import dns from 'dns/promises';
|
|
12
|
+
import https from 'https';
|
|
13
|
+
import http from 'http';
|
|
14
|
+
import { URL } from 'url';
|
|
15
|
+
// ---------------------------------------------------------------------------
|
|
16
|
+
// Known CA issuers → normalised label
|
|
17
|
+
// ---------------------------------------------------------------------------
|
|
18
|
+
const CA_LABELS = [
|
|
19
|
+
["Let's Encrypt", "Let's Encrypt"],
|
|
20
|
+
['ISRG', "Let's Encrypt"],
|
|
21
|
+
['DigiCert', 'DigiCert'],
|
|
22
|
+
['Comodo', 'Comodo'],
|
|
23
|
+
['Sectigo', 'Sectigo'],
|
|
24
|
+
['GlobalSign', 'GlobalSign'],
|
|
25
|
+
['GeoTrust', 'GeoTrust'],
|
|
26
|
+
['Thawte', 'Thawte'],
|
|
27
|
+
['Entrust', 'Entrust'],
|
|
28
|
+
['Amazon', 'Amazon Trust Services'],
|
|
29
|
+
['Google Trust Services', 'Google Trust Services'],
|
|
30
|
+
['Google', 'Google Trust Services'],
|
|
31
|
+
['Microsoft', 'Microsoft RSA TLS CA'],
|
|
32
|
+
['Cloudflare', 'Cloudflare'],
|
|
33
|
+
['ZeroSSL', 'ZeroSSL'],
|
|
34
|
+
['Buypass', 'Buypass'],
|
|
35
|
+
['SSL.com', 'SSL.com'],
|
|
36
|
+
];
|
|
37
|
+
// Known CDN / cloud providers detected from Server header
|
|
38
|
+
const CDN_LABELS = [
|
|
39
|
+
[/cloudflare/i, 'Cloudflare'],
|
|
40
|
+
[/vercel/i, 'Vercel'],
|
|
41
|
+
[/netlify/i, 'Netlify'],
|
|
42
|
+
[/awselb|amazon/i, 'AWS'],
|
|
43
|
+
[/nginx/i, 'nginx'],
|
|
44
|
+
[/apache/i, 'Apache'],
|
|
45
|
+
[/gws|google/i, 'Google'],
|
|
46
|
+
[/microsoft/i, 'Microsoft'],
|
|
47
|
+
[/fastly/i, 'Fastly'],
|
|
48
|
+
[/akamai/i, 'Akamai'],
|
|
49
|
+
[/litespeed/i, 'LiteSpeed'],
|
|
50
|
+
[/openresty/i, 'OpenResty'],
|
|
51
|
+
[/caddy/i, 'Caddy'],
|
|
52
|
+
];
|
|
53
|
+
// ---------------------------------------------------------------------------
|
|
54
|
+
// Helpers
|
|
55
|
+
// ---------------------------------------------------------------------------
|
|
56
|
+
function withTimeout(promise, ms, fallback) {
|
|
57
|
+
return Promise.race([
|
|
58
|
+
promise,
|
|
59
|
+
new Promise((resolve) => setTimeout(() => resolve(fallback), ms)),
|
|
60
|
+
]);
|
|
61
|
+
}
|
|
62
|
+
function normaliseCaIssuer(raw) {
|
|
63
|
+
for (const [pattern, label] of CA_LABELS) {
|
|
64
|
+
if (raw.includes(pattern))
|
|
65
|
+
return label;
|
|
66
|
+
}
|
|
67
|
+
return raw || 'Unknown CA';
|
|
68
|
+
}
|
|
69
|
+
function detectServer(raw) {
|
|
70
|
+
for (const [regex, label] of CDN_LABELS) {
|
|
71
|
+
if (regex.test(raw))
|
|
72
|
+
return label;
|
|
73
|
+
}
|
|
74
|
+
return raw.trim() || 'unknown';
|
|
75
|
+
}
|
|
76
|
+
// ---------------------------------------------------------------------------
|
|
77
|
+
// TLS check — connect to port 443, inspect peer cert
|
|
78
|
+
// ---------------------------------------------------------------------------
|
|
79
|
+
async function checkTls(host) {
|
|
80
|
+
return withTimeout(new Promise((resolve) => {
|
|
81
|
+
let settled = false;
|
|
82
|
+
const settle = (v) => {
|
|
83
|
+
if (!settled) {
|
|
84
|
+
settled = true;
|
|
85
|
+
resolve(v);
|
|
86
|
+
}
|
|
87
|
+
};
|
|
88
|
+
try {
|
|
89
|
+
const socket = tls.connect({
|
|
90
|
+
host,
|
|
91
|
+
port: 443,
|
|
92
|
+
servername: host,
|
|
93
|
+
rejectUnauthorized: false, // we check validity manually
|
|
94
|
+
timeout: 3000,
|
|
95
|
+
});
|
|
96
|
+
socket.on('secureConnect', () => {
|
|
97
|
+
try {
|
|
98
|
+
const cert = socket.getPeerCertificate(true);
|
|
99
|
+
socket.destroy();
|
|
100
|
+
if (!cert || !cert.valid_to) {
|
|
101
|
+
settle(null);
|
|
102
|
+
return;
|
|
103
|
+
}
|
|
104
|
+
const validTo = new Date(cert.valid_to);
|
|
105
|
+
const now = new Date();
|
|
106
|
+
const daysRemaining = Math.floor((validTo.getTime() - now.getTime()) / (1000 * 60 * 60 * 24));
|
|
107
|
+
const valid = socket.authorized !== false || daysRemaining > 0;
|
|
108
|
+
// Issuer from either issuer.O or issuer.CN
|
|
109
|
+
const issuerRaw = cert.issuer?.O || cert.issuer?.CN || '';
|
|
110
|
+
const issuer = normaliseCaIssuer(issuerRaw);
|
|
111
|
+
// Extended Validation: subject.O is typically set for EV certs
|
|
112
|
+
const subjectO = cert.subject?.O || '';
|
|
113
|
+
const ev = Boolean(subjectO && subjectO.length > 0 && !subjectO.toLowerCase().includes('unknown'));
|
|
114
|
+
settle({ valid, issuer, daysRemaining, ev });
|
|
115
|
+
}
|
|
116
|
+
catch {
|
|
117
|
+
socket.destroy();
|
|
118
|
+
settle(null);
|
|
119
|
+
}
|
|
120
|
+
});
|
|
121
|
+
socket.on('error', () => settle(null));
|
|
122
|
+
socket.on('timeout', () => {
|
|
123
|
+
socket.destroy();
|
|
124
|
+
settle(null);
|
|
125
|
+
});
|
|
126
|
+
}
|
|
127
|
+
catch {
|
|
128
|
+
settle(null);
|
|
129
|
+
}
|
|
130
|
+
}), 3000, null);
|
|
131
|
+
}
|
|
132
|
+
// ---------------------------------------------------------------------------
|
|
133
|
+
// Header check — HEAD request to collect response headers
|
|
134
|
+
// ---------------------------------------------------------------------------
|
|
135
|
+
async function checkHeaders(url) {
|
|
136
|
+
const fallback = {
|
|
137
|
+
hsts: false,
|
|
138
|
+
csp: false,
|
|
139
|
+
xFrameOptions: false,
|
|
140
|
+
server: 'unknown',
|
|
141
|
+
poweredBy: null,
|
|
142
|
+
};
|
|
143
|
+
return withTimeout(new Promise((resolve) => {
|
|
144
|
+
let settled = false;
|
|
145
|
+
const settle = (v) => {
|
|
146
|
+
if (!settled) {
|
|
147
|
+
settled = true;
|
|
148
|
+
resolve(v);
|
|
149
|
+
}
|
|
150
|
+
};
|
|
151
|
+
try {
|
|
152
|
+
const parsedUrl = new URL(url);
|
|
153
|
+
const requester = parsedUrl.protocol === 'https:' ? https : http;
|
|
154
|
+
const req = requester.request({
|
|
155
|
+
hostname: parsedUrl.hostname,
|
|
156
|
+
path: parsedUrl.pathname + parsedUrl.search,
|
|
157
|
+
port: parsedUrl.port || (parsedUrl.protocol === 'https:' ? 443 : 80),
|
|
158
|
+
method: 'HEAD',
|
|
159
|
+
headers: {
|
|
160
|
+
'User-Agent': 'Mozilla/5.0 (compatible; WebPeel/1.0; +https://webpeel.dev)',
|
|
161
|
+
Accept: 'text/html,*/*',
|
|
162
|
+
},
|
|
163
|
+
timeout: 3000,
|
|
164
|
+
rejectUnauthorized: false,
|
|
165
|
+
}, (res) => {
|
|
166
|
+
const h = res.headers;
|
|
167
|
+
const serverRaw = (h['server'] || '');
|
|
168
|
+
const poweredBy = (h['x-powered-by'] || null);
|
|
169
|
+
settle({
|
|
170
|
+
hsts: Boolean(h['strict-transport-security']),
|
|
171
|
+
csp: Boolean(h['content-security-policy']),
|
|
172
|
+
xFrameOptions: Boolean(h['x-frame-options']),
|
|
173
|
+
server: detectServer(serverRaw),
|
|
174
|
+
poweredBy,
|
|
175
|
+
});
|
|
176
|
+
});
|
|
177
|
+
req.on('error', () => settle(fallback));
|
|
178
|
+
req.on('timeout', () => {
|
|
179
|
+
req.destroy();
|
|
180
|
+
settle(fallback);
|
|
181
|
+
});
|
|
182
|
+
req.end();
|
|
183
|
+
}
|
|
184
|
+
catch {
|
|
185
|
+
settle(fallback);
|
|
186
|
+
}
|
|
187
|
+
}), 3000, fallback);
|
|
188
|
+
}
|
|
189
|
+
// ---------------------------------------------------------------------------
|
|
190
|
+
// DNS check — MX, TXT (SPF/DMARC), NS
|
|
191
|
+
// ---------------------------------------------------------------------------
|
|
192
|
+
async function checkDns(domain) {
|
|
193
|
+
return withTimeout(Promise.all([
|
|
194
|
+
dns.resolveMx(domain).catch(() => []),
|
|
195
|
+
dns.resolveTxt(domain).catch(() => []),
|
|
196
|
+
dns.resolveTxt(`_dmarc.${domain}`).catch(() => []),
|
|
197
|
+
dns.resolveNs(domain).catch(() => []),
|
|
198
|
+
]).then(([mx, txt, dmarcTxt, ns]) => {
|
|
199
|
+
const txtFlat = txt.flat().map((s) => s.toLowerCase());
|
|
200
|
+
const dmarcFlat = dmarcTxt.flat().map((s) => s.toLowerCase());
|
|
201
|
+
const hasSpf = txtFlat.some((s) => s.startsWith('v=spf1'));
|
|
202
|
+
const hasDmarc = dmarcFlat.some((s) => s.startsWith('v=dmarc1'));
|
|
203
|
+
// Normalise nameserver labels
|
|
204
|
+
const nameservers = ns.map((n) => {
|
|
205
|
+
const lower = n.toLowerCase();
|
|
206
|
+
if (lower.includes('cloudflare'))
|
|
207
|
+
return 'Cloudflare';
|
|
208
|
+
if (lower.includes('amazonaws') || lower.includes('awsdns'))
|
|
209
|
+
return 'AWS';
|
|
210
|
+
if (lower.includes('googledomains') || lower.includes('google'))
|
|
211
|
+
return 'Google';
|
|
212
|
+
if (lower.includes('azure') || lower.includes('microsoft'))
|
|
213
|
+
return 'Azure';
|
|
214
|
+
if (lower.includes('namecheap'))
|
|
215
|
+
return 'Namecheap';
|
|
216
|
+
if (lower.includes('godaddy'))
|
|
217
|
+
return 'GoDaddy';
|
|
218
|
+
if (lower.includes('digitalocean'))
|
|
219
|
+
return 'DigitalOcean';
|
|
220
|
+
if (lower.includes('vercel'))
|
|
221
|
+
return 'Vercel';
|
|
222
|
+
if (lower.includes('netlify'))
|
|
223
|
+
return 'Netlify';
|
|
224
|
+
return n;
|
|
225
|
+
});
|
|
226
|
+
return {
|
|
227
|
+
hasMx: mx.length > 0,
|
|
228
|
+
hasDmarc,
|
|
229
|
+
hasSpf,
|
|
230
|
+
nameservers: [...new Set(nameservers)],
|
|
231
|
+
};
|
|
232
|
+
}), 3000, null);
|
|
233
|
+
}
|
|
234
|
+
// ---------------------------------------------------------------------------
|
|
235
|
+
// Scoring
|
|
236
|
+
// ---------------------------------------------------------------------------
|
|
237
|
+
function computeScore(tlsResult, headersResult, dnsResult, signals, warnings) {
|
|
238
|
+
let score = 0;
|
|
239
|
+
// TLS
|
|
240
|
+
if (tlsResult) {
|
|
241
|
+
if (tlsResult.valid) {
|
|
242
|
+
score += 15;
|
|
243
|
+
signals.push(`Valid TLS cert (${tlsResult.issuer}, ${tlsResult.daysRemaining} days remaining)`);
|
|
244
|
+
const knownCas = ['DigiCert', 'Comodo', 'GlobalSign', 'GeoTrust', 'Entrust', 'Sectigo', 'Google Trust Services', 'Amazon Trust Services'];
|
|
245
|
+
if (knownCas.includes(tlsResult.issuer)) {
|
|
246
|
+
score += 5;
|
|
247
|
+
signals.push(`Trusted CA (${tlsResult.issuer})`);
|
|
248
|
+
}
|
|
249
|
+
if (tlsResult.ev) {
|
|
250
|
+
score += 10;
|
|
251
|
+
signals.push('Extended Validation (EV) certificate');
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
else {
|
|
255
|
+
warnings.push('Invalid or expired TLS certificate');
|
|
256
|
+
}
|
|
257
|
+
if (tlsResult.daysRemaining < 14) {
|
|
258
|
+
warnings.push(`TLS certificate expires soon (${tlsResult.daysRemaining} days)`);
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
else {
|
|
262
|
+
warnings.push('TLS check unavailable or failed');
|
|
263
|
+
}
|
|
264
|
+
// Headers
|
|
265
|
+
if (headersResult.hsts) {
|
|
266
|
+
score += 10;
|
|
267
|
+
signals.push('HSTS (HTTP Strict Transport Security) enabled');
|
|
268
|
+
}
|
|
269
|
+
else {
|
|
270
|
+
warnings.push('No HSTS header');
|
|
271
|
+
}
|
|
272
|
+
if (headersResult.csp) {
|
|
273
|
+
score += 5;
|
|
274
|
+
signals.push('Content-Security-Policy header present');
|
|
275
|
+
}
|
|
276
|
+
const knownCdns = ['Cloudflare', 'Vercel', 'Netlify', 'AWS', 'Fastly', 'Akamai'];
|
|
277
|
+
if (knownCdns.includes(headersResult.server)) {
|
|
278
|
+
score += 10;
|
|
279
|
+
signals.push(`HTTPS via ${headersResult.server}`);
|
|
280
|
+
}
|
|
281
|
+
// DNS
|
|
282
|
+
if (dnsResult) {
|
|
283
|
+
if (dnsResult.hasMx) {
|
|
284
|
+
score += 10;
|
|
285
|
+
signals.push('Mail exchange (MX) records present — real organisation');
|
|
286
|
+
}
|
|
287
|
+
else {
|
|
288
|
+
warnings.push('No MX records — may not be a real organisation');
|
|
289
|
+
}
|
|
290
|
+
if (dnsResult.hasDmarc) {
|
|
291
|
+
score += 10;
|
|
292
|
+
signals.push('DMARC policy configured (email authentication)');
|
|
293
|
+
}
|
|
294
|
+
else {
|
|
295
|
+
warnings.push('No DMARC policy');
|
|
296
|
+
}
|
|
297
|
+
if (dnsResult.hasSpf) {
|
|
298
|
+
score += 5;
|
|
299
|
+
signals.push('SPF record present (email authentication)');
|
|
300
|
+
}
|
|
301
|
+
if (dnsResult.nameservers.length > 0) {
|
|
302
|
+
const knownNs = ['Cloudflare', 'AWS', 'Google', 'Azure', 'Vercel', 'Netlify'];
|
|
303
|
+
const knownFound = dnsResult.nameservers.filter((ns) => knownNs.includes(ns));
|
|
304
|
+
if (knownFound.length > 0) {
|
|
305
|
+
signals.push(`Hosted on ${knownFound.join(', ')} nameservers`);
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
else {
|
|
310
|
+
warnings.push('DNS check failed');
|
|
311
|
+
}
|
|
312
|
+
return Math.max(0, Math.min(100, score));
|
|
313
|
+
}
|
|
314
|
+
// ---------------------------------------------------------------------------
|
|
315
|
+
// Main export
|
|
316
|
+
// ---------------------------------------------------------------------------
|
|
317
|
+
/**
|
|
318
|
+
* Perform active domain verification (TLS + HTTP headers + DNS).
|
|
319
|
+
*
|
|
320
|
+
* @param url Full URL to verify (e.g. "https://stripe.com")
|
|
321
|
+
* @param existingHeaders Optional pre-fetched HTTP response headers (avoids a HEAD request)
|
|
322
|
+
*/
|
|
323
|
+
export async function verifyDomain(url, existingHeaders) {
|
|
324
|
+
let parsedUrl;
|
|
325
|
+
try {
|
|
326
|
+
parsedUrl = new URL(url);
|
|
327
|
+
}
|
|
328
|
+
catch {
|
|
329
|
+
return {
|
|
330
|
+
tls: null,
|
|
331
|
+
headers: { hsts: false, csp: false, xFrameOptions: false, server: 'unknown', poweredBy: null },
|
|
332
|
+
dns: null,
|
|
333
|
+
signals: [],
|
|
334
|
+
warnings: ['Invalid URL — cannot verify'],
|
|
335
|
+
verificationScore: 0,
|
|
336
|
+
};
|
|
337
|
+
}
|
|
338
|
+
const hostname = parsedUrl.hostname.replace(/^www\./, '');
|
|
339
|
+
const isHttps = parsedUrl.protocol === 'https:';
|
|
340
|
+
// Run all three checks in parallel
|
|
341
|
+
const [tlsResult, headersResult, dnsResult] = await Promise.all([
|
|
342
|
+
isHttps ? checkTls(parsedUrl.hostname) : Promise.resolve(null),
|
|
343
|
+
existingHeaders
|
|
344
|
+
? Promise.resolve(buildHeadersFromExisting(existingHeaders))
|
|
345
|
+
: checkHeaders(url),
|
|
346
|
+
checkDns(hostname),
|
|
347
|
+
]);
|
|
348
|
+
const signals = [];
|
|
349
|
+
const warnings = [];
|
|
350
|
+
if (!isHttps) {
|
|
351
|
+
warnings.push('Site does not use HTTPS');
|
|
352
|
+
}
|
|
353
|
+
const verificationScore = computeScore(tlsResult, headersResult, dnsResult, signals, warnings);
|
|
354
|
+
return {
|
|
355
|
+
tls: tlsResult,
|
|
356
|
+
headers: headersResult,
|
|
357
|
+
dns: dnsResult,
|
|
358
|
+
signals,
|
|
359
|
+
warnings,
|
|
360
|
+
verificationScore,
|
|
361
|
+
};
|
|
362
|
+
}
|
|
363
|
+
/**
|
|
364
|
+
* Build a DomainVerification['headers'] object from existing response headers
|
|
365
|
+
* (e.g. from the pipeline's fetchResult).
|
|
366
|
+
*/
|
|
367
|
+
function buildHeadersFromExisting(headers) {
|
|
368
|
+
const lower = {};
|
|
369
|
+
for (const [k, v] of Object.entries(headers)) {
|
|
370
|
+
lower[k.toLowerCase()] = v;
|
|
371
|
+
}
|
|
372
|
+
return {
|
|
373
|
+
hsts: Boolean(lower['strict-transport-security']),
|
|
374
|
+
csp: Boolean(lower['content-security-policy']),
|
|
375
|
+
xFrameOptions: Boolean(lower['x-frame-options']),
|
|
376
|
+
server: detectServer(lower['server'] || ''),
|
|
377
|
+
poweredBy: lower['x-powered-by'] || null,
|
|
378
|
+
};
|
|
379
|
+
}
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Engine Quality-Ranked Fallback System
|
|
3
|
+
*
|
|
4
|
+
* Ranks extraction strategies by quality for a given URL, producing a
|
|
5
|
+
* dynamic fallback chain. Inspired by Firecrawl's engine cascade approach
|
|
6
|
+
* but tailored to WebPeel's architecture.
|
|
7
|
+
*
|
|
8
|
+
* Usage:
|
|
9
|
+
* ```ts
|
|
10
|
+
* import { buildFallbackChain } from './engine-ranker.js';
|
|
11
|
+
* const chain = buildFallbackChain('https://twitter.com/user', { render: true });
|
|
12
|
+
* // Returns engines sorted by quality, with domain-specific adjustments
|
|
13
|
+
* ```
|
|
14
|
+
*
|
|
15
|
+
* @module engine-ranker
|
|
16
|
+
*/
|
|
17
|
+
/** Supported extraction engine types, ordered roughly by sophistication. */
|
|
18
|
+
export type EngineType = 'domain-api' | 'simple' | 'browser' | 'stealth' | 'cloaked' | 'search-fallback';
|
|
19
|
+
/**
|
|
20
|
+
* Configuration for a single extraction engine describing its quality,
|
|
21
|
+
* performance characteristics, resource cost, and feature capabilities.
|
|
22
|
+
*/
|
|
23
|
+
export interface EngineConfig {
|
|
24
|
+
/** Engine identifier. */
|
|
25
|
+
type: EngineType;
|
|
26
|
+
/** Quality score 0-100 — higher means better extraction fidelity. */
|
|
27
|
+
quality: number;
|
|
28
|
+
/** Speed score 0-100 — higher means faster. */
|
|
29
|
+
speed: number;
|
|
30
|
+
/** Cost score 0-100 — higher means more resource-intensive. */
|
|
31
|
+
cost: number;
|
|
32
|
+
/** Maximum reasonable timeout for this engine in milliseconds. */
|
|
33
|
+
maxTimeoutMs: number;
|
|
34
|
+
/** Feature flags indicating engine capabilities. */
|
|
35
|
+
features: {
|
|
36
|
+
/** Can execute JavaScript / render SPAs. */
|
|
37
|
+
javascript: boolean;
|
|
38
|
+
/** Has anti-bot bypass capabilities. */
|
|
39
|
+
antibot: boolean;
|
|
40
|
+
/** Can capture screenshots. */
|
|
41
|
+
screenshots: boolean;
|
|
42
|
+
/** Uses stealth techniques (fingerprint spoofing, etc.). */
|
|
43
|
+
stealth: boolean;
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
/** Options controlling which engines are eligible for the fallback chain. */
|
|
47
|
+
export interface FallbackChainOptions {
|
|
48
|
+
/** If false, exclude engines that require browser rendering (browser, stealth, cloaked). */
|
|
49
|
+
render?: boolean;
|
|
50
|
+
/** If true, include stealth-capable engines even when render is false. */
|
|
51
|
+
stealth?: boolean;
|
|
52
|
+
/** If true, exclude the domain-api engine from the chain. */
|
|
53
|
+
noDomainApi?: boolean;
|
|
54
|
+
}
|
|
55
|
+
/**
|
|
56
|
+
* Returns domain-specific engine config overrides for a given hostname.
|
|
57
|
+
*
|
|
58
|
+
* Matches against known domain patterns using suffix matching.
|
|
59
|
+
* A pattern "twitter.com" matches hostnames "twitter.com", "www.twitter.com",
|
|
60
|
+
* "mobile.twitter.com", etc.
|
|
61
|
+
*
|
|
62
|
+
* @param hostname - The hostname to look up (e.g. "www.twitter.com")
|
|
63
|
+
* @returns Partial config overrides keyed by engine type, or an empty object
|
|
64
|
+
*/
|
|
65
|
+
export declare function getDomainOverrides(hostname: string): Partial<Record<EngineType, Partial<EngineConfig>>>;
|
|
66
|
+
/**
|
|
67
|
+
* Builds an ordered fallback chain of extraction engines for a given URL.
|
|
68
|
+
*
|
|
69
|
+
* The chain is constructed by:
|
|
70
|
+
* 1. Starting with default engine configurations
|
|
71
|
+
* 2. Applying domain-specific quality/score overrides
|
|
72
|
+
* 3. Filtering engines based on the provided options
|
|
73
|
+
* 4. Sorting by quality descending (ties broken by speed descending)
|
|
74
|
+
*
|
|
75
|
+
* @param url - The target URL to build a fallback chain for
|
|
76
|
+
* @param options - Controls which engines are eligible
|
|
77
|
+
* @returns Ordered array of engine entries, highest quality first
|
|
78
|
+
*
|
|
79
|
+
* @example
|
|
80
|
+
* ```ts
|
|
81
|
+
* // Basic chain for a static site
|
|
82
|
+
* const chain = buildFallbackChain('https://wikipedia.org/wiki/Test');
|
|
83
|
+
* // → [domain-api, simple, browser, stealth, cloaked, search-fallback]
|
|
84
|
+
*
|
|
85
|
+
* // Chain for a social media URL with rendering
|
|
86
|
+
* const chain = buildFallbackChain('https://twitter.com/user', { render: true });
|
|
87
|
+
* // → [domain-api, stealth, cloaked, browser, simple, search-fallback]
|
|
88
|
+
*
|
|
89
|
+
* // No browser rendering, no domain API
|
|
90
|
+
* const chain = buildFallbackChain('https://example.com', {
|
|
91
|
+
* render: false,
|
|
92
|
+
* noDomainApi: true,
|
|
93
|
+
* });
|
|
94
|
+
* // → [simple, search-fallback]
|
|
95
|
+
* ```
|
|
96
|
+
*/
|
|
97
|
+
export declare function buildFallbackChain(url: string, options?: FallbackChainOptions): Array<{
|
|
98
|
+
engine: EngineType;
|
|
99
|
+
config: EngineConfig;
|
|
100
|
+
}>;
|
|
101
|
+
/**
|
|
102
|
+
* Returns the default engine configuration for a given engine type.
|
|
103
|
+
* Useful for inspecting baseline values without domain overrides.
|
|
104
|
+
*
|
|
105
|
+
* @param type - The engine type to look up
|
|
106
|
+
* @returns A copy of the default EngineConfig
|
|
107
|
+
*/
|
|
108
|
+
export declare function getEngineDefaults(type: EngineType): EngineConfig;
|
|
109
|
+
/**
|
|
110
|
+
* Returns all available engine types.
|
|
111
|
+
*/
|
|
112
|
+
export declare function getAvailableEngines(): EngineType[];
|