@iflow-mcp/jakeliume-webpeel 0.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +15 -0
- package/README.md +313 -0
- package/dist/cache.d.ts +30 -0
- package/dist/cache.js +139 -0
- package/dist/cli/commands/auth.d.ts +5 -0
- package/dist/cli/commands/auth.js +411 -0
- package/dist/cli/commands/doctor.d.ts +37 -0
- package/dist/cli/commands/doctor.js +371 -0
- package/dist/cli/commands/fetch.d.ts +6 -0
- package/dist/cli/commands/fetch.js +1345 -0
- package/dist/cli/commands/guide.d.ts +2 -0
- package/dist/cli/commands/guide.js +183 -0
- package/dist/cli/commands/interact.d.ts +5 -0
- package/dist/cli/commands/interact.js +840 -0
- package/dist/cli/commands/jobs.d.ts +5 -0
- package/dist/cli/commands/jobs.js +997 -0
- package/dist/cli/commands/monitor.d.ts +12 -0
- package/dist/cli/commands/monitor.js +197 -0
- package/dist/cli/commands/observe.d.ts +12 -0
- package/dist/cli/commands/observe.js +158 -0
- package/dist/cli/commands/screenshot.d.ts +5 -0
- package/dist/cli/commands/screenshot.js +282 -0
- package/dist/cli/commands/search.d.ts +5 -0
- package/dist/cli/commands/search.js +1021 -0
- package/dist/cli/commands/setup.d.ts +13 -0
- package/dist/cli/commands/setup.js +244 -0
- package/dist/cli/commands/skill.d.ts +15 -0
- package/dist/cli/commands/skill.js +195 -0
- package/dist/cli/utils.d.ts +84 -0
- package/dist/cli/utils.js +806 -0
- package/dist/cli-auth.d.ts +75 -0
- package/dist/cli-auth.js +369 -0
- package/dist/cli.d.ts +17 -0
- package/dist/cli.js +99 -0
- package/dist/core/actions.d.ts +69 -0
- package/dist/core/actions.js +495 -0
- package/dist/core/agent.d.ts +98 -0
- package/dist/core/agent.js +558 -0
- package/dist/core/answer.d.ts +42 -0
- package/dist/core/answer.js +395 -0
- package/dist/core/application-tracker.d.ts +84 -0
- package/dist/core/application-tracker.js +184 -0
- package/dist/core/apply.d.ts +162 -0
- package/dist/core/apply.js +816 -0
- package/dist/core/auth-detection.d.ts +35 -0
- package/dist/core/auth-detection.js +358 -0
- package/dist/core/auto-extract.d.ts +82 -0
- package/dist/core/auto-extract.js +604 -0
- package/dist/core/auto-interact.d.ts +23 -0
- package/dist/core/auto-interact.js +246 -0
- package/dist/core/bm25-filter.d.ts +66 -0
- package/dist/core/bm25-filter.js +288 -0
- package/dist/core/branding.d.ts +54 -0
- package/dist/core/branding.js +234 -0
- package/dist/core/browser-fetch.d.ts +323 -0
- package/dist/core/browser-fetch.js +1600 -0
- package/dist/core/browser-pool.d.ts +91 -0
- package/dist/core/browser-pool.js +550 -0
- package/dist/core/budget.d.ts +42 -0
- package/dist/core/budget.js +324 -0
- package/dist/core/business-intel.d.ts +47 -0
- package/dist/core/business-intel.js +279 -0
- package/dist/core/cache.d.ts +13 -0
- package/dist/core/cache.js +121 -0
- package/dist/core/cf-worker-proxy.d.ts +32 -0
- package/dist/core/cf-worker-proxy.js +87 -0
- package/dist/core/challenge-detection.d.ts +26 -0
- package/dist/core/challenge-detection.js +468 -0
- package/dist/core/change-tracking.d.ts +75 -0
- package/dist/core/change-tracking.js +276 -0
- package/dist/core/chunker.d.ts +46 -0
- package/dist/core/chunker.js +249 -0
- package/dist/core/chunking.d.ts +42 -0
- package/dist/core/chunking.js +181 -0
- package/dist/core/circuit-breaker.d.ts +44 -0
- package/dist/core/circuit-breaker.js +85 -0
- package/dist/core/content-pruner.d.ts +47 -0
- package/dist/core/content-pruner.js +425 -0
- package/dist/core/cookie-cache.d.ts +60 -0
- package/dist/core/cookie-cache.js +163 -0
- package/dist/core/crawl-checkpoint.d.ts +54 -0
- package/dist/core/crawl-checkpoint.js +104 -0
- package/dist/core/crawler.d.ts +84 -0
- package/dist/core/crawler.js +349 -0
- package/dist/core/cross-verify.d.ts +27 -0
- package/dist/core/cross-verify.js +93 -0
- package/dist/core/deep-fetch.d.ts +74 -0
- package/dist/core/deep-fetch.js +405 -0
- package/dist/core/deep-research.d.ts +141 -0
- package/dist/core/deep-research.js +972 -0
- package/dist/core/design-analysis.d.ts +70 -0
- package/dist/core/design-analysis.js +490 -0
- package/dist/core/design-compare.d.ts +38 -0
- package/dist/core/design-compare.js +264 -0
- package/dist/core/diff.d.ts +61 -0
- package/dist/core/diff.js +289 -0
- package/dist/core/dns-cache.d.ts +20 -0
- package/dist/core/dns-cache.js +198 -0
- package/dist/core/documents.d.ts +23 -0
- package/dist/core/documents.js +123 -0
- package/dist/core/domain-memory.d.ts +66 -0
- package/dist/core/domain-memory.js +163 -0
- package/dist/core/domain-verify.d.ts +40 -0
- package/dist/core/domain-verify.js +379 -0
- package/dist/core/engine-ranker.d.ts +112 -0
- package/dist/core/engine-ranker.js +395 -0
- package/dist/core/extract-inline.d.ts +38 -0
- package/dist/core/extract-inline.js +215 -0
- package/dist/core/extract-listings.d.ts +38 -0
- package/dist/core/extract-listings.js +461 -0
- package/dist/core/extract.d.ts +9 -0
- package/dist/core/extract.js +139 -0
- package/dist/core/fetch-cache.d.ts +57 -0
- package/dist/core/fetch-cache.js +95 -0
- package/dist/core/fetcher.d.ts +13 -0
- package/dist/core/fetcher.js +12 -0
- package/dist/core/google-cache.d.ts +29 -0
- package/dist/core/google-cache.js +180 -0
- package/dist/core/google-serp-parser.d.ts +82 -0
- package/dist/core/google-serp-parser.js +287 -0
- package/dist/core/hotel-search.d.ts +122 -0
- package/dist/core/hotel-search.js +382 -0
- package/dist/core/http-fetch.d.ts +72 -0
- package/dist/core/http-fetch.js +820 -0
- package/dist/core/human.d.ts +175 -0
- package/dist/core/human.js +680 -0
- package/dist/core/image-caption.d.ts +44 -0
- package/dist/core/image-caption.js +271 -0
- package/dist/core/jobs.d.ts +75 -0
- package/dist/core/jobs.js +634 -0
- package/dist/core/json-ld.d.ts +15 -0
- package/dist/core/json-ld.js +617 -0
- package/dist/core/language-detect.d.ts +18 -0
- package/dist/core/language-detect.js +135 -0
- package/dist/core/links.d.ts +10 -0
- package/dist/core/links.js +44 -0
- package/dist/core/llm-extract.d.ts +71 -0
- package/dist/core/llm-extract.js +507 -0
- package/dist/core/llm-provider.d.ts +100 -0
- package/dist/core/llm-provider.js +702 -0
- package/dist/core/local-search.d.ts +60 -0
- package/dist/core/local-search.js +308 -0
- package/dist/core/logger.d.ts +28 -0
- package/dist/core/logger.js +104 -0
- package/dist/core/map.d.ts +33 -0
- package/dist/core/map.js +127 -0
- package/dist/core/markdown.d.ts +92 -0
- package/dist/core/markdown.js +809 -0
- package/dist/core/metadata.d.ts +34 -0
- package/dist/core/metadata.js +422 -0
- package/dist/core/observe.d.ts +113 -0
- package/dist/core/observe.js +395 -0
- package/dist/core/ocr.d.ts +12 -0
- package/dist/core/ocr.js +33 -0
- package/dist/core/paginate.d.ts +31 -0
- package/dist/core/paginate.js +106 -0
- package/dist/core/pdf.d.ts +8 -0
- package/dist/core/pdf.js +25 -0
- package/dist/core/peel-tls.d.ts +25 -0
- package/dist/core/peel-tls.js +220 -0
- package/dist/core/pipeline.d.ts +132 -0
- package/dist/core/pipeline.js +1666 -0
- package/dist/core/profiles.d.ts +61 -0
- package/dist/core/profiles.js +350 -0
- package/dist/core/prompt-guard.d.ts +30 -0
- package/dist/core/prompt-guard.js +119 -0
- package/dist/core/proxy-config.d.ts +90 -0
- package/dist/core/proxy-config.js +172 -0
- package/dist/core/quick-answer.d.ts +53 -0
- package/dist/core/quick-answer.js +833 -0
- package/dist/core/rate-governor.d.ts +80 -0
- package/dist/core/rate-governor.js +238 -0
- package/dist/core/readability.d.ts +57 -0
- package/dist/core/readability.js +533 -0
- package/dist/core/research.d.ts +66 -0
- package/dist/core/research.js +270 -0
- package/dist/core/retry.d.ts +60 -0
- package/dist/core/retry.js +119 -0
- package/dist/core/safe-browsing.d.ts +30 -0
- package/dist/core/safe-browsing.js +206 -0
- package/dist/core/schema-extraction.d.ts +66 -0
- package/dist/core/schema-extraction.js +352 -0
- package/dist/core/schema-postprocess.d.ts +32 -0
- package/dist/core/schema-postprocess.js +469 -0
- package/dist/core/schema-templates.d.ts +19 -0
- package/dist/core/schema-templates.js +143 -0
- package/dist/core/screenshot.d.ts +224 -0
- package/dist/core/screenshot.js +207 -0
- package/dist/core/search-engines.d.ts +25 -0
- package/dist/core/search-engines.js +182 -0
- package/dist/core/search-provider.d.ts +243 -0
- package/dist/core/search-provider.js +1629 -0
- package/dist/core/searxng-provider.d.ts +35 -0
- package/dist/core/searxng-provider.js +105 -0
- package/dist/core/selective-evidence.d.ts +151 -0
- package/dist/core/selective-evidence.js +389 -0
- package/dist/core/site-search.d.ts +44 -0
- package/dist/core/site-search.js +252 -0
- package/dist/core/sitemap.d.ts +23 -0
- package/dist/core/sitemap.js +105 -0
- package/dist/core/source-credibility.d.ts +29 -0
- package/dist/core/source-credibility.js +584 -0
- package/dist/core/source-scoring.d.ts +166 -0
- package/dist/core/source-scoring.js +396 -0
- package/dist/core/stemmer.d.ts +38 -0
- package/dist/core/stemmer.js +509 -0
- package/dist/core/strategies.d.ts +104 -0
- package/dist/core/strategies.js +1044 -0
- package/dist/core/strategy-hooks.d.ts +145 -0
- package/dist/core/strategy-hooks.js +74 -0
- package/dist/core/structured-extract.d.ts +43 -0
- package/dist/core/structured-extract.js +550 -0
- package/dist/core/summarize.d.ts +17 -0
- package/dist/core/summarize.js +78 -0
- package/dist/core/synonyms.d.ts +42 -0
- package/dist/core/synonyms.js +184 -0
- package/dist/core/system-monitor.d.ts +61 -0
- package/dist/core/system-monitor.js +133 -0
- package/dist/core/table-format.d.ts +30 -0
- package/dist/core/table-format.js +146 -0
- package/dist/core/threat-feeds.d.ts +23 -0
- package/dist/core/threat-feeds.js +104 -0
- package/dist/core/timing.d.ts +21 -0
- package/dist/core/timing.js +33 -0
- package/dist/core/transcript-export.d.ts +47 -0
- package/dist/core/transcript-export.js +107 -0
- package/dist/core/user-agents.d.ts +82 -0
- package/dist/core/user-agents.js +239 -0
- package/dist/core/vertical-search.d.ts +54 -0
- package/dist/core/vertical-search.js +158 -0
- package/dist/core/watch-manager.d.ts +175 -0
- package/dist/core/watch-manager.js +416 -0
- package/dist/core/watch.d.ts +101 -0
- package/dist/core/watch.js +389 -0
- package/dist/core/youtube.d.ts +130 -0
- package/dist/core/youtube.js +1175 -0
- package/dist/ee/challenge-re-export.d.ts +1 -0
- package/dist/ee/challenge-re-export.js +1 -0
- package/dist/ee/challenge-solver.d.ts +72 -0
- package/dist/ee/challenge-solver.js +720 -0
- package/dist/ee/domain-extractors.d.ts +8 -0
- package/dist/ee/domain-extractors.js +8 -0
- package/dist/ee/domain-intel.d.ts +16 -0
- package/dist/ee/domain-intel.js +133 -0
- package/dist/ee/extractors/allrecipes.d.ts +2 -0
- package/dist/ee/extractors/allrecipes.js +120 -0
- package/dist/ee/extractors/amazon.d.ts +2 -0
- package/dist/ee/extractors/amazon.js +78 -0
- package/dist/ee/extractors/arxiv.d.ts +2 -0
- package/dist/ee/extractors/arxiv.js +137 -0
- package/dist/ee/extractors/bestbuy.d.ts +2 -0
- package/dist/ee/extractors/bestbuy.js +78 -0
- package/dist/ee/extractors/carscom.d.ts +2 -0
- package/dist/ee/extractors/carscom.js +121 -0
- package/dist/ee/extractors/coingecko.d.ts +2 -0
- package/dist/ee/extractors/coingecko.js +134 -0
- package/dist/ee/extractors/craigslist.d.ts +2 -0
- package/dist/ee/extractors/craigslist.js +92 -0
- package/dist/ee/extractors/devto.d.ts +2 -0
- package/dist/ee/extractors/devto.js +135 -0
- package/dist/ee/extractors/ebay.d.ts +2 -0
- package/dist/ee/extractors/ebay.js +90 -0
- package/dist/ee/extractors/espn.d.ts +2 -0
- package/dist/ee/extractors/espn.js +260 -0
- package/dist/ee/extractors/etsy.d.ts +2 -0
- package/dist/ee/extractors/etsy.js +52 -0
- package/dist/ee/extractors/facebook.d.ts +2 -0
- package/dist/ee/extractors/facebook.js +46 -0
- package/dist/ee/extractors/github.d.ts +2 -0
- package/dist/ee/extractors/github.js +196 -0
- package/dist/ee/extractors/google-flights.d.ts +2 -0
- package/dist/ee/extractors/google-flights.js +176 -0
- package/dist/ee/extractors/hackernews.d.ts +2 -0
- package/dist/ee/extractors/hackernews.js +147 -0
- package/dist/ee/extractors/imdb.d.ts +2 -0
- package/dist/ee/extractors/imdb.js +172 -0
- package/dist/ee/extractors/index.d.ts +26 -0
- package/dist/ee/extractors/index.js +247 -0
- package/dist/ee/extractors/instagram.d.ts +2 -0
- package/dist/ee/extractors/instagram.js +102 -0
- package/dist/ee/extractors/kalshi.d.ts +2 -0
- package/dist/ee/extractors/kalshi.js +121 -0
- package/dist/ee/extractors/kayak-cars.d.ts +2 -0
- package/dist/ee/extractors/kayak-cars.js +270 -0
- package/dist/ee/extractors/linkedin.d.ts +2 -0
- package/dist/ee/extractors/linkedin.js +113 -0
- package/dist/ee/extractors/medium.d.ts +2 -0
- package/dist/ee/extractors/medium.js +130 -0
- package/dist/ee/extractors/news.d.ts +4 -0
- package/dist/ee/extractors/news.js +173 -0
- package/dist/ee/extractors/npm.d.ts +2 -0
- package/dist/ee/extractors/npm.js +86 -0
- package/dist/ee/extractors/pdf.d.ts +2 -0
- package/dist/ee/extractors/pdf.js +108 -0
- package/dist/ee/extractors/pinterest.d.ts +2 -0
- package/dist/ee/extractors/pinterest.js +34 -0
- package/dist/ee/extractors/polymarket.d.ts +2 -0
- package/dist/ee/extractors/polymarket.js +358 -0
- package/dist/ee/extractors/producthunt.d.ts +2 -0
- package/dist/ee/extractors/producthunt.js +88 -0
- package/dist/ee/extractors/pubmed.d.ts +2 -0
- package/dist/ee/extractors/pubmed.js +162 -0
- package/dist/ee/extractors/pypi.d.ts +2 -0
- package/dist/ee/extractors/pypi.js +80 -0
- package/dist/ee/extractors/reddit.d.ts +2 -0
- package/dist/ee/extractors/reddit.js +438 -0
- package/dist/ee/extractors/redfin.d.ts +2 -0
- package/dist/ee/extractors/redfin.js +156 -0
- package/dist/ee/extractors/semanticscholar.d.ts +2 -0
- package/dist/ee/extractors/semanticscholar.js +131 -0
- package/dist/ee/extractors/shared.d.ts +12 -0
- package/dist/ee/extractors/shared.js +76 -0
- package/dist/ee/extractors/soundcloud.d.ts +2 -0
- package/dist/ee/extractors/soundcloud.js +34 -0
- package/dist/ee/extractors/sportsbetting.d.ts +2 -0
- package/dist/ee/extractors/sportsbetting.js +37 -0
- package/dist/ee/extractors/spotify.d.ts +2 -0
- package/dist/ee/extractors/spotify.js +34 -0
- package/dist/ee/extractors/stackoverflow.d.ts +2 -0
- package/dist/ee/extractors/stackoverflow.js +61 -0
- package/dist/ee/extractors/substack.d.ts +2 -0
- package/dist/ee/extractors/substack.js +115 -0
- package/dist/ee/extractors/substackroot.d.ts +2 -0
- package/dist/ee/extractors/substackroot.js +46 -0
- package/dist/ee/extractors/tiktok.d.ts +2 -0
- package/dist/ee/extractors/tiktok.js +29 -0
- package/dist/ee/extractors/tradingview.d.ts +2 -0
- package/dist/ee/extractors/tradingview.js +182 -0
- package/dist/ee/extractors/twitch.d.ts +2 -0
- package/dist/ee/extractors/twitch.js +36 -0
- package/dist/ee/extractors/twitter.d.ts +2 -0
- package/dist/ee/extractors/twitter.js +327 -0
- package/dist/ee/extractors/types.d.ts +14 -0
- package/dist/ee/extractors/types.js +1 -0
- package/dist/ee/extractors/walmart.d.ts +2 -0
- package/dist/ee/extractors/walmart.js +50 -0
- package/dist/ee/extractors/weather.d.ts +2 -0
- package/dist/ee/extractors/weather.js +133 -0
- package/dist/ee/extractors/wikipedia.d.ts +4 -0
- package/dist/ee/extractors/wikipedia.js +235 -0
- package/dist/ee/extractors/yelp.d.ts +2 -0
- package/dist/ee/extractors/yelp.js +216 -0
- package/dist/ee/extractors/youtube.d.ts +2 -0
- package/dist/ee/extractors/youtube.js +189 -0
- package/dist/ee/extractors/zillow.d.ts +54 -0
- package/dist/ee/extractors/zillow.js +247 -0
- package/dist/ee/extractors-re-export.d.ts +1 -0
- package/dist/ee/extractors-re-export.js +1 -0
- package/dist/ee/premium-hooks.d.ts +20 -0
- package/dist/ee/premium-hooks.js +50 -0
- package/dist/ee/spa-detection.d.ts +2 -0
- package/dist/ee/spa-detection.js +2 -0
- package/dist/ee/stability.d.ts +4 -0
- package/dist/ee/stability.js +29 -0
- package/dist/ee/swr-cache.d.ts +14 -0
- package/dist/ee/swr-cache.js +34 -0
- package/dist/index.d.ts +143 -0
- package/dist/index.js +291 -0
- package/dist/integrations/index.d.ts +2 -0
- package/dist/integrations/index.js +2 -0
- package/dist/integrations/langchain.d.ts +64 -0
- package/dist/integrations/langchain.js +115 -0
- package/dist/integrations/llamaindex.d.ts +50 -0
- package/dist/integrations/llamaindex.js +91 -0
- package/dist/mcp/handlers/act.d.ts +5 -0
- package/dist/mcp/handlers/act.js +34 -0
- package/dist/mcp/handlers/definitions.d.ts +6 -0
- package/dist/mcp/handlers/definitions.js +395 -0
- package/dist/mcp/handlers/extract.d.ts +7 -0
- package/dist/mcp/handlers/extract.js +135 -0
- package/dist/mcp/handlers/fetch.d.ts +6 -0
- package/dist/mcp/handlers/fetch.js +98 -0
- package/dist/mcp/handlers/find.d.ts +5 -0
- package/dist/mcp/handlers/find.js +137 -0
- package/dist/mcp/handlers/index.d.ts +13 -0
- package/dist/mcp/handlers/index.js +63 -0
- package/dist/mcp/handlers/legacy.d.ts +25 -0
- package/dist/mcp/handlers/legacy.js +450 -0
- package/dist/mcp/handlers/meta.d.ts +6 -0
- package/dist/mcp/handlers/meta.js +40 -0
- package/dist/mcp/handlers/monitor.d.ts +5 -0
- package/dist/mcp/handlers/monitor.js +41 -0
- package/dist/mcp/handlers/observe.d.ts +8 -0
- package/dist/mcp/handlers/observe.js +37 -0
- package/dist/mcp/handlers/read.d.ts +6 -0
- package/dist/mcp/handlers/read.js +78 -0
- package/dist/mcp/handlers/see.d.ts +5 -0
- package/dist/mcp/handlers/see.js +75 -0
- package/dist/mcp/handlers/types.d.ts +29 -0
- package/dist/mcp/handlers/types.js +28 -0
- package/dist/mcp/server.d.ts +7 -0
- package/dist/mcp/server.js +108 -0
- package/dist/mcp/smart-router.d.ts +23 -0
- package/dist/mcp/smart-router.js +178 -0
- package/dist/server/app.d.ts +14 -0
- package/dist/server/app.js +632 -0
- package/dist/server/auth-store.d.ts +28 -0
- package/dist/server/auth-store.js +88 -0
- package/dist/server/bull-queues.d.ts +60 -0
- package/dist/server/bull-queues.js +90 -0
- package/dist/server/email-service.d.ts +55 -0
- package/dist/server/email-service.js +291 -0
- package/dist/server/job-queue.d.ts +100 -0
- package/dist/server/job-queue.js +145 -0
- package/dist/server/logger.d.ts +10 -0
- package/dist/server/logger.js +37 -0
- package/dist/server/middleware/audit-log.d.ts +14 -0
- package/dist/server/middleware/audit-log.js +73 -0
- package/dist/server/middleware/auth.d.ts +35 -0
- package/dist/server/middleware/auth.js +225 -0
- package/dist/server/middleware/rate-limit.d.ts +50 -0
- package/dist/server/middleware/rate-limit.js +270 -0
- package/dist/server/middleware/scope-guard.d.ts +25 -0
- package/dist/server/middleware/scope-guard.js +45 -0
- package/dist/server/middleware/url-validator.d.ts +15 -0
- package/dist/server/middleware/url-validator.js +201 -0
- package/dist/server/openapi.yaml +6418 -0
- package/dist/server/pg-auth-store.d.ts +146 -0
- package/dist/server/pg-auth-store.js +576 -0
- package/dist/server/pg-job-queue.d.ts +59 -0
- package/dist/server/pg-job-queue.js +375 -0
- package/dist/server/routes/activity.d.ts +6 -0
- package/dist/server/routes/activity.js +79 -0
- package/dist/server/routes/admin-active.d.ts +7 -0
- package/dist/server/routes/admin-active.js +120 -0
- package/dist/server/routes/admin-stats.d.ts +7 -0
- package/dist/server/routes/admin-stats.js +176 -0
- package/dist/server/routes/agent.d.ts +24 -0
- package/dist/server/routes/agent.js +480 -0
- package/dist/server/routes/answer.d.ts +5 -0
- package/dist/server/routes/answer.js +125 -0
- package/dist/server/routes/ask.d.ts +28 -0
- package/dist/server/routes/ask.js +295 -0
- package/dist/server/routes/batch.d.ts +6 -0
- package/dist/server/routes/batch.js +493 -0
- package/dist/server/routes/cache-warm.d.ts +25 -0
- package/dist/server/routes/cache-warm.js +212 -0
- package/dist/server/routes/cli-usage.d.ts +6 -0
- package/dist/server/routes/cli-usage.js +127 -0
- package/dist/server/routes/compat.d.ts +23 -0
- package/dist/server/routes/compat.js +652 -0
- package/dist/server/routes/crawl.d.ts +13 -0
- package/dist/server/routes/crawl.js +287 -0
- package/dist/server/routes/deep-fetch.d.ts +8 -0
- package/dist/server/routes/deep-fetch.js +57 -0
- package/dist/server/routes/deep-research.d.ts +11 -0
- package/dist/server/routes/deep-research.js +232 -0
- package/dist/server/routes/demo.d.ts +24 -0
- package/dist/server/routes/demo.js +517 -0
- package/dist/server/routes/do.d.ts +8 -0
- package/dist/server/routes/do.js +72 -0
- package/dist/server/routes/extract.d.ts +14 -0
- package/dist/server/routes/extract.js +325 -0
- package/dist/server/routes/feed.d.ts +15 -0
- package/dist/server/routes/feed.js +311 -0
- package/dist/server/routes/fetch-queue.d.ts +13 -0
- package/dist/server/routes/fetch-queue.js +357 -0
- package/dist/server/routes/fetch.d.ts +7 -0
- package/dist/server/routes/fetch.js +1274 -0
- package/dist/server/routes/go.d.ts +14 -0
- package/dist/server/routes/go.js +81 -0
- package/dist/server/routes/health.d.ts +11 -0
- package/dist/server/routes/health.js +141 -0
- package/dist/server/routes/jobs.d.ts +7 -0
- package/dist/server/routes/jobs.js +574 -0
- package/dist/server/routes/map.d.ts +11 -0
- package/dist/server/routes/map.js +116 -0
- package/dist/server/routes/mcp.d.ts +14 -0
- package/dist/server/routes/mcp.js +197 -0
- package/dist/server/routes/metrics.d.ts +37 -0
- package/dist/server/routes/metrics.js +149 -0
- package/dist/server/routes/oauth.d.ts +9 -0
- package/dist/server/routes/oauth.js +396 -0
- package/dist/server/routes/playground.d.ts +17 -0
- package/dist/server/routes/playground.js +283 -0
- package/dist/server/routes/reader.d.ts +18 -0
- package/dist/server/routes/reader.js +192 -0
- package/dist/server/routes/research.d.ts +14 -0
- package/dist/server/routes/research.js +482 -0
- package/dist/server/routes/screenshot.d.ts +22 -0
- package/dist/server/routes/screenshot.js +820 -0
- package/dist/server/routes/search.d.ts +6 -0
- package/dist/server/routes/search.js +874 -0
- package/dist/server/routes/session.d.ts +17 -0
- package/dist/server/routes/session.js +548 -0
- package/dist/server/routes/share.d.ts +18 -0
- package/dist/server/routes/share.js +462 -0
- package/dist/server/routes/smart-search/handlers/cars.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/cars.js +102 -0
- package/dist/server/routes/smart-search/handlers/flights.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/flights.js +72 -0
- package/dist/server/routes/smart-search/handlers/general.d.ts +13 -0
- package/dist/server/routes/smart-search/handlers/general.js +717 -0
- package/dist/server/routes/smart-search/handlers/hotels.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/hotels.js +88 -0
- package/dist/server/routes/smart-search/handlers/products.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/products.js +1309 -0
- package/dist/server/routes/smart-search/handlers/rental.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/rental.js +154 -0
- package/dist/server/routes/smart-search/handlers/restaurants.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/restaurants.js +225 -0
- package/dist/server/routes/smart-search/handlers/transit-verdict.d.ts +41 -0
- package/dist/server/routes/smart-search/handlers/transit-verdict.js +224 -0
- package/dist/server/routes/smart-search/index.d.ts +19 -0
- package/dist/server/routes/smart-search/index.js +546 -0
- package/dist/server/routes/smart-search/intent.d.ts +3 -0
- package/dist/server/routes/smart-search/intent.js +264 -0
- package/dist/server/routes/smart-search/llm.d.ts +16 -0
- package/dist/server/routes/smart-search/llm.js +70 -0
- package/dist/server/routes/smart-search/sources/reddit.d.ts +18 -0
- package/dist/server/routes/smart-search/sources/reddit.js +34 -0
- package/dist/server/routes/smart-search/sources/yelp.d.ts +25 -0
- package/dist/server/routes/smart-search/sources/yelp.js +171 -0
- package/dist/server/routes/smart-search/sources/youtube.d.ts +8 -0
- package/dist/server/routes/smart-search/sources/youtube.js +9 -0
- package/dist/server/routes/smart-search/types.d.ts +81 -0
- package/dist/server/routes/smart-search/types.js +1 -0
- package/dist/server/routes/smart-search/utils.d.ts +20 -0
- package/dist/server/routes/smart-search/utils.js +146 -0
- package/dist/server/routes/stats.d.ts +6 -0
- package/dist/server/routes/stats.js +71 -0
- package/dist/server/routes/stripe.d.ts +15 -0
- package/dist/server/routes/stripe.js +296 -0
- package/dist/server/routes/transcript-export.d.ts +10 -0
- package/dist/server/routes/transcript-export.js +178 -0
- package/dist/server/routes/usage.d.ts +9 -0
- package/dist/server/routes/usage.js +279 -0
- package/dist/server/routes/users.d.ts +8 -0
- package/dist/server/routes/users.js +1867 -0
- package/dist/server/routes/watch.d.ts +15 -0
- package/dist/server/routes/watch.js +309 -0
- package/dist/server/routes/webhooks.d.ts +26 -0
- package/dist/server/routes/webhooks.js +170 -0
- package/dist/server/routes/youtube.d.ts +6 -0
- package/dist/server/routes/youtube.js +130 -0
- package/dist/server/sentry.d.ts +14 -0
- package/dist/server/sentry.js +104 -0
- package/dist/server/types.d.ts +15 -0
- package/dist/server/types.js +7 -0
- package/dist/server/utils/response.d.ts +44 -0
- package/dist/server/utils/response.js +69 -0
- package/dist/server/utils/sse.d.ts +22 -0
- package/dist/server/utils/sse.js +38 -0
- package/dist/types.d.ts +552 -0
- package/dist/types.js +39 -0
- package/llms.txt +105 -0
- package/package.json +189 -0
|
@@ -0,0 +1,395 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Engine Quality-Ranked Fallback System
|
|
3
|
+
*
|
|
4
|
+
* Ranks extraction strategies by quality for a given URL, producing a
|
|
5
|
+
* dynamic fallback chain. Inspired by Firecrawl's engine cascade approach
|
|
6
|
+
* but tailored to WebPeel's architecture.
|
|
7
|
+
*
|
|
8
|
+
* Usage:
|
|
9
|
+
* ```ts
|
|
10
|
+
* import { buildFallbackChain } from './engine-ranker.js';
|
|
11
|
+
* const chain = buildFallbackChain('https://twitter.com/user', { render: true });
|
|
12
|
+
* // Returns engines sorted by quality, with domain-specific adjustments
|
|
13
|
+
* ```
|
|
14
|
+
*
|
|
15
|
+
* @module engine-ranker
|
|
16
|
+
*/
|
|
17
|
+
// ---------------------------------------------------------------------------
|
|
18
|
+
// Default engine configurations
|
|
19
|
+
// ---------------------------------------------------------------------------
|
|
20
|
+
/**
|
|
21
|
+
* Baseline configuration for each engine type.
|
|
22
|
+
*
|
|
23
|
+
* Quality reflects extraction fidelity (not speed or cost):
|
|
24
|
+
* - domain-api: Best quality — structured data direct from source APIs
|
|
25
|
+
* - browser: High quality — full JS rendering captures dynamic content
|
|
26
|
+
* - stealth: Good quality — same as browser but with anti-bot bypass
|
|
27
|
+
* - cloaked: Decent quality — heavy stealth, sometimes degrades content
|
|
28
|
+
* - simple: Moderate — plain HTTP fetch, misses JS-rendered content
|
|
29
|
+
* - search-fallback: Low — cached/snippet data from search engine caches
|
|
30
|
+
*/
|
|
31
|
+
const ENGINE_DEFAULTS = {
|
|
32
|
+
'domain-api': {
|
|
33
|
+
type: 'domain-api',
|
|
34
|
+
quality: 95,
|
|
35
|
+
speed: 95,
|
|
36
|
+
cost: 5,
|
|
37
|
+
maxTimeoutMs: 5000,
|
|
38
|
+
features: { javascript: false, antibot: false, screenshots: false, stealth: false },
|
|
39
|
+
},
|
|
40
|
+
'simple': {
|
|
41
|
+
type: 'simple',
|
|
42
|
+
quality: 70,
|
|
43
|
+
speed: 90,
|
|
44
|
+
cost: 10,
|
|
45
|
+
maxTimeoutMs: 8000,
|
|
46
|
+
features: { javascript: false, antibot: false, screenshots: false, stealth: false },
|
|
47
|
+
},
|
|
48
|
+
'browser': {
|
|
49
|
+
type: 'browser',
|
|
50
|
+
quality: 85,
|
|
51
|
+
speed: 40,
|
|
52
|
+
cost: 60,
|
|
53
|
+
maxTimeoutMs: 15000,
|
|
54
|
+
features: { javascript: true, antibot: false, screenshots: true, stealth: false },
|
|
55
|
+
},
|
|
56
|
+
'stealth': {
|
|
57
|
+
type: 'stealth',
|
|
58
|
+
quality: 80,
|
|
59
|
+
speed: 30,
|
|
60
|
+
cost: 80,
|
|
61
|
+
maxTimeoutMs: 20000,
|
|
62
|
+
features: { javascript: true, antibot: true, screenshots: true, stealth: true },
|
|
63
|
+
},
|
|
64
|
+
'cloaked': {
|
|
65
|
+
type: 'cloaked',
|
|
66
|
+
quality: 75,
|
|
67
|
+
speed: 20,
|
|
68
|
+
cost: 90,
|
|
69
|
+
maxTimeoutMs: 25000,
|
|
70
|
+
features: { javascript: true, antibot: true, screenshots: true, stealth: true },
|
|
71
|
+
},
|
|
72
|
+
'search-fallback': {
|
|
73
|
+
type: 'search-fallback',
|
|
74
|
+
quality: 40,
|
|
75
|
+
speed: 50,
|
|
76
|
+
cost: 30,
|
|
77
|
+
maxTimeoutMs: 10000,
|
|
78
|
+
features: { javascript: false, antibot: false, screenshots: false, stealth: false },
|
|
79
|
+
},
|
|
80
|
+
};
|
|
81
|
+
/**
|
|
82
|
+
* Domain rules that adjust engine scores for known site categories.
|
|
83
|
+
*
|
|
84
|
+
* Patterns use suffix matching: "twitter.com" matches both "twitter.com"
|
|
85
|
+
* and "www.twitter.com" but not "nottwitter.com".
|
|
86
|
+
*/
|
|
87
|
+
const DOMAIN_RULES = [
|
|
88
|
+
// ── Social media: heavy JS, aggressive anti-bot ──────────────────────
|
|
89
|
+
{
|
|
90
|
+
pattern: 'twitter.com',
|
|
91
|
+
overrides: {
|
|
92
|
+
'simple': { quality: 20, speed: 95 },
|
|
93
|
+
'stealth': { quality: 90 },
|
|
94
|
+
'cloaked': { quality: 85 },
|
|
95
|
+
'browser': { quality: 80 },
|
|
96
|
+
},
|
|
97
|
+
},
|
|
98
|
+
{
|
|
99
|
+
pattern: 'x.com',
|
|
100
|
+
overrides: {
|
|
101
|
+
'simple': { quality: 20, speed: 95 },
|
|
102
|
+
'stealth': { quality: 90 },
|
|
103
|
+
'cloaked': { quality: 85 },
|
|
104
|
+
'browser': { quality: 80 },
|
|
105
|
+
},
|
|
106
|
+
},
|
|
107
|
+
{
|
|
108
|
+
pattern: 'instagram.com',
|
|
109
|
+
overrides: {
|
|
110
|
+
'simple': { quality: 15 },
|
|
111
|
+
'stealth': { quality: 90 },
|
|
112
|
+
'cloaked': { quality: 88 },
|
|
113
|
+
'browser': { quality: 75 },
|
|
114
|
+
},
|
|
115
|
+
},
|
|
116
|
+
{
|
|
117
|
+
pattern: 'tiktok.com',
|
|
118
|
+
overrides: {
|
|
119
|
+
'simple': { quality: 15 },
|
|
120
|
+
'stealth': { quality: 90 },
|
|
121
|
+
'cloaked': { quality: 88 },
|
|
122
|
+
'browser': { quality: 70 },
|
|
123
|
+
},
|
|
124
|
+
},
|
|
125
|
+
{
|
|
126
|
+
pattern: 'facebook.com',
|
|
127
|
+
overrides: {
|
|
128
|
+
'simple': { quality: 20 },
|
|
129
|
+
'stealth': { quality: 88 },
|
|
130
|
+
'cloaked': { quality: 85 },
|
|
131
|
+
},
|
|
132
|
+
},
|
|
133
|
+
{
|
|
134
|
+
pattern: 'linkedin.com',
|
|
135
|
+
overrides: {
|
|
136
|
+
'simple': { quality: 25 },
|
|
137
|
+
'stealth': { quality: 88 },
|
|
138
|
+
'browser': { quality: 78 },
|
|
139
|
+
},
|
|
140
|
+
},
|
|
141
|
+
{
|
|
142
|
+
pattern: 'reddit.com',
|
|
143
|
+
overrides: {
|
|
144
|
+
'simple': { quality: 30 },
|
|
145
|
+
'browser': { quality: 88 },
|
|
146
|
+
'stealth': { quality: 85 },
|
|
147
|
+
},
|
|
148
|
+
},
|
|
149
|
+
{
|
|
150
|
+
pattern: 'threads.net',
|
|
151
|
+
overrides: {
|
|
152
|
+
'simple': { quality: 15 },
|
|
153
|
+
'stealth': { quality: 90 },
|
|
154
|
+
'cloaked': { quality: 85 },
|
|
155
|
+
},
|
|
156
|
+
},
|
|
157
|
+
// ── SPA-heavy / JS-rendered sites ────────────────────────────────────
|
|
158
|
+
{
|
|
159
|
+
pattern: 'vercel.app',
|
|
160
|
+
overrides: {
|
|
161
|
+
'browser': { quality: 90 },
|
|
162
|
+
'simple': { quality: 50 },
|
|
163
|
+
},
|
|
164
|
+
},
|
|
165
|
+
{
|
|
166
|
+
pattern: 'netlify.app',
|
|
167
|
+
overrides: {
|
|
168
|
+
'browser': { quality: 90 },
|
|
169
|
+
'simple': { quality: 50 },
|
|
170
|
+
},
|
|
171
|
+
},
|
|
172
|
+
{
|
|
173
|
+
pattern: 'notion.so',
|
|
174
|
+
overrides: {
|
|
175
|
+
'browser': { quality: 92 },
|
|
176
|
+
'simple': { quality: 20 },
|
|
177
|
+
},
|
|
178
|
+
},
|
|
179
|
+
{
|
|
180
|
+
pattern: 'figma.com',
|
|
181
|
+
overrides: {
|
|
182
|
+
'browser': { quality: 90 },
|
|
183
|
+
'simple': { quality: 15 },
|
|
184
|
+
},
|
|
185
|
+
},
|
|
186
|
+
// ── Static / well-structured sites ───────────────────────────────────
|
|
187
|
+
{
|
|
188
|
+
pattern: 'wikipedia.org',
|
|
189
|
+
overrides: {
|
|
190
|
+
'simple': { quality: 92 },
|
|
191
|
+
'browser': { quality: 80, cost: 70 },
|
|
192
|
+
},
|
|
193
|
+
},
|
|
194
|
+
{
|
|
195
|
+
pattern: 'github.com',
|
|
196
|
+
overrides: {
|
|
197
|
+
'simple': { quality: 85 },
|
|
198
|
+
'browser': { quality: 78, cost: 65 },
|
|
199
|
+
},
|
|
200
|
+
},
|
|
201
|
+
{
|
|
202
|
+
pattern: 'stackoverflow.com',
|
|
203
|
+
overrides: {
|
|
204
|
+
'simple': { quality: 88 },
|
|
205
|
+
'browser': { quality: 78 },
|
|
206
|
+
},
|
|
207
|
+
},
|
|
208
|
+
{
|
|
209
|
+
pattern: 'docs.python.org',
|
|
210
|
+
overrides: {
|
|
211
|
+
'simple': { quality: 90 },
|
|
212
|
+
},
|
|
213
|
+
},
|
|
214
|
+
{
|
|
215
|
+
pattern: 'developer.mozilla.org',
|
|
216
|
+
overrides: {
|
|
217
|
+
'simple': { quality: 90 },
|
|
218
|
+
},
|
|
219
|
+
},
|
|
220
|
+
{
|
|
221
|
+
pattern: 'news.ycombinator.com',
|
|
222
|
+
overrides: {
|
|
223
|
+
'simple': { quality: 92 },
|
|
224
|
+
'browser': { quality: 75 },
|
|
225
|
+
},
|
|
226
|
+
},
|
|
227
|
+
// ── Known-blocked / aggressive anti-bot ──────────────────────────────
|
|
228
|
+
{
|
|
229
|
+
pattern: 'zillow.com',
|
|
230
|
+
overrides: {
|
|
231
|
+
'simple': { quality: 10 },
|
|
232
|
+
'browser': { quality: 50 },
|
|
233
|
+
'cloaked': { quality: 90 },
|
|
234
|
+
'stealth': { quality: 85 },
|
|
235
|
+
},
|
|
236
|
+
},
|
|
237
|
+
{
|
|
238
|
+
pattern: 'yelp.com',
|
|
239
|
+
overrides: {
|
|
240
|
+
'simple': { quality: 15 },
|
|
241
|
+
'cloaked': { quality: 88 },
|
|
242
|
+
'stealth': { quality: 82 },
|
|
243
|
+
},
|
|
244
|
+
},
|
|
245
|
+
{
|
|
246
|
+
pattern: 'pinterest.com',
|
|
247
|
+
overrides: {
|
|
248
|
+
'simple': { quality: 15 },
|
|
249
|
+
'cloaked': { quality: 88 },
|
|
250
|
+
'stealth': { quality: 85 },
|
|
251
|
+
},
|
|
252
|
+
},
|
|
253
|
+
{
|
|
254
|
+
pattern: 'ticketmaster.com',
|
|
255
|
+
overrides: {
|
|
256
|
+
'simple': { quality: 10 },
|
|
257
|
+
'cloaked': { quality: 90 },
|
|
258
|
+
'stealth': { quality: 82 },
|
|
259
|
+
},
|
|
260
|
+
},
|
|
261
|
+
];
|
|
262
|
+
/**
|
|
263
|
+
* Returns domain-specific engine config overrides for a given hostname.
|
|
264
|
+
*
|
|
265
|
+
* Matches against known domain patterns using suffix matching.
|
|
266
|
+
* A pattern "twitter.com" matches hostnames "twitter.com", "www.twitter.com",
|
|
267
|
+
* "mobile.twitter.com", etc.
|
|
268
|
+
*
|
|
269
|
+
* @param hostname - The hostname to look up (e.g. "www.twitter.com")
|
|
270
|
+
* @returns Partial config overrides keyed by engine type, or an empty object
|
|
271
|
+
*/
|
|
272
|
+
export function getDomainOverrides(hostname) {
|
|
273
|
+
const lower = hostname.toLowerCase();
|
|
274
|
+
const merged = {};
|
|
275
|
+
for (const rule of DOMAIN_RULES) {
|
|
276
|
+
if (lower === rule.pattern || lower.endsWith(`.${rule.pattern}`)) {
|
|
277
|
+
// Merge overrides — last match wins for conflicting fields
|
|
278
|
+
for (const [engineKey, overrideValue] of Object.entries(rule.overrides)) {
|
|
279
|
+
const engine = engineKey;
|
|
280
|
+
merged[engine] = { ...merged[engine], ...overrideValue };
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
return merged;
|
|
285
|
+
}
|
|
286
|
+
// ---------------------------------------------------------------------------
|
|
287
|
+
// Fallback chain builder
|
|
288
|
+
// ---------------------------------------------------------------------------
|
|
289
|
+
/**
|
|
290
|
+
* Deep-clones an EngineConfig and merges partial overrides onto it.
|
|
291
|
+
*/
|
|
292
|
+
function applyOverrides(base, overrides) {
|
|
293
|
+
return {
|
|
294
|
+
...base,
|
|
295
|
+
...overrides,
|
|
296
|
+
features: {
|
|
297
|
+
...base.features,
|
|
298
|
+
...(overrides.features ?? {}),
|
|
299
|
+
},
|
|
300
|
+
// Ensure type is always preserved from base
|
|
301
|
+
type: base.type,
|
|
302
|
+
};
|
|
303
|
+
}
|
|
304
|
+
/**
|
|
305
|
+
* Builds an ordered fallback chain of extraction engines for a given URL.
|
|
306
|
+
*
|
|
307
|
+
* The chain is constructed by:
|
|
308
|
+
* 1. Starting with default engine configurations
|
|
309
|
+
* 2. Applying domain-specific quality/score overrides
|
|
310
|
+
* 3. Filtering engines based on the provided options
|
|
311
|
+
* 4. Sorting by quality descending (ties broken by speed descending)
|
|
312
|
+
*
|
|
313
|
+
* @param url - The target URL to build a fallback chain for
|
|
314
|
+
* @param options - Controls which engines are eligible
|
|
315
|
+
* @returns Ordered array of engine entries, highest quality first
|
|
316
|
+
*
|
|
317
|
+
* @example
|
|
318
|
+
* ```ts
|
|
319
|
+
* // Basic chain for a static site
|
|
320
|
+
* const chain = buildFallbackChain('https://wikipedia.org/wiki/Test');
|
|
321
|
+
* // → [domain-api, simple, browser, stealth, cloaked, search-fallback]
|
|
322
|
+
*
|
|
323
|
+
* // Chain for a social media URL with rendering
|
|
324
|
+
* const chain = buildFallbackChain('https://twitter.com/user', { render: true });
|
|
325
|
+
* // → [domain-api, stealth, cloaked, browser, simple, search-fallback]
|
|
326
|
+
*
|
|
327
|
+
* // No browser rendering, no domain API
|
|
328
|
+
* const chain = buildFallbackChain('https://example.com', {
|
|
329
|
+
* render: false,
|
|
330
|
+
* noDomainApi: true,
|
|
331
|
+
* });
|
|
332
|
+
* // → [simple, search-fallback]
|
|
333
|
+
* ```
|
|
334
|
+
*/
|
|
335
|
+
export function buildFallbackChain(url, options = {}) {
|
|
336
|
+
const { render, stealth, noDomainApi } = options;
|
|
337
|
+
// 1. Parse hostname for domain overrides
|
|
338
|
+
let hostname = '';
|
|
339
|
+
try {
|
|
340
|
+
hostname = new URL(url).hostname;
|
|
341
|
+
}
|
|
342
|
+
catch {
|
|
343
|
+
// Invalid URL — proceed with no domain overrides
|
|
344
|
+
}
|
|
345
|
+
const domainOverrides = hostname ? getDomainOverrides(hostname) : {};
|
|
346
|
+
// 2. Build full config for each engine (base + domain overrides)
|
|
347
|
+
const allEngines = Object.keys(ENGINE_DEFAULTS);
|
|
348
|
+
const configs = allEngines.map((engineType) => {
|
|
349
|
+
const base = { ...ENGINE_DEFAULTS[engineType] };
|
|
350
|
+
const override = domainOverrides[engineType];
|
|
351
|
+
const config = override ? applyOverrides(base, override) : { ...base };
|
|
352
|
+
return { engine: engineType, config };
|
|
353
|
+
});
|
|
354
|
+
// 3. Filter engines based on options
|
|
355
|
+
const filtered = configs.filter(({ engine, config }) => {
|
|
356
|
+
// Remove domain-api if explicitly excluded
|
|
357
|
+
if (noDomainApi && engine === 'domain-api')
|
|
358
|
+
return false;
|
|
359
|
+
// When render is explicitly false, remove engines that require a browser
|
|
360
|
+
// UNLESS stealth is explicitly requested
|
|
361
|
+
if (render === false) {
|
|
362
|
+
if (config.features.javascript) {
|
|
363
|
+
// Keep stealth/cloaked engines if stealth was explicitly requested
|
|
364
|
+
if (stealth && config.features.stealth)
|
|
365
|
+
return true;
|
|
366
|
+
return false;
|
|
367
|
+
}
|
|
368
|
+
}
|
|
369
|
+
return true;
|
|
370
|
+
});
|
|
371
|
+
// 4. Sort by quality descending, tie-break by speed descending
|
|
372
|
+
filtered.sort((a, b) => {
|
|
373
|
+
const qualityDiff = b.config.quality - a.config.quality;
|
|
374
|
+
if (qualityDiff !== 0)
|
|
375
|
+
return qualityDiff;
|
|
376
|
+
return b.config.speed - a.config.speed;
|
|
377
|
+
});
|
|
378
|
+
return filtered;
|
|
379
|
+
}
|
|
380
|
+
/**
|
|
381
|
+
* Returns the default engine configuration for a given engine type.
|
|
382
|
+
* Useful for inspecting baseline values without domain overrides.
|
|
383
|
+
*
|
|
384
|
+
* @param type - The engine type to look up
|
|
385
|
+
* @returns A copy of the default EngineConfig
|
|
386
|
+
*/
|
|
387
|
+
export function getEngineDefaults(type) {
|
|
388
|
+
return { ...ENGINE_DEFAULTS[type], features: { ...ENGINE_DEFAULTS[type].features } };
|
|
389
|
+
}
|
|
390
|
+
/**
|
|
391
|
+
* Returns all available engine types.
|
|
392
|
+
*/
|
|
393
|
+
export function getAvailableEngines() {
|
|
394
|
+
return Object.keys(ENGINE_DEFAULTS);
|
|
395
|
+
}
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Inline structured extraction using BYOK LLM
|
|
3
|
+
*
|
|
4
|
+
* After fetching page content, pass it + a JSON schema + optional prompt
|
|
5
|
+
* to an LLM and get back structured JSON matching the schema.
|
|
6
|
+
*
|
|
7
|
+
* Supports OpenAI, Anthropic, and Google (same BYOK pattern as /v1/answer).
|
|
8
|
+
*/
|
|
9
|
+
export type LLMProvider = 'openai' | 'anthropic' | 'google';
|
|
10
|
+
export interface InlineExtractOptions {
|
|
11
|
+
/** JSON Schema describing the desired output structure */
|
|
12
|
+
schema?: Record<string, any>;
|
|
13
|
+
/** Natural language prompt describing what to extract */
|
|
14
|
+
prompt?: string;
|
|
15
|
+
/** LLM provider (required) */
|
|
16
|
+
llmProvider: LLMProvider;
|
|
17
|
+
/** LLM API key — BYOK (required) */
|
|
18
|
+
llmApiKey: string;
|
|
19
|
+
/** LLM model name (optional — uses provider default) */
|
|
20
|
+
llmModel?: string;
|
|
21
|
+
}
|
|
22
|
+
export interface InlineExtractResult {
|
|
23
|
+
/** Extracted structured data */
|
|
24
|
+
data: Record<string, any>;
|
|
25
|
+
/** Tokens consumed */
|
|
26
|
+
tokensUsed: {
|
|
27
|
+
input: number;
|
|
28
|
+
output: number;
|
|
29
|
+
};
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* Extract structured JSON from page content using an LLM (BYOK).
|
|
33
|
+
*
|
|
34
|
+
* @param content - Page content (markdown or text)
|
|
35
|
+
* @param options - Extraction options including schema, prompt, and LLM credentials
|
|
36
|
+
* @returns Extracted structured data + token usage
|
|
37
|
+
*/
|
|
38
|
+
export declare function extractInlineJson(content: string, options: InlineExtractOptions): Promise<InlineExtractResult>;
|
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Inline structured extraction using BYOK LLM
|
|
3
|
+
*
|
|
4
|
+
* After fetching page content, pass it + a JSON schema + optional prompt
|
|
5
|
+
* to an LLM and get back structured JSON matching the schema.
|
|
6
|
+
*
|
|
7
|
+
* Supports OpenAI, Anthropic, and Google (same BYOK pattern as /v1/answer).
|
|
8
|
+
*/
|
|
9
|
+
function defaultModel(provider) {
|
|
10
|
+
switch (provider) {
|
|
11
|
+
case 'openai':
|
|
12
|
+
return 'gpt-4o-mini';
|
|
13
|
+
case 'anthropic':
|
|
14
|
+
return 'claude-3-5-sonnet-latest';
|
|
15
|
+
case 'google':
|
|
16
|
+
return 'gemini-1.5-flash';
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
function buildSystemPrompt(schema, prompt) {
|
|
20
|
+
const parts = [
|
|
21
|
+
'You are a structured data extraction assistant.',
|
|
22
|
+
'Extract data from the provided web page content and return ONLY valid JSON — no markdown fences, no explanation, no extra text.',
|
|
23
|
+
];
|
|
24
|
+
if (prompt) {
|
|
25
|
+
parts.push(`\nInstruction: ${prompt}`);
|
|
26
|
+
}
|
|
27
|
+
if (schema) {
|
|
28
|
+
parts.push(`\nReturn a JSON object that conforms to this JSON Schema:\n${JSON.stringify(schema, null, 2)}`);
|
|
29
|
+
}
|
|
30
|
+
parts.push('\nReturn ONLY the JSON object.');
|
|
31
|
+
return parts.join('\n');
|
|
32
|
+
}
|
|
33
|
+
function truncateContent(content, maxChars = 24_000) {
|
|
34
|
+
if (content.length <= maxChars)
|
|
35
|
+
return content;
|
|
36
|
+
return content.slice(0, maxChars) + '\n\n[Content truncated]';
|
|
37
|
+
}
|
|
38
|
+
function parseJsonResponse(text) {
|
|
39
|
+
// Try direct parse first
|
|
40
|
+
try {
|
|
41
|
+
return JSON.parse(text);
|
|
42
|
+
}
|
|
43
|
+
catch {
|
|
44
|
+
// Strip markdown code fences if present
|
|
45
|
+
const fenceMatch = text.match(/```(?:json)?\s*([\s\S]*?)```/);
|
|
46
|
+
if (fenceMatch) {
|
|
47
|
+
try {
|
|
48
|
+
return JSON.parse(fenceMatch[1].trim());
|
|
49
|
+
}
|
|
50
|
+
catch (e) {
|
|
51
|
+
if (process.env.DEBUG)
|
|
52
|
+
console.debug('[webpeel]', 'fence json parse failed:', e instanceof Error ? e.message : e);
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
// Try to find the first { ... } block
|
|
56
|
+
const braceStart = text.indexOf('{');
|
|
57
|
+
const braceEnd = text.lastIndexOf('}');
|
|
58
|
+
if (braceStart !== -1 && braceEnd > braceStart) {
|
|
59
|
+
try {
|
|
60
|
+
return JSON.parse(text.slice(braceStart, braceEnd + 1));
|
|
61
|
+
}
|
|
62
|
+
catch (e) {
|
|
63
|
+
if (process.env.DEBUG)
|
|
64
|
+
console.debug('[webpeel]', 'brace json parse failed:', e instanceof Error ? e.message : e);
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
throw new Error(`LLM returned invalid JSON: ${text.slice(0, 300)}`);
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
// ---------------------------------------------------------------------------
|
|
71
|
+
// Provider-specific calls (mirrors core/answer.ts patterns)
|
|
72
|
+
// ---------------------------------------------------------------------------
|
|
73
|
+
async function callOpenAI(apiKey, model, systemPrompt, userContent) {
|
|
74
|
+
const resp = await fetch('https://api.openai.com/v1/chat/completions', {
|
|
75
|
+
method: 'POST',
|
|
76
|
+
headers: {
|
|
77
|
+
'Content-Type': 'application/json',
|
|
78
|
+
Authorization: `Bearer ${apiKey}`,
|
|
79
|
+
},
|
|
80
|
+
body: JSON.stringify({
|
|
81
|
+
model,
|
|
82
|
+
messages: [
|
|
83
|
+
{ role: 'system', content: systemPrompt },
|
|
84
|
+
{ role: 'user', content: userContent },
|
|
85
|
+
],
|
|
86
|
+
temperature: 0,
|
|
87
|
+
response_format: { type: 'json_object' },
|
|
88
|
+
}),
|
|
89
|
+
});
|
|
90
|
+
if (!resp.ok) {
|
|
91
|
+
const errText = await resp.text().catch(() => '');
|
|
92
|
+
throw new Error(`OpenAI API error: HTTP ${resp.status}${errText ? ` - ${errText}` : ''}`);
|
|
93
|
+
}
|
|
94
|
+
const json = (await resp.json());
|
|
95
|
+
return {
|
|
96
|
+
text: String(json?.choices?.[0]?.message?.content || '').trim(),
|
|
97
|
+
usage: {
|
|
98
|
+
input: Number(json?.usage?.prompt_tokens || 0),
|
|
99
|
+
output: Number(json?.usage?.completion_tokens || 0),
|
|
100
|
+
},
|
|
101
|
+
};
|
|
102
|
+
}
|
|
103
|
+
async function callAnthropic(apiKey, model, systemPrompt, userContent) {
|
|
104
|
+
const resp = await fetch('https://api.anthropic.com/v1/messages', {
|
|
105
|
+
method: 'POST',
|
|
106
|
+
headers: {
|
|
107
|
+
'Content-Type': 'application/json',
|
|
108
|
+
'x-api-key': apiKey,
|
|
109
|
+
'anthropic-version': '2023-06-01',
|
|
110
|
+
},
|
|
111
|
+
body: JSON.stringify({
|
|
112
|
+
model,
|
|
113
|
+
system: systemPrompt,
|
|
114
|
+
messages: [{ role: 'user', content: userContent }],
|
|
115
|
+
max_tokens: 4096,
|
|
116
|
+
temperature: 0,
|
|
117
|
+
}),
|
|
118
|
+
});
|
|
119
|
+
if (!resp.ok) {
|
|
120
|
+
const errText = await resp.text().catch(() => '');
|
|
121
|
+
throw new Error(`Anthropic API error: HTTP ${resp.status}${errText ? ` - ${errText}` : ''}`);
|
|
122
|
+
}
|
|
123
|
+
const json = (await resp.json());
|
|
124
|
+
const blocks = Array.isArray(json?.content) ? json.content : [];
|
|
125
|
+
const text = blocks
|
|
126
|
+
.map((b) => (typeof b?.text === 'string' ? b.text : ''))
|
|
127
|
+
.join('')
|
|
128
|
+
.trim();
|
|
129
|
+
return {
|
|
130
|
+
text,
|
|
131
|
+
usage: {
|
|
132
|
+
input: Number(json?.usage?.input_tokens || 0),
|
|
133
|
+
output: Number(json?.usage?.output_tokens || 0),
|
|
134
|
+
},
|
|
135
|
+
};
|
|
136
|
+
}
|
|
137
|
+
async function callGoogle(apiKey, model, systemPrompt, userContent) {
|
|
138
|
+
const url = `https://generativelanguage.googleapis.com/v1beta/models/${encodeURIComponent(model)}:generateContent?key=${encodeURIComponent(apiKey)}`;
|
|
139
|
+
const resp = await fetch(url, {
|
|
140
|
+
method: 'POST',
|
|
141
|
+
headers: { 'Content-Type': 'application/json' },
|
|
142
|
+
body: JSON.stringify({
|
|
143
|
+
contents: [
|
|
144
|
+
{
|
|
145
|
+
role: 'user',
|
|
146
|
+
parts: [{ text: `${systemPrompt}\n\n${userContent}` }],
|
|
147
|
+
},
|
|
148
|
+
],
|
|
149
|
+
generationConfig: {
|
|
150
|
+
temperature: 0,
|
|
151
|
+
responseMimeType: 'application/json',
|
|
152
|
+
},
|
|
153
|
+
}),
|
|
154
|
+
});
|
|
155
|
+
if (!resp.ok) {
|
|
156
|
+
const errText = await resp.text().catch(() => '');
|
|
157
|
+
throw new Error(`Google API error: HTTP ${resp.status}${errText ? ` - ${errText}` : ''}`);
|
|
158
|
+
}
|
|
159
|
+
const json = (await resp.json());
|
|
160
|
+
const parts = json?.candidates?.[0]?.content?.parts;
|
|
161
|
+
const text = Array.isArray(parts)
|
|
162
|
+
? parts.map((p) => (typeof p?.text === 'string' ? p.text : '')).join('')
|
|
163
|
+
: '';
|
|
164
|
+
return {
|
|
165
|
+
text: String(text || '').trim(),
|
|
166
|
+
usage: {
|
|
167
|
+
input: Number(json?.usageMetadata?.promptTokenCount || 0),
|
|
168
|
+
output: Number(json?.usageMetadata?.candidatesTokenCount || 0),
|
|
169
|
+
},
|
|
170
|
+
};
|
|
171
|
+
}
|
|
172
|
+
// ---------------------------------------------------------------------------
|
|
173
|
+
// Public API
|
|
174
|
+
// ---------------------------------------------------------------------------
|
|
175
|
+
/**
|
|
176
|
+
* Extract structured JSON from page content using an LLM (BYOK).
|
|
177
|
+
*
|
|
178
|
+
* @param content - Page content (markdown or text)
|
|
179
|
+
* @param options - Extraction options including schema, prompt, and LLM credentials
|
|
180
|
+
* @returns Extracted structured data + token usage
|
|
181
|
+
*/
|
|
182
|
+
export async function extractInlineJson(content, options) {
|
|
183
|
+
const { schema, prompt, llmProvider, llmApiKey, llmModel } = options;
|
|
184
|
+
if (!llmApiKey) {
|
|
185
|
+
throw new Error('Inline extraction requires "llmApiKey" (BYOK)');
|
|
186
|
+
}
|
|
187
|
+
if (!llmProvider) {
|
|
188
|
+
throw new Error('Inline extraction requires "llmProvider" (openai, anthropic, or google)');
|
|
189
|
+
}
|
|
190
|
+
if (!schema && !prompt) {
|
|
191
|
+
throw new Error('Inline extraction requires "schema" or "prompt" (or both)');
|
|
192
|
+
}
|
|
193
|
+
const model = (llmModel || '').trim() || defaultModel(llmProvider);
|
|
194
|
+
const systemPrompt = buildSystemPrompt(schema, prompt);
|
|
195
|
+
const userContent = truncateContent(content);
|
|
196
|
+
let result;
|
|
197
|
+
switch (llmProvider) {
|
|
198
|
+
case 'openai':
|
|
199
|
+
result = await callOpenAI(llmApiKey, model, systemPrompt, userContent);
|
|
200
|
+
break;
|
|
201
|
+
case 'anthropic':
|
|
202
|
+
result = await callAnthropic(llmApiKey, model, systemPrompt, userContent);
|
|
203
|
+
break;
|
|
204
|
+
case 'google':
|
|
205
|
+
result = await callGoogle(llmApiKey, model, systemPrompt, userContent);
|
|
206
|
+
break;
|
|
207
|
+
default:
|
|
208
|
+
throw new Error(`Unsupported llmProvider: ${llmProvider}`);
|
|
209
|
+
}
|
|
210
|
+
const data = parseJsonResponse(result.text);
|
|
211
|
+
return {
|
|
212
|
+
data,
|
|
213
|
+
tokensUsed: result.usage,
|
|
214
|
+
};
|
|
215
|
+
}
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Auto-extract repeated listing patterns from HTML pages.
|
|
3
|
+
*
|
|
4
|
+
* Given raw HTML (e.g. an eBay search results page), this module detects the
|
|
5
|
+
* largest group of sibling elements with a consistent internal structure and
|
|
6
|
+
* extracts structured fields (title, price, image, link, description, rating)
|
|
7
|
+
* from each item.
|
|
8
|
+
*
|
|
9
|
+
* @module extract-listings
|
|
10
|
+
*/
|
|
11
|
+
/** A single extracted listing item. */
|
|
12
|
+
export interface ListingItem {
|
|
13
|
+
title?: string;
|
|
14
|
+
price?: string;
|
|
15
|
+
image?: string;
|
|
16
|
+
link?: string;
|
|
17
|
+
description?: string;
|
|
18
|
+
rating?: string;
|
|
19
|
+
[key: string]: string | undefined;
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Automatically detect repeated listing patterns in raw HTML and extract
|
|
23
|
+
* structured items.
|
|
24
|
+
*
|
|
25
|
+
* @param html - Raw HTML string to parse.
|
|
26
|
+
* @param url - Optional base URL for resolving relative links and images.
|
|
27
|
+
* @returns Array of extracted listing items (may be empty).
|
|
28
|
+
*
|
|
29
|
+
* @example
|
|
30
|
+
* ```typescript
|
|
31
|
+
* import { extractListings } from 'webpeel';
|
|
32
|
+
*
|
|
33
|
+
* const items = extractListings(ebayHtml, 'https://ebay.com/sch?q=card');
|
|
34
|
+
* console.log(items[0].title); // "Charizard VMAX 020/189"
|
|
35
|
+
* console.log(items[0].price); // "$24.99"
|
|
36
|
+
* ```
|
|
37
|
+
*/
|
|
38
|
+
export declare function extractListings(html: string, url?: string): ListingItem[];
|