@iflow-mcp/jakeliume-webpeel 0.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +15 -0
- package/README.md +313 -0
- package/dist/cache.d.ts +30 -0
- package/dist/cache.js +139 -0
- package/dist/cli/commands/auth.d.ts +5 -0
- package/dist/cli/commands/auth.js +411 -0
- package/dist/cli/commands/doctor.d.ts +37 -0
- package/dist/cli/commands/doctor.js +371 -0
- package/dist/cli/commands/fetch.d.ts +6 -0
- package/dist/cli/commands/fetch.js +1345 -0
- package/dist/cli/commands/guide.d.ts +2 -0
- package/dist/cli/commands/guide.js +183 -0
- package/dist/cli/commands/interact.d.ts +5 -0
- package/dist/cli/commands/interact.js +840 -0
- package/dist/cli/commands/jobs.d.ts +5 -0
- package/dist/cli/commands/jobs.js +997 -0
- package/dist/cli/commands/monitor.d.ts +12 -0
- package/dist/cli/commands/monitor.js +197 -0
- package/dist/cli/commands/observe.d.ts +12 -0
- package/dist/cli/commands/observe.js +158 -0
- package/dist/cli/commands/screenshot.d.ts +5 -0
- package/dist/cli/commands/screenshot.js +282 -0
- package/dist/cli/commands/search.d.ts +5 -0
- package/dist/cli/commands/search.js +1021 -0
- package/dist/cli/commands/setup.d.ts +13 -0
- package/dist/cli/commands/setup.js +244 -0
- package/dist/cli/commands/skill.d.ts +15 -0
- package/dist/cli/commands/skill.js +195 -0
- package/dist/cli/utils.d.ts +84 -0
- package/dist/cli/utils.js +806 -0
- package/dist/cli-auth.d.ts +75 -0
- package/dist/cli-auth.js +369 -0
- package/dist/cli.d.ts +17 -0
- package/dist/cli.js +99 -0
- package/dist/core/actions.d.ts +69 -0
- package/dist/core/actions.js +495 -0
- package/dist/core/agent.d.ts +98 -0
- package/dist/core/agent.js +558 -0
- package/dist/core/answer.d.ts +42 -0
- package/dist/core/answer.js +395 -0
- package/dist/core/application-tracker.d.ts +84 -0
- package/dist/core/application-tracker.js +184 -0
- package/dist/core/apply.d.ts +162 -0
- package/dist/core/apply.js +816 -0
- package/dist/core/auth-detection.d.ts +35 -0
- package/dist/core/auth-detection.js +358 -0
- package/dist/core/auto-extract.d.ts +82 -0
- package/dist/core/auto-extract.js +604 -0
- package/dist/core/auto-interact.d.ts +23 -0
- package/dist/core/auto-interact.js +246 -0
- package/dist/core/bm25-filter.d.ts +66 -0
- package/dist/core/bm25-filter.js +288 -0
- package/dist/core/branding.d.ts +54 -0
- package/dist/core/branding.js +234 -0
- package/dist/core/browser-fetch.d.ts +323 -0
- package/dist/core/browser-fetch.js +1600 -0
- package/dist/core/browser-pool.d.ts +91 -0
- package/dist/core/browser-pool.js +550 -0
- package/dist/core/budget.d.ts +42 -0
- package/dist/core/budget.js +324 -0
- package/dist/core/business-intel.d.ts +47 -0
- package/dist/core/business-intel.js +279 -0
- package/dist/core/cache.d.ts +13 -0
- package/dist/core/cache.js +121 -0
- package/dist/core/cf-worker-proxy.d.ts +32 -0
- package/dist/core/cf-worker-proxy.js +87 -0
- package/dist/core/challenge-detection.d.ts +26 -0
- package/dist/core/challenge-detection.js +468 -0
- package/dist/core/change-tracking.d.ts +75 -0
- package/dist/core/change-tracking.js +276 -0
- package/dist/core/chunker.d.ts +46 -0
- package/dist/core/chunker.js +249 -0
- package/dist/core/chunking.d.ts +42 -0
- package/dist/core/chunking.js +181 -0
- package/dist/core/circuit-breaker.d.ts +44 -0
- package/dist/core/circuit-breaker.js +85 -0
- package/dist/core/content-pruner.d.ts +47 -0
- package/dist/core/content-pruner.js +425 -0
- package/dist/core/cookie-cache.d.ts +60 -0
- package/dist/core/cookie-cache.js +163 -0
- package/dist/core/crawl-checkpoint.d.ts +54 -0
- package/dist/core/crawl-checkpoint.js +104 -0
- package/dist/core/crawler.d.ts +84 -0
- package/dist/core/crawler.js +349 -0
- package/dist/core/cross-verify.d.ts +27 -0
- package/dist/core/cross-verify.js +93 -0
- package/dist/core/deep-fetch.d.ts +74 -0
- package/dist/core/deep-fetch.js +405 -0
- package/dist/core/deep-research.d.ts +141 -0
- package/dist/core/deep-research.js +972 -0
- package/dist/core/design-analysis.d.ts +70 -0
- package/dist/core/design-analysis.js +490 -0
- package/dist/core/design-compare.d.ts +38 -0
- package/dist/core/design-compare.js +264 -0
- package/dist/core/diff.d.ts +61 -0
- package/dist/core/diff.js +289 -0
- package/dist/core/dns-cache.d.ts +20 -0
- package/dist/core/dns-cache.js +198 -0
- package/dist/core/documents.d.ts +23 -0
- package/dist/core/documents.js +123 -0
- package/dist/core/domain-memory.d.ts +66 -0
- package/dist/core/domain-memory.js +163 -0
- package/dist/core/domain-verify.d.ts +40 -0
- package/dist/core/domain-verify.js +379 -0
- package/dist/core/engine-ranker.d.ts +112 -0
- package/dist/core/engine-ranker.js +395 -0
- package/dist/core/extract-inline.d.ts +38 -0
- package/dist/core/extract-inline.js +215 -0
- package/dist/core/extract-listings.d.ts +38 -0
- package/dist/core/extract-listings.js +461 -0
- package/dist/core/extract.d.ts +9 -0
- package/dist/core/extract.js +139 -0
- package/dist/core/fetch-cache.d.ts +57 -0
- package/dist/core/fetch-cache.js +95 -0
- package/dist/core/fetcher.d.ts +13 -0
- package/dist/core/fetcher.js +12 -0
- package/dist/core/google-cache.d.ts +29 -0
- package/dist/core/google-cache.js +180 -0
- package/dist/core/google-serp-parser.d.ts +82 -0
- package/dist/core/google-serp-parser.js +287 -0
- package/dist/core/hotel-search.d.ts +122 -0
- package/dist/core/hotel-search.js +382 -0
- package/dist/core/http-fetch.d.ts +72 -0
- package/dist/core/http-fetch.js +820 -0
- package/dist/core/human.d.ts +175 -0
- package/dist/core/human.js +680 -0
- package/dist/core/image-caption.d.ts +44 -0
- package/dist/core/image-caption.js +271 -0
- package/dist/core/jobs.d.ts +75 -0
- package/dist/core/jobs.js +634 -0
- package/dist/core/json-ld.d.ts +15 -0
- package/dist/core/json-ld.js +617 -0
- package/dist/core/language-detect.d.ts +18 -0
- package/dist/core/language-detect.js +135 -0
- package/dist/core/links.d.ts +10 -0
- package/dist/core/links.js +44 -0
- package/dist/core/llm-extract.d.ts +71 -0
- package/dist/core/llm-extract.js +507 -0
- package/dist/core/llm-provider.d.ts +100 -0
- package/dist/core/llm-provider.js +702 -0
- package/dist/core/local-search.d.ts +60 -0
- package/dist/core/local-search.js +308 -0
- package/dist/core/logger.d.ts +28 -0
- package/dist/core/logger.js +104 -0
- package/dist/core/map.d.ts +33 -0
- package/dist/core/map.js +127 -0
- package/dist/core/markdown.d.ts +92 -0
- package/dist/core/markdown.js +809 -0
- package/dist/core/metadata.d.ts +34 -0
- package/dist/core/metadata.js +422 -0
- package/dist/core/observe.d.ts +113 -0
- package/dist/core/observe.js +395 -0
- package/dist/core/ocr.d.ts +12 -0
- package/dist/core/ocr.js +33 -0
- package/dist/core/paginate.d.ts +31 -0
- package/dist/core/paginate.js +106 -0
- package/dist/core/pdf.d.ts +8 -0
- package/dist/core/pdf.js +25 -0
- package/dist/core/peel-tls.d.ts +25 -0
- package/dist/core/peel-tls.js +220 -0
- package/dist/core/pipeline.d.ts +132 -0
- package/dist/core/pipeline.js +1666 -0
- package/dist/core/profiles.d.ts +61 -0
- package/dist/core/profiles.js +350 -0
- package/dist/core/prompt-guard.d.ts +30 -0
- package/dist/core/prompt-guard.js +119 -0
- package/dist/core/proxy-config.d.ts +90 -0
- package/dist/core/proxy-config.js +172 -0
- package/dist/core/quick-answer.d.ts +53 -0
- package/dist/core/quick-answer.js +833 -0
- package/dist/core/rate-governor.d.ts +80 -0
- package/dist/core/rate-governor.js +238 -0
- package/dist/core/readability.d.ts +57 -0
- package/dist/core/readability.js +533 -0
- package/dist/core/research.d.ts +66 -0
- package/dist/core/research.js +270 -0
- package/dist/core/retry.d.ts +60 -0
- package/dist/core/retry.js +119 -0
- package/dist/core/safe-browsing.d.ts +30 -0
- package/dist/core/safe-browsing.js +206 -0
- package/dist/core/schema-extraction.d.ts +66 -0
- package/dist/core/schema-extraction.js +352 -0
- package/dist/core/schema-postprocess.d.ts +32 -0
- package/dist/core/schema-postprocess.js +469 -0
- package/dist/core/schema-templates.d.ts +19 -0
- package/dist/core/schema-templates.js +143 -0
- package/dist/core/screenshot.d.ts +224 -0
- package/dist/core/screenshot.js +207 -0
- package/dist/core/search-engines.d.ts +25 -0
- package/dist/core/search-engines.js +182 -0
- package/dist/core/search-provider.d.ts +243 -0
- package/dist/core/search-provider.js +1629 -0
- package/dist/core/searxng-provider.d.ts +35 -0
- package/dist/core/searxng-provider.js +105 -0
- package/dist/core/selective-evidence.d.ts +151 -0
- package/dist/core/selective-evidence.js +389 -0
- package/dist/core/site-search.d.ts +44 -0
- package/dist/core/site-search.js +252 -0
- package/dist/core/sitemap.d.ts +23 -0
- package/dist/core/sitemap.js +105 -0
- package/dist/core/source-credibility.d.ts +29 -0
- package/dist/core/source-credibility.js +584 -0
- package/dist/core/source-scoring.d.ts +166 -0
- package/dist/core/source-scoring.js +396 -0
- package/dist/core/stemmer.d.ts +38 -0
- package/dist/core/stemmer.js +509 -0
- package/dist/core/strategies.d.ts +104 -0
- package/dist/core/strategies.js +1044 -0
- package/dist/core/strategy-hooks.d.ts +145 -0
- package/dist/core/strategy-hooks.js +74 -0
- package/dist/core/structured-extract.d.ts +43 -0
- package/dist/core/structured-extract.js +550 -0
- package/dist/core/summarize.d.ts +17 -0
- package/dist/core/summarize.js +78 -0
- package/dist/core/synonyms.d.ts +42 -0
- package/dist/core/synonyms.js +184 -0
- package/dist/core/system-monitor.d.ts +61 -0
- package/dist/core/system-monitor.js +133 -0
- package/dist/core/table-format.d.ts +30 -0
- package/dist/core/table-format.js +146 -0
- package/dist/core/threat-feeds.d.ts +23 -0
- package/dist/core/threat-feeds.js +104 -0
- package/dist/core/timing.d.ts +21 -0
- package/dist/core/timing.js +33 -0
- package/dist/core/transcript-export.d.ts +47 -0
- package/dist/core/transcript-export.js +107 -0
- package/dist/core/user-agents.d.ts +82 -0
- package/dist/core/user-agents.js +239 -0
- package/dist/core/vertical-search.d.ts +54 -0
- package/dist/core/vertical-search.js +158 -0
- package/dist/core/watch-manager.d.ts +175 -0
- package/dist/core/watch-manager.js +416 -0
- package/dist/core/watch.d.ts +101 -0
- package/dist/core/watch.js +389 -0
- package/dist/core/youtube.d.ts +130 -0
- package/dist/core/youtube.js +1175 -0
- package/dist/ee/challenge-re-export.d.ts +1 -0
- package/dist/ee/challenge-re-export.js +1 -0
- package/dist/ee/challenge-solver.d.ts +72 -0
- package/dist/ee/challenge-solver.js +720 -0
- package/dist/ee/domain-extractors.d.ts +8 -0
- package/dist/ee/domain-extractors.js +8 -0
- package/dist/ee/domain-intel.d.ts +16 -0
- package/dist/ee/domain-intel.js +133 -0
- package/dist/ee/extractors/allrecipes.d.ts +2 -0
- package/dist/ee/extractors/allrecipes.js +120 -0
- package/dist/ee/extractors/amazon.d.ts +2 -0
- package/dist/ee/extractors/amazon.js +78 -0
- package/dist/ee/extractors/arxiv.d.ts +2 -0
- package/dist/ee/extractors/arxiv.js +137 -0
- package/dist/ee/extractors/bestbuy.d.ts +2 -0
- package/dist/ee/extractors/bestbuy.js +78 -0
- package/dist/ee/extractors/carscom.d.ts +2 -0
- package/dist/ee/extractors/carscom.js +121 -0
- package/dist/ee/extractors/coingecko.d.ts +2 -0
- package/dist/ee/extractors/coingecko.js +134 -0
- package/dist/ee/extractors/craigslist.d.ts +2 -0
- package/dist/ee/extractors/craigslist.js +92 -0
- package/dist/ee/extractors/devto.d.ts +2 -0
- package/dist/ee/extractors/devto.js +135 -0
- package/dist/ee/extractors/ebay.d.ts +2 -0
- package/dist/ee/extractors/ebay.js +90 -0
- package/dist/ee/extractors/espn.d.ts +2 -0
- package/dist/ee/extractors/espn.js +260 -0
- package/dist/ee/extractors/etsy.d.ts +2 -0
- package/dist/ee/extractors/etsy.js +52 -0
- package/dist/ee/extractors/facebook.d.ts +2 -0
- package/dist/ee/extractors/facebook.js +46 -0
- package/dist/ee/extractors/github.d.ts +2 -0
- package/dist/ee/extractors/github.js +196 -0
- package/dist/ee/extractors/google-flights.d.ts +2 -0
- package/dist/ee/extractors/google-flights.js +176 -0
- package/dist/ee/extractors/hackernews.d.ts +2 -0
- package/dist/ee/extractors/hackernews.js +147 -0
- package/dist/ee/extractors/imdb.d.ts +2 -0
- package/dist/ee/extractors/imdb.js +172 -0
- package/dist/ee/extractors/index.d.ts +26 -0
- package/dist/ee/extractors/index.js +247 -0
- package/dist/ee/extractors/instagram.d.ts +2 -0
- package/dist/ee/extractors/instagram.js +102 -0
- package/dist/ee/extractors/kalshi.d.ts +2 -0
- package/dist/ee/extractors/kalshi.js +121 -0
- package/dist/ee/extractors/kayak-cars.d.ts +2 -0
- package/dist/ee/extractors/kayak-cars.js +270 -0
- package/dist/ee/extractors/linkedin.d.ts +2 -0
- package/dist/ee/extractors/linkedin.js +113 -0
- package/dist/ee/extractors/medium.d.ts +2 -0
- package/dist/ee/extractors/medium.js +130 -0
- package/dist/ee/extractors/news.d.ts +4 -0
- package/dist/ee/extractors/news.js +173 -0
- package/dist/ee/extractors/npm.d.ts +2 -0
- package/dist/ee/extractors/npm.js +86 -0
- package/dist/ee/extractors/pdf.d.ts +2 -0
- package/dist/ee/extractors/pdf.js +108 -0
- package/dist/ee/extractors/pinterest.d.ts +2 -0
- package/dist/ee/extractors/pinterest.js +34 -0
- package/dist/ee/extractors/polymarket.d.ts +2 -0
- package/dist/ee/extractors/polymarket.js +358 -0
- package/dist/ee/extractors/producthunt.d.ts +2 -0
- package/dist/ee/extractors/producthunt.js +88 -0
- package/dist/ee/extractors/pubmed.d.ts +2 -0
- package/dist/ee/extractors/pubmed.js +162 -0
- package/dist/ee/extractors/pypi.d.ts +2 -0
- package/dist/ee/extractors/pypi.js +80 -0
- package/dist/ee/extractors/reddit.d.ts +2 -0
- package/dist/ee/extractors/reddit.js +438 -0
- package/dist/ee/extractors/redfin.d.ts +2 -0
- package/dist/ee/extractors/redfin.js +156 -0
- package/dist/ee/extractors/semanticscholar.d.ts +2 -0
- package/dist/ee/extractors/semanticscholar.js +131 -0
- package/dist/ee/extractors/shared.d.ts +12 -0
- package/dist/ee/extractors/shared.js +76 -0
- package/dist/ee/extractors/soundcloud.d.ts +2 -0
- package/dist/ee/extractors/soundcloud.js +34 -0
- package/dist/ee/extractors/sportsbetting.d.ts +2 -0
- package/dist/ee/extractors/sportsbetting.js +37 -0
- package/dist/ee/extractors/spotify.d.ts +2 -0
- package/dist/ee/extractors/spotify.js +34 -0
- package/dist/ee/extractors/stackoverflow.d.ts +2 -0
- package/dist/ee/extractors/stackoverflow.js +61 -0
- package/dist/ee/extractors/substack.d.ts +2 -0
- package/dist/ee/extractors/substack.js +115 -0
- package/dist/ee/extractors/substackroot.d.ts +2 -0
- package/dist/ee/extractors/substackroot.js +46 -0
- package/dist/ee/extractors/tiktok.d.ts +2 -0
- package/dist/ee/extractors/tiktok.js +29 -0
- package/dist/ee/extractors/tradingview.d.ts +2 -0
- package/dist/ee/extractors/tradingview.js +182 -0
- package/dist/ee/extractors/twitch.d.ts +2 -0
- package/dist/ee/extractors/twitch.js +36 -0
- package/dist/ee/extractors/twitter.d.ts +2 -0
- package/dist/ee/extractors/twitter.js +327 -0
- package/dist/ee/extractors/types.d.ts +14 -0
- package/dist/ee/extractors/types.js +1 -0
- package/dist/ee/extractors/walmart.d.ts +2 -0
- package/dist/ee/extractors/walmart.js +50 -0
- package/dist/ee/extractors/weather.d.ts +2 -0
- package/dist/ee/extractors/weather.js +133 -0
- package/dist/ee/extractors/wikipedia.d.ts +4 -0
- package/dist/ee/extractors/wikipedia.js +235 -0
- package/dist/ee/extractors/yelp.d.ts +2 -0
- package/dist/ee/extractors/yelp.js +216 -0
- package/dist/ee/extractors/youtube.d.ts +2 -0
- package/dist/ee/extractors/youtube.js +189 -0
- package/dist/ee/extractors/zillow.d.ts +54 -0
- package/dist/ee/extractors/zillow.js +247 -0
- package/dist/ee/extractors-re-export.d.ts +1 -0
- package/dist/ee/extractors-re-export.js +1 -0
- package/dist/ee/premium-hooks.d.ts +20 -0
- package/dist/ee/premium-hooks.js +50 -0
- package/dist/ee/spa-detection.d.ts +2 -0
- package/dist/ee/spa-detection.js +2 -0
- package/dist/ee/stability.d.ts +4 -0
- package/dist/ee/stability.js +29 -0
- package/dist/ee/swr-cache.d.ts +14 -0
- package/dist/ee/swr-cache.js +34 -0
- package/dist/index.d.ts +143 -0
- package/dist/index.js +291 -0
- package/dist/integrations/index.d.ts +2 -0
- package/dist/integrations/index.js +2 -0
- package/dist/integrations/langchain.d.ts +64 -0
- package/dist/integrations/langchain.js +115 -0
- package/dist/integrations/llamaindex.d.ts +50 -0
- package/dist/integrations/llamaindex.js +91 -0
- package/dist/mcp/handlers/act.d.ts +5 -0
- package/dist/mcp/handlers/act.js +34 -0
- package/dist/mcp/handlers/definitions.d.ts +6 -0
- package/dist/mcp/handlers/definitions.js +395 -0
- package/dist/mcp/handlers/extract.d.ts +7 -0
- package/dist/mcp/handlers/extract.js +135 -0
- package/dist/mcp/handlers/fetch.d.ts +6 -0
- package/dist/mcp/handlers/fetch.js +98 -0
- package/dist/mcp/handlers/find.d.ts +5 -0
- package/dist/mcp/handlers/find.js +137 -0
- package/dist/mcp/handlers/index.d.ts +13 -0
- package/dist/mcp/handlers/index.js +63 -0
- package/dist/mcp/handlers/legacy.d.ts +25 -0
- package/dist/mcp/handlers/legacy.js +450 -0
- package/dist/mcp/handlers/meta.d.ts +6 -0
- package/dist/mcp/handlers/meta.js +40 -0
- package/dist/mcp/handlers/monitor.d.ts +5 -0
- package/dist/mcp/handlers/monitor.js +41 -0
- package/dist/mcp/handlers/observe.d.ts +8 -0
- package/dist/mcp/handlers/observe.js +37 -0
- package/dist/mcp/handlers/read.d.ts +6 -0
- package/dist/mcp/handlers/read.js +78 -0
- package/dist/mcp/handlers/see.d.ts +5 -0
- package/dist/mcp/handlers/see.js +75 -0
- package/dist/mcp/handlers/types.d.ts +29 -0
- package/dist/mcp/handlers/types.js +28 -0
- package/dist/mcp/server.d.ts +7 -0
- package/dist/mcp/server.js +108 -0
- package/dist/mcp/smart-router.d.ts +23 -0
- package/dist/mcp/smart-router.js +178 -0
- package/dist/server/app.d.ts +14 -0
- package/dist/server/app.js +632 -0
- package/dist/server/auth-store.d.ts +28 -0
- package/dist/server/auth-store.js +88 -0
- package/dist/server/bull-queues.d.ts +60 -0
- package/dist/server/bull-queues.js +90 -0
- package/dist/server/email-service.d.ts +55 -0
- package/dist/server/email-service.js +291 -0
- package/dist/server/job-queue.d.ts +100 -0
- package/dist/server/job-queue.js +145 -0
- package/dist/server/logger.d.ts +10 -0
- package/dist/server/logger.js +37 -0
- package/dist/server/middleware/audit-log.d.ts +14 -0
- package/dist/server/middleware/audit-log.js +73 -0
- package/dist/server/middleware/auth.d.ts +35 -0
- package/dist/server/middleware/auth.js +225 -0
- package/dist/server/middleware/rate-limit.d.ts +50 -0
- package/dist/server/middleware/rate-limit.js +270 -0
- package/dist/server/middleware/scope-guard.d.ts +25 -0
- package/dist/server/middleware/scope-guard.js +45 -0
- package/dist/server/middleware/url-validator.d.ts +15 -0
- package/dist/server/middleware/url-validator.js +201 -0
- package/dist/server/openapi.yaml +6418 -0
- package/dist/server/pg-auth-store.d.ts +146 -0
- package/dist/server/pg-auth-store.js +576 -0
- package/dist/server/pg-job-queue.d.ts +59 -0
- package/dist/server/pg-job-queue.js +375 -0
- package/dist/server/routes/activity.d.ts +6 -0
- package/dist/server/routes/activity.js +79 -0
- package/dist/server/routes/admin-active.d.ts +7 -0
- package/dist/server/routes/admin-active.js +120 -0
- package/dist/server/routes/admin-stats.d.ts +7 -0
- package/dist/server/routes/admin-stats.js +176 -0
- package/dist/server/routes/agent.d.ts +24 -0
- package/dist/server/routes/agent.js +480 -0
- package/dist/server/routes/answer.d.ts +5 -0
- package/dist/server/routes/answer.js +125 -0
- package/dist/server/routes/ask.d.ts +28 -0
- package/dist/server/routes/ask.js +295 -0
- package/dist/server/routes/batch.d.ts +6 -0
- package/dist/server/routes/batch.js +493 -0
- package/dist/server/routes/cache-warm.d.ts +25 -0
- package/dist/server/routes/cache-warm.js +212 -0
- package/dist/server/routes/cli-usage.d.ts +6 -0
- package/dist/server/routes/cli-usage.js +127 -0
- package/dist/server/routes/compat.d.ts +23 -0
- package/dist/server/routes/compat.js +652 -0
- package/dist/server/routes/crawl.d.ts +13 -0
- package/dist/server/routes/crawl.js +287 -0
- package/dist/server/routes/deep-fetch.d.ts +8 -0
- package/dist/server/routes/deep-fetch.js +57 -0
- package/dist/server/routes/deep-research.d.ts +11 -0
- package/dist/server/routes/deep-research.js +232 -0
- package/dist/server/routes/demo.d.ts +24 -0
- package/dist/server/routes/demo.js +517 -0
- package/dist/server/routes/do.d.ts +8 -0
- package/dist/server/routes/do.js +72 -0
- package/dist/server/routes/extract.d.ts +14 -0
- package/dist/server/routes/extract.js +325 -0
- package/dist/server/routes/feed.d.ts +15 -0
- package/dist/server/routes/feed.js +311 -0
- package/dist/server/routes/fetch-queue.d.ts +13 -0
- package/dist/server/routes/fetch-queue.js +357 -0
- package/dist/server/routes/fetch.d.ts +7 -0
- package/dist/server/routes/fetch.js +1274 -0
- package/dist/server/routes/go.d.ts +14 -0
- package/dist/server/routes/go.js +81 -0
- package/dist/server/routes/health.d.ts +11 -0
- package/dist/server/routes/health.js +141 -0
- package/dist/server/routes/jobs.d.ts +7 -0
- package/dist/server/routes/jobs.js +574 -0
- package/dist/server/routes/map.d.ts +11 -0
- package/dist/server/routes/map.js +116 -0
- package/dist/server/routes/mcp.d.ts +14 -0
- package/dist/server/routes/mcp.js +197 -0
- package/dist/server/routes/metrics.d.ts +37 -0
- package/dist/server/routes/metrics.js +149 -0
- package/dist/server/routes/oauth.d.ts +9 -0
- package/dist/server/routes/oauth.js +396 -0
- package/dist/server/routes/playground.d.ts +17 -0
- package/dist/server/routes/playground.js +283 -0
- package/dist/server/routes/reader.d.ts +18 -0
- package/dist/server/routes/reader.js +192 -0
- package/dist/server/routes/research.d.ts +14 -0
- package/dist/server/routes/research.js +482 -0
- package/dist/server/routes/screenshot.d.ts +22 -0
- package/dist/server/routes/screenshot.js +820 -0
- package/dist/server/routes/search.d.ts +6 -0
- package/dist/server/routes/search.js +874 -0
- package/dist/server/routes/session.d.ts +17 -0
- package/dist/server/routes/session.js +548 -0
- package/dist/server/routes/share.d.ts +18 -0
- package/dist/server/routes/share.js +462 -0
- package/dist/server/routes/smart-search/handlers/cars.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/cars.js +102 -0
- package/dist/server/routes/smart-search/handlers/flights.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/flights.js +72 -0
- package/dist/server/routes/smart-search/handlers/general.d.ts +13 -0
- package/dist/server/routes/smart-search/handlers/general.js +717 -0
- package/dist/server/routes/smart-search/handlers/hotels.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/hotels.js +88 -0
- package/dist/server/routes/smart-search/handlers/products.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/products.js +1309 -0
- package/dist/server/routes/smart-search/handlers/rental.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/rental.js +154 -0
- package/dist/server/routes/smart-search/handlers/restaurants.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/restaurants.js +225 -0
- package/dist/server/routes/smart-search/handlers/transit-verdict.d.ts +41 -0
- package/dist/server/routes/smart-search/handlers/transit-verdict.js +224 -0
- package/dist/server/routes/smart-search/index.d.ts +19 -0
- package/dist/server/routes/smart-search/index.js +546 -0
- package/dist/server/routes/smart-search/intent.d.ts +3 -0
- package/dist/server/routes/smart-search/intent.js +264 -0
- package/dist/server/routes/smart-search/llm.d.ts +16 -0
- package/dist/server/routes/smart-search/llm.js +70 -0
- package/dist/server/routes/smart-search/sources/reddit.d.ts +18 -0
- package/dist/server/routes/smart-search/sources/reddit.js +34 -0
- package/dist/server/routes/smart-search/sources/yelp.d.ts +25 -0
- package/dist/server/routes/smart-search/sources/yelp.js +171 -0
- package/dist/server/routes/smart-search/sources/youtube.d.ts +8 -0
- package/dist/server/routes/smart-search/sources/youtube.js +9 -0
- package/dist/server/routes/smart-search/types.d.ts +81 -0
- package/dist/server/routes/smart-search/types.js +1 -0
- package/dist/server/routes/smart-search/utils.d.ts +20 -0
- package/dist/server/routes/smart-search/utils.js +146 -0
- package/dist/server/routes/stats.d.ts +6 -0
- package/dist/server/routes/stats.js +71 -0
- package/dist/server/routes/stripe.d.ts +15 -0
- package/dist/server/routes/stripe.js +296 -0
- package/dist/server/routes/transcript-export.d.ts +10 -0
- package/dist/server/routes/transcript-export.js +178 -0
- package/dist/server/routes/usage.d.ts +9 -0
- package/dist/server/routes/usage.js +279 -0
- package/dist/server/routes/users.d.ts +8 -0
- package/dist/server/routes/users.js +1867 -0
- package/dist/server/routes/watch.d.ts +15 -0
- package/dist/server/routes/watch.js +309 -0
- package/dist/server/routes/webhooks.d.ts +26 -0
- package/dist/server/routes/webhooks.js +170 -0
- package/dist/server/routes/youtube.d.ts +6 -0
- package/dist/server/routes/youtube.js +130 -0
- package/dist/server/sentry.d.ts +14 -0
- package/dist/server/sentry.js +104 -0
- package/dist/server/types.d.ts +15 -0
- package/dist/server/types.js +7 -0
- package/dist/server/utils/response.d.ts +44 -0
- package/dist/server/utils/response.js +69 -0
- package/dist/server/utils/sse.d.ts +22 -0
- package/dist/server/utils/sse.js +38 -0
- package/dist/types.d.ts +552 -0
- package/dist/types.js +39 -0
- package/llms.txt +105 -0
- package/package.json +189 -0
package/dist/types.d.ts
ADDED
|
@@ -0,0 +1,552 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Core types for WebPeel
|
|
3
|
+
*/
|
|
4
|
+
export interface PageAction {
|
|
5
|
+
type: 'wait' | 'click' | 'scroll' | 'type' | 'fill' | 'select' | 'press' | 'hover' | 'waitForSelector' | 'screenshot';
|
|
6
|
+
/** CSS selector for element-targeted actions */
|
|
7
|
+
selector?: string;
|
|
8
|
+
/**
|
|
9
|
+
* Value/text payload for actions like type/fill/select.
|
|
10
|
+
* Accepts Firecrawl-style `text` too (normalized internally).
|
|
11
|
+
*/
|
|
12
|
+
value?: string;
|
|
13
|
+
text?: string;
|
|
14
|
+
/** Keyboard key for press actions (e.g., "Enter") */
|
|
15
|
+
key?: string;
|
|
16
|
+
/** Wait duration for wait actions (ms). Firecrawl uses `milliseconds`. */
|
|
17
|
+
ms?: number;
|
|
18
|
+
milliseconds?: number;
|
|
19
|
+
/**
|
|
20
|
+
* Scroll target (absolute) — legacy/internal.
|
|
21
|
+
* Use direction+amount for relative scrolling.
|
|
22
|
+
*/
|
|
23
|
+
to?: 'top' | 'bottom' | number | {
|
|
24
|
+
x: number;
|
|
25
|
+
y: number;
|
|
26
|
+
};
|
|
27
|
+
/** Relative scroll direction (Firecrawl-style) */
|
|
28
|
+
direction?: 'up' | 'down' | 'left' | 'right';
|
|
29
|
+
/** Relative scroll amount in pixels (Firecrawl-style) */
|
|
30
|
+
amount?: number;
|
|
31
|
+
/** Per-action timeout override (ms) */
|
|
32
|
+
timeout?: number;
|
|
33
|
+
}
|
|
34
|
+
export interface ExtractOptions {
|
|
35
|
+
/** JSON Schema for structured output */
|
|
36
|
+
schema?: Record<string, any>;
|
|
37
|
+
/** CSS selectors mapped to field names */
|
|
38
|
+
selectors?: Record<string, string>;
|
|
39
|
+
/** Natural language prompt describing what to extract */
|
|
40
|
+
prompt?: string;
|
|
41
|
+
/** API key for LLM-powered extraction (OpenAI-compatible) */
|
|
42
|
+
llmApiKey?: string;
|
|
43
|
+
/** LLM model to use (default: gpt-4o-mini) */
|
|
44
|
+
llmModel?: string;
|
|
45
|
+
/** LLM API base URL (default: https://api.openai.com/v1) */
|
|
46
|
+
llmBaseUrl?: string;
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* Inline structured extraction options (BYOK, multi-provider).
|
|
50
|
+
* Used with /v1/fetch POST, /v2/scrape, and /v1/scrape (Firecrawl compat).
|
|
51
|
+
*/
|
|
52
|
+
export interface InlineExtractParam {
|
|
53
|
+
/** JSON Schema describing the desired output structure */
|
|
54
|
+
schema?: Record<string, any>;
|
|
55
|
+
/** Natural language prompt describing what to extract */
|
|
56
|
+
prompt?: string;
|
|
57
|
+
}
|
|
58
|
+
/** LLM provider for BYOK inline extraction */
|
|
59
|
+
export type InlineLLMProvider = 'openai' | 'anthropic' | 'google';
|
|
60
|
+
export interface PeelOptions {
|
|
61
|
+
/**
|
|
62
|
+
* Lite mode — minimal processing for maximum speed.
|
|
63
|
+
* Skips pruning, budget, quality scoring, and metadata extraction.
|
|
64
|
+
* Just fetch → convert to markdown. ~50% faster on typical pages.
|
|
65
|
+
*/
|
|
66
|
+
lite?: boolean;
|
|
67
|
+
/** Use headless browser instead of simple HTTP fetch */
|
|
68
|
+
render?: boolean;
|
|
69
|
+
/** Use stealth mode to bypass bot detection (requires render=true, auto-enables if not set) */
|
|
70
|
+
stealth?: boolean;
|
|
71
|
+
/** Wait time in milliseconds after page load (only with render=true) */
|
|
72
|
+
wait?: number;
|
|
73
|
+
/** Output format */
|
|
74
|
+
format?: 'markdown' | 'text' | 'html' | 'clean';
|
|
75
|
+
/** Request timeout in milliseconds (default: 30000) */
|
|
76
|
+
timeout?: number;
|
|
77
|
+
/** Prepare streaming responses (API plumbing only; full SSE/chunked stream not yet implemented) */
|
|
78
|
+
stream?: boolean;
|
|
79
|
+
/** Custom user agent */
|
|
80
|
+
userAgent?: string;
|
|
81
|
+
/** Capture a screenshot of the page */
|
|
82
|
+
screenshot?: boolean;
|
|
83
|
+
/** Full-page screenshot (default: viewport only) */
|
|
84
|
+
screenshotFullPage?: boolean;
|
|
85
|
+
/** CSS selector to extract specific content (e.g., "article", ".main-content", "#post") */
|
|
86
|
+
selector?: string;
|
|
87
|
+
/** CSS selectors to exclude from content (e.g., [".sidebar", ".ads"]) */
|
|
88
|
+
exclude?: string[];
|
|
89
|
+
/** Only include content from these HTML elements (e.g., ['article', 'main', '.content']) */
|
|
90
|
+
includeTags?: string[];
|
|
91
|
+
/** Remove these HTML elements (e.g., ['nav', 'footer', 'header', '.sidebar']) */
|
|
92
|
+
excludeTags?: string[];
|
|
93
|
+
/** Custom HTTP headers to send */
|
|
94
|
+
headers?: Record<string, string>;
|
|
95
|
+
/** Cookies to set (key=value pairs) */
|
|
96
|
+
cookies?: string[];
|
|
97
|
+
/** Skip smart content extraction — return full page without stripping boilerplate */
|
|
98
|
+
raw?: boolean;
|
|
99
|
+
/** Bypass domain-specific API extractors — force actual page scraping instead of API shortcuts.
|
|
100
|
+
* Use when you want the REAL page content (e.g., AccuWeather forecasts) instead of
|
|
101
|
+
* the domain extractor's API data (e.g., Open-Meteo weather data). */
|
|
102
|
+
noDomainApi?: boolean;
|
|
103
|
+
/** Page actions to execute before extraction (auto-enables render) */
|
|
104
|
+
actions?: PageAction[];
|
|
105
|
+
/** Extract structured data using a JSON schema or CSS selectors */
|
|
106
|
+
extract?: ExtractOptions;
|
|
107
|
+
/** Maximum token count for output (truncate intelligently if exceeded) */
|
|
108
|
+
maxTokens?: number;
|
|
109
|
+
/** Track content changes (stores local snapshots) */
|
|
110
|
+
changeTracking?: boolean;
|
|
111
|
+
/** Extract branding/design system (requires render=true) */
|
|
112
|
+
branding?: boolean;
|
|
113
|
+
/** Extract structured design analysis (requires render=true) */
|
|
114
|
+
designAnalysis?: boolean;
|
|
115
|
+
/** Generate AI summary of content */
|
|
116
|
+
summary?: boolean | {
|
|
117
|
+
prompt?: string;
|
|
118
|
+
maxLength?: number;
|
|
119
|
+
};
|
|
120
|
+
/** LLM configuration for AI features (extraction, summary) */
|
|
121
|
+
llm?: {
|
|
122
|
+
apiKey: string;
|
|
123
|
+
model?: string;
|
|
124
|
+
baseUrl?: string;
|
|
125
|
+
};
|
|
126
|
+
/** Extract images from the page */
|
|
127
|
+
images?: boolean;
|
|
128
|
+
/** Location and language preferences for browser rendering */
|
|
129
|
+
location?: {
|
|
130
|
+
/** ISO 3166-1 alpha-2 country code (e.g., 'US', 'DE', 'JP') */
|
|
131
|
+
country?: string;
|
|
132
|
+
/** Language preferences (e.g., ['en-US', 'de']) */
|
|
133
|
+
languages?: string[];
|
|
134
|
+
};
|
|
135
|
+
/**
|
|
136
|
+
* Smart token budget — intelligently distill content to fit within N tokens.
|
|
137
|
+
* Uses heuristic compression (not LLM): strips boilerplate, compresses tables,
|
|
138
|
+
* removes low-density paragraphs. No API key required.
|
|
139
|
+
* Different from maxTokens (simple truncation) — this is smart compression.
|
|
140
|
+
*/
|
|
141
|
+
budget?: number;
|
|
142
|
+
/**
|
|
143
|
+
* Proxy URL for routing requests through a proxy server.
|
|
144
|
+
* Supports HTTP, HTTPS, and SOCKS5 proxies.
|
|
145
|
+
* Format: protocol://[user:pass@]host:port
|
|
146
|
+
* Examples:
|
|
147
|
+
* 'http://proxy.example.com:8080'
|
|
148
|
+
* 'http://user:pass@proxy.example.com:8080'
|
|
149
|
+
* 'socks5://user:pass@proxy.example.com:1080'
|
|
150
|
+
*/
|
|
151
|
+
proxy?: string;
|
|
152
|
+
/**
|
|
153
|
+
* Array of proxy URLs for rotation. Tried in order on BlockedError.
|
|
154
|
+
* Format: http://user:pass@host:port
|
|
155
|
+
* When a proxy is blocked or fails, the next proxy in the list is tried automatically.
|
|
156
|
+
* Takes precedence over the single `proxy` option when both are provided.
|
|
157
|
+
*/
|
|
158
|
+
proxies?: string[];
|
|
159
|
+
/**
|
|
160
|
+
* Path to a persistent Chrome user-data-dir directory.
|
|
161
|
+
* When set, cookies, history, and login sessions survive between fetch calls
|
|
162
|
+
* in the same process. Each unique profileDir gets its own browser instance.
|
|
163
|
+
*
|
|
164
|
+
* Tip: use `--headed` first to log in, then run headless for automation.
|
|
165
|
+
*/
|
|
166
|
+
profileDir?: string;
|
|
167
|
+
/**
|
|
168
|
+
* Launch the browser in headed (visible) mode.
|
|
169
|
+
* Useful for setting up a profile (logging in) before running headless automation.
|
|
170
|
+
* Only meaningful when `render` or `stealth` is true.
|
|
171
|
+
*/
|
|
172
|
+
headed?: boolean;
|
|
173
|
+
/**
|
|
174
|
+
* Playwright storage state (cookies + localStorage) to inject into the browser context.
|
|
175
|
+
* Loaded from a named profile's `storage-state.json` by the CLI profile system.
|
|
176
|
+
* More reliable than `--user-data-dir` for session injection.
|
|
177
|
+
*/
|
|
178
|
+
storageState?: any;
|
|
179
|
+
/**
|
|
180
|
+
* Enable agent-friendly defaults:
|
|
181
|
+
* - budget: 4000 tokens (unless already set)
|
|
182
|
+
* - format: 'markdown' (unless already set)
|
|
183
|
+
*
|
|
184
|
+
* Mirrors the CLI `--agent` flag for programmatic use.
|
|
185
|
+
*/
|
|
186
|
+
agentMode?: boolean;
|
|
187
|
+
/**
|
|
188
|
+
* Disable content pruning and return the full page content.
|
|
189
|
+
* By default, WebPeel automatically removes low-value blocks (sidebars,
|
|
190
|
+
* footers, navigation, ads) using content density scoring.
|
|
191
|
+
* Set to true to opt out and receive the complete page.
|
|
192
|
+
*/
|
|
193
|
+
fullPage?: boolean;
|
|
194
|
+
/**
|
|
195
|
+
* Reader mode — extract only the main article content, strip all noise.
|
|
196
|
+
* Like browser Reader Mode / Pocket / Instapaper but deterministic and fast.
|
|
197
|
+
* Returns clean markdown with metadata header (title, author, date, reading time).
|
|
198
|
+
* When enabled, readability metadata is included in result.readability.
|
|
199
|
+
*/
|
|
200
|
+
readable?: boolean;
|
|
201
|
+
/**
|
|
202
|
+
* Intelligently scroll the page to load all lazy/infinite-scroll content
|
|
203
|
+
* before extracting. Set to `true` for default settings or an object to
|
|
204
|
+
* configure scroll behavior. Auto-enables browser rendering.
|
|
205
|
+
*
|
|
206
|
+
* @example
|
|
207
|
+
* // Simple (use defaults: up to 20 scrolls, 30s timeout)
|
|
208
|
+
* { autoScroll: true }
|
|
209
|
+
*
|
|
210
|
+
* // Customized
|
|
211
|
+
* { autoScroll: { maxScrolls: 10, scrollDelay: 2000, timeout: 60000 } }
|
|
212
|
+
*/
|
|
213
|
+
autoScroll?: boolean | import('./core/actions.js').AutoScrollOptions;
|
|
214
|
+
/** Ask a question about the page content. Uses BM25 to find relevant passages — no LLM key needed. */
|
|
215
|
+
question?: string;
|
|
216
|
+
/** Device emulation: 'desktop' (default), 'mobile', 'tablet' */
|
|
217
|
+
device?: 'desktop' | 'mobile' | 'tablet';
|
|
218
|
+
/** Browser viewport width in pixels */
|
|
219
|
+
viewportWidth?: number;
|
|
220
|
+
/** Browser viewport height in pixels */
|
|
221
|
+
viewportHeight?: number;
|
|
222
|
+
/** Device scale factor (pixel density) for screenshots. Defaults to device profile value (desktop: 1, mobile: 3, tablet: 2). */
|
|
223
|
+
deviceScaleFactor?: number;
|
|
224
|
+
/** Wait condition: 'domcontentloaded' (default), 'networkidle', 'load', 'commit' */
|
|
225
|
+
waitUntil?: 'domcontentloaded' | 'networkidle' | 'load' | 'commit';
|
|
226
|
+
/** CSS selector to wait for before extracting content */
|
|
227
|
+
waitSelector?: string;
|
|
228
|
+
/** Block resource types for faster loading: 'image', 'stylesheet', 'font', 'media', 'script' */
|
|
229
|
+
blockResources?: string[];
|
|
230
|
+
/** Use CloakBrowser for maximum stealth (requires: npm install cloakbrowser) */
|
|
231
|
+
cloaked?: boolean;
|
|
232
|
+
/** Use CycleTLS TLS fingerprint spoofing (requires: npm install cycletls) */
|
|
233
|
+
cycle?: boolean;
|
|
234
|
+
/** Use PeelTLS TLS fingerprint spoofing */
|
|
235
|
+
tls?: boolean;
|
|
236
|
+
/**
|
|
237
|
+
* Skip browser escalation on thin/shell pages.
|
|
238
|
+
* When true, returns the simple HTTP result as-is without escalating to a headless browser.
|
|
239
|
+
* Use for Q&A or high-throughput workloads where speed > JS-rendered completeness.
|
|
240
|
+
*/
|
|
241
|
+
noEscalate?: boolean;
|
|
242
|
+
/**
|
|
243
|
+
* Per-user proxy context for bandwidth enforcement.
|
|
244
|
+
* When set, proxy usage is checked against the user's tier limit before each proxy attempt.
|
|
245
|
+
* Used internally by the API server — not exposed via CLI.
|
|
246
|
+
*/
|
|
247
|
+
proxyContext?: {
|
|
248
|
+
userId?: string;
|
|
249
|
+
tier?: string;
|
|
250
|
+
};
|
|
251
|
+
/**
|
|
252
|
+
* Enhance images that lack alt text with heuristic-generated descriptions.
|
|
253
|
+
* Uses filename analysis, URL path segments, and nearby heading/paragraph context.
|
|
254
|
+
* No LLM key required — purely text-based heuristics.
|
|
255
|
+
* Default: false (opt-in). Set to true to enable.
|
|
256
|
+
*/
|
|
257
|
+
captionImages?: boolean;
|
|
258
|
+
/** Query to extract relevant highlights from the content (BM25-powered, no LLM needed) */
|
|
259
|
+
highlightQuery?: string;
|
|
260
|
+
/** Maximum characters for highlights (default: 1000) */
|
|
261
|
+
highlightMaxChars?: number;
|
|
262
|
+
/** Chunk content for RAG pipelines */
|
|
263
|
+
chunk?: boolean | {
|
|
264
|
+
/** Max tokens per chunk (default: 512) */
|
|
265
|
+
maxTokens?: number;
|
|
266
|
+
/** Overlap tokens between chunks (default: 50) */
|
|
267
|
+
overlap?: number;
|
|
268
|
+
/** Strategy: 'section' (default), 'paragraph', 'fixed' */
|
|
269
|
+
strategy?: 'section' | 'paragraph' | 'fixed';
|
|
270
|
+
};
|
|
271
|
+
}
|
|
272
|
+
export interface ImageInfo {
|
|
273
|
+
/** Absolute URL of the image */
|
|
274
|
+
src: string;
|
|
275
|
+
/** Alt text */
|
|
276
|
+
alt: string;
|
|
277
|
+
/** Title attribute */
|
|
278
|
+
title?: string;
|
|
279
|
+
/** Width if specified */
|
|
280
|
+
width?: number;
|
|
281
|
+
/** Height if specified */
|
|
282
|
+
height?: number;
|
|
283
|
+
}
|
|
284
|
+
export interface PeelResult {
|
|
285
|
+
/** Final URL (after redirects) */
|
|
286
|
+
url: string;
|
|
287
|
+
/** Page title */
|
|
288
|
+
title: string;
|
|
289
|
+
/** Page content in requested format */
|
|
290
|
+
content: string;
|
|
291
|
+
/** Extracted metadata */
|
|
292
|
+
metadata: PageMetadata;
|
|
293
|
+
/** All links found on the page (absolute URLs, deduplicated) */
|
|
294
|
+
links: string[];
|
|
295
|
+
/** Estimated token count (rough: content.length / 4) */
|
|
296
|
+
tokens: number;
|
|
297
|
+
/** Method used: 'simple' | 'browser' | 'stealth' | 'cycle' | 'cloaked' | 'peeltls' | 'cf-worker' | 'google-cache' | 'ocr' */
|
|
298
|
+
method: 'simple' | 'browser' | 'stealth' | 'cycle' | 'cloaked' | 'peeltls' | 'cf-worker' | 'google-cache' | 'ocr';
|
|
299
|
+
/** Time elapsed in milliseconds */
|
|
300
|
+
elapsed: number;
|
|
301
|
+
/** Base64-encoded screenshot (PNG), only if screenshot option was set */
|
|
302
|
+
screenshot?: string;
|
|
303
|
+
/** Content type detected (html, json, xml, text, rss, etc.) */
|
|
304
|
+
contentType?: string;
|
|
305
|
+
/** Content quality score 0-1 (how clean the extraction was) */
|
|
306
|
+
quality?: number;
|
|
307
|
+
/** SHA256 hash of content (first 16 chars) — for change detection */
|
|
308
|
+
fingerprint?: string;
|
|
309
|
+
/** Extracted structured data (when extract option is used — CSS/heuristic extraction) */
|
|
310
|
+
extracted?: Record<string, any>;
|
|
311
|
+
/** Structured JSON from inline LLM extraction (when extract + llmProvider is used) */
|
|
312
|
+
json?: Record<string, any>;
|
|
313
|
+
/** Branding/design system profile */
|
|
314
|
+
branding?: import('./core/branding.js').BrandingProfile;
|
|
315
|
+
/** Structured design analysis */
|
|
316
|
+
designAnalysis?: import('./core/design-analysis.js').DesignAnalysis;
|
|
317
|
+
/** Content change tracking result */
|
|
318
|
+
changeTracking?: import('./core/change-tracking.js').ChangeResult;
|
|
319
|
+
/** AI-generated summary */
|
|
320
|
+
summary?: string;
|
|
321
|
+
/** Extracted images (when images option is set) */
|
|
322
|
+
images?: ImageInfo[];
|
|
323
|
+
/** Percentage of HTML pruned by content density scoring (0-100). Only present when pruning was applied. */
|
|
324
|
+
prunedPercent?: number;
|
|
325
|
+
/**
|
|
326
|
+
* Readability extraction result (when readable option is true).
|
|
327
|
+
* Contains title, author, date, reading time, excerpt, and word count.
|
|
328
|
+
*/
|
|
329
|
+
readability?: import('./core/readability.js').ReadabilityResult;
|
|
330
|
+
/** Domain-aware structured data (Twitter, Reddit, GitHub, HN). Present when URL matches a known domain. */
|
|
331
|
+
domainData?: import('./ee/domain-extractors.js').DomainExtractResult;
|
|
332
|
+
/** Quick answer result (when question option is set). BM25-powered, no LLM needed. */
|
|
333
|
+
quickAnswer?: import('./core/quick-answer.js').QuickAnswerResult;
|
|
334
|
+
/** Per-stage timing breakdown in milliseconds. */
|
|
335
|
+
timing?: import('./core/timing.js').PipelineTiming;
|
|
336
|
+
/** Number of unique links found on the page. Always present (cheaper than full links array). */
|
|
337
|
+
linkCount?: number;
|
|
338
|
+
/** Schema.org type extracted from JSON-LD (e.g., "Recipe", "Product", "Article") */
|
|
339
|
+
jsonLdType?: string;
|
|
340
|
+
/** Content freshness metadata from HTTP response headers */
|
|
341
|
+
freshness?: {
|
|
342
|
+
lastModified?: string;
|
|
343
|
+
etag?: string;
|
|
344
|
+
fetchedAt: string;
|
|
345
|
+
cacheControl?: string;
|
|
346
|
+
};
|
|
347
|
+
/** Warning message when content may be incomplete or degraded */
|
|
348
|
+
warning?: string;
|
|
349
|
+
/** True when the site appears to be blocking bot access (Cloudflare, Akamai, PerimeterX, etc.) */
|
|
350
|
+
blocked?: boolean;
|
|
351
|
+
/** True when the page requires authentication (login/signup wall detected). When true, use --profile <name> with a logged-in browser profile. */
|
|
352
|
+
authRequired?: boolean;
|
|
353
|
+
/** Non-fatal warnings about content quality or extraction issues */
|
|
354
|
+
warnings?: string[];
|
|
355
|
+
/** True when server returned pre-rendered markdown directly (Content-Type: text/markdown) */
|
|
356
|
+
serverMarkdown?: boolean;
|
|
357
|
+
/** Auto-interact summary: cookie banners / overlays dismissed before content extraction (browser mode only) */
|
|
358
|
+
autoInteract?: import('./core/auto-interact.js').AutoInteractResult;
|
|
359
|
+
/** Estimated tokens if you fed the raw HTML to an LLM (~4 chars per token) */
|
|
360
|
+
rawTokenEstimate?: number;
|
|
361
|
+
/** Token savings percentage compared to raw HTML (how much cheaper WebPeel is) */
|
|
362
|
+
tokenSavingsPercent?: number;
|
|
363
|
+
/** Trust & safety assessment of the fetched content */
|
|
364
|
+
trust?: {
|
|
365
|
+
/** Source credibility tier */
|
|
366
|
+
source: {
|
|
367
|
+
tier: 'official' | 'established' | 'community' | 'new' | 'suspicious';
|
|
368
|
+
score: number;
|
|
369
|
+
label: string;
|
|
370
|
+
signals?: string[];
|
|
371
|
+
warnings?: string[];
|
|
372
|
+
/** Active runtime verification results (TLS, DNS, HTTP headers) */
|
|
373
|
+
verification?: {
|
|
374
|
+
tls: {
|
|
375
|
+
valid: boolean;
|
|
376
|
+
issuer: string;
|
|
377
|
+
daysRemaining: number;
|
|
378
|
+
} | null;
|
|
379
|
+
dns: {
|
|
380
|
+
hasMx: boolean;
|
|
381
|
+
hasDmarc: boolean;
|
|
382
|
+
hasSpf: boolean;
|
|
383
|
+
} | null;
|
|
384
|
+
headers: {
|
|
385
|
+
hsts: boolean;
|
|
386
|
+
csp: boolean;
|
|
387
|
+
server: string;
|
|
388
|
+
} | null;
|
|
389
|
+
};
|
|
390
|
+
};
|
|
391
|
+
/** Prompt injection scan result */
|
|
392
|
+
contentSafety: {
|
|
393
|
+
clean: boolean;
|
|
394
|
+
injectionDetected: boolean;
|
|
395
|
+
detectedPatterns: string[];
|
|
396
|
+
strippedCount: number;
|
|
397
|
+
};
|
|
398
|
+
/** Google Safe Browsing + local blocklist result */
|
|
399
|
+
safeBrowsing?: {
|
|
400
|
+
safe: boolean;
|
|
401
|
+
threats: string[];
|
|
402
|
+
source: 'google-api' | 'local-blocklist' | 'unchecked';
|
|
403
|
+
};
|
|
404
|
+
/** Community threat intelligence feeds (URLhaus, PhishTank, OpenPhish) */
|
|
405
|
+
threatFeeds?: {
|
|
406
|
+
safe: boolean;
|
|
407
|
+
threats: string[];
|
|
408
|
+
source: 'urlhaus' | 'phishtank' | 'openphish' | 'none';
|
|
409
|
+
details?: string;
|
|
410
|
+
};
|
|
411
|
+
/** Overall trust score 0-1 (composite of source + content safety) */
|
|
412
|
+
score: number;
|
|
413
|
+
/** Human-readable safety warnings */
|
|
414
|
+
warnings: string[];
|
|
415
|
+
};
|
|
416
|
+
/** Query-relevant highlights (when highlightQuery option is set). BM25-ranked passages. */
|
|
417
|
+
highlights?: Array<{
|
|
418
|
+
text: string;
|
|
419
|
+
score: number;
|
|
420
|
+
}>;
|
|
421
|
+
/** Convenience string: highlighted passages joined with double newlines */
|
|
422
|
+
highlightedContent?: string;
|
|
423
|
+
/** Content chunks (when chunk option is enabled) */
|
|
424
|
+
chunks?: Array<{
|
|
425
|
+
index: number;
|
|
426
|
+
text: string;
|
|
427
|
+
tokenCount: number;
|
|
428
|
+
wordCount: number;
|
|
429
|
+
section: string | null;
|
|
430
|
+
sectionDepth: number | null;
|
|
431
|
+
startOffset: number;
|
|
432
|
+
endOffset: number;
|
|
433
|
+
}>;
|
|
434
|
+
/** Safe Browsing check result (Google API + local blocklist + community threat feeds) */
|
|
435
|
+
safeBrowsing?: {
|
|
436
|
+
safe: boolean;
|
|
437
|
+
threats: string[];
|
|
438
|
+
source: 'google-api' | 'local-blocklist' | 'unchecked';
|
|
439
|
+
/** Community threat intelligence feed results (URLhaus, PhishTank, OpenPhish) */
|
|
440
|
+
threatFeeds?: {
|
|
441
|
+
safe: boolean;
|
|
442
|
+
threats: string[];
|
|
443
|
+
source: 'urlhaus' | 'phishtank' | 'openphish' | 'none';
|
|
444
|
+
details?: string;
|
|
445
|
+
};
|
|
446
|
+
};
|
|
447
|
+
}
|
|
448
|
+
export interface PageMetadata {
|
|
449
|
+
/** Meta description */
|
|
450
|
+
description?: string;
|
|
451
|
+
/** Author name */
|
|
452
|
+
author?: string;
|
|
453
|
+
/** Published date (ISO 8601) */
|
|
454
|
+
published?: string;
|
|
455
|
+
/** Open Graph image URL */
|
|
456
|
+
image?: string;
|
|
457
|
+
/** Canonical URL */
|
|
458
|
+
canonical?: string;
|
|
459
|
+
/** MIME content type (set for documents like PDF/DOCX) */
|
|
460
|
+
contentType?: string;
|
|
461
|
+
/** Word count (set for documents like PDF/DOCX, and HTML pages) */
|
|
462
|
+
wordCount?: number;
|
|
463
|
+
/** Page count (set for PDF documents) */
|
|
464
|
+
pages?: number;
|
|
465
|
+
/** Publish date extracted from rich meta sources (ISO 8601) */
|
|
466
|
+
publishDate?: string;
|
|
467
|
+
/** Page language (e.g. "en", "en-US") */
|
|
468
|
+
language?: string;
|
|
469
|
+
/** Allow additional document-specific metadata */
|
|
470
|
+
[key: string]: any;
|
|
471
|
+
}
|
|
472
|
+
/**
|
|
473
|
+
* Unified response envelope for JSON CLI output (--json flag).
|
|
474
|
+
*
|
|
475
|
+
* All JSON output paths use this schema regardless of which flags are
|
|
476
|
+
* combined (--extract-all, --extract, --meta, etc.). Existing PeelResult
|
|
477
|
+
* fields are always preserved for backward compatibility — the envelope
|
|
478
|
+
* adds a consistent set of required fields on top.
|
|
479
|
+
*/
|
|
480
|
+
export interface PeelEnvelope {
|
|
481
|
+
/** Final URL (after redirects) */
|
|
482
|
+
url: string;
|
|
483
|
+
/** HTTP status code — always 200 for successful fetches */
|
|
484
|
+
status: number;
|
|
485
|
+
/** Page content in markdown/text format */
|
|
486
|
+
content: string;
|
|
487
|
+
/**
|
|
488
|
+
* Structured data extracted by --extract-all or --extract.
|
|
489
|
+
* Present only when extraction was requested.
|
|
490
|
+
*/
|
|
491
|
+
structured?: Record<string, unknown>[];
|
|
492
|
+
/** Page metadata (title, description, author, OG tags, etc.) */
|
|
493
|
+
metadata: {
|
|
494
|
+
title?: string;
|
|
495
|
+
description?: string;
|
|
496
|
+
author?: string;
|
|
497
|
+
[key: string]: unknown;
|
|
498
|
+
};
|
|
499
|
+
/** Estimated token count of content (rough: content.length / 4) */
|
|
500
|
+
tokens: number;
|
|
501
|
+
/** Whether this result was served from the local cache */
|
|
502
|
+
cached: boolean;
|
|
503
|
+
/** Total time elapsed in milliseconds */
|
|
504
|
+
elapsed: number;
|
|
505
|
+
/**
|
|
506
|
+
* True when --budget was applied and content was distilled to fit.
|
|
507
|
+
* For listings: true when fewer items are returned than available.
|
|
508
|
+
*/
|
|
509
|
+
truncated?: boolean;
|
|
510
|
+
/**
|
|
511
|
+
* Total items available before budget limiting (for listings only).
|
|
512
|
+
* Present only when truncated=true and using --extract-all.
|
|
513
|
+
*/
|
|
514
|
+
totalAvailable?: number;
|
|
515
|
+
}
|
|
516
|
+
/**
|
|
517
|
+
* Programmatic error classification for fetch failures.
|
|
518
|
+
* Returned in the `error.type` field of API error responses.
|
|
519
|
+
*
|
|
520
|
+
* - `timeout` — Site took too long to respond
|
|
521
|
+
* - `blocked` — Site actively blocked the request (403, CAPTCHA, bot detection)
|
|
522
|
+
* - `not_found` — 404 or the domain/URL does not exist
|
|
523
|
+
* - `server_error` — Target site returned a 5xx error
|
|
524
|
+
* - `network` — DNS failure, connection refused, or other network-level issue
|
|
525
|
+
* - `unknown` — Unclassified error
|
|
526
|
+
*/
|
|
527
|
+
export type FetchErrorType = 'timeout' | 'blocked' | 'not_found' | 'server_error' | 'network' | 'unknown';
|
|
528
|
+
/**
|
|
529
|
+
* Backward-compatible WebPeelError wrapper.
|
|
530
|
+
*
|
|
531
|
+
* Extends the new typed WebPeelError from errors.ts so that:
|
|
532
|
+
* - `new WebPeelError('message')` still works (legacy callers)
|
|
533
|
+
* - `instanceof` checks work against both old and new versions
|
|
534
|
+
* - All the rich typed fields (retryable, statusCode, context, timestamp) are available
|
|
535
|
+
*
|
|
536
|
+
* New code should import from './errors.js' directly for the full typed API.
|
|
537
|
+
*/
|
|
538
|
+
import { WebPeelError as TypedWebPeelError } from './errors.js';
|
|
539
|
+
export { type ErrorCode } from './errors.js';
|
|
540
|
+
export declare class WebPeelError extends TypedWebPeelError {
|
|
541
|
+
constructor(message: string, code?: string);
|
|
542
|
+
}
|
|
543
|
+
export declare class TimeoutError extends TypedWebPeelError {
|
|
544
|
+
constructor(message: string);
|
|
545
|
+
}
|
|
546
|
+
export declare class BlockedError extends TypedWebPeelError {
|
|
547
|
+
readonly blocked = true;
|
|
548
|
+
constructor(message: string, retryable?: boolean);
|
|
549
|
+
}
|
|
550
|
+
export declare class NetworkError extends TypedWebPeelError {
|
|
551
|
+
constructor(message: string);
|
|
552
|
+
}
|
package/dist/types.js
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Core types for WebPeel
|
|
3
|
+
*/
|
|
4
|
+
/**
|
|
5
|
+
* Backward-compatible WebPeelError wrapper.
|
|
6
|
+
*
|
|
7
|
+
* Extends the new typed WebPeelError from errors.ts so that:
|
|
8
|
+
* - `new WebPeelError('message')` still works (legacy callers)
|
|
9
|
+
* - `instanceof` checks work against both old and new versions
|
|
10
|
+
* - All the rich typed fields (retryable, statusCode, context, timestamp) are available
|
|
11
|
+
*
|
|
12
|
+
* New code should import from './errors.js' directly for the full typed API.
|
|
13
|
+
*/
|
|
14
|
+
import { WebPeelError as TypedWebPeelError } from './errors.js';
|
|
15
|
+
export class WebPeelError extends TypedWebPeelError {
|
|
16
|
+
constructor(message, code) {
|
|
17
|
+
super(code || 'UNKNOWN', message, { retryable: false });
|
|
18
|
+
this.name = 'WebPeelError';
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
export class TimeoutError extends TypedWebPeelError {
|
|
22
|
+
constructor(message) {
|
|
23
|
+
super('TIMEOUT', message, { retryable: true, statusCode: 504 });
|
|
24
|
+
this.name = 'TimeoutError';
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
export class BlockedError extends TypedWebPeelError {
|
|
28
|
+
blocked = true;
|
|
29
|
+
constructor(message, retryable = true) {
|
|
30
|
+
super('BLOCKED', message, { retryable, statusCode: 403 });
|
|
31
|
+
this.name = 'BlockedError';
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
export class NetworkError extends TypedWebPeelError {
|
|
35
|
+
constructor(message) {
|
|
36
|
+
super('NETWORK_ERROR', message, { retryable: true, statusCode: 502 });
|
|
37
|
+
this.name = 'NetworkError';
|
|
38
|
+
}
|
|
39
|
+
}
|