@iflow-mcp/jakeliume-webpeel 0.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +15 -0
- package/README.md +313 -0
- package/dist/cache.d.ts +30 -0
- package/dist/cache.js +139 -0
- package/dist/cli/commands/auth.d.ts +5 -0
- package/dist/cli/commands/auth.js +411 -0
- package/dist/cli/commands/doctor.d.ts +37 -0
- package/dist/cli/commands/doctor.js +371 -0
- package/dist/cli/commands/fetch.d.ts +6 -0
- package/dist/cli/commands/fetch.js +1345 -0
- package/dist/cli/commands/guide.d.ts +2 -0
- package/dist/cli/commands/guide.js +183 -0
- package/dist/cli/commands/interact.d.ts +5 -0
- package/dist/cli/commands/interact.js +840 -0
- package/dist/cli/commands/jobs.d.ts +5 -0
- package/dist/cli/commands/jobs.js +997 -0
- package/dist/cli/commands/monitor.d.ts +12 -0
- package/dist/cli/commands/monitor.js +197 -0
- package/dist/cli/commands/observe.d.ts +12 -0
- package/dist/cli/commands/observe.js +158 -0
- package/dist/cli/commands/screenshot.d.ts +5 -0
- package/dist/cli/commands/screenshot.js +282 -0
- package/dist/cli/commands/search.d.ts +5 -0
- package/dist/cli/commands/search.js +1021 -0
- package/dist/cli/commands/setup.d.ts +13 -0
- package/dist/cli/commands/setup.js +244 -0
- package/dist/cli/commands/skill.d.ts +15 -0
- package/dist/cli/commands/skill.js +195 -0
- package/dist/cli/utils.d.ts +84 -0
- package/dist/cli/utils.js +806 -0
- package/dist/cli-auth.d.ts +75 -0
- package/dist/cli-auth.js +369 -0
- package/dist/cli.d.ts +17 -0
- package/dist/cli.js +99 -0
- package/dist/core/actions.d.ts +69 -0
- package/dist/core/actions.js +495 -0
- package/dist/core/agent.d.ts +98 -0
- package/dist/core/agent.js +558 -0
- package/dist/core/answer.d.ts +42 -0
- package/dist/core/answer.js +395 -0
- package/dist/core/application-tracker.d.ts +84 -0
- package/dist/core/application-tracker.js +184 -0
- package/dist/core/apply.d.ts +162 -0
- package/dist/core/apply.js +816 -0
- package/dist/core/auth-detection.d.ts +35 -0
- package/dist/core/auth-detection.js +358 -0
- package/dist/core/auto-extract.d.ts +82 -0
- package/dist/core/auto-extract.js +604 -0
- package/dist/core/auto-interact.d.ts +23 -0
- package/dist/core/auto-interact.js +246 -0
- package/dist/core/bm25-filter.d.ts +66 -0
- package/dist/core/bm25-filter.js +288 -0
- package/dist/core/branding.d.ts +54 -0
- package/dist/core/branding.js +234 -0
- package/dist/core/browser-fetch.d.ts +323 -0
- package/dist/core/browser-fetch.js +1600 -0
- package/dist/core/browser-pool.d.ts +91 -0
- package/dist/core/browser-pool.js +550 -0
- package/dist/core/budget.d.ts +42 -0
- package/dist/core/budget.js +324 -0
- package/dist/core/business-intel.d.ts +47 -0
- package/dist/core/business-intel.js +279 -0
- package/dist/core/cache.d.ts +13 -0
- package/dist/core/cache.js +121 -0
- package/dist/core/cf-worker-proxy.d.ts +32 -0
- package/dist/core/cf-worker-proxy.js +87 -0
- package/dist/core/challenge-detection.d.ts +26 -0
- package/dist/core/challenge-detection.js +468 -0
- package/dist/core/change-tracking.d.ts +75 -0
- package/dist/core/change-tracking.js +276 -0
- package/dist/core/chunker.d.ts +46 -0
- package/dist/core/chunker.js +249 -0
- package/dist/core/chunking.d.ts +42 -0
- package/dist/core/chunking.js +181 -0
- package/dist/core/circuit-breaker.d.ts +44 -0
- package/dist/core/circuit-breaker.js +85 -0
- package/dist/core/content-pruner.d.ts +47 -0
- package/dist/core/content-pruner.js +425 -0
- package/dist/core/cookie-cache.d.ts +60 -0
- package/dist/core/cookie-cache.js +163 -0
- package/dist/core/crawl-checkpoint.d.ts +54 -0
- package/dist/core/crawl-checkpoint.js +104 -0
- package/dist/core/crawler.d.ts +84 -0
- package/dist/core/crawler.js +349 -0
- package/dist/core/cross-verify.d.ts +27 -0
- package/dist/core/cross-verify.js +93 -0
- package/dist/core/deep-fetch.d.ts +74 -0
- package/dist/core/deep-fetch.js +405 -0
- package/dist/core/deep-research.d.ts +141 -0
- package/dist/core/deep-research.js +972 -0
- package/dist/core/design-analysis.d.ts +70 -0
- package/dist/core/design-analysis.js +490 -0
- package/dist/core/design-compare.d.ts +38 -0
- package/dist/core/design-compare.js +264 -0
- package/dist/core/diff.d.ts +61 -0
- package/dist/core/diff.js +289 -0
- package/dist/core/dns-cache.d.ts +20 -0
- package/dist/core/dns-cache.js +198 -0
- package/dist/core/documents.d.ts +23 -0
- package/dist/core/documents.js +123 -0
- package/dist/core/domain-memory.d.ts +66 -0
- package/dist/core/domain-memory.js +163 -0
- package/dist/core/domain-verify.d.ts +40 -0
- package/dist/core/domain-verify.js +379 -0
- package/dist/core/engine-ranker.d.ts +112 -0
- package/dist/core/engine-ranker.js +395 -0
- package/dist/core/extract-inline.d.ts +38 -0
- package/dist/core/extract-inline.js +215 -0
- package/dist/core/extract-listings.d.ts +38 -0
- package/dist/core/extract-listings.js +461 -0
- package/dist/core/extract.d.ts +9 -0
- package/dist/core/extract.js +139 -0
- package/dist/core/fetch-cache.d.ts +57 -0
- package/dist/core/fetch-cache.js +95 -0
- package/dist/core/fetcher.d.ts +13 -0
- package/dist/core/fetcher.js +12 -0
- package/dist/core/google-cache.d.ts +29 -0
- package/dist/core/google-cache.js +180 -0
- package/dist/core/google-serp-parser.d.ts +82 -0
- package/dist/core/google-serp-parser.js +287 -0
- package/dist/core/hotel-search.d.ts +122 -0
- package/dist/core/hotel-search.js +382 -0
- package/dist/core/http-fetch.d.ts +72 -0
- package/dist/core/http-fetch.js +820 -0
- package/dist/core/human.d.ts +175 -0
- package/dist/core/human.js +680 -0
- package/dist/core/image-caption.d.ts +44 -0
- package/dist/core/image-caption.js +271 -0
- package/dist/core/jobs.d.ts +75 -0
- package/dist/core/jobs.js +634 -0
- package/dist/core/json-ld.d.ts +15 -0
- package/dist/core/json-ld.js +617 -0
- package/dist/core/language-detect.d.ts +18 -0
- package/dist/core/language-detect.js +135 -0
- package/dist/core/links.d.ts +10 -0
- package/dist/core/links.js +44 -0
- package/dist/core/llm-extract.d.ts +71 -0
- package/dist/core/llm-extract.js +507 -0
- package/dist/core/llm-provider.d.ts +100 -0
- package/dist/core/llm-provider.js +702 -0
- package/dist/core/local-search.d.ts +60 -0
- package/dist/core/local-search.js +308 -0
- package/dist/core/logger.d.ts +28 -0
- package/dist/core/logger.js +104 -0
- package/dist/core/map.d.ts +33 -0
- package/dist/core/map.js +127 -0
- package/dist/core/markdown.d.ts +92 -0
- package/dist/core/markdown.js +809 -0
- package/dist/core/metadata.d.ts +34 -0
- package/dist/core/metadata.js +422 -0
- package/dist/core/observe.d.ts +113 -0
- package/dist/core/observe.js +395 -0
- package/dist/core/ocr.d.ts +12 -0
- package/dist/core/ocr.js +33 -0
- package/dist/core/paginate.d.ts +31 -0
- package/dist/core/paginate.js +106 -0
- package/dist/core/pdf.d.ts +8 -0
- package/dist/core/pdf.js +25 -0
- package/dist/core/peel-tls.d.ts +25 -0
- package/dist/core/peel-tls.js +220 -0
- package/dist/core/pipeline.d.ts +132 -0
- package/dist/core/pipeline.js +1666 -0
- package/dist/core/profiles.d.ts +61 -0
- package/dist/core/profiles.js +350 -0
- package/dist/core/prompt-guard.d.ts +30 -0
- package/dist/core/prompt-guard.js +119 -0
- package/dist/core/proxy-config.d.ts +90 -0
- package/dist/core/proxy-config.js +172 -0
- package/dist/core/quick-answer.d.ts +53 -0
- package/dist/core/quick-answer.js +833 -0
- package/dist/core/rate-governor.d.ts +80 -0
- package/dist/core/rate-governor.js +238 -0
- package/dist/core/readability.d.ts +57 -0
- package/dist/core/readability.js +533 -0
- package/dist/core/research.d.ts +66 -0
- package/dist/core/research.js +270 -0
- package/dist/core/retry.d.ts +60 -0
- package/dist/core/retry.js +119 -0
- package/dist/core/safe-browsing.d.ts +30 -0
- package/dist/core/safe-browsing.js +206 -0
- package/dist/core/schema-extraction.d.ts +66 -0
- package/dist/core/schema-extraction.js +352 -0
- package/dist/core/schema-postprocess.d.ts +32 -0
- package/dist/core/schema-postprocess.js +469 -0
- package/dist/core/schema-templates.d.ts +19 -0
- package/dist/core/schema-templates.js +143 -0
- package/dist/core/screenshot.d.ts +224 -0
- package/dist/core/screenshot.js +207 -0
- package/dist/core/search-engines.d.ts +25 -0
- package/dist/core/search-engines.js +182 -0
- package/dist/core/search-provider.d.ts +243 -0
- package/dist/core/search-provider.js +1629 -0
- package/dist/core/searxng-provider.d.ts +35 -0
- package/dist/core/searxng-provider.js +105 -0
- package/dist/core/selective-evidence.d.ts +151 -0
- package/dist/core/selective-evidence.js +389 -0
- package/dist/core/site-search.d.ts +44 -0
- package/dist/core/site-search.js +252 -0
- package/dist/core/sitemap.d.ts +23 -0
- package/dist/core/sitemap.js +105 -0
- package/dist/core/source-credibility.d.ts +29 -0
- package/dist/core/source-credibility.js +584 -0
- package/dist/core/source-scoring.d.ts +166 -0
- package/dist/core/source-scoring.js +396 -0
- package/dist/core/stemmer.d.ts +38 -0
- package/dist/core/stemmer.js +509 -0
- package/dist/core/strategies.d.ts +104 -0
- package/dist/core/strategies.js +1044 -0
- package/dist/core/strategy-hooks.d.ts +145 -0
- package/dist/core/strategy-hooks.js +74 -0
- package/dist/core/structured-extract.d.ts +43 -0
- package/dist/core/structured-extract.js +550 -0
- package/dist/core/summarize.d.ts +17 -0
- package/dist/core/summarize.js +78 -0
- package/dist/core/synonyms.d.ts +42 -0
- package/dist/core/synonyms.js +184 -0
- package/dist/core/system-monitor.d.ts +61 -0
- package/dist/core/system-monitor.js +133 -0
- package/dist/core/table-format.d.ts +30 -0
- package/dist/core/table-format.js +146 -0
- package/dist/core/threat-feeds.d.ts +23 -0
- package/dist/core/threat-feeds.js +104 -0
- package/dist/core/timing.d.ts +21 -0
- package/dist/core/timing.js +33 -0
- package/dist/core/transcript-export.d.ts +47 -0
- package/dist/core/transcript-export.js +107 -0
- package/dist/core/user-agents.d.ts +82 -0
- package/dist/core/user-agents.js +239 -0
- package/dist/core/vertical-search.d.ts +54 -0
- package/dist/core/vertical-search.js +158 -0
- package/dist/core/watch-manager.d.ts +175 -0
- package/dist/core/watch-manager.js +416 -0
- package/dist/core/watch.d.ts +101 -0
- package/dist/core/watch.js +389 -0
- package/dist/core/youtube.d.ts +130 -0
- package/dist/core/youtube.js +1175 -0
- package/dist/ee/challenge-re-export.d.ts +1 -0
- package/dist/ee/challenge-re-export.js +1 -0
- package/dist/ee/challenge-solver.d.ts +72 -0
- package/dist/ee/challenge-solver.js +720 -0
- package/dist/ee/domain-extractors.d.ts +8 -0
- package/dist/ee/domain-extractors.js +8 -0
- package/dist/ee/domain-intel.d.ts +16 -0
- package/dist/ee/domain-intel.js +133 -0
- package/dist/ee/extractors/allrecipes.d.ts +2 -0
- package/dist/ee/extractors/allrecipes.js +120 -0
- package/dist/ee/extractors/amazon.d.ts +2 -0
- package/dist/ee/extractors/amazon.js +78 -0
- package/dist/ee/extractors/arxiv.d.ts +2 -0
- package/dist/ee/extractors/arxiv.js +137 -0
- package/dist/ee/extractors/bestbuy.d.ts +2 -0
- package/dist/ee/extractors/bestbuy.js +78 -0
- package/dist/ee/extractors/carscom.d.ts +2 -0
- package/dist/ee/extractors/carscom.js +121 -0
- package/dist/ee/extractors/coingecko.d.ts +2 -0
- package/dist/ee/extractors/coingecko.js +134 -0
- package/dist/ee/extractors/craigslist.d.ts +2 -0
- package/dist/ee/extractors/craigslist.js +92 -0
- package/dist/ee/extractors/devto.d.ts +2 -0
- package/dist/ee/extractors/devto.js +135 -0
- package/dist/ee/extractors/ebay.d.ts +2 -0
- package/dist/ee/extractors/ebay.js +90 -0
- package/dist/ee/extractors/espn.d.ts +2 -0
- package/dist/ee/extractors/espn.js +260 -0
- package/dist/ee/extractors/etsy.d.ts +2 -0
- package/dist/ee/extractors/etsy.js +52 -0
- package/dist/ee/extractors/facebook.d.ts +2 -0
- package/dist/ee/extractors/facebook.js +46 -0
- package/dist/ee/extractors/github.d.ts +2 -0
- package/dist/ee/extractors/github.js +196 -0
- package/dist/ee/extractors/google-flights.d.ts +2 -0
- package/dist/ee/extractors/google-flights.js +176 -0
- package/dist/ee/extractors/hackernews.d.ts +2 -0
- package/dist/ee/extractors/hackernews.js +147 -0
- package/dist/ee/extractors/imdb.d.ts +2 -0
- package/dist/ee/extractors/imdb.js +172 -0
- package/dist/ee/extractors/index.d.ts +26 -0
- package/dist/ee/extractors/index.js +247 -0
- package/dist/ee/extractors/instagram.d.ts +2 -0
- package/dist/ee/extractors/instagram.js +102 -0
- package/dist/ee/extractors/kalshi.d.ts +2 -0
- package/dist/ee/extractors/kalshi.js +121 -0
- package/dist/ee/extractors/kayak-cars.d.ts +2 -0
- package/dist/ee/extractors/kayak-cars.js +270 -0
- package/dist/ee/extractors/linkedin.d.ts +2 -0
- package/dist/ee/extractors/linkedin.js +113 -0
- package/dist/ee/extractors/medium.d.ts +2 -0
- package/dist/ee/extractors/medium.js +130 -0
- package/dist/ee/extractors/news.d.ts +4 -0
- package/dist/ee/extractors/news.js +173 -0
- package/dist/ee/extractors/npm.d.ts +2 -0
- package/dist/ee/extractors/npm.js +86 -0
- package/dist/ee/extractors/pdf.d.ts +2 -0
- package/dist/ee/extractors/pdf.js +108 -0
- package/dist/ee/extractors/pinterest.d.ts +2 -0
- package/dist/ee/extractors/pinterest.js +34 -0
- package/dist/ee/extractors/polymarket.d.ts +2 -0
- package/dist/ee/extractors/polymarket.js +358 -0
- package/dist/ee/extractors/producthunt.d.ts +2 -0
- package/dist/ee/extractors/producthunt.js +88 -0
- package/dist/ee/extractors/pubmed.d.ts +2 -0
- package/dist/ee/extractors/pubmed.js +162 -0
- package/dist/ee/extractors/pypi.d.ts +2 -0
- package/dist/ee/extractors/pypi.js +80 -0
- package/dist/ee/extractors/reddit.d.ts +2 -0
- package/dist/ee/extractors/reddit.js +438 -0
- package/dist/ee/extractors/redfin.d.ts +2 -0
- package/dist/ee/extractors/redfin.js +156 -0
- package/dist/ee/extractors/semanticscholar.d.ts +2 -0
- package/dist/ee/extractors/semanticscholar.js +131 -0
- package/dist/ee/extractors/shared.d.ts +12 -0
- package/dist/ee/extractors/shared.js +76 -0
- package/dist/ee/extractors/soundcloud.d.ts +2 -0
- package/dist/ee/extractors/soundcloud.js +34 -0
- package/dist/ee/extractors/sportsbetting.d.ts +2 -0
- package/dist/ee/extractors/sportsbetting.js +37 -0
- package/dist/ee/extractors/spotify.d.ts +2 -0
- package/dist/ee/extractors/spotify.js +34 -0
- package/dist/ee/extractors/stackoverflow.d.ts +2 -0
- package/dist/ee/extractors/stackoverflow.js +61 -0
- package/dist/ee/extractors/substack.d.ts +2 -0
- package/dist/ee/extractors/substack.js +115 -0
- package/dist/ee/extractors/substackroot.d.ts +2 -0
- package/dist/ee/extractors/substackroot.js +46 -0
- package/dist/ee/extractors/tiktok.d.ts +2 -0
- package/dist/ee/extractors/tiktok.js +29 -0
- package/dist/ee/extractors/tradingview.d.ts +2 -0
- package/dist/ee/extractors/tradingview.js +182 -0
- package/dist/ee/extractors/twitch.d.ts +2 -0
- package/dist/ee/extractors/twitch.js +36 -0
- package/dist/ee/extractors/twitter.d.ts +2 -0
- package/dist/ee/extractors/twitter.js +327 -0
- package/dist/ee/extractors/types.d.ts +14 -0
- package/dist/ee/extractors/types.js +1 -0
- package/dist/ee/extractors/walmart.d.ts +2 -0
- package/dist/ee/extractors/walmart.js +50 -0
- package/dist/ee/extractors/weather.d.ts +2 -0
- package/dist/ee/extractors/weather.js +133 -0
- package/dist/ee/extractors/wikipedia.d.ts +4 -0
- package/dist/ee/extractors/wikipedia.js +235 -0
- package/dist/ee/extractors/yelp.d.ts +2 -0
- package/dist/ee/extractors/yelp.js +216 -0
- package/dist/ee/extractors/youtube.d.ts +2 -0
- package/dist/ee/extractors/youtube.js +189 -0
- package/dist/ee/extractors/zillow.d.ts +54 -0
- package/dist/ee/extractors/zillow.js +247 -0
- package/dist/ee/extractors-re-export.d.ts +1 -0
- package/dist/ee/extractors-re-export.js +1 -0
- package/dist/ee/premium-hooks.d.ts +20 -0
- package/dist/ee/premium-hooks.js +50 -0
- package/dist/ee/spa-detection.d.ts +2 -0
- package/dist/ee/spa-detection.js +2 -0
- package/dist/ee/stability.d.ts +4 -0
- package/dist/ee/stability.js +29 -0
- package/dist/ee/swr-cache.d.ts +14 -0
- package/dist/ee/swr-cache.js +34 -0
- package/dist/index.d.ts +143 -0
- package/dist/index.js +291 -0
- package/dist/integrations/index.d.ts +2 -0
- package/dist/integrations/index.js +2 -0
- package/dist/integrations/langchain.d.ts +64 -0
- package/dist/integrations/langchain.js +115 -0
- package/dist/integrations/llamaindex.d.ts +50 -0
- package/dist/integrations/llamaindex.js +91 -0
- package/dist/mcp/handlers/act.d.ts +5 -0
- package/dist/mcp/handlers/act.js +34 -0
- package/dist/mcp/handlers/definitions.d.ts +6 -0
- package/dist/mcp/handlers/definitions.js +395 -0
- package/dist/mcp/handlers/extract.d.ts +7 -0
- package/dist/mcp/handlers/extract.js +135 -0
- package/dist/mcp/handlers/fetch.d.ts +6 -0
- package/dist/mcp/handlers/fetch.js +98 -0
- package/dist/mcp/handlers/find.d.ts +5 -0
- package/dist/mcp/handlers/find.js +137 -0
- package/dist/mcp/handlers/index.d.ts +13 -0
- package/dist/mcp/handlers/index.js +63 -0
- package/dist/mcp/handlers/legacy.d.ts +25 -0
- package/dist/mcp/handlers/legacy.js +450 -0
- package/dist/mcp/handlers/meta.d.ts +6 -0
- package/dist/mcp/handlers/meta.js +40 -0
- package/dist/mcp/handlers/monitor.d.ts +5 -0
- package/dist/mcp/handlers/monitor.js +41 -0
- package/dist/mcp/handlers/observe.d.ts +8 -0
- package/dist/mcp/handlers/observe.js +37 -0
- package/dist/mcp/handlers/read.d.ts +6 -0
- package/dist/mcp/handlers/read.js +78 -0
- package/dist/mcp/handlers/see.d.ts +5 -0
- package/dist/mcp/handlers/see.js +75 -0
- package/dist/mcp/handlers/types.d.ts +29 -0
- package/dist/mcp/handlers/types.js +28 -0
- package/dist/mcp/server.d.ts +7 -0
- package/dist/mcp/server.js +108 -0
- package/dist/mcp/smart-router.d.ts +23 -0
- package/dist/mcp/smart-router.js +178 -0
- package/dist/server/app.d.ts +14 -0
- package/dist/server/app.js +632 -0
- package/dist/server/auth-store.d.ts +28 -0
- package/dist/server/auth-store.js +88 -0
- package/dist/server/bull-queues.d.ts +60 -0
- package/dist/server/bull-queues.js +90 -0
- package/dist/server/email-service.d.ts +55 -0
- package/dist/server/email-service.js +291 -0
- package/dist/server/job-queue.d.ts +100 -0
- package/dist/server/job-queue.js +145 -0
- package/dist/server/logger.d.ts +10 -0
- package/dist/server/logger.js +37 -0
- package/dist/server/middleware/audit-log.d.ts +14 -0
- package/dist/server/middleware/audit-log.js +73 -0
- package/dist/server/middleware/auth.d.ts +35 -0
- package/dist/server/middleware/auth.js +225 -0
- package/dist/server/middleware/rate-limit.d.ts +50 -0
- package/dist/server/middleware/rate-limit.js +270 -0
- package/dist/server/middleware/scope-guard.d.ts +25 -0
- package/dist/server/middleware/scope-guard.js +45 -0
- package/dist/server/middleware/url-validator.d.ts +15 -0
- package/dist/server/middleware/url-validator.js +201 -0
- package/dist/server/openapi.yaml +6418 -0
- package/dist/server/pg-auth-store.d.ts +146 -0
- package/dist/server/pg-auth-store.js +576 -0
- package/dist/server/pg-job-queue.d.ts +59 -0
- package/dist/server/pg-job-queue.js +375 -0
- package/dist/server/routes/activity.d.ts +6 -0
- package/dist/server/routes/activity.js +79 -0
- package/dist/server/routes/admin-active.d.ts +7 -0
- package/dist/server/routes/admin-active.js +120 -0
- package/dist/server/routes/admin-stats.d.ts +7 -0
- package/dist/server/routes/admin-stats.js +176 -0
- package/dist/server/routes/agent.d.ts +24 -0
- package/dist/server/routes/agent.js +480 -0
- package/dist/server/routes/answer.d.ts +5 -0
- package/dist/server/routes/answer.js +125 -0
- package/dist/server/routes/ask.d.ts +28 -0
- package/dist/server/routes/ask.js +295 -0
- package/dist/server/routes/batch.d.ts +6 -0
- package/dist/server/routes/batch.js +493 -0
- package/dist/server/routes/cache-warm.d.ts +25 -0
- package/dist/server/routes/cache-warm.js +212 -0
- package/dist/server/routes/cli-usage.d.ts +6 -0
- package/dist/server/routes/cli-usage.js +127 -0
- package/dist/server/routes/compat.d.ts +23 -0
- package/dist/server/routes/compat.js +652 -0
- package/dist/server/routes/crawl.d.ts +13 -0
- package/dist/server/routes/crawl.js +287 -0
- package/dist/server/routes/deep-fetch.d.ts +8 -0
- package/dist/server/routes/deep-fetch.js +57 -0
- package/dist/server/routes/deep-research.d.ts +11 -0
- package/dist/server/routes/deep-research.js +232 -0
- package/dist/server/routes/demo.d.ts +24 -0
- package/dist/server/routes/demo.js +517 -0
- package/dist/server/routes/do.d.ts +8 -0
- package/dist/server/routes/do.js +72 -0
- package/dist/server/routes/extract.d.ts +14 -0
- package/dist/server/routes/extract.js +325 -0
- package/dist/server/routes/feed.d.ts +15 -0
- package/dist/server/routes/feed.js +311 -0
- package/dist/server/routes/fetch-queue.d.ts +13 -0
- package/dist/server/routes/fetch-queue.js +357 -0
- package/dist/server/routes/fetch.d.ts +7 -0
- package/dist/server/routes/fetch.js +1274 -0
- package/dist/server/routes/go.d.ts +14 -0
- package/dist/server/routes/go.js +81 -0
- package/dist/server/routes/health.d.ts +11 -0
- package/dist/server/routes/health.js +141 -0
- package/dist/server/routes/jobs.d.ts +7 -0
- package/dist/server/routes/jobs.js +574 -0
- package/dist/server/routes/map.d.ts +11 -0
- package/dist/server/routes/map.js +116 -0
- package/dist/server/routes/mcp.d.ts +14 -0
- package/dist/server/routes/mcp.js +197 -0
- package/dist/server/routes/metrics.d.ts +37 -0
- package/dist/server/routes/metrics.js +149 -0
- package/dist/server/routes/oauth.d.ts +9 -0
- package/dist/server/routes/oauth.js +396 -0
- package/dist/server/routes/playground.d.ts +17 -0
- package/dist/server/routes/playground.js +283 -0
- package/dist/server/routes/reader.d.ts +18 -0
- package/dist/server/routes/reader.js +192 -0
- package/dist/server/routes/research.d.ts +14 -0
- package/dist/server/routes/research.js +482 -0
- package/dist/server/routes/screenshot.d.ts +22 -0
- package/dist/server/routes/screenshot.js +820 -0
- package/dist/server/routes/search.d.ts +6 -0
- package/dist/server/routes/search.js +874 -0
- package/dist/server/routes/session.d.ts +17 -0
- package/dist/server/routes/session.js +548 -0
- package/dist/server/routes/share.d.ts +18 -0
- package/dist/server/routes/share.js +462 -0
- package/dist/server/routes/smart-search/handlers/cars.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/cars.js +102 -0
- package/dist/server/routes/smart-search/handlers/flights.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/flights.js +72 -0
- package/dist/server/routes/smart-search/handlers/general.d.ts +13 -0
- package/dist/server/routes/smart-search/handlers/general.js +717 -0
- package/dist/server/routes/smart-search/handlers/hotels.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/hotels.js +88 -0
- package/dist/server/routes/smart-search/handlers/products.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/products.js +1309 -0
- package/dist/server/routes/smart-search/handlers/rental.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/rental.js +154 -0
- package/dist/server/routes/smart-search/handlers/restaurants.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/restaurants.js +225 -0
- package/dist/server/routes/smart-search/handlers/transit-verdict.d.ts +41 -0
- package/dist/server/routes/smart-search/handlers/transit-verdict.js +224 -0
- package/dist/server/routes/smart-search/index.d.ts +19 -0
- package/dist/server/routes/smart-search/index.js +546 -0
- package/dist/server/routes/smart-search/intent.d.ts +3 -0
- package/dist/server/routes/smart-search/intent.js +264 -0
- package/dist/server/routes/smart-search/llm.d.ts +16 -0
- package/dist/server/routes/smart-search/llm.js +70 -0
- package/dist/server/routes/smart-search/sources/reddit.d.ts +18 -0
- package/dist/server/routes/smart-search/sources/reddit.js +34 -0
- package/dist/server/routes/smart-search/sources/yelp.d.ts +25 -0
- package/dist/server/routes/smart-search/sources/yelp.js +171 -0
- package/dist/server/routes/smart-search/sources/youtube.d.ts +8 -0
- package/dist/server/routes/smart-search/sources/youtube.js +9 -0
- package/dist/server/routes/smart-search/types.d.ts +81 -0
- package/dist/server/routes/smart-search/types.js +1 -0
- package/dist/server/routes/smart-search/utils.d.ts +20 -0
- package/dist/server/routes/smart-search/utils.js +146 -0
- package/dist/server/routes/stats.d.ts +6 -0
- package/dist/server/routes/stats.js +71 -0
- package/dist/server/routes/stripe.d.ts +15 -0
- package/dist/server/routes/stripe.js +296 -0
- package/dist/server/routes/transcript-export.d.ts +10 -0
- package/dist/server/routes/transcript-export.js +178 -0
- package/dist/server/routes/usage.d.ts +9 -0
- package/dist/server/routes/usage.js +279 -0
- package/dist/server/routes/users.d.ts +8 -0
- package/dist/server/routes/users.js +1867 -0
- package/dist/server/routes/watch.d.ts +15 -0
- package/dist/server/routes/watch.js +309 -0
- package/dist/server/routes/webhooks.d.ts +26 -0
- package/dist/server/routes/webhooks.js +170 -0
- package/dist/server/routes/youtube.d.ts +6 -0
- package/dist/server/routes/youtube.js +130 -0
- package/dist/server/sentry.d.ts +14 -0
- package/dist/server/sentry.js +104 -0
- package/dist/server/types.d.ts +15 -0
- package/dist/server/types.js +7 -0
- package/dist/server/utils/response.d.ts +44 -0
- package/dist/server/utils/response.js +69 -0
- package/dist/server/utils/sse.d.ts +22 -0
- package/dist/server/utils/sse.js +38 -0
- package/dist/types.d.ts +552 -0
- package/dist/types.js +39 -0
- package/llms.txt +105 -0
- package/package.json +189 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
Copyright (c) 2026 WebPeel, Inc. All rights reserved.
|
|
2
|
+
|
|
3
|
+
The WebPeel SDK is provided under the following terms:
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to use,
|
|
7
|
+
copy, and modify the Software for the purpose of interacting with WebPeel's
|
|
8
|
+
services, subject to the following conditions:
|
|
9
|
+
|
|
10
|
+
1. The Software may only be used in connection with WebPeel's API services.
|
|
11
|
+
2. Redistribution of the Software is permitted provided this license notice
|
|
12
|
+
is included in all copies.
|
|
13
|
+
3. The Software is provided "AS IS", without warranty of any kind.
|
|
14
|
+
|
|
15
|
+
For the full terms of service, visit https://webpeel.dev/terms
|
package/README.md
ADDED
|
@@ -0,0 +1,313 @@
|
|
|
1
|
+
<p align="center">
|
|
2
|
+
<a href="https://webpeel.dev">
|
|
3
|
+
<img src=".github/banner.svg" alt="WebPeel — Web data API for AI agents" width="100%">
|
|
4
|
+
</a>
|
|
5
|
+
</p>
|
|
6
|
+
|
|
7
|
+
<p align="center">
|
|
8
|
+
<a href="https://www.npmjs.com/package/webpeel"><img src="https://img.shields.io/npm/v/webpeel.svg?style=flat-square" alt="npm version"></a>
|
|
9
|
+
<a href="https://www.npmjs.com/package/webpeel"><img src="https://img.shields.io/npm/dm/webpeel.svg?style=flat-square" alt="npm downloads"></a>
|
|
10
|
+
<a href="https://github.com/webpeel/webpeel/stargazers"><img src="https://img.shields.io/github/stars/webpeel/webpeel?style=flat-square" alt="GitHub stars"></a>
|
|
11
|
+
<a href="https://github.com/webpeel/webpeel/actions/workflows/ci.yml"><img src="https://github.com/webpeel/webpeel/actions/workflows/ci.yml/badge.svg" alt="CI"></a>
|
|
12
|
+
<a href="LICENSE"><img src="https://img.shields.io/badge/license-WebPeel%20SDK-blue.svg?style=flat-square" alt="License"></a>
|
|
13
|
+
</p>
|
|
14
|
+
|
|
15
|
+
<h3 align="center">The web data layer for AI agents.<br>Fetch, search, crawl, extract, screenshot — one call, zero boilerplate.</h3>
|
|
16
|
+
|
|
17
|
+
<p align="center">
|
|
18
|
+
<a href="#quick-start">Quick Start</a> ·
|
|
19
|
+
<a href="#agent-native-integrations">Agent Integrations</a> ·
|
|
20
|
+
<a href="https://webpeel.dev/docs">Docs</a> ·
|
|
21
|
+
<a href="https://webpeel.dev/playground">Playground</a> ·
|
|
22
|
+
<a href="https://app.webpeel.dev/signup">Get API Key</a>
|
|
23
|
+
</p>
|
|
24
|
+
|
|
25
|
+
<p align="center">
|
|
26
|
+
<img src=".github/readme-demo.svg" alt="WebPeel demo showing agent-friendly web fetch input, automatic engine selection, and clean JSON output" width="100%">
|
|
27
|
+
</p>
|
|
28
|
+
|
|
29
|
+
---
|
|
30
|
+
|
|
31
|
+
## The Problem
|
|
32
|
+
|
|
33
|
+
Every AI agent that touches the web rebuilds the same brittle stack: HTTP fetch → headless browser → anti-bot bypass → HTML cleanup → markdown conversion → token budgeting. Each layer fails differently. Sites change. Cloudflare rotates challenges. Your agent gets empty strings at 2 AM and your pipeline breaks.
|
|
34
|
+
|
|
35
|
+
**WebPeel replaces that entire stack with one function call.** It handles engine selection, anti-bot escalation, domain-specific extraction, and token optimization so your agent gets clean, structured data every time — without managing browsers, proxies, or parsing logic.
|
|
36
|
+
|
|
37
|
+
---
|
|
38
|
+
|
|
39
|
+
## Quick Start
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
# Zero-install — just run it
|
|
43
|
+
npx webpeel "https://example.com"
|
|
44
|
+
|
|
45
|
+
# Search the web
|
|
46
|
+
npx webpeel search "latest AI agent frameworks"
|
|
47
|
+
|
|
48
|
+
# Crawl an entire site
|
|
49
|
+
npx webpeel crawl docs.example.com --max-pages 50
|
|
50
|
+
|
|
51
|
+
# Screenshot any page
|
|
52
|
+
npx webpeel screenshot "https://stripe.com/pricing" --full-page
|
|
53
|
+
|
|
54
|
+
# Ask a question about any page
|
|
55
|
+
npx webpeel ask "https://arxiv.org/abs/2401.00001" "What is the main contribution?"
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
Or install globally:
|
|
59
|
+
|
|
60
|
+
```bash
|
|
61
|
+
npm install -g webpeel
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
**Use as a library:**
|
|
65
|
+
|
|
66
|
+
```typescript
|
|
67
|
+
import { peel } from 'webpeel';
|
|
68
|
+
|
|
69
|
+
const result = await peel('https://news.ycombinator.com');
|
|
70
|
+
console.log(result.markdown); // Clean markdown, ready for your LLM
|
|
71
|
+
console.log(result.metadata); // Title, tokens saved, timing, etc.
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
**Use via API:**
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
curl "https://api.webpeel.dev/v1/fetch?url=https://stripe.com/pricing" \
|
|
78
|
+
-H "Authorization: Bearer $WEBPEEL_API_KEY"
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
```json
|
|
82
|
+
{
|
|
83
|
+
"url": "https://stripe.com/pricing",
|
|
84
|
+
"markdown": "# Stripe Pricing\n\n**Integrated per-transaction fees**...",
|
|
85
|
+
"metadata": {
|
|
86
|
+
"title": "Pricing & Fees | Stripe",
|
|
87
|
+
"tokens": 420,
|
|
88
|
+
"tokensOriginal": 8200,
|
|
89
|
+
"savingsPct": 94.9
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
[Get your free API key →](https://app.webpeel.dev/signup) · No credit card required · 500 requests/week free
|
|
95
|
+
|
|
96
|
+
---
|
|
97
|
+
|
|
98
|
+
## Why WebPeel
|
|
99
|
+
|
|
100
|
+
### 🧠 55+ Domain Extractors — Not Just HTML-to-Markdown
|
|
101
|
+
|
|
102
|
+
Generic scrapers convert raw HTML to markdown and call it a day. WebPeel has **purpose-built extractors** for 55+ domains — Reddit, GitHub, YouTube, Amazon, ArXiv, Hacker News, Wikipedia, StackOverflow, Zillow, Polymarket, ESPN, and more. Each extractor understands the site's structure and returns clean, structured data without browser rendering.
|
|
103
|
+
|
|
104
|
+
### ⚡ 65–98% Token Savings
|
|
105
|
+
|
|
106
|
+
Domain extractors strip navigation, ads, sidebars, and boilerplate *before* content reaches your agent. Less context consumed = lower costs, faster inference, and longer agent chains.
|
|
107
|
+
|
|
108
|
+
| Site | Raw HTML tokens | WebPeel tokens | Savings |
|
|
109
|
+
|------|:--------------:|:--------------:|:-------:|
|
|
110
|
+
| News article | 18,000 | 640 | **96%** |
|
|
111
|
+
| Reddit thread | 24,000 | 890 | **96%** |
|
|
112
|
+
| Wikipedia page | 31,000 | 2,100 | **93%** |
|
|
113
|
+
| GitHub README | 5,200 | 1,800 | **65%** |
|
|
114
|
+
| E-commerce product | 14,000 | 310 | **98%** |
|
|
115
|
+
|
|
116
|
+
### 🔄 6-Layer Engine Escalation
|
|
117
|
+
|
|
118
|
+
WebPeel doesn't just try one method — it automatically escalates through 6 engines until it gets a good result:
|
|
119
|
+
|
|
120
|
+
```
|
|
121
|
+
Simple HTTP → Domain API → Browser render → Stealth browser → Cloaked browser → Search cache fallback
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
No manual `--render` flags for most sites. WebPeel knows which sites need JavaScript, which need stealth, and which have anti-bot protection — and picks the right engine automatically.
|
|
125
|
+
|
|
126
|
+
### 🔌 Firecrawl-Compatible Migration Path
|
|
127
|
+
|
|
128
|
+
Already using Firecrawl-style workflows? WebPeel supports compatible `/v1/scrape`, `/v2/scrape`, `/v1/crawl`, `/v1/search`, and `/v1/map` endpoints, which makes migration dramatically easier than rebuilding your pipeline from scratch.
|
|
129
|
+
|
|
130
|
+
---
|
|
131
|
+
|
|
132
|
+
## Agent-Native Integrations
|
|
133
|
+
|
|
134
|
+
### MCP Server (Claude, Cursor, Windsurf, VS Code)
|
|
135
|
+
|
|
136
|
+
Give any MCP-compatible AI the ability to browse, search, and extract from the web.
|
|
137
|
+
|
|
138
|
+
```json
|
|
139
|
+
{
|
|
140
|
+
"mcpServers": {
|
|
141
|
+
"webpeel": {
|
|
142
|
+
"command": "npx",
|
|
143
|
+
"args": ["-y", "webpeel", "mcp"],
|
|
144
|
+
"env": { "WEBPEEL_API_KEY": "wp_your_key_here" }
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
**7 MCP tools exposed:** `webpeel_read` · `webpeel_find` · `webpeel_see` · `webpeel_extract` · `webpeel_monitor` · `webpeel_act` · `webpeel_crawl`
|
|
151
|
+
|
|
152
|
+
[Full MCP setup guide →](https://webpeel.dev/docs/mcp)
|
|
153
|
+
|
|
154
|
+
### LangChain
|
|
155
|
+
|
|
156
|
+
```typescript
|
|
157
|
+
import { WebPeelLoader } from 'webpeel/integrations/langchain';
|
|
158
|
+
|
|
159
|
+
const loader = new WebPeelLoader({ url: 'https://example.com', render: true });
|
|
160
|
+
const docs = await loader.load();
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
### LlamaIndex
|
|
164
|
+
|
|
165
|
+
```typescript
|
|
166
|
+
import { WebPeelReader } from 'webpeel/integrations/llamaindex';
|
|
167
|
+
|
|
168
|
+
const reader = new WebPeelReader();
|
|
169
|
+
const docs = await reader.loadData('https://example.com');
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
### Python SDK
|
|
173
|
+
|
|
174
|
+
```bash
|
|
175
|
+
pip install webpeel
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
```python
|
|
179
|
+
from webpeel import WebPeel
|
|
180
|
+
|
|
181
|
+
wp = WebPeel(api_key="wp_...")
|
|
182
|
+
result = wp.fetch("https://example.com")
|
|
183
|
+
print(result.markdown)
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
---
|
|
187
|
+
|
|
188
|
+
## Full Feature Set
|
|
189
|
+
|
|
190
|
+
| Capability | CLI | API | Details |
|
|
191
|
+
|-----------|:---:|:---:|---------|
|
|
192
|
+
| **Fetch & extract** | `webpeel "url"` | `GET /v1/fetch` | Clean markdown from any URL |
|
|
193
|
+
| **Web search** | `webpeel search "query"` | `GET /v1/search` | DuckDuckGo (free) or Brave (BYOK) |
|
|
194
|
+
| **Smart search** | — | `POST /v1/search/smart` | AI-powered structured results |
|
|
195
|
+
| **Crawl sites** | `webpeel crawl "url"` | `POST /v1/crawl` | Depth/page limits, rate control |
|
|
196
|
+
| **Screenshots** | `webpeel screenshot "url"` | `POST /v1/screenshot` | Full-page, multi-viewport, visual diff, filmstrip |
|
|
197
|
+
| **Structured extraction** | `--extract-schema` | `POST /v1/extract` | JSON schema → structured data |
|
|
198
|
+
| **Q&A** | `webpeel ask "url" "q"` | `POST /v1/answer` | Answer questions about any page |
|
|
199
|
+
| **Deep research** | — | `POST /v1/deep-research` | Multi-query autonomous research |
|
|
200
|
+
| **Content monitoring** | `webpeel monitor "url"` | `POST /v1/watch` | Change detection with webhooks |
|
|
201
|
+
| **Browser sessions** | — | `POST /v1/session` | Persistent sessions for login flows |
|
|
202
|
+
| **Browser actions** | `--action 'click:.btn'` | actions field | Click, type, scroll, wait |
|
|
203
|
+
| **Batch scrape** | `webpeel batch file` | `POST /v1/batch/scrape` | Parallel multi-URL processing |
|
|
204
|
+
| **URL discovery** | `webpeel map "url"` | `POST /v1/map` | Sitemap and link discovery |
|
|
205
|
+
| **YouTube transcripts** | auto-detected | auto-detected | Multiple export formats |
|
|
206
|
+
| **PDF extraction** | auto-detected | auto-detected | Text, tables, structure |
|
|
207
|
+
| **Research agent** | — | `POST /v1/agent` | Autonomous multi-step research |
|
|
208
|
+
|
|
209
|
+
---
|
|
210
|
+
|
|
211
|
+
## Use Cases for Agent Builders
|
|
212
|
+
|
|
213
|
+
**RAG pipelines** — Fetch docs, articles, or entire sites as clean markdown ready for chunking, embedding, and retrieval.
|
|
214
|
+
|
|
215
|
+
**Price monitoring** — Track product pages across major commerce sites with structured extraction and change detection.
|
|
216
|
+
|
|
217
|
+
**Competitive intel** — Monitor competitor pages, pricing tables, and job boards. Visual diff screenshots catch layout changes CSS selectors would miss.
|
|
218
|
+
|
|
219
|
+
**Research agents** — Give Claude, Codex, Cursor, or your own agent grounded web access through the API or MCP server.
|
|
220
|
+
|
|
221
|
+
**Lead enrichment** — Pull company details, public links, and page structure from business sites without writing per-site parsers.
|
|
222
|
+
|
|
223
|
+
**Content aggregation** — Crawl and extract from communities, docs sites, and publications with domain-native extractors that understand each site's structure.
|
|
224
|
+
|
|
225
|
+
---
|
|
226
|
+
|
|
227
|
+
## Architecture
|
|
228
|
+
|
|
229
|
+
```
|
|
230
|
+
Your Agent
|
|
231
|
+
↓
|
|
232
|
+
WebPeel (npm / API / MCP)
|
|
233
|
+
↓
|
|
234
|
+
┌─────────────────────────────────┐
|
|
235
|
+
│ Engine Ranker │
|
|
236
|
+
│ HTTP → Domain API → Browser │
|
|
237
|
+
│ → Stealth → Cloaked → Cache │
|
|
238
|
+
├─────────────────────────────────┤
|
|
239
|
+
│ 55+ Domain Extractors │
|
|
240
|
+
│ reddit · github · youtube │
|
|
241
|
+
│ amazon · arxiv · zillow · ... │
|
|
242
|
+
├─────────────────────────────────┤
|
|
243
|
+
│ Content Pipeline │
|
|
244
|
+
│ Readability → Turndown → │
|
|
245
|
+
│ Token budgeting → Chunking │
|
|
246
|
+
└─────────────────────────────────┘
|
|
247
|
+
↓
|
|
248
|
+
Clean markdown / structured JSON
|
|
249
|
+
```
|
|
250
|
+
|
|
251
|
+
---
|
|
252
|
+
|
|
253
|
+
## Reliability
|
|
254
|
+
|
|
255
|
+
WebPeel is built for production agent workflows, not just one-off demos.
|
|
256
|
+
|
|
257
|
+
- **Automated evals in-repo** — smart search and fetch eval suites ship with the codebase
|
|
258
|
+
- **Post-deploy gate** — critical checks run before calling a deploy healthy
|
|
259
|
+
- **Engine fallback chain** — when one fetch method fails, WebPeel escalates instead of giving up
|
|
260
|
+
- **Multiple surfaces, one core** — CLI, API, SDK, and MCP all ride the same extraction pipeline
|
|
261
|
+
|
|
262
|
+
---
|
|
263
|
+
|
|
264
|
+
## Security
|
|
265
|
+
|
|
266
|
+
- **SSRF protection** — blocks localhost, private IPs, metadata endpoints, `file://` schemes
|
|
267
|
+
- **Helmet.js** — HSTS, X-Frame-Options, nosniff, XSS protection on all responses
|
|
268
|
+
- **Webhook signing** — HMAC-SHA256 on all outbound webhooks
|
|
269
|
+
- **API key hashing** — SHA-256 with granular scopes
|
|
270
|
+
- **Rate limiting** — sliding window, per-tier
|
|
271
|
+
- **Audit logging** — every API call logged with IP, key, and action
|
|
272
|
+
- **GDPR compliant** — `DELETE /v1/account` for full data erasure
|
|
273
|
+
[Security policy →](https://webpeel.dev/security) · [SLA (99.9% uptime) →](https://webpeel.dev/sla)
|
|
274
|
+
|
|
275
|
+
---
|
|
276
|
+
|
|
277
|
+
## Why teams choose WebPeel instead of stitching a stack together
|
|
278
|
+
|
|
279
|
+
| Approach | What it gives you | Where it breaks down |
|
|
280
|
+
|---|---|---|
|
|
281
|
+
| Raw HTTP + HTML parsing | Cheap, simple fetches | Falls apart on JS-heavy sites, anti-bot pages, and noisy HTML |
|
|
282
|
+
| Pure browser automation | Maximum control | Expensive, slow, fragile, and high-maintenance for large-scale use |
|
|
283
|
+
| Search-only APIs | Great discovery | Weak page extraction, limited structured output, limited downstream actions |
|
|
284
|
+
| Single-purpose scrapers | Fast on one job | You end up composing 4–6 tools for real agent workflows |
|
|
285
|
+
| **WebPeel** | Fetch + search + crawl + extraction + screenshots + monitoring in one layer | Opinionated toward agent workflows rather than generic scraping |
|
|
286
|
+
|
|
287
|
+
---
|
|
288
|
+
|
|
289
|
+
## Links
|
|
290
|
+
|
|
291
|
+
📖 [Documentation](https://webpeel.dev/docs) · 💰 [Pricing](https://webpeel.dev/pricing) · 🎮 [Playground](https://webpeel.dev/playground) · 📝 [Blog](https://webpeel.dev/blog) · 💬 [Discussions](https://github.com/webpeel/webpeel/discussions) · 🚀 [Releases](https://github.com/webpeel/webpeel/releases) · 📊 [Status](https://webpeel.dev/status) · 🔒 [Security](https://webpeel.dev/security) · 📋 [Changelog](https://webpeel.dev/changelog)
|
|
292
|
+
|
|
293
|
+
---
|
|
294
|
+
|
|
295
|
+
## Contributing
|
|
296
|
+
|
|
297
|
+
Pull requests welcome. Please open an issue first to discuss major changes.
|
|
298
|
+
|
|
299
|
+
```bash
|
|
300
|
+
git clone https://github.com/webpeel/webpeel.git
|
|
301
|
+
cd webpeel && npm install
|
|
302
|
+
npm run build && npm test
|
|
303
|
+
```
|
|
304
|
+
|
|
305
|
+
---
|
|
306
|
+
|
|
307
|
+
## License
|
|
308
|
+
|
|
309
|
+
[WebPeel SDK License](LICENSE) — free for personal and commercial use with attribution.
|
|
310
|
+
|
|
311
|
+
<p align="center">
|
|
312
|
+
<a href="https://app.webpeel.dev/signup"><strong>Get started free →</strong></a>
|
|
313
|
+
</p>
|
package/dist/cache.d.ts
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Local response cache for WebPeel CLI
|
|
3
|
+
*
|
|
4
|
+
* Caches fetch results in ~/.webpeel/cache/ with TTL support.
|
|
5
|
+
* Cache key is a hash of URL + relevant options.
|
|
6
|
+
*/
|
|
7
|
+
/**
|
|
8
|
+
* Parse a TTL string like "5m", "1h", "30s", "1d" into milliseconds
|
|
9
|
+
*/
|
|
10
|
+
export declare function parseTTL(ttl: string): number;
|
|
11
|
+
/**
|
|
12
|
+
* Get a cached result if it exists and hasn't expired
|
|
13
|
+
*/
|
|
14
|
+
export declare function getCache(url: string, options?: Record<string, any>): any | null;
|
|
15
|
+
/**
|
|
16
|
+
* Store a result in the cache
|
|
17
|
+
*/
|
|
18
|
+
export declare function setCache(url: string, result: any, ttlMs: number, options?: Record<string, any>): void;
|
|
19
|
+
/**
|
|
20
|
+
* Clear expired cache entries (or all entries)
|
|
21
|
+
*/
|
|
22
|
+
export declare function clearCache(all?: boolean): number;
|
|
23
|
+
/**
|
|
24
|
+
* Get cache stats
|
|
25
|
+
*/
|
|
26
|
+
export declare function cacheStats(): {
|
|
27
|
+
entries: number;
|
|
28
|
+
sizeBytes: number;
|
|
29
|
+
dir: string;
|
|
30
|
+
};
|
package/dist/cache.js
ADDED
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Local response cache for WebPeel CLI
|
|
3
|
+
*
|
|
4
|
+
* Caches fetch results in ~/.webpeel/cache/ with TTL support.
|
|
5
|
+
* Cache key is a hash of URL + relevant options.
|
|
6
|
+
*/
|
|
7
|
+
import { readFileSync, writeFileSync, existsSync, mkdirSync, readdirSync, unlinkSync, statSync } from 'fs';
|
|
8
|
+
import { homedir } from 'os';
|
|
9
|
+
import { join } from 'path';
|
|
10
|
+
import { createHash } from 'crypto';
|
|
11
|
+
const CACHE_DIR = join(homedir(), '.webpeel', 'cache');
|
|
12
|
+
/**
|
|
13
|
+
* Parse a TTL string like "5m", "1h", "30s", "1d" into milliseconds
|
|
14
|
+
*/
|
|
15
|
+
export function parseTTL(ttl) {
|
|
16
|
+
const match = ttl.match(/^(\d+)(s|m|h|d)$/);
|
|
17
|
+
if (!match) {
|
|
18
|
+
throw new Error(`Invalid TTL format: "${ttl}". Use: 30s, 5m, 1h, 1d`);
|
|
19
|
+
}
|
|
20
|
+
const value = parseInt(match[1]);
|
|
21
|
+
const unit = match[2];
|
|
22
|
+
switch (unit) {
|
|
23
|
+
case 's': return value * 1000;
|
|
24
|
+
case 'm': return value * 60 * 1000;
|
|
25
|
+
case 'h': return value * 60 * 60 * 1000;
|
|
26
|
+
case 'd': return value * 24 * 60 * 60 * 1000;
|
|
27
|
+
default: throw new Error(`Unknown TTL unit: ${unit}`);
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
* Generate a cache key from URL + options
|
|
32
|
+
*/
|
|
33
|
+
function cacheKey(url, options) {
|
|
34
|
+
const relevant = {
|
|
35
|
+
url,
|
|
36
|
+
render: options?.render || false,
|
|
37
|
+
stealth: options?.stealth || false,
|
|
38
|
+
selector: options?.selector || null,
|
|
39
|
+
format: options?.format || 'markdown',
|
|
40
|
+
readable: options?.readable || false,
|
|
41
|
+
};
|
|
42
|
+
const hash = createHash('sha256').update(JSON.stringify(relevant)).digest('hex').slice(0, 16);
|
|
43
|
+
return hash;
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* Get a cached result if it exists and hasn't expired
|
|
47
|
+
*/
|
|
48
|
+
export function getCache(url, options) {
|
|
49
|
+
const key = cacheKey(url, options);
|
|
50
|
+
const filePath = join(CACHE_DIR, `${key}.json`);
|
|
51
|
+
if (!existsSync(filePath))
|
|
52
|
+
return null;
|
|
53
|
+
try {
|
|
54
|
+
const entry = JSON.parse(readFileSync(filePath, 'utf-8'));
|
|
55
|
+
const age = Date.now() - entry.cachedAt;
|
|
56
|
+
if (age > entry.ttlMs) {
|
|
57
|
+
// Expired — delete and return null
|
|
58
|
+
try {
|
|
59
|
+
unlinkSync(filePath);
|
|
60
|
+
}
|
|
61
|
+
catch (e) {
|
|
62
|
+
if (process.env.DEBUG)
|
|
63
|
+
console.debug('[webpeel]', 'file unlink failed:', e instanceof Error ? e.message : e);
|
|
64
|
+
}
|
|
65
|
+
return null;
|
|
66
|
+
}
|
|
67
|
+
return entry.result;
|
|
68
|
+
}
|
|
69
|
+
catch {
|
|
70
|
+
return null;
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
/**
|
|
74
|
+
* Store a result in the cache
|
|
75
|
+
*/
|
|
76
|
+
export function setCache(url, result, ttlMs, options) {
|
|
77
|
+
if (!existsSync(CACHE_DIR)) {
|
|
78
|
+
mkdirSync(CACHE_DIR, { recursive: true });
|
|
79
|
+
}
|
|
80
|
+
const key = cacheKey(url, options);
|
|
81
|
+
const entry = {
|
|
82
|
+
url,
|
|
83
|
+
result,
|
|
84
|
+
cachedAt: Date.now(),
|
|
85
|
+
ttlMs,
|
|
86
|
+
options: options ? JSON.stringify(options) : undefined,
|
|
87
|
+
};
|
|
88
|
+
writeFileSync(join(CACHE_DIR, `${key}.json`), JSON.stringify(entry));
|
|
89
|
+
}
|
|
90
|
+
/**
|
|
91
|
+
* Clear expired cache entries (or all entries)
|
|
92
|
+
*/
|
|
93
|
+
export function clearCache(all = false) {
|
|
94
|
+
if (!existsSync(CACHE_DIR))
|
|
95
|
+
return 0;
|
|
96
|
+
const files = readdirSync(CACHE_DIR).filter(f => f.endsWith('.json'));
|
|
97
|
+
let cleared = 0;
|
|
98
|
+
for (const file of files) {
|
|
99
|
+
const filePath = join(CACHE_DIR, file);
|
|
100
|
+
try {
|
|
101
|
+
if (all) {
|
|
102
|
+
unlinkSync(filePath);
|
|
103
|
+
cleared++;
|
|
104
|
+
}
|
|
105
|
+
else {
|
|
106
|
+
const entry = JSON.parse(readFileSync(filePath, 'utf-8'));
|
|
107
|
+
if (Date.now() - entry.cachedAt > entry.ttlMs) {
|
|
108
|
+
unlinkSync(filePath);
|
|
109
|
+
cleared++;
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
catch (e) {
|
|
114
|
+
if (process.env.DEBUG)
|
|
115
|
+
console.debug('[webpeel]', 'cache file parse failed:', e instanceof Error ? e.message : e);
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
return cleared;
|
|
119
|
+
}
|
|
120
|
+
/**
|
|
121
|
+
* Get cache stats
|
|
122
|
+
*/
|
|
123
|
+
export function cacheStats() {
|
|
124
|
+
if (!existsSync(CACHE_DIR))
|
|
125
|
+
return { entries: 0, sizeBytes: 0, dir: CACHE_DIR };
|
|
126
|
+
const files = readdirSync(CACHE_DIR).filter(f => f.endsWith('.json'));
|
|
127
|
+
let sizeBytes = 0;
|
|
128
|
+
for (const file of files) {
|
|
129
|
+
try {
|
|
130
|
+
const stat = statSync(join(CACHE_DIR, file));
|
|
131
|
+
sizeBytes += stat.size;
|
|
132
|
+
}
|
|
133
|
+
catch (e) {
|
|
134
|
+
if (process.env.DEBUG)
|
|
135
|
+
console.debug('[webpeel]', 'stat file failed:', e instanceof Error ? e.message : e);
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
return { entries: files.length, sizeBytes, dir: CACHE_DIR };
|
|
139
|
+
}
|