@iflow-mcp/jakeliume-webpeel 0.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +15 -0
- package/README.md +313 -0
- package/dist/cache.d.ts +30 -0
- package/dist/cache.js +139 -0
- package/dist/cli/commands/auth.d.ts +5 -0
- package/dist/cli/commands/auth.js +411 -0
- package/dist/cli/commands/doctor.d.ts +37 -0
- package/dist/cli/commands/doctor.js +371 -0
- package/dist/cli/commands/fetch.d.ts +6 -0
- package/dist/cli/commands/fetch.js +1345 -0
- package/dist/cli/commands/guide.d.ts +2 -0
- package/dist/cli/commands/guide.js +183 -0
- package/dist/cli/commands/interact.d.ts +5 -0
- package/dist/cli/commands/interact.js +840 -0
- package/dist/cli/commands/jobs.d.ts +5 -0
- package/dist/cli/commands/jobs.js +997 -0
- package/dist/cli/commands/monitor.d.ts +12 -0
- package/dist/cli/commands/monitor.js +197 -0
- package/dist/cli/commands/observe.d.ts +12 -0
- package/dist/cli/commands/observe.js +158 -0
- package/dist/cli/commands/screenshot.d.ts +5 -0
- package/dist/cli/commands/screenshot.js +282 -0
- package/dist/cli/commands/search.d.ts +5 -0
- package/dist/cli/commands/search.js +1021 -0
- package/dist/cli/commands/setup.d.ts +13 -0
- package/dist/cli/commands/setup.js +244 -0
- package/dist/cli/commands/skill.d.ts +15 -0
- package/dist/cli/commands/skill.js +195 -0
- package/dist/cli/utils.d.ts +84 -0
- package/dist/cli/utils.js +806 -0
- package/dist/cli-auth.d.ts +75 -0
- package/dist/cli-auth.js +369 -0
- package/dist/cli.d.ts +17 -0
- package/dist/cli.js +99 -0
- package/dist/core/actions.d.ts +69 -0
- package/dist/core/actions.js +495 -0
- package/dist/core/agent.d.ts +98 -0
- package/dist/core/agent.js +558 -0
- package/dist/core/answer.d.ts +42 -0
- package/dist/core/answer.js +395 -0
- package/dist/core/application-tracker.d.ts +84 -0
- package/dist/core/application-tracker.js +184 -0
- package/dist/core/apply.d.ts +162 -0
- package/dist/core/apply.js +816 -0
- package/dist/core/auth-detection.d.ts +35 -0
- package/dist/core/auth-detection.js +358 -0
- package/dist/core/auto-extract.d.ts +82 -0
- package/dist/core/auto-extract.js +604 -0
- package/dist/core/auto-interact.d.ts +23 -0
- package/dist/core/auto-interact.js +246 -0
- package/dist/core/bm25-filter.d.ts +66 -0
- package/dist/core/bm25-filter.js +288 -0
- package/dist/core/branding.d.ts +54 -0
- package/dist/core/branding.js +234 -0
- package/dist/core/browser-fetch.d.ts +323 -0
- package/dist/core/browser-fetch.js +1600 -0
- package/dist/core/browser-pool.d.ts +91 -0
- package/dist/core/browser-pool.js +550 -0
- package/dist/core/budget.d.ts +42 -0
- package/dist/core/budget.js +324 -0
- package/dist/core/business-intel.d.ts +47 -0
- package/dist/core/business-intel.js +279 -0
- package/dist/core/cache.d.ts +13 -0
- package/dist/core/cache.js +121 -0
- package/dist/core/cf-worker-proxy.d.ts +32 -0
- package/dist/core/cf-worker-proxy.js +87 -0
- package/dist/core/challenge-detection.d.ts +26 -0
- package/dist/core/challenge-detection.js +468 -0
- package/dist/core/change-tracking.d.ts +75 -0
- package/dist/core/change-tracking.js +276 -0
- package/dist/core/chunker.d.ts +46 -0
- package/dist/core/chunker.js +249 -0
- package/dist/core/chunking.d.ts +42 -0
- package/dist/core/chunking.js +181 -0
- package/dist/core/circuit-breaker.d.ts +44 -0
- package/dist/core/circuit-breaker.js +85 -0
- package/dist/core/content-pruner.d.ts +47 -0
- package/dist/core/content-pruner.js +425 -0
- package/dist/core/cookie-cache.d.ts +60 -0
- package/dist/core/cookie-cache.js +163 -0
- package/dist/core/crawl-checkpoint.d.ts +54 -0
- package/dist/core/crawl-checkpoint.js +104 -0
- package/dist/core/crawler.d.ts +84 -0
- package/dist/core/crawler.js +349 -0
- package/dist/core/cross-verify.d.ts +27 -0
- package/dist/core/cross-verify.js +93 -0
- package/dist/core/deep-fetch.d.ts +74 -0
- package/dist/core/deep-fetch.js +405 -0
- package/dist/core/deep-research.d.ts +141 -0
- package/dist/core/deep-research.js +972 -0
- package/dist/core/design-analysis.d.ts +70 -0
- package/dist/core/design-analysis.js +490 -0
- package/dist/core/design-compare.d.ts +38 -0
- package/dist/core/design-compare.js +264 -0
- package/dist/core/diff.d.ts +61 -0
- package/dist/core/diff.js +289 -0
- package/dist/core/dns-cache.d.ts +20 -0
- package/dist/core/dns-cache.js +198 -0
- package/dist/core/documents.d.ts +23 -0
- package/dist/core/documents.js +123 -0
- package/dist/core/domain-memory.d.ts +66 -0
- package/dist/core/domain-memory.js +163 -0
- package/dist/core/domain-verify.d.ts +40 -0
- package/dist/core/domain-verify.js +379 -0
- package/dist/core/engine-ranker.d.ts +112 -0
- package/dist/core/engine-ranker.js +395 -0
- package/dist/core/extract-inline.d.ts +38 -0
- package/dist/core/extract-inline.js +215 -0
- package/dist/core/extract-listings.d.ts +38 -0
- package/dist/core/extract-listings.js +461 -0
- package/dist/core/extract.d.ts +9 -0
- package/dist/core/extract.js +139 -0
- package/dist/core/fetch-cache.d.ts +57 -0
- package/dist/core/fetch-cache.js +95 -0
- package/dist/core/fetcher.d.ts +13 -0
- package/dist/core/fetcher.js +12 -0
- package/dist/core/google-cache.d.ts +29 -0
- package/dist/core/google-cache.js +180 -0
- package/dist/core/google-serp-parser.d.ts +82 -0
- package/dist/core/google-serp-parser.js +287 -0
- package/dist/core/hotel-search.d.ts +122 -0
- package/dist/core/hotel-search.js +382 -0
- package/dist/core/http-fetch.d.ts +72 -0
- package/dist/core/http-fetch.js +820 -0
- package/dist/core/human.d.ts +175 -0
- package/dist/core/human.js +680 -0
- package/dist/core/image-caption.d.ts +44 -0
- package/dist/core/image-caption.js +271 -0
- package/dist/core/jobs.d.ts +75 -0
- package/dist/core/jobs.js +634 -0
- package/dist/core/json-ld.d.ts +15 -0
- package/dist/core/json-ld.js +617 -0
- package/dist/core/language-detect.d.ts +18 -0
- package/dist/core/language-detect.js +135 -0
- package/dist/core/links.d.ts +10 -0
- package/dist/core/links.js +44 -0
- package/dist/core/llm-extract.d.ts +71 -0
- package/dist/core/llm-extract.js +507 -0
- package/dist/core/llm-provider.d.ts +100 -0
- package/dist/core/llm-provider.js +702 -0
- package/dist/core/local-search.d.ts +60 -0
- package/dist/core/local-search.js +308 -0
- package/dist/core/logger.d.ts +28 -0
- package/dist/core/logger.js +104 -0
- package/dist/core/map.d.ts +33 -0
- package/dist/core/map.js +127 -0
- package/dist/core/markdown.d.ts +92 -0
- package/dist/core/markdown.js +809 -0
- package/dist/core/metadata.d.ts +34 -0
- package/dist/core/metadata.js +422 -0
- package/dist/core/observe.d.ts +113 -0
- package/dist/core/observe.js +395 -0
- package/dist/core/ocr.d.ts +12 -0
- package/dist/core/ocr.js +33 -0
- package/dist/core/paginate.d.ts +31 -0
- package/dist/core/paginate.js +106 -0
- package/dist/core/pdf.d.ts +8 -0
- package/dist/core/pdf.js +25 -0
- package/dist/core/peel-tls.d.ts +25 -0
- package/dist/core/peel-tls.js +220 -0
- package/dist/core/pipeline.d.ts +132 -0
- package/dist/core/pipeline.js +1666 -0
- package/dist/core/profiles.d.ts +61 -0
- package/dist/core/profiles.js +350 -0
- package/dist/core/prompt-guard.d.ts +30 -0
- package/dist/core/prompt-guard.js +119 -0
- package/dist/core/proxy-config.d.ts +90 -0
- package/dist/core/proxy-config.js +172 -0
- package/dist/core/quick-answer.d.ts +53 -0
- package/dist/core/quick-answer.js +833 -0
- package/dist/core/rate-governor.d.ts +80 -0
- package/dist/core/rate-governor.js +238 -0
- package/dist/core/readability.d.ts +57 -0
- package/dist/core/readability.js +533 -0
- package/dist/core/research.d.ts +66 -0
- package/dist/core/research.js +270 -0
- package/dist/core/retry.d.ts +60 -0
- package/dist/core/retry.js +119 -0
- package/dist/core/safe-browsing.d.ts +30 -0
- package/dist/core/safe-browsing.js +206 -0
- package/dist/core/schema-extraction.d.ts +66 -0
- package/dist/core/schema-extraction.js +352 -0
- package/dist/core/schema-postprocess.d.ts +32 -0
- package/dist/core/schema-postprocess.js +469 -0
- package/dist/core/schema-templates.d.ts +19 -0
- package/dist/core/schema-templates.js +143 -0
- package/dist/core/screenshot.d.ts +224 -0
- package/dist/core/screenshot.js +207 -0
- package/dist/core/search-engines.d.ts +25 -0
- package/dist/core/search-engines.js +182 -0
- package/dist/core/search-provider.d.ts +243 -0
- package/dist/core/search-provider.js +1629 -0
- package/dist/core/searxng-provider.d.ts +35 -0
- package/dist/core/searxng-provider.js +105 -0
- package/dist/core/selective-evidence.d.ts +151 -0
- package/dist/core/selective-evidence.js +389 -0
- package/dist/core/site-search.d.ts +44 -0
- package/dist/core/site-search.js +252 -0
- package/dist/core/sitemap.d.ts +23 -0
- package/dist/core/sitemap.js +105 -0
- package/dist/core/source-credibility.d.ts +29 -0
- package/dist/core/source-credibility.js +584 -0
- package/dist/core/source-scoring.d.ts +166 -0
- package/dist/core/source-scoring.js +396 -0
- package/dist/core/stemmer.d.ts +38 -0
- package/dist/core/stemmer.js +509 -0
- package/dist/core/strategies.d.ts +104 -0
- package/dist/core/strategies.js +1044 -0
- package/dist/core/strategy-hooks.d.ts +145 -0
- package/dist/core/strategy-hooks.js +74 -0
- package/dist/core/structured-extract.d.ts +43 -0
- package/dist/core/structured-extract.js +550 -0
- package/dist/core/summarize.d.ts +17 -0
- package/dist/core/summarize.js +78 -0
- package/dist/core/synonyms.d.ts +42 -0
- package/dist/core/synonyms.js +184 -0
- package/dist/core/system-monitor.d.ts +61 -0
- package/dist/core/system-monitor.js +133 -0
- package/dist/core/table-format.d.ts +30 -0
- package/dist/core/table-format.js +146 -0
- package/dist/core/threat-feeds.d.ts +23 -0
- package/dist/core/threat-feeds.js +104 -0
- package/dist/core/timing.d.ts +21 -0
- package/dist/core/timing.js +33 -0
- package/dist/core/transcript-export.d.ts +47 -0
- package/dist/core/transcript-export.js +107 -0
- package/dist/core/user-agents.d.ts +82 -0
- package/dist/core/user-agents.js +239 -0
- package/dist/core/vertical-search.d.ts +54 -0
- package/dist/core/vertical-search.js +158 -0
- package/dist/core/watch-manager.d.ts +175 -0
- package/dist/core/watch-manager.js +416 -0
- package/dist/core/watch.d.ts +101 -0
- package/dist/core/watch.js +389 -0
- package/dist/core/youtube.d.ts +130 -0
- package/dist/core/youtube.js +1175 -0
- package/dist/ee/challenge-re-export.d.ts +1 -0
- package/dist/ee/challenge-re-export.js +1 -0
- package/dist/ee/challenge-solver.d.ts +72 -0
- package/dist/ee/challenge-solver.js +720 -0
- package/dist/ee/domain-extractors.d.ts +8 -0
- package/dist/ee/domain-extractors.js +8 -0
- package/dist/ee/domain-intel.d.ts +16 -0
- package/dist/ee/domain-intel.js +133 -0
- package/dist/ee/extractors/allrecipes.d.ts +2 -0
- package/dist/ee/extractors/allrecipes.js +120 -0
- package/dist/ee/extractors/amazon.d.ts +2 -0
- package/dist/ee/extractors/amazon.js +78 -0
- package/dist/ee/extractors/arxiv.d.ts +2 -0
- package/dist/ee/extractors/arxiv.js +137 -0
- package/dist/ee/extractors/bestbuy.d.ts +2 -0
- package/dist/ee/extractors/bestbuy.js +78 -0
- package/dist/ee/extractors/carscom.d.ts +2 -0
- package/dist/ee/extractors/carscom.js +121 -0
- package/dist/ee/extractors/coingecko.d.ts +2 -0
- package/dist/ee/extractors/coingecko.js +134 -0
- package/dist/ee/extractors/craigslist.d.ts +2 -0
- package/dist/ee/extractors/craigslist.js +92 -0
- package/dist/ee/extractors/devto.d.ts +2 -0
- package/dist/ee/extractors/devto.js +135 -0
- package/dist/ee/extractors/ebay.d.ts +2 -0
- package/dist/ee/extractors/ebay.js +90 -0
- package/dist/ee/extractors/espn.d.ts +2 -0
- package/dist/ee/extractors/espn.js +260 -0
- package/dist/ee/extractors/etsy.d.ts +2 -0
- package/dist/ee/extractors/etsy.js +52 -0
- package/dist/ee/extractors/facebook.d.ts +2 -0
- package/dist/ee/extractors/facebook.js +46 -0
- package/dist/ee/extractors/github.d.ts +2 -0
- package/dist/ee/extractors/github.js +196 -0
- package/dist/ee/extractors/google-flights.d.ts +2 -0
- package/dist/ee/extractors/google-flights.js +176 -0
- package/dist/ee/extractors/hackernews.d.ts +2 -0
- package/dist/ee/extractors/hackernews.js +147 -0
- package/dist/ee/extractors/imdb.d.ts +2 -0
- package/dist/ee/extractors/imdb.js +172 -0
- package/dist/ee/extractors/index.d.ts +26 -0
- package/dist/ee/extractors/index.js +247 -0
- package/dist/ee/extractors/instagram.d.ts +2 -0
- package/dist/ee/extractors/instagram.js +102 -0
- package/dist/ee/extractors/kalshi.d.ts +2 -0
- package/dist/ee/extractors/kalshi.js +121 -0
- package/dist/ee/extractors/kayak-cars.d.ts +2 -0
- package/dist/ee/extractors/kayak-cars.js +270 -0
- package/dist/ee/extractors/linkedin.d.ts +2 -0
- package/dist/ee/extractors/linkedin.js +113 -0
- package/dist/ee/extractors/medium.d.ts +2 -0
- package/dist/ee/extractors/medium.js +130 -0
- package/dist/ee/extractors/news.d.ts +4 -0
- package/dist/ee/extractors/news.js +173 -0
- package/dist/ee/extractors/npm.d.ts +2 -0
- package/dist/ee/extractors/npm.js +86 -0
- package/dist/ee/extractors/pdf.d.ts +2 -0
- package/dist/ee/extractors/pdf.js +108 -0
- package/dist/ee/extractors/pinterest.d.ts +2 -0
- package/dist/ee/extractors/pinterest.js +34 -0
- package/dist/ee/extractors/polymarket.d.ts +2 -0
- package/dist/ee/extractors/polymarket.js +358 -0
- package/dist/ee/extractors/producthunt.d.ts +2 -0
- package/dist/ee/extractors/producthunt.js +88 -0
- package/dist/ee/extractors/pubmed.d.ts +2 -0
- package/dist/ee/extractors/pubmed.js +162 -0
- package/dist/ee/extractors/pypi.d.ts +2 -0
- package/dist/ee/extractors/pypi.js +80 -0
- package/dist/ee/extractors/reddit.d.ts +2 -0
- package/dist/ee/extractors/reddit.js +438 -0
- package/dist/ee/extractors/redfin.d.ts +2 -0
- package/dist/ee/extractors/redfin.js +156 -0
- package/dist/ee/extractors/semanticscholar.d.ts +2 -0
- package/dist/ee/extractors/semanticscholar.js +131 -0
- package/dist/ee/extractors/shared.d.ts +12 -0
- package/dist/ee/extractors/shared.js +76 -0
- package/dist/ee/extractors/soundcloud.d.ts +2 -0
- package/dist/ee/extractors/soundcloud.js +34 -0
- package/dist/ee/extractors/sportsbetting.d.ts +2 -0
- package/dist/ee/extractors/sportsbetting.js +37 -0
- package/dist/ee/extractors/spotify.d.ts +2 -0
- package/dist/ee/extractors/spotify.js +34 -0
- package/dist/ee/extractors/stackoverflow.d.ts +2 -0
- package/dist/ee/extractors/stackoverflow.js +61 -0
- package/dist/ee/extractors/substack.d.ts +2 -0
- package/dist/ee/extractors/substack.js +115 -0
- package/dist/ee/extractors/substackroot.d.ts +2 -0
- package/dist/ee/extractors/substackroot.js +46 -0
- package/dist/ee/extractors/tiktok.d.ts +2 -0
- package/dist/ee/extractors/tiktok.js +29 -0
- package/dist/ee/extractors/tradingview.d.ts +2 -0
- package/dist/ee/extractors/tradingview.js +182 -0
- package/dist/ee/extractors/twitch.d.ts +2 -0
- package/dist/ee/extractors/twitch.js +36 -0
- package/dist/ee/extractors/twitter.d.ts +2 -0
- package/dist/ee/extractors/twitter.js +327 -0
- package/dist/ee/extractors/types.d.ts +14 -0
- package/dist/ee/extractors/types.js +1 -0
- package/dist/ee/extractors/walmart.d.ts +2 -0
- package/dist/ee/extractors/walmart.js +50 -0
- package/dist/ee/extractors/weather.d.ts +2 -0
- package/dist/ee/extractors/weather.js +133 -0
- package/dist/ee/extractors/wikipedia.d.ts +4 -0
- package/dist/ee/extractors/wikipedia.js +235 -0
- package/dist/ee/extractors/yelp.d.ts +2 -0
- package/dist/ee/extractors/yelp.js +216 -0
- package/dist/ee/extractors/youtube.d.ts +2 -0
- package/dist/ee/extractors/youtube.js +189 -0
- package/dist/ee/extractors/zillow.d.ts +54 -0
- package/dist/ee/extractors/zillow.js +247 -0
- package/dist/ee/extractors-re-export.d.ts +1 -0
- package/dist/ee/extractors-re-export.js +1 -0
- package/dist/ee/premium-hooks.d.ts +20 -0
- package/dist/ee/premium-hooks.js +50 -0
- package/dist/ee/spa-detection.d.ts +2 -0
- package/dist/ee/spa-detection.js +2 -0
- package/dist/ee/stability.d.ts +4 -0
- package/dist/ee/stability.js +29 -0
- package/dist/ee/swr-cache.d.ts +14 -0
- package/dist/ee/swr-cache.js +34 -0
- package/dist/index.d.ts +143 -0
- package/dist/index.js +291 -0
- package/dist/integrations/index.d.ts +2 -0
- package/dist/integrations/index.js +2 -0
- package/dist/integrations/langchain.d.ts +64 -0
- package/dist/integrations/langchain.js +115 -0
- package/dist/integrations/llamaindex.d.ts +50 -0
- package/dist/integrations/llamaindex.js +91 -0
- package/dist/mcp/handlers/act.d.ts +5 -0
- package/dist/mcp/handlers/act.js +34 -0
- package/dist/mcp/handlers/definitions.d.ts +6 -0
- package/dist/mcp/handlers/definitions.js +395 -0
- package/dist/mcp/handlers/extract.d.ts +7 -0
- package/dist/mcp/handlers/extract.js +135 -0
- package/dist/mcp/handlers/fetch.d.ts +6 -0
- package/dist/mcp/handlers/fetch.js +98 -0
- package/dist/mcp/handlers/find.d.ts +5 -0
- package/dist/mcp/handlers/find.js +137 -0
- package/dist/mcp/handlers/index.d.ts +13 -0
- package/dist/mcp/handlers/index.js +63 -0
- package/dist/mcp/handlers/legacy.d.ts +25 -0
- package/dist/mcp/handlers/legacy.js +450 -0
- package/dist/mcp/handlers/meta.d.ts +6 -0
- package/dist/mcp/handlers/meta.js +40 -0
- package/dist/mcp/handlers/monitor.d.ts +5 -0
- package/dist/mcp/handlers/monitor.js +41 -0
- package/dist/mcp/handlers/observe.d.ts +8 -0
- package/dist/mcp/handlers/observe.js +37 -0
- package/dist/mcp/handlers/read.d.ts +6 -0
- package/dist/mcp/handlers/read.js +78 -0
- package/dist/mcp/handlers/see.d.ts +5 -0
- package/dist/mcp/handlers/see.js +75 -0
- package/dist/mcp/handlers/types.d.ts +29 -0
- package/dist/mcp/handlers/types.js +28 -0
- package/dist/mcp/server.d.ts +7 -0
- package/dist/mcp/server.js +108 -0
- package/dist/mcp/smart-router.d.ts +23 -0
- package/dist/mcp/smart-router.js +178 -0
- package/dist/server/app.d.ts +14 -0
- package/dist/server/app.js +632 -0
- package/dist/server/auth-store.d.ts +28 -0
- package/dist/server/auth-store.js +88 -0
- package/dist/server/bull-queues.d.ts +60 -0
- package/dist/server/bull-queues.js +90 -0
- package/dist/server/email-service.d.ts +55 -0
- package/dist/server/email-service.js +291 -0
- package/dist/server/job-queue.d.ts +100 -0
- package/dist/server/job-queue.js +145 -0
- package/dist/server/logger.d.ts +10 -0
- package/dist/server/logger.js +37 -0
- package/dist/server/middleware/audit-log.d.ts +14 -0
- package/dist/server/middleware/audit-log.js +73 -0
- package/dist/server/middleware/auth.d.ts +35 -0
- package/dist/server/middleware/auth.js +225 -0
- package/dist/server/middleware/rate-limit.d.ts +50 -0
- package/dist/server/middleware/rate-limit.js +270 -0
- package/dist/server/middleware/scope-guard.d.ts +25 -0
- package/dist/server/middleware/scope-guard.js +45 -0
- package/dist/server/middleware/url-validator.d.ts +15 -0
- package/dist/server/middleware/url-validator.js +201 -0
- package/dist/server/openapi.yaml +6418 -0
- package/dist/server/pg-auth-store.d.ts +146 -0
- package/dist/server/pg-auth-store.js +576 -0
- package/dist/server/pg-job-queue.d.ts +59 -0
- package/dist/server/pg-job-queue.js +375 -0
- package/dist/server/routes/activity.d.ts +6 -0
- package/dist/server/routes/activity.js +79 -0
- package/dist/server/routes/admin-active.d.ts +7 -0
- package/dist/server/routes/admin-active.js +120 -0
- package/dist/server/routes/admin-stats.d.ts +7 -0
- package/dist/server/routes/admin-stats.js +176 -0
- package/dist/server/routes/agent.d.ts +24 -0
- package/dist/server/routes/agent.js +480 -0
- package/dist/server/routes/answer.d.ts +5 -0
- package/dist/server/routes/answer.js +125 -0
- package/dist/server/routes/ask.d.ts +28 -0
- package/dist/server/routes/ask.js +295 -0
- package/dist/server/routes/batch.d.ts +6 -0
- package/dist/server/routes/batch.js +493 -0
- package/dist/server/routes/cache-warm.d.ts +25 -0
- package/dist/server/routes/cache-warm.js +212 -0
- package/dist/server/routes/cli-usage.d.ts +6 -0
- package/dist/server/routes/cli-usage.js +127 -0
- package/dist/server/routes/compat.d.ts +23 -0
- package/dist/server/routes/compat.js +652 -0
- package/dist/server/routes/crawl.d.ts +13 -0
- package/dist/server/routes/crawl.js +287 -0
- package/dist/server/routes/deep-fetch.d.ts +8 -0
- package/dist/server/routes/deep-fetch.js +57 -0
- package/dist/server/routes/deep-research.d.ts +11 -0
- package/dist/server/routes/deep-research.js +232 -0
- package/dist/server/routes/demo.d.ts +24 -0
- package/dist/server/routes/demo.js +517 -0
- package/dist/server/routes/do.d.ts +8 -0
- package/dist/server/routes/do.js +72 -0
- package/dist/server/routes/extract.d.ts +14 -0
- package/dist/server/routes/extract.js +325 -0
- package/dist/server/routes/feed.d.ts +15 -0
- package/dist/server/routes/feed.js +311 -0
- package/dist/server/routes/fetch-queue.d.ts +13 -0
- package/dist/server/routes/fetch-queue.js +357 -0
- package/dist/server/routes/fetch.d.ts +7 -0
- package/dist/server/routes/fetch.js +1274 -0
- package/dist/server/routes/go.d.ts +14 -0
- package/dist/server/routes/go.js +81 -0
- package/dist/server/routes/health.d.ts +11 -0
- package/dist/server/routes/health.js +141 -0
- package/dist/server/routes/jobs.d.ts +7 -0
- package/dist/server/routes/jobs.js +574 -0
- package/dist/server/routes/map.d.ts +11 -0
- package/dist/server/routes/map.js +116 -0
- package/dist/server/routes/mcp.d.ts +14 -0
- package/dist/server/routes/mcp.js +197 -0
- package/dist/server/routes/metrics.d.ts +37 -0
- package/dist/server/routes/metrics.js +149 -0
- package/dist/server/routes/oauth.d.ts +9 -0
- package/dist/server/routes/oauth.js +396 -0
- package/dist/server/routes/playground.d.ts +17 -0
- package/dist/server/routes/playground.js +283 -0
- package/dist/server/routes/reader.d.ts +18 -0
- package/dist/server/routes/reader.js +192 -0
- package/dist/server/routes/research.d.ts +14 -0
- package/dist/server/routes/research.js +482 -0
- package/dist/server/routes/screenshot.d.ts +22 -0
- package/dist/server/routes/screenshot.js +820 -0
- package/dist/server/routes/search.d.ts +6 -0
- package/dist/server/routes/search.js +874 -0
- package/dist/server/routes/session.d.ts +17 -0
- package/dist/server/routes/session.js +548 -0
- package/dist/server/routes/share.d.ts +18 -0
- package/dist/server/routes/share.js +462 -0
- package/dist/server/routes/smart-search/handlers/cars.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/cars.js +102 -0
- package/dist/server/routes/smart-search/handlers/flights.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/flights.js +72 -0
- package/dist/server/routes/smart-search/handlers/general.d.ts +13 -0
- package/dist/server/routes/smart-search/handlers/general.js +717 -0
- package/dist/server/routes/smart-search/handlers/hotels.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/hotels.js +88 -0
- package/dist/server/routes/smart-search/handlers/products.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/products.js +1309 -0
- package/dist/server/routes/smart-search/handlers/rental.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/rental.js +154 -0
- package/dist/server/routes/smart-search/handlers/restaurants.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/restaurants.js +225 -0
- package/dist/server/routes/smart-search/handlers/transit-verdict.d.ts +41 -0
- package/dist/server/routes/smart-search/handlers/transit-verdict.js +224 -0
- package/dist/server/routes/smart-search/index.d.ts +19 -0
- package/dist/server/routes/smart-search/index.js +546 -0
- package/dist/server/routes/smart-search/intent.d.ts +3 -0
- package/dist/server/routes/smart-search/intent.js +264 -0
- package/dist/server/routes/smart-search/llm.d.ts +16 -0
- package/dist/server/routes/smart-search/llm.js +70 -0
- package/dist/server/routes/smart-search/sources/reddit.d.ts +18 -0
- package/dist/server/routes/smart-search/sources/reddit.js +34 -0
- package/dist/server/routes/smart-search/sources/yelp.d.ts +25 -0
- package/dist/server/routes/smart-search/sources/yelp.js +171 -0
- package/dist/server/routes/smart-search/sources/youtube.d.ts +8 -0
- package/dist/server/routes/smart-search/sources/youtube.js +9 -0
- package/dist/server/routes/smart-search/types.d.ts +81 -0
- package/dist/server/routes/smart-search/types.js +1 -0
- package/dist/server/routes/smart-search/utils.d.ts +20 -0
- package/dist/server/routes/smart-search/utils.js +146 -0
- package/dist/server/routes/stats.d.ts +6 -0
- package/dist/server/routes/stats.js +71 -0
- package/dist/server/routes/stripe.d.ts +15 -0
- package/dist/server/routes/stripe.js +296 -0
- package/dist/server/routes/transcript-export.d.ts +10 -0
- package/dist/server/routes/transcript-export.js +178 -0
- package/dist/server/routes/usage.d.ts +9 -0
- package/dist/server/routes/usage.js +279 -0
- package/dist/server/routes/users.d.ts +8 -0
- package/dist/server/routes/users.js +1867 -0
- package/dist/server/routes/watch.d.ts +15 -0
- package/dist/server/routes/watch.js +309 -0
- package/dist/server/routes/webhooks.d.ts +26 -0
- package/dist/server/routes/webhooks.js +170 -0
- package/dist/server/routes/youtube.d.ts +6 -0
- package/dist/server/routes/youtube.js +130 -0
- package/dist/server/sentry.d.ts +14 -0
- package/dist/server/sentry.js +104 -0
- package/dist/server/types.d.ts +15 -0
- package/dist/server/types.js +7 -0
- package/dist/server/utils/response.d.ts +44 -0
- package/dist/server/utils/response.js +69 -0
- package/dist/server/utils/sse.d.ts +22 -0
- package/dist/server/utils/sse.js +38 -0
- package/dist/types.d.ts +552 -0
- package/dist/types.js +39 -0
- package/llms.txt +105 -0
- package/package.json +189 -0
package/dist/index.js
ADDED
|
@@ -0,0 +1,291 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* WebPeel - Fast web fetcher for AI agents
|
|
3
|
+
*
|
|
4
|
+
* Main library export
|
|
5
|
+
*/
|
|
6
|
+
import { cleanup, warmup, closePool, scrollAndWait, closeProfileBrowser } from './core/fetcher.js';
|
|
7
|
+
import { createContext, normalizeOptions, handleYouTube, fetchContent, detectContentType, parseContent, postProcess, finalize, buildResult, } from './core/pipeline.js';
|
|
8
|
+
import { checkUrlSafety } from './core/safe-browsing.js';
|
|
9
|
+
export * from './types.js';
|
|
10
|
+
export { WebPeelError as TypedWebPeelError, Errors, isRetryable, } from './errors.js';
|
|
11
|
+
export { withRetry, DomainRateLimiter, domainLimiter } from './core/retry.js';
|
|
12
|
+
// Domain extractors — compiled JS ships in npm, TypeScript source is .gitignore'd.
|
|
13
|
+
// Re-export types from the basic stub (always available), runtime functions via lazy wrapper.
|
|
14
|
+
export { getDomainExtractor, extractDomainData } from './ee/domain-extractors.js';
|
|
15
|
+
export { crawl } from './core/crawler.js';
|
|
16
|
+
export { discoverSitemap } from './core/sitemap.js';
|
|
17
|
+
export { mapDomain } from './core/map.js';
|
|
18
|
+
export { extractBranding } from './core/branding.js';
|
|
19
|
+
export { trackChange, getSnapshot, clearSnapshots } from './core/change-tracking.js';
|
|
20
|
+
export { extractWithLLM } from './core/extract.js';
|
|
21
|
+
export { extractDocumentToFormat, isPdfContentType, isDocxContentType } from './core/documents.js';
|
|
22
|
+
export { extractInlineJson } from './core/extract-inline.js';
|
|
23
|
+
export { runAgent } from './core/agent.js';
|
|
24
|
+
export { summarizeContent } from './core/summarize.js';
|
|
25
|
+
export { getSearchProvider, DuckDuckGoProvider, BraveSearchProvider, providerStats, } from './core/search-provider.js';
|
|
26
|
+
export { BaiduSearchProvider, YandexSearchProvider, NaverSearchProvider, YahooJapanSearchProvider } from './core/search-engines.js';
|
|
27
|
+
export { crossVerifySearch } from './core/cross-verify.js';
|
|
28
|
+
export { answerQuestion, } from './core/answer.js';
|
|
29
|
+
export { parseGoogleSerp } from './core/google-serp-parser.js';
|
|
30
|
+
export { searchJobs } from './core/jobs.js';
|
|
31
|
+
export { RateGovernor, formatDuration, } from './core/rate-governor.js';
|
|
32
|
+
export { ApplicationTracker, } from './core/application-tracker.js';
|
|
33
|
+
export { applyToJob, loadApplications, saveApplication, getApplicationsToday, updateApplicationStatus, } from './core/apply.js';
|
|
34
|
+
// Human behavior exports — see bottom of file for full export
|
|
35
|
+
export { extractListings } from './core/extract-listings.js';
|
|
36
|
+
export { parseYouTubeUrl, extractVideoInfo, extractPlayerResponse, parseCaptionXml, decodeHtmlEntities, getYouTubeTranscript, } from './core/youtube.js';
|
|
37
|
+
export { formatTable } from './core/table-format.js';
|
|
38
|
+
export { findNextPageUrl } from './core/paginate.js';
|
|
39
|
+
export { distillToBudget, budgetListings, TOKENS_PER_LISTING_ITEM } from './core/budget.js';
|
|
40
|
+
export { watch, parseDuration, parseAssertion, } from './core/watch.js';
|
|
41
|
+
export { observe, } from './core/observe.js';
|
|
42
|
+
export { diffUrl, } from './core/diff.js';
|
|
43
|
+
export { extractReadableContent } from './core/readability.js';
|
|
44
|
+
export { quickAnswer } from './core/quick-answer.js';
|
|
45
|
+
export { extractValueFromPassage, smartExtractSchemaFields } from './core/schema-postprocess.js';
|
|
46
|
+
export { Timer } from './core/timing.js';
|
|
47
|
+
export { chunkContent } from './core/chunker.js';
|
|
48
|
+
export async function searchFallback(..._args) {
|
|
49
|
+
// @ts-ignore — proprietary module, gitignored
|
|
50
|
+
try {
|
|
51
|
+
const m = await import('./core/search-fallback.js');
|
|
52
|
+
return m.searchFallback(..._args);
|
|
53
|
+
}
|
|
54
|
+
catch {
|
|
55
|
+
return null;
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
export { peelTLSFetch, isPeelTLSAvailable, shutdownPeelTLS } from './core/peel-tls.js';
|
|
59
|
+
export { sanitizeForLLM } from './core/prompt-guard.js';
|
|
60
|
+
export { getSourceCredibility } from './core/source-credibility.js';
|
|
61
|
+
export { verifyDomain } from './core/domain-verify.js';
|
|
62
|
+
export { checkUrlSafety } from './core/safe-browsing.js';
|
|
63
|
+
export { checkThreatFeeds } from './core/threat-feeds.js';
|
|
64
|
+
export { detectLanguageFromUrl, buildAcceptLanguageHeader } from './core/language-detect.js';
|
|
65
|
+
export { localSearch } from './core/local-search.js';
|
|
66
|
+
export { getBusinessIntel } from './core/business-intel.js';
|
|
67
|
+
export { CircuitBreaker, browserCircuitBreaker } from './core/circuit-breaker.js';
|
|
68
|
+
export { checkMemoryPressure } from './core/browser-pool.js';
|
|
69
|
+
export { searchShopping, searchNews, searchImages, searchVideos } from './core/vertical-search.js';
|
|
70
|
+
/**
|
|
71
|
+
* Fetch and extract content from a URL
|
|
72
|
+
*
|
|
73
|
+
* @param url - URL to fetch
|
|
74
|
+
* @param options - Fetch options
|
|
75
|
+
* @returns Extracted content and metadata
|
|
76
|
+
*
|
|
77
|
+
* @example
|
|
78
|
+
* ```typescript
|
|
79
|
+
* import { peel } from 'webpeel';
|
|
80
|
+
*
|
|
81
|
+
* const result = await peel('https://example.com');
|
|
82
|
+
* console.log(result.content); // Markdown content
|
|
83
|
+
* console.log(result.metadata); // Structured metadata
|
|
84
|
+
* ```
|
|
85
|
+
*/
|
|
86
|
+
export async function peel(url, options = {}) {
|
|
87
|
+
const ctx = createContext(url, options);
|
|
88
|
+
normalizeOptions(ctx);
|
|
89
|
+
// Safe Browsing check — runs before any HTTP request, non-blocking
|
|
90
|
+
const sbResult = await checkUrlSafety(url, process.env.SAFE_BROWSING_API_KEY);
|
|
91
|
+
ctx.safeBrowsingResult = sbResult;
|
|
92
|
+
if (!sbResult.safe) {
|
|
93
|
+
const threatList = sbResult.threats.join(', ');
|
|
94
|
+
ctx.warnings.push(`⚠️ URL flagged by Safe Browsing: ${threatList}`);
|
|
95
|
+
}
|
|
96
|
+
const ytResult = await handleYouTube(ctx);
|
|
97
|
+
if (ytResult) {
|
|
98
|
+
// Attach safe browsing to YouTube results too
|
|
99
|
+
return {
|
|
100
|
+
...ytResult,
|
|
101
|
+
safeBrowsing: sbResult,
|
|
102
|
+
...(ytResult.warnings || ctx.warnings.length > 0
|
|
103
|
+
? { warnings: [...(ytResult.warnings ?? []), ...ctx.warnings.filter(w => !ytResult.warnings?.includes(w))] }
|
|
104
|
+
: {}),
|
|
105
|
+
};
|
|
106
|
+
}
|
|
107
|
+
try {
|
|
108
|
+
await fetchContent(ctx);
|
|
109
|
+
detectContentType(ctx);
|
|
110
|
+
await parseContent(ctx);
|
|
111
|
+
await postProcess(ctx);
|
|
112
|
+
await finalize(ctx);
|
|
113
|
+
const result = buildResult(ctx);
|
|
114
|
+
// Attach safe browsing result
|
|
115
|
+
result.safeBrowsing = sbResult;
|
|
116
|
+
// ── Auto-render escalation (post-processing) ────────────────────────
|
|
117
|
+
// If final content is thin and we did NOT use browser rendering,
|
|
118
|
+
// retry with render=true. This catches SPAs that return enough SSR
|
|
119
|
+
// HTML to pass the fetch-time thin-content check but produce sparse
|
|
120
|
+
// extracted content (e.g. React shells with nav chrome only).
|
|
121
|
+
// Only escalate when: (a) not already rendered, (b) not explicitly
|
|
122
|
+
// opted out (noEscalate), (c) not a domain-extracted result, (d) HTML
|
|
123
|
+
// content type, (e) not a retry already.
|
|
124
|
+
const contentTokens = result.tokens ?? Math.ceil((result.content?.length ?? 0) / 4);
|
|
125
|
+
const wasRendered = ctx.render || options.render;
|
|
126
|
+
const hasDomainData = !!ctx.domainData;
|
|
127
|
+
const isHtml = (ctx.fetchResult?.contentType || '').includes('html');
|
|
128
|
+
const noEscalate = !!options.noEscalate;
|
|
129
|
+
const isRetry = !!options._autoRenderRetry;
|
|
130
|
+
// Don't escalate when user explicitly limited output size
|
|
131
|
+
const hasTokenBudget = !!(options.budget || options.maxTokens);
|
|
132
|
+
// Don't escalate if browser/stealth was already used — can't go higher
|
|
133
|
+
const fetchMethod = ctx.fetchResult?.method ?? '';
|
|
134
|
+
const alreadyBrowserOrStealth = fetchMethod === 'browser' || fetchMethod === 'stealth' ||
|
|
135
|
+
fetchMethod === 'browser-with-wait' || wasRendered;
|
|
136
|
+
if (contentTokens < 80 &&
|
|
137
|
+
!alreadyBrowserOrStealth &&
|
|
138
|
+
!hasDomainData &&
|
|
139
|
+
isHtml &&
|
|
140
|
+
!noEscalate &&
|
|
141
|
+
!isRetry &&
|
|
142
|
+
!hasTokenBudget &&
|
|
143
|
+
result.content &&
|
|
144
|
+
result.content.length < 400) {
|
|
145
|
+
// Retry with render — this is a one-shot escalation, not a loop
|
|
146
|
+
const retryResult = await peel(url, {
|
|
147
|
+
...options,
|
|
148
|
+
render: true,
|
|
149
|
+
noCache: true,
|
|
150
|
+
_autoRenderRetry: true,
|
|
151
|
+
});
|
|
152
|
+
// Only use the retry if it produced more content
|
|
153
|
+
if ((retryResult.tokens ?? 0) > contentTokens) {
|
|
154
|
+
retryResult.warnings = [
|
|
155
|
+
...(retryResult.warnings || []),
|
|
156
|
+
'Auto-escalated to browser rendering (initial fetch produced sparse content)',
|
|
157
|
+
];
|
|
158
|
+
return retryResult;
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
return result;
|
|
162
|
+
}
|
|
163
|
+
catch (error) {
|
|
164
|
+
// Clean up browser resources on error
|
|
165
|
+
await cleanup();
|
|
166
|
+
throw error;
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
/**
|
|
170
|
+
* Fetch multiple URLs in batch with concurrency control
|
|
171
|
+
*
|
|
172
|
+
* @param urls - Array of URLs to fetch
|
|
173
|
+
* @param options - Fetch options (including concurrency)
|
|
174
|
+
* @returns Array of results or errors
|
|
175
|
+
*
|
|
176
|
+
* @example
|
|
177
|
+
* ```typescript
|
|
178
|
+
* import { peelBatch } from 'webpeel';
|
|
179
|
+
*
|
|
180
|
+
* const urls = ['https://example.com', 'https://example.org'];
|
|
181
|
+
* const results = await peelBatch(urls, { concurrency: 3 });
|
|
182
|
+
* ```
|
|
183
|
+
*/
|
|
184
|
+
export async function peelBatch(urls, options = {}) {
|
|
185
|
+
const { concurrency = 3, onProgress, ...peelOpts } = options;
|
|
186
|
+
const results = new Array(urls.length);
|
|
187
|
+
let nextIndex = 0;
|
|
188
|
+
let completedCount = 0;
|
|
189
|
+
async function worker() {
|
|
190
|
+
while (nextIndex < urls.length) {
|
|
191
|
+
const index = nextIndex++;
|
|
192
|
+
const url = urls[index];
|
|
193
|
+
try {
|
|
194
|
+
results[index] = await peel(url, peelOpts);
|
|
195
|
+
}
|
|
196
|
+
catch (error) {
|
|
197
|
+
results[index] = {
|
|
198
|
+
url,
|
|
199
|
+
error: error instanceof Error ? error.message : 'Unknown error',
|
|
200
|
+
};
|
|
201
|
+
}
|
|
202
|
+
completedCount++;
|
|
203
|
+
onProgress?.(completedCount, urls.length);
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
// Launch concurrent workers (true worker-pool, not sequential batches)
|
|
207
|
+
const workerCount = Math.min(concurrency, urls.length);
|
|
208
|
+
if (workerCount > 0) {
|
|
209
|
+
await Promise.all(Array.from({ length: workerCount }, () => worker()));
|
|
210
|
+
}
|
|
211
|
+
return results;
|
|
212
|
+
}
|
|
213
|
+
/**
|
|
214
|
+
* Clean up any browser resources
|
|
215
|
+
* Call this when you're done using WebPeel
|
|
216
|
+
*/
|
|
217
|
+
export { cleanup, warmup, closePool, scrollAndWait, closeProfileBrowser };
|
|
218
|
+
export { getCached, setCached, clearCache, setCacheTTL } from './core/cache.js';
|
|
219
|
+
export { getRealisticUserAgent, getRandomUA, REALISTIC_USER_AGENTS, } from './core/user-agents.js';
|
|
220
|
+
export { humanDelay, humanMouseMove, humanRead, warmupBrowse, humanType, humanClearAndType, humanClick, humanScroll, humanScrollToElement, warmupSession, humanSelect, humanUploadFile, humanToggle, } from './core/human.js';
|
|
221
|
+
export { SCHEMA_TEMPLATES, getSchemaTemplate, listSchemaTemplates } from './core/schema-templates.js';
|
|
222
|
+
// Framework integrations
|
|
223
|
+
export { WebPeelLoader } from './integrations/langchain.js';
|
|
224
|
+
export { WebPeelReader } from './integrations/llamaindex.js';
|
|
225
|
+
// Advanced stealth utilities — for power users who want to apply extra evasions
|
|
226
|
+
// to their own Playwright pages.
|
|
227
|
+
// stealth-patches: proprietary module, loaded at runtime only
|
|
228
|
+
export async function applyStealthPatches(page) {
|
|
229
|
+
// @ts-ignore — proprietary module, gitignored
|
|
230
|
+
try {
|
|
231
|
+
const m = await import('./core/stealth-patches.js');
|
|
232
|
+
await m.applyStealthPatches(page);
|
|
233
|
+
}
|
|
234
|
+
catch { /* not available */ }
|
|
235
|
+
}
|
|
236
|
+
export async function applyAcceptLanguageHeader(page, lang) {
|
|
237
|
+
// @ts-ignore — proprietary module, gitignored
|
|
238
|
+
try {
|
|
239
|
+
const m = await import('./core/stealth-patches.js');
|
|
240
|
+
await m.applyAcceptLanguageHeader(page, lang);
|
|
241
|
+
}
|
|
242
|
+
catch { /* not available */ }
|
|
243
|
+
}
|
|
244
|
+
// Google Cache fallback — fetch cached copies of blocked pages
|
|
245
|
+
export { fetchGoogleCache, isGoogleCacheAvailable } from './core/google-cache.js';
|
|
246
|
+
export { cfWorkerFetch, isCfWorkerAvailable } from './core/cf-worker-proxy.js';
|
|
247
|
+
/**
|
|
248
|
+
* WebPeel client class — alternative OOP interface over the functional API.
|
|
249
|
+
* Provides the same capabilities as the standalone functions but with
|
|
250
|
+
* a configured client instance.
|
|
251
|
+
*
|
|
252
|
+
* @example
|
|
253
|
+
* import { WebPeel } from 'webpeel';
|
|
254
|
+
* const wp = new WebPeel({ apiKey: process.env.WEBPEEL_API_KEY });
|
|
255
|
+
* const result = await wp.fetch('https://stripe.com');
|
|
256
|
+
*/
|
|
257
|
+
export class WebPeel {
|
|
258
|
+
apiKey;
|
|
259
|
+
constructor(config) {
|
|
260
|
+
if (!config.apiKey)
|
|
261
|
+
throw new Error('WebPeel: apiKey is required');
|
|
262
|
+
this.apiKey = config.apiKey;
|
|
263
|
+
// apiUrl reserved for future use (remote API proxy mode)
|
|
264
|
+
void config.apiUrl;
|
|
265
|
+
}
|
|
266
|
+
/** Fetch and extract content from a URL */
|
|
267
|
+
async fetch(url, options = {}) {
|
|
268
|
+
return peel(url, { ...options });
|
|
269
|
+
}
|
|
270
|
+
/** Search the web */
|
|
271
|
+
async search(query, options = {}) {
|
|
272
|
+
const { getSearchProvider } = await import('./core/search-provider.js');
|
|
273
|
+
const provider = getSearchProvider({ ...options });
|
|
274
|
+
return provider.searchWeb(query, options);
|
|
275
|
+
}
|
|
276
|
+
/** Crawl a site */
|
|
277
|
+
async crawl(startUrl, options = {}) {
|
|
278
|
+
const { crawl: crawlFn } = await import('./core/crawler.js');
|
|
279
|
+
return crawlFn(startUrl, { ...options, apiKey: this.apiKey });
|
|
280
|
+
}
|
|
281
|
+
/** Map a domain's pages */
|
|
282
|
+
async map(url, options = {}) {
|
|
283
|
+
const { mapDomain } = await import('./core/map.js');
|
|
284
|
+
return mapDomain(url, { ...options });
|
|
285
|
+
}
|
|
286
|
+
/** Extract structured data */
|
|
287
|
+
async extract(url, _schema, options = {}) {
|
|
288
|
+
const result = await peel(url, { ...options });
|
|
289
|
+
return result;
|
|
290
|
+
}
|
|
291
|
+
}
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* WebPeel LangChain.js Document Loader
|
|
3
|
+
*
|
|
4
|
+
* Usage:
|
|
5
|
+
* import { WebPeelLoader } from 'webpeel/integrations/langchain';
|
|
6
|
+
* const loader = new WebPeelLoader({ url: 'https://example.com' });
|
|
7
|
+
* const docs = await loader.load();
|
|
8
|
+
*/
|
|
9
|
+
import type { PeelOptions } from '../types.js';
|
|
10
|
+
/** LangChain Document interface (we define our own to avoid the dependency) */
|
|
11
|
+
export interface Document {
|
|
12
|
+
pageContent: string;
|
|
13
|
+
metadata: Record<string, any>;
|
|
14
|
+
}
|
|
15
|
+
export interface WebPeelLoaderOptions {
|
|
16
|
+
/** URL to fetch */
|
|
17
|
+
url: string;
|
|
18
|
+
/** Multiple URLs to fetch */
|
|
19
|
+
urls?: string[];
|
|
20
|
+
/** Scraping mode: 'scrape' for single page, 'crawl' for following links */
|
|
21
|
+
mode?: 'scrape' | 'crawl';
|
|
22
|
+
/** Output format */
|
|
23
|
+
format?: 'markdown' | 'text' | 'html' | 'clean';
|
|
24
|
+
/** Use headless browser */
|
|
25
|
+
render?: boolean;
|
|
26
|
+
/** Stealth mode for anti-bot */
|
|
27
|
+
stealth?: boolean;
|
|
28
|
+
/** Token budget per page */
|
|
29
|
+
budget?: number;
|
|
30
|
+
/** Proxy URL */
|
|
31
|
+
proxy?: string;
|
|
32
|
+
/** Multiple proxies for rotation */
|
|
33
|
+
proxies?: string[];
|
|
34
|
+
/** CSS selector to extract */
|
|
35
|
+
selector?: string;
|
|
36
|
+
/** Enable chunking for RAG */
|
|
37
|
+
chunk?: boolean;
|
|
38
|
+
/** Max tokens per chunk (default: 512) */
|
|
39
|
+
chunkSize?: number;
|
|
40
|
+
/** Chunk overlap tokens (default: 50) */
|
|
41
|
+
chunkOverlap?: number;
|
|
42
|
+
/** Additional PeelOptions */
|
|
43
|
+
peelOptions?: Partial<PeelOptions>;
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* WebPeel Document Loader for LangChain.js
|
|
47
|
+
*
|
|
48
|
+
* Compatible with LangChain's BaseDocumentLoader interface.
|
|
49
|
+
* Returns Document[] with pageContent and metadata.
|
|
50
|
+
*/
|
|
51
|
+
export declare class WebPeelLoader {
|
|
52
|
+
private options;
|
|
53
|
+
constructor(options: WebPeelLoaderOptions);
|
|
54
|
+
/**
|
|
55
|
+
* Load documents from the configured URL(s).
|
|
56
|
+
* If chunking is enabled, each chunk becomes a separate Document.
|
|
57
|
+
*/
|
|
58
|
+
load(): Promise<Document[]>;
|
|
59
|
+
/**
|
|
60
|
+
* Lazy load documents one at a time (async generator).
|
|
61
|
+
* Useful for large URL lists to avoid memory pressure.
|
|
62
|
+
*/
|
|
63
|
+
lazyLoad(): AsyncGenerator<Document>;
|
|
64
|
+
}
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* WebPeel LangChain.js Document Loader
|
|
3
|
+
*
|
|
4
|
+
* Usage:
|
|
5
|
+
* import { WebPeelLoader } from 'webpeel/integrations/langchain';
|
|
6
|
+
* const loader = new WebPeelLoader({ url: 'https://example.com' });
|
|
7
|
+
* const docs = await loader.load();
|
|
8
|
+
*/
|
|
9
|
+
import { peel } from '../index.js';
|
|
10
|
+
import { chunkContent } from '../core/chunker.js';
|
|
11
|
+
/**
|
|
12
|
+
* WebPeel Document Loader for LangChain.js
|
|
13
|
+
*
|
|
14
|
+
* Compatible with LangChain's BaseDocumentLoader interface.
|
|
15
|
+
* Returns Document[] with pageContent and metadata.
|
|
16
|
+
*/
|
|
17
|
+
export class WebPeelLoader {
|
|
18
|
+
options;
|
|
19
|
+
constructor(options) {
|
|
20
|
+
this.options = options;
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Load documents from the configured URL(s).
|
|
24
|
+
* If chunking is enabled, each chunk becomes a separate Document.
|
|
25
|
+
*/
|
|
26
|
+
async load() {
|
|
27
|
+
const urls = this.options.urls || [this.options.url];
|
|
28
|
+
const documents = [];
|
|
29
|
+
for (const url of urls) {
|
|
30
|
+
try {
|
|
31
|
+
const peelOpts = {
|
|
32
|
+
format: this.options.format || 'markdown',
|
|
33
|
+
render: this.options.render,
|
|
34
|
+
stealth: this.options.stealth,
|
|
35
|
+
budget: this.options.budget,
|
|
36
|
+
proxy: this.options.proxy,
|
|
37
|
+
proxies: this.options.proxies,
|
|
38
|
+
selector: this.options.selector,
|
|
39
|
+
...this.options.peelOptions,
|
|
40
|
+
};
|
|
41
|
+
// Remove undefined values
|
|
42
|
+
Object.keys(peelOpts).forEach(key => {
|
|
43
|
+
if (peelOpts[key] === undefined)
|
|
44
|
+
delete peelOpts[key];
|
|
45
|
+
});
|
|
46
|
+
const result = await peel(url, peelOpts);
|
|
47
|
+
if (this.options.chunk) {
|
|
48
|
+
// Split into chunks, each becomes a Document
|
|
49
|
+
const chunkResult = chunkContent(result.content, {
|
|
50
|
+
maxTokens: this.options.chunkSize || 512,
|
|
51
|
+
overlap: this.options.chunkOverlap || 50,
|
|
52
|
+
strategy: 'section',
|
|
53
|
+
});
|
|
54
|
+
for (const chunk of chunkResult.chunks) {
|
|
55
|
+
documents.push({
|
|
56
|
+
pageContent: chunk.text,
|
|
57
|
+
metadata: {
|
|
58
|
+
source: url,
|
|
59
|
+
title: result.metadata?.title || '',
|
|
60
|
+
description: result.metadata?.description || '',
|
|
61
|
+
chunkIndex: chunk.index,
|
|
62
|
+
totalChunks: chunkResult.totalChunks,
|
|
63
|
+
section: chunk.section,
|
|
64
|
+
sectionDepth: chunk.sectionDepth,
|
|
65
|
+
tokenCount: chunk.tokenCount,
|
|
66
|
+
wordCount: chunk.wordCount,
|
|
67
|
+
fetchedAt: result.metadata?.fetchedAt || new Date().toISOString(),
|
|
68
|
+
method: result.metadata?.method || 'unknown',
|
|
69
|
+
},
|
|
70
|
+
});
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
else {
|
|
74
|
+
// Single document per URL
|
|
75
|
+
documents.push({
|
|
76
|
+
pageContent: result.content,
|
|
77
|
+
metadata: {
|
|
78
|
+
source: url,
|
|
79
|
+
title: result.metadata?.title || '',
|
|
80
|
+
description: result.metadata?.description || '',
|
|
81
|
+
wordCount: result.metadata?.wordCount || 0,
|
|
82
|
+
language: result.metadata?.language || '',
|
|
83
|
+
fetchedAt: result.metadata?.fetchedAt || new Date().toISOString(),
|
|
84
|
+
method: result.metadata?.method || 'unknown',
|
|
85
|
+
contentType: result.metadata?.contentType || '',
|
|
86
|
+
statusCode: result.metadata?.statusCode || 0,
|
|
87
|
+
},
|
|
88
|
+
});
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
catch (error) {
|
|
92
|
+
// Include failed URLs as empty documents with error metadata
|
|
93
|
+
documents.push({
|
|
94
|
+
pageContent: '',
|
|
95
|
+
metadata: {
|
|
96
|
+
source: url,
|
|
97
|
+
error: error instanceof Error ? error.message : String(error),
|
|
98
|
+
fetchedAt: new Date().toISOString(),
|
|
99
|
+
},
|
|
100
|
+
});
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
return documents;
|
|
104
|
+
}
|
|
105
|
+
/**
|
|
106
|
+
* Lazy load documents one at a time (async generator).
|
|
107
|
+
* Useful for large URL lists to avoid memory pressure.
|
|
108
|
+
*/
|
|
109
|
+
async *lazyLoad() {
|
|
110
|
+
const docs = await this.load();
|
|
111
|
+
for (const doc of docs) {
|
|
112
|
+
yield doc;
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
}
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* WebPeel LlamaIndex Reader
|
|
3
|
+
*
|
|
4
|
+
* Usage:
|
|
5
|
+
* import { WebPeelReader } from 'webpeel/integrations/llamaindex';
|
|
6
|
+
* const reader = new WebPeelReader();
|
|
7
|
+
* const docs = await reader.loadData('https://example.com');
|
|
8
|
+
*/
|
|
9
|
+
import type { PeelOptions } from '../types.js';
|
|
10
|
+
/** LlamaIndex Document interface */
|
|
11
|
+
export interface LlamaDocument {
|
|
12
|
+
text: string;
|
|
13
|
+
metadata: Record<string, any>;
|
|
14
|
+
id_?: string;
|
|
15
|
+
}
|
|
16
|
+
export interface WebPeelReaderOptions {
|
|
17
|
+
/** Output format */
|
|
18
|
+
format?: 'markdown' | 'text' | 'html' | 'clean';
|
|
19
|
+
/** Use headless browser */
|
|
20
|
+
render?: boolean;
|
|
21
|
+
/** Stealth mode */
|
|
22
|
+
stealth?: boolean;
|
|
23
|
+
/** Token budget */
|
|
24
|
+
budget?: number;
|
|
25
|
+
/** Enable chunking */
|
|
26
|
+
chunk?: boolean;
|
|
27
|
+
/** Max tokens per chunk */
|
|
28
|
+
chunkSize?: number;
|
|
29
|
+
/** Chunk overlap */
|
|
30
|
+
chunkOverlap?: number;
|
|
31
|
+
/** Proxy URL */
|
|
32
|
+
proxy?: string;
|
|
33
|
+
/** Multiple proxies */
|
|
34
|
+
proxies?: string[];
|
|
35
|
+
/** Additional PeelOptions */
|
|
36
|
+
peelOptions?: Partial<PeelOptions>;
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* WebPeel Reader for LlamaIndex
|
|
40
|
+
*
|
|
41
|
+
* Compatible with LlamaIndex's BaseReader interface.
|
|
42
|
+
*/
|
|
43
|
+
export declare class WebPeelReader {
|
|
44
|
+
private options;
|
|
45
|
+
constructor(options?: WebPeelReaderOptions);
|
|
46
|
+
/**
|
|
47
|
+
* Load data from one or more URLs.
|
|
48
|
+
*/
|
|
49
|
+
loadData(urlOrUrls: string | string[]): Promise<LlamaDocument[]>;
|
|
50
|
+
}
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* WebPeel LlamaIndex Reader
|
|
3
|
+
*
|
|
4
|
+
* Usage:
|
|
5
|
+
* import { WebPeelReader } from 'webpeel/integrations/llamaindex';
|
|
6
|
+
* const reader = new WebPeelReader();
|
|
7
|
+
* const docs = await reader.loadData('https://example.com');
|
|
8
|
+
*/
|
|
9
|
+
import { peel } from '../index.js';
|
|
10
|
+
import { chunkContent } from '../core/chunker.js';
|
|
11
|
+
/**
|
|
12
|
+
* WebPeel Reader for LlamaIndex
|
|
13
|
+
*
|
|
14
|
+
* Compatible with LlamaIndex's BaseReader interface.
|
|
15
|
+
*/
|
|
16
|
+
export class WebPeelReader {
|
|
17
|
+
options;
|
|
18
|
+
constructor(options = {}) {
|
|
19
|
+
this.options = options;
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Load data from one or more URLs.
|
|
23
|
+
*/
|
|
24
|
+
async loadData(urlOrUrls) {
|
|
25
|
+
const urls = Array.isArray(urlOrUrls) ? urlOrUrls : [urlOrUrls];
|
|
26
|
+
const documents = [];
|
|
27
|
+
for (const url of urls) {
|
|
28
|
+
try {
|
|
29
|
+
const peelOpts = {
|
|
30
|
+
format: this.options.format || 'markdown',
|
|
31
|
+
render: this.options.render,
|
|
32
|
+
stealth: this.options.stealth,
|
|
33
|
+
budget: this.options.budget,
|
|
34
|
+
proxy: this.options.proxy,
|
|
35
|
+
proxies: this.options.proxies,
|
|
36
|
+
...this.options.peelOptions,
|
|
37
|
+
};
|
|
38
|
+
Object.keys(peelOpts).forEach(key => {
|
|
39
|
+
if (peelOpts[key] === undefined)
|
|
40
|
+
delete peelOpts[key];
|
|
41
|
+
});
|
|
42
|
+
const result = await peel(url, peelOpts);
|
|
43
|
+
if (this.options.chunk) {
|
|
44
|
+
const chunkResult = chunkContent(result.content, {
|
|
45
|
+
maxTokens: this.options.chunkSize || 512,
|
|
46
|
+
overlap: this.options.chunkOverlap || 50,
|
|
47
|
+
strategy: 'section',
|
|
48
|
+
});
|
|
49
|
+
for (const chunk of chunkResult.chunks) {
|
|
50
|
+
documents.push({
|
|
51
|
+
text: chunk.text,
|
|
52
|
+
id_: `${url}#chunk-${chunk.index}`,
|
|
53
|
+
metadata: {
|
|
54
|
+
url,
|
|
55
|
+
title: result.metadata?.title || '',
|
|
56
|
+
chunkIndex: chunk.index,
|
|
57
|
+
totalChunks: chunkResult.totalChunks,
|
|
58
|
+
section: chunk.section,
|
|
59
|
+
tokenCount: chunk.tokenCount,
|
|
60
|
+
},
|
|
61
|
+
});
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
else {
|
|
65
|
+
documents.push({
|
|
66
|
+
text: result.content,
|
|
67
|
+
id_: url,
|
|
68
|
+
metadata: {
|
|
69
|
+
url,
|
|
70
|
+
title: result.metadata?.title || '',
|
|
71
|
+
description: result.metadata?.description || '',
|
|
72
|
+
wordCount: result.metadata?.wordCount || 0,
|
|
73
|
+
language: result.metadata?.language || '',
|
|
74
|
+
},
|
|
75
|
+
});
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
catch (error) {
|
|
79
|
+
documents.push({
|
|
80
|
+
text: '',
|
|
81
|
+
id_: url,
|
|
82
|
+
metadata: {
|
|
83
|
+
url,
|
|
84
|
+
error: error instanceof Error ? error.message : String(error),
|
|
85
|
+
},
|
|
86
|
+
});
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
return documents;
|
|
90
|
+
}
|
|
91
|
+
}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* handleAct — perform browser actions on a page, then optionally extract content.
|
|
3
|
+
*/
|
|
4
|
+
import { peel } from '../../index.js';
|
|
5
|
+
import { normalizeActions } from '../../core/actions.js';
|
|
6
|
+
import { textResult, safeStringify } from './types.js';
|
|
7
|
+
export const handleAct = async (args, _ctx) => {
|
|
8
|
+
const url = args.url;
|
|
9
|
+
const rawActions = args.actions || [];
|
|
10
|
+
const extract = args.extract !== false; // default true
|
|
11
|
+
const screenshot = Boolean(args.screenshot);
|
|
12
|
+
if (!url)
|
|
13
|
+
return textResult(safeStringify({ error: 'url is required' }));
|
|
14
|
+
if (!rawActions.length)
|
|
15
|
+
return textResult(safeStringify({ error: 'actions array is required' }));
|
|
16
|
+
// Normalize actions (handles Firecrawl-style aliases)
|
|
17
|
+
const actions = normalizeActions(rawActions) || [];
|
|
18
|
+
const result = await peel(url, {
|
|
19
|
+
render: true, // actions always require browser
|
|
20
|
+
actions,
|
|
21
|
+
screenshot,
|
|
22
|
+
format: 'markdown',
|
|
23
|
+
budget: 4000,
|
|
24
|
+
timeout: 60000,
|
|
25
|
+
});
|
|
26
|
+
return textResult(safeStringify({
|
|
27
|
+
url: result.url,
|
|
28
|
+
title: result.title,
|
|
29
|
+
content: extract ? result.content : undefined,
|
|
30
|
+
screenshot: result.screenshot,
|
|
31
|
+
method: result.method,
|
|
32
|
+
elapsed: result.elapsed,
|
|
33
|
+
}));
|
|
34
|
+
};
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tool schema definitions — single source of truth for both transports.
|
|
3
|
+
* Imported by the standalone MCP server and the HTTP MCP route for tools/list.
|
|
4
|
+
*/
|
|
5
|
+
import type { Tool } from '@modelcontextprotocol/sdk/types.js';
|
|
6
|
+
export declare const toolDefinitions: Tool[];
|