@iflow-mcp/jakeliume-webpeel 0.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +15 -0
- package/README.md +313 -0
- package/dist/cache.d.ts +30 -0
- package/dist/cache.js +139 -0
- package/dist/cli/commands/auth.d.ts +5 -0
- package/dist/cli/commands/auth.js +411 -0
- package/dist/cli/commands/doctor.d.ts +37 -0
- package/dist/cli/commands/doctor.js +371 -0
- package/dist/cli/commands/fetch.d.ts +6 -0
- package/dist/cli/commands/fetch.js +1345 -0
- package/dist/cli/commands/guide.d.ts +2 -0
- package/dist/cli/commands/guide.js +183 -0
- package/dist/cli/commands/interact.d.ts +5 -0
- package/dist/cli/commands/interact.js +840 -0
- package/dist/cli/commands/jobs.d.ts +5 -0
- package/dist/cli/commands/jobs.js +997 -0
- package/dist/cli/commands/monitor.d.ts +12 -0
- package/dist/cli/commands/monitor.js +197 -0
- package/dist/cli/commands/observe.d.ts +12 -0
- package/dist/cli/commands/observe.js +158 -0
- package/dist/cli/commands/screenshot.d.ts +5 -0
- package/dist/cli/commands/screenshot.js +282 -0
- package/dist/cli/commands/search.d.ts +5 -0
- package/dist/cli/commands/search.js +1021 -0
- package/dist/cli/commands/setup.d.ts +13 -0
- package/dist/cli/commands/setup.js +244 -0
- package/dist/cli/commands/skill.d.ts +15 -0
- package/dist/cli/commands/skill.js +195 -0
- package/dist/cli/utils.d.ts +84 -0
- package/dist/cli/utils.js +806 -0
- package/dist/cli-auth.d.ts +75 -0
- package/dist/cli-auth.js +369 -0
- package/dist/cli.d.ts +17 -0
- package/dist/cli.js +99 -0
- package/dist/core/actions.d.ts +69 -0
- package/dist/core/actions.js +495 -0
- package/dist/core/agent.d.ts +98 -0
- package/dist/core/agent.js +558 -0
- package/dist/core/answer.d.ts +42 -0
- package/dist/core/answer.js +395 -0
- package/dist/core/application-tracker.d.ts +84 -0
- package/dist/core/application-tracker.js +184 -0
- package/dist/core/apply.d.ts +162 -0
- package/dist/core/apply.js +816 -0
- package/dist/core/auth-detection.d.ts +35 -0
- package/dist/core/auth-detection.js +358 -0
- package/dist/core/auto-extract.d.ts +82 -0
- package/dist/core/auto-extract.js +604 -0
- package/dist/core/auto-interact.d.ts +23 -0
- package/dist/core/auto-interact.js +246 -0
- package/dist/core/bm25-filter.d.ts +66 -0
- package/dist/core/bm25-filter.js +288 -0
- package/dist/core/branding.d.ts +54 -0
- package/dist/core/branding.js +234 -0
- package/dist/core/browser-fetch.d.ts +323 -0
- package/dist/core/browser-fetch.js +1600 -0
- package/dist/core/browser-pool.d.ts +91 -0
- package/dist/core/browser-pool.js +550 -0
- package/dist/core/budget.d.ts +42 -0
- package/dist/core/budget.js +324 -0
- package/dist/core/business-intel.d.ts +47 -0
- package/dist/core/business-intel.js +279 -0
- package/dist/core/cache.d.ts +13 -0
- package/dist/core/cache.js +121 -0
- package/dist/core/cf-worker-proxy.d.ts +32 -0
- package/dist/core/cf-worker-proxy.js +87 -0
- package/dist/core/challenge-detection.d.ts +26 -0
- package/dist/core/challenge-detection.js +468 -0
- package/dist/core/change-tracking.d.ts +75 -0
- package/dist/core/change-tracking.js +276 -0
- package/dist/core/chunker.d.ts +46 -0
- package/dist/core/chunker.js +249 -0
- package/dist/core/chunking.d.ts +42 -0
- package/dist/core/chunking.js +181 -0
- package/dist/core/circuit-breaker.d.ts +44 -0
- package/dist/core/circuit-breaker.js +85 -0
- package/dist/core/content-pruner.d.ts +47 -0
- package/dist/core/content-pruner.js +425 -0
- package/dist/core/cookie-cache.d.ts +60 -0
- package/dist/core/cookie-cache.js +163 -0
- package/dist/core/crawl-checkpoint.d.ts +54 -0
- package/dist/core/crawl-checkpoint.js +104 -0
- package/dist/core/crawler.d.ts +84 -0
- package/dist/core/crawler.js +349 -0
- package/dist/core/cross-verify.d.ts +27 -0
- package/dist/core/cross-verify.js +93 -0
- package/dist/core/deep-fetch.d.ts +74 -0
- package/dist/core/deep-fetch.js +405 -0
- package/dist/core/deep-research.d.ts +141 -0
- package/dist/core/deep-research.js +972 -0
- package/dist/core/design-analysis.d.ts +70 -0
- package/dist/core/design-analysis.js +490 -0
- package/dist/core/design-compare.d.ts +38 -0
- package/dist/core/design-compare.js +264 -0
- package/dist/core/diff.d.ts +61 -0
- package/dist/core/diff.js +289 -0
- package/dist/core/dns-cache.d.ts +20 -0
- package/dist/core/dns-cache.js +198 -0
- package/dist/core/documents.d.ts +23 -0
- package/dist/core/documents.js +123 -0
- package/dist/core/domain-memory.d.ts +66 -0
- package/dist/core/domain-memory.js +163 -0
- package/dist/core/domain-verify.d.ts +40 -0
- package/dist/core/domain-verify.js +379 -0
- package/dist/core/engine-ranker.d.ts +112 -0
- package/dist/core/engine-ranker.js +395 -0
- package/dist/core/extract-inline.d.ts +38 -0
- package/dist/core/extract-inline.js +215 -0
- package/dist/core/extract-listings.d.ts +38 -0
- package/dist/core/extract-listings.js +461 -0
- package/dist/core/extract.d.ts +9 -0
- package/dist/core/extract.js +139 -0
- package/dist/core/fetch-cache.d.ts +57 -0
- package/dist/core/fetch-cache.js +95 -0
- package/dist/core/fetcher.d.ts +13 -0
- package/dist/core/fetcher.js +12 -0
- package/dist/core/google-cache.d.ts +29 -0
- package/dist/core/google-cache.js +180 -0
- package/dist/core/google-serp-parser.d.ts +82 -0
- package/dist/core/google-serp-parser.js +287 -0
- package/dist/core/hotel-search.d.ts +122 -0
- package/dist/core/hotel-search.js +382 -0
- package/dist/core/http-fetch.d.ts +72 -0
- package/dist/core/http-fetch.js +820 -0
- package/dist/core/human.d.ts +175 -0
- package/dist/core/human.js +680 -0
- package/dist/core/image-caption.d.ts +44 -0
- package/dist/core/image-caption.js +271 -0
- package/dist/core/jobs.d.ts +75 -0
- package/dist/core/jobs.js +634 -0
- package/dist/core/json-ld.d.ts +15 -0
- package/dist/core/json-ld.js +617 -0
- package/dist/core/language-detect.d.ts +18 -0
- package/dist/core/language-detect.js +135 -0
- package/dist/core/links.d.ts +10 -0
- package/dist/core/links.js +44 -0
- package/dist/core/llm-extract.d.ts +71 -0
- package/dist/core/llm-extract.js +507 -0
- package/dist/core/llm-provider.d.ts +100 -0
- package/dist/core/llm-provider.js +702 -0
- package/dist/core/local-search.d.ts +60 -0
- package/dist/core/local-search.js +308 -0
- package/dist/core/logger.d.ts +28 -0
- package/dist/core/logger.js +104 -0
- package/dist/core/map.d.ts +33 -0
- package/dist/core/map.js +127 -0
- package/dist/core/markdown.d.ts +92 -0
- package/dist/core/markdown.js +809 -0
- package/dist/core/metadata.d.ts +34 -0
- package/dist/core/metadata.js +422 -0
- package/dist/core/observe.d.ts +113 -0
- package/dist/core/observe.js +395 -0
- package/dist/core/ocr.d.ts +12 -0
- package/dist/core/ocr.js +33 -0
- package/dist/core/paginate.d.ts +31 -0
- package/dist/core/paginate.js +106 -0
- package/dist/core/pdf.d.ts +8 -0
- package/dist/core/pdf.js +25 -0
- package/dist/core/peel-tls.d.ts +25 -0
- package/dist/core/peel-tls.js +220 -0
- package/dist/core/pipeline.d.ts +132 -0
- package/dist/core/pipeline.js +1666 -0
- package/dist/core/profiles.d.ts +61 -0
- package/dist/core/profiles.js +350 -0
- package/dist/core/prompt-guard.d.ts +30 -0
- package/dist/core/prompt-guard.js +119 -0
- package/dist/core/proxy-config.d.ts +90 -0
- package/dist/core/proxy-config.js +172 -0
- package/dist/core/quick-answer.d.ts +53 -0
- package/dist/core/quick-answer.js +833 -0
- package/dist/core/rate-governor.d.ts +80 -0
- package/dist/core/rate-governor.js +238 -0
- package/dist/core/readability.d.ts +57 -0
- package/dist/core/readability.js +533 -0
- package/dist/core/research.d.ts +66 -0
- package/dist/core/research.js +270 -0
- package/dist/core/retry.d.ts +60 -0
- package/dist/core/retry.js +119 -0
- package/dist/core/safe-browsing.d.ts +30 -0
- package/dist/core/safe-browsing.js +206 -0
- package/dist/core/schema-extraction.d.ts +66 -0
- package/dist/core/schema-extraction.js +352 -0
- package/dist/core/schema-postprocess.d.ts +32 -0
- package/dist/core/schema-postprocess.js +469 -0
- package/dist/core/schema-templates.d.ts +19 -0
- package/dist/core/schema-templates.js +143 -0
- package/dist/core/screenshot.d.ts +224 -0
- package/dist/core/screenshot.js +207 -0
- package/dist/core/search-engines.d.ts +25 -0
- package/dist/core/search-engines.js +182 -0
- package/dist/core/search-provider.d.ts +243 -0
- package/dist/core/search-provider.js +1629 -0
- package/dist/core/searxng-provider.d.ts +35 -0
- package/dist/core/searxng-provider.js +105 -0
- package/dist/core/selective-evidence.d.ts +151 -0
- package/dist/core/selective-evidence.js +389 -0
- package/dist/core/site-search.d.ts +44 -0
- package/dist/core/site-search.js +252 -0
- package/dist/core/sitemap.d.ts +23 -0
- package/dist/core/sitemap.js +105 -0
- package/dist/core/source-credibility.d.ts +29 -0
- package/dist/core/source-credibility.js +584 -0
- package/dist/core/source-scoring.d.ts +166 -0
- package/dist/core/source-scoring.js +396 -0
- package/dist/core/stemmer.d.ts +38 -0
- package/dist/core/stemmer.js +509 -0
- package/dist/core/strategies.d.ts +104 -0
- package/dist/core/strategies.js +1044 -0
- package/dist/core/strategy-hooks.d.ts +145 -0
- package/dist/core/strategy-hooks.js +74 -0
- package/dist/core/structured-extract.d.ts +43 -0
- package/dist/core/structured-extract.js +550 -0
- package/dist/core/summarize.d.ts +17 -0
- package/dist/core/summarize.js +78 -0
- package/dist/core/synonyms.d.ts +42 -0
- package/dist/core/synonyms.js +184 -0
- package/dist/core/system-monitor.d.ts +61 -0
- package/dist/core/system-monitor.js +133 -0
- package/dist/core/table-format.d.ts +30 -0
- package/dist/core/table-format.js +146 -0
- package/dist/core/threat-feeds.d.ts +23 -0
- package/dist/core/threat-feeds.js +104 -0
- package/dist/core/timing.d.ts +21 -0
- package/dist/core/timing.js +33 -0
- package/dist/core/transcript-export.d.ts +47 -0
- package/dist/core/transcript-export.js +107 -0
- package/dist/core/user-agents.d.ts +82 -0
- package/dist/core/user-agents.js +239 -0
- package/dist/core/vertical-search.d.ts +54 -0
- package/dist/core/vertical-search.js +158 -0
- package/dist/core/watch-manager.d.ts +175 -0
- package/dist/core/watch-manager.js +416 -0
- package/dist/core/watch.d.ts +101 -0
- package/dist/core/watch.js +389 -0
- package/dist/core/youtube.d.ts +130 -0
- package/dist/core/youtube.js +1175 -0
- package/dist/ee/challenge-re-export.d.ts +1 -0
- package/dist/ee/challenge-re-export.js +1 -0
- package/dist/ee/challenge-solver.d.ts +72 -0
- package/dist/ee/challenge-solver.js +720 -0
- package/dist/ee/domain-extractors.d.ts +8 -0
- package/dist/ee/domain-extractors.js +8 -0
- package/dist/ee/domain-intel.d.ts +16 -0
- package/dist/ee/domain-intel.js +133 -0
- package/dist/ee/extractors/allrecipes.d.ts +2 -0
- package/dist/ee/extractors/allrecipes.js +120 -0
- package/dist/ee/extractors/amazon.d.ts +2 -0
- package/dist/ee/extractors/amazon.js +78 -0
- package/dist/ee/extractors/arxiv.d.ts +2 -0
- package/dist/ee/extractors/arxiv.js +137 -0
- package/dist/ee/extractors/bestbuy.d.ts +2 -0
- package/dist/ee/extractors/bestbuy.js +78 -0
- package/dist/ee/extractors/carscom.d.ts +2 -0
- package/dist/ee/extractors/carscom.js +121 -0
- package/dist/ee/extractors/coingecko.d.ts +2 -0
- package/dist/ee/extractors/coingecko.js +134 -0
- package/dist/ee/extractors/craigslist.d.ts +2 -0
- package/dist/ee/extractors/craigslist.js +92 -0
- package/dist/ee/extractors/devto.d.ts +2 -0
- package/dist/ee/extractors/devto.js +135 -0
- package/dist/ee/extractors/ebay.d.ts +2 -0
- package/dist/ee/extractors/ebay.js +90 -0
- package/dist/ee/extractors/espn.d.ts +2 -0
- package/dist/ee/extractors/espn.js +260 -0
- package/dist/ee/extractors/etsy.d.ts +2 -0
- package/dist/ee/extractors/etsy.js +52 -0
- package/dist/ee/extractors/facebook.d.ts +2 -0
- package/dist/ee/extractors/facebook.js +46 -0
- package/dist/ee/extractors/github.d.ts +2 -0
- package/dist/ee/extractors/github.js +196 -0
- package/dist/ee/extractors/google-flights.d.ts +2 -0
- package/dist/ee/extractors/google-flights.js +176 -0
- package/dist/ee/extractors/hackernews.d.ts +2 -0
- package/dist/ee/extractors/hackernews.js +147 -0
- package/dist/ee/extractors/imdb.d.ts +2 -0
- package/dist/ee/extractors/imdb.js +172 -0
- package/dist/ee/extractors/index.d.ts +26 -0
- package/dist/ee/extractors/index.js +247 -0
- package/dist/ee/extractors/instagram.d.ts +2 -0
- package/dist/ee/extractors/instagram.js +102 -0
- package/dist/ee/extractors/kalshi.d.ts +2 -0
- package/dist/ee/extractors/kalshi.js +121 -0
- package/dist/ee/extractors/kayak-cars.d.ts +2 -0
- package/dist/ee/extractors/kayak-cars.js +270 -0
- package/dist/ee/extractors/linkedin.d.ts +2 -0
- package/dist/ee/extractors/linkedin.js +113 -0
- package/dist/ee/extractors/medium.d.ts +2 -0
- package/dist/ee/extractors/medium.js +130 -0
- package/dist/ee/extractors/news.d.ts +4 -0
- package/dist/ee/extractors/news.js +173 -0
- package/dist/ee/extractors/npm.d.ts +2 -0
- package/dist/ee/extractors/npm.js +86 -0
- package/dist/ee/extractors/pdf.d.ts +2 -0
- package/dist/ee/extractors/pdf.js +108 -0
- package/dist/ee/extractors/pinterest.d.ts +2 -0
- package/dist/ee/extractors/pinterest.js +34 -0
- package/dist/ee/extractors/polymarket.d.ts +2 -0
- package/dist/ee/extractors/polymarket.js +358 -0
- package/dist/ee/extractors/producthunt.d.ts +2 -0
- package/dist/ee/extractors/producthunt.js +88 -0
- package/dist/ee/extractors/pubmed.d.ts +2 -0
- package/dist/ee/extractors/pubmed.js +162 -0
- package/dist/ee/extractors/pypi.d.ts +2 -0
- package/dist/ee/extractors/pypi.js +80 -0
- package/dist/ee/extractors/reddit.d.ts +2 -0
- package/dist/ee/extractors/reddit.js +438 -0
- package/dist/ee/extractors/redfin.d.ts +2 -0
- package/dist/ee/extractors/redfin.js +156 -0
- package/dist/ee/extractors/semanticscholar.d.ts +2 -0
- package/dist/ee/extractors/semanticscholar.js +131 -0
- package/dist/ee/extractors/shared.d.ts +12 -0
- package/dist/ee/extractors/shared.js +76 -0
- package/dist/ee/extractors/soundcloud.d.ts +2 -0
- package/dist/ee/extractors/soundcloud.js +34 -0
- package/dist/ee/extractors/sportsbetting.d.ts +2 -0
- package/dist/ee/extractors/sportsbetting.js +37 -0
- package/dist/ee/extractors/spotify.d.ts +2 -0
- package/dist/ee/extractors/spotify.js +34 -0
- package/dist/ee/extractors/stackoverflow.d.ts +2 -0
- package/dist/ee/extractors/stackoverflow.js +61 -0
- package/dist/ee/extractors/substack.d.ts +2 -0
- package/dist/ee/extractors/substack.js +115 -0
- package/dist/ee/extractors/substackroot.d.ts +2 -0
- package/dist/ee/extractors/substackroot.js +46 -0
- package/dist/ee/extractors/tiktok.d.ts +2 -0
- package/dist/ee/extractors/tiktok.js +29 -0
- package/dist/ee/extractors/tradingview.d.ts +2 -0
- package/dist/ee/extractors/tradingview.js +182 -0
- package/dist/ee/extractors/twitch.d.ts +2 -0
- package/dist/ee/extractors/twitch.js +36 -0
- package/dist/ee/extractors/twitter.d.ts +2 -0
- package/dist/ee/extractors/twitter.js +327 -0
- package/dist/ee/extractors/types.d.ts +14 -0
- package/dist/ee/extractors/types.js +1 -0
- package/dist/ee/extractors/walmart.d.ts +2 -0
- package/dist/ee/extractors/walmart.js +50 -0
- package/dist/ee/extractors/weather.d.ts +2 -0
- package/dist/ee/extractors/weather.js +133 -0
- package/dist/ee/extractors/wikipedia.d.ts +4 -0
- package/dist/ee/extractors/wikipedia.js +235 -0
- package/dist/ee/extractors/yelp.d.ts +2 -0
- package/dist/ee/extractors/yelp.js +216 -0
- package/dist/ee/extractors/youtube.d.ts +2 -0
- package/dist/ee/extractors/youtube.js +189 -0
- package/dist/ee/extractors/zillow.d.ts +54 -0
- package/dist/ee/extractors/zillow.js +247 -0
- package/dist/ee/extractors-re-export.d.ts +1 -0
- package/dist/ee/extractors-re-export.js +1 -0
- package/dist/ee/premium-hooks.d.ts +20 -0
- package/dist/ee/premium-hooks.js +50 -0
- package/dist/ee/spa-detection.d.ts +2 -0
- package/dist/ee/spa-detection.js +2 -0
- package/dist/ee/stability.d.ts +4 -0
- package/dist/ee/stability.js +29 -0
- package/dist/ee/swr-cache.d.ts +14 -0
- package/dist/ee/swr-cache.js +34 -0
- package/dist/index.d.ts +143 -0
- package/dist/index.js +291 -0
- package/dist/integrations/index.d.ts +2 -0
- package/dist/integrations/index.js +2 -0
- package/dist/integrations/langchain.d.ts +64 -0
- package/dist/integrations/langchain.js +115 -0
- package/dist/integrations/llamaindex.d.ts +50 -0
- package/dist/integrations/llamaindex.js +91 -0
- package/dist/mcp/handlers/act.d.ts +5 -0
- package/dist/mcp/handlers/act.js +34 -0
- package/dist/mcp/handlers/definitions.d.ts +6 -0
- package/dist/mcp/handlers/definitions.js +395 -0
- package/dist/mcp/handlers/extract.d.ts +7 -0
- package/dist/mcp/handlers/extract.js +135 -0
- package/dist/mcp/handlers/fetch.d.ts +6 -0
- package/dist/mcp/handlers/fetch.js +98 -0
- package/dist/mcp/handlers/find.d.ts +5 -0
- package/dist/mcp/handlers/find.js +137 -0
- package/dist/mcp/handlers/index.d.ts +13 -0
- package/dist/mcp/handlers/index.js +63 -0
- package/dist/mcp/handlers/legacy.d.ts +25 -0
- package/dist/mcp/handlers/legacy.js +450 -0
- package/dist/mcp/handlers/meta.d.ts +6 -0
- package/dist/mcp/handlers/meta.js +40 -0
- package/dist/mcp/handlers/monitor.d.ts +5 -0
- package/dist/mcp/handlers/monitor.js +41 -0
- package/dist/mcp/handlers/observe.d.ts +8 -0
- package/dist/mcp/handlers/observe.js +37 -0
- package/dist/mcp/handlers/read.d.ts +6 -0
- package/dist/mcp/handlers/read.js +78 -0
- package/dist/mcp/handlers/see.d.ts +5 -0
- package/dist/mcp/handlers/see.js +75 -0
- package/dist/mcp/handlers/types.d.ts +29 -0
- package/dist/mcp/handlers/types.js +28 -0
- package/dist/mcp/server.d.ts +7 -0
- package/dist/mcp/server.js +108 -0
- package/dist/mcp/smart-router.d.ts +23 -0
- package/dist/mcp/smart-router.js +178 -0
- package/dist/server/app.d.ts +14 -0
- package/dist/server/app.js +632 -0
- package/dist/server/auth-store.d.ts +28 -0
- package/dist/server/auth-store.js +88 -0
- package/dist/server/bull-queues.d.ts +60 -0
- package/dist/server/bull-queues.js +90 -0
- package/dist/server/email-service.d.ts +55 -0
- package/dist/server/email-service.js +291 -0
- package/dist/server/job-queue.d.ts +100 -0
- package/dist/server/job-queue.js +145 -0
- package/dist/server/logger.d.ts +10 -0
- package/dist/server/logger.js +37 -0
- package/dist/server/middleware/audit-log.d.ts +14 -0
- package/dist/server/middleware/audit-log.js +73 -0
- package/dist/server/middleware/auth.d.ts +35 -0
- package/dist/server/middleware/auth.js +225 -0
- package/dist/server/middleware/rate-limit.d.ts +50 -0
- package/dist/server/middleware/rate-limit.js +270 -0
- package/dist/server/middleware/scope-guard.d.ts +25 -0
- package/dist/server/middleware/scope-guard.js +45 -0
- package/dist/server/middleware/url-validator.d.ts +15 -0
- package/dist/server/middleware/url-validator.js +201 -0
- package/dist/server/openapi.yaml +6418 -0
- package/dist/server/pg-auth-store.d.ts +146 -0
- package/dist/server/pg-auth-store.js +576 -0
- package/dist/server/pg-job-queue.d.ts +59 -0
- package/dist/server/pg-job-queue.js +375 -0
- package/dist/server/routes/activity.d.ts +6 -0
- package/dist/server/routes/activity.js +79 -0
- package/dist/server/routes/admin-active.d.ts +7 -0
- package/dist/server/routes/admin-active.js +120 -0
- package/dist/server/routes/admin-stats.d.ts +7 -0
- package/dist/server/routes/admin-stats.js +176 -0
- package/dist/server/routes/agent.d.ts +24 -0
- package/dist/server/routes/agent.js +480 -0
- package/dist/server/routes/answer.d.ts +5 -0
- package/dist/server/routes/answer.js +125 -0
- package/dist/server/routes/ask.d.ts +28 -0
- package/dist/server/routes/ask.js +295 -0
- package/dist/server/routes/batch.d.ts +6 -0
- package/dist/server/routes/batch.js +493 -0
- package/dist/server/routes/cache-warm.d.ts +25 -0
- package/dist/server/routes/cache-warm.js +212 -0
- package/dist/server/routes/cli-usage.d.ts +6 -0
- package/dist/server/routes/cli-usage.js +127 -0
- package/dist/server/routes/compat.d.ts +23 -0
- package/dist/server/routes/compat.js +652 -0
- package/dist/server/routes/crawl.d.ts +13 -0
- package/dist/server/routes/crawl.js +287 -0
- package/dist/server/routes/deep-fetch.d.ts +8 -0
- package/dist/server/routes/deep-fetch.js +57 -0
- package/dist/server/routes/deep-research.d.ts +11 -0
- package/dist/server/routes/deep-research.js +232 -0
- package/dist/server/routes/demo.d.ts +24 -0
- package/dist/server/routes/demo.js +517 -0
- package/dist/server/routes/do.d.ts +8 -0
- package/dist/server/routes/do.js +72 -0
- package/dist/server/routes/extract.d.ts +14 -0
- package/dist/server/routes/extract.js +325 -0
- package/dist/server/routes/feed.d.ts +15 -0
- package/dist/server/routes/feed.js +311 -0
- package/dist/server/routes/fetch-queue.d.ts +13 -0
- package/dist/server/routes/fetch-queue.js +357 -0
- package/dist/server/routes/fetch.d.ts +7 -0
- package/dist/server/routes/fetch.js +1274 -0
- package/dist/server/routes/go.d.ts +14 -0
- package/dist/server/routes/go.js +81 -0
- package/dist/server/routes/health.d.ts +11 -0
- package/dist/server/routes/health.js +141 -0
- package/dist/server/routes/jobs.d.ts +7 -0
- package/dist/server/routes/jobs.js +574 -0
- package/dist/server/routes/map.d.ts +11 -0
- package/dist/server/routes/map.js +116 -0
- package/dist/server/routes/mcp.d.ts +14 -0
- package/dist/server/routes/mcp.js +197 -0
- package/dist/server/routes/metrics.d.ts +37 -0
- package/dist/server/routes/metrics.js +149 -0
- package/dist/server/routes/oauth.d.ts +9 -0
- package/dist/server/routes/oauth.js +396 -0
- package/dist/server/routes/playground.d.ts +17 -0
- package/dist/server/routes/playground.js +283 -0
- package/dist/server/routes/reader.d.ts +18 -0
- package/dist/server/routes/reader.js +192 -0
- package/dist/server/routes/research.d.ts +14 -0
- package/dist/server/routes/research.js +482 -0
- package/dist/server/routes/screenshot.d.ts +22 -0
- package/dist/server/routes/screenshot.js +820 -0
- package/dist/server/routes/search.d.ts +6 -0
- package/dist/server/routes/search.js +874 -0
- package/dist/server/routes/session.d.ts +17 -0
- package/dist/server/routes/session.js +548 -0
- package/dist/server/routes/share.d.ts +18 -0
- package/dist/server/routes/share.js +462 -0
- package/dist/server/routes/smart-search/handlers/cars.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/cars.js +102 -0
- package/dist/server/routes/smart-search/handlers/flights.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/flights.js +72 -0
- package/dist/server/routes/smart-search/handlers/general.d.ts +13 -0
- package/dist/server/routes/smart-search/handlers/general.js +717 -0
- package/dist/server/routes/smart-search/handlers/hotels.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/hotels.js +88 -0
- package/dist/server/routes/smart-search/handlers/products.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/products.js +1309 -0
- package/dist/server/routes/smart-search/handlers/rental.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/rental.js +154 -0
- package/dist/server/routes/smart-search/handlers/restaurants.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/restaurants.js +225 -0
- package/dist/server/routes/smart-search/handlers/transit-verdict.d.ts +41 -0
- package/dist/server/routes/smart-search/handlers/transit-verdict.js +224 -0
- package/dist/server/routes/smart-search/index.d.ts +19 -0
- package/dist/server/routes/smart-search/index.js +546 -0
- package/dist/server/routes/smart-search/intent.d.ts +3 -0
- package/dist/server/routes/smart-search/intent.js +264 -0
- package/dist/server/routes/smart-search/llm.d.ts +16 -0
- package/dist/server/routes/smart-search/llm.js +70 -0
- package/dist/server/routes/smart-search/sources/reddit.d.ts +18 -0
- package/dist/server/routes/smart-search/sources/reddit.js +34 -0
- package/dist/server/routes/smart-search/sources/yelp.d.ts +25 -0
- package/dist/server/routes/smart-search/sources/yelp.js +171 -0
- package/dist/server/routes/smart-search/sources/youtube.d.ts +8 -0
- package/dist/server/routes/smart-search/sources/youtube.js +9 -0
- package/dist/server/routes/smart-search/types.d.ts +81 -0
- package/dist/server/routes/smart-search/types.js +1 -0
- package/dist/server/routes/smart-search/utils.d.ts +20 -0
- package/dist/server/routes/smart-search/utils.js +146 -0
- package/dist/server/routes/stats.d.ts +6 -0
- package/dist/server/routes/stats.js +71 -0
- package/dist/server/routes/stripe.d.ts +15 -0
- package/dist/server/routes/stripe.js +296 -0
- package/dist/server/routes/transcript-export.d.ts +10 -0
- package/dist/server/routes/transcript-export.js +178 -0
- package/dist/server/routes/usage.d.ts +9 -0
- package/dist/server/routes/usage.js +279 -0
- package/dist/server/routes/users.d.ts +8 -0
- package/dist/server/routes/users.js +1867 -0
- package/dist/server/routes/watch.d.ts +15 -0
- package/dist/server/routes/watch.js +309 -0
- package/dist/server/routes/webhooks.d.ts +26 -0
- package/dist/server/routes/webhooks.js +170 -0
- package/dist/server/routes/youtube.d.ts +6 -0
- package/dist/server/routes/youtube.js +130 -0
- package/dist/server/sentry.d.ts +14 -0
- package/dist/server/sentry.js +104 -0
- package/dist/server/types.d.ts +15 -0
- package/dist/server/types.js +7 -0
- package/dist/server/utils/response.d.ts +44 -0
- package/dist/server/utils/response.js +69 -0
- package/dist/server/utils/sse.d.ts +22 -0
- package/dist/server/utils/sse.js +38 -0
- package/dist/types.d.ts +552 -0
- package/dist/types.js +39 -0
- package/llms.txt +105 -0
- package/package.json +189 -0
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import type { DomainExtractResult } from './types.js';
|
|
2
|
+
interface RedfinHome {
|
|
3
|
+
price?: {
|
|
4
|
+
value?: number;
|
|
5
|
+
};
|
|
6
|
+
beds?: number;
|
|
7
|
+
baths?: number;
|
|
8
|
+
sqFt?: {
|
|
9
|
+
value?: number;
|
|
10
|
+
};
|
|
11
|
+
streetLine?: {
|
|
12
|
+
value?: string;
|
|
13
|
+
};
|
|
14
|
+
city?: string;
|
|
15
|
+
state?: string;
|
|
16
|
+
zip?: string;
|
|
17
|
+
location?: {
|
|
18
|
+
value?: string;
|
|
19
|
+
};
|
|
20
|
+
url?: string;
|
|
21
|
+
propertyType?: number;
|
|
22
|
+
yearBuilt?: {
|
|
23
|
+
value?: number;
|
|
24
|
+
};
|
|
25
|
+
dom?: {
|
|
26
|
+
value?: number;
|
|
27
|
+
};
|
|
28
|
+
mlsStatus?: string;
|
|
29
|
+
listingRemarks?: string;
|
|
30
|
+
sashes?: Array<{
|
|
31
|
+
sashTypeName?: string;
|
|
32
|
+
}>;
|
|
33
|
+
latLong?: {
|
|
34
|
+
value?: {
|
|
35
|
+
latitude?: number;
|
|
36
|
+
longitude?: number;
|
|
37
|
+
};
|
|
38
|
+
};
|
|
39
|
+
}
|
|
40
|
+
interface RedfinApiPayload {
|
|
41
|
+
homes?: RedfinHome[];
|
|
42
|
+
searchMedian?: {
|
|
43
|
+
price?: number;
|
|
44
|
+
sqFt?: number;
|
|
45
|
+
pricePerSqFt?: number;
|
|
46
|
+
beds?: number;
|
|
47
|
+
baths?: number;
|
|
48
|
+
dom?: number;
|
|
49
|
+
};
|
|
50
|
+
}
|
|
51
|
+
export declare function fetchRedfinListings(regionId: string | number, regionType: number, numHomes?: number): Promise<RedfinApiPayload | null>;
|
|
52
|
+
export declare function formatRedfinListings(homes: RedfinHome[], locationLabel: string, sourceUrl: string, medianData?: RedfinApiPayload['searchMedian']): DomainExtractResult;
|
|
53
|
+
export declare function zillowExtractor(_html: string, url: string): Promise<DomainExtractResult | null>;
|
|
54
|
+
export {};
|
|
@@ -0,0 +1,247 @@
|
|
|
1
|
+
import { simpleFetch } from '../../core/fetcher.js';
|
|
2
|
+
export async function fetchRedfinListings(regionId, regionType, numHomes = 20) {
|
|
3
|
+
try {
|
|
4
|
+
const apiUrl = `https://www.redfin.com/stingray/api/gis?al=1&num_homes=${numHomes}®ion_id=${regionId}®ion_type=${regionType}&sf=1,2,3,5,6,7&status=9&uipt=1,2,3,4,5,6,7,8&v=8`;
|
|
5
|
+
const resp = await simpleFetch(apiUrl, 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36', 30000, { 'Accept': 'application/json, text/plain, */*', 'Referer': 'https://www.redfin.com/' });
|
|
6
|
+
if (!resp || (resp.statusCode && resp.statusCode >= 400))
|
|
7
|
+
return null;
|
|
8
|
+
// Redfin prepends {}&&
|
|
9
|
+
const raw = resp.html.replace(/^\{\}&&/, '');
|
|
10
|
+
const data = JSON.parse(raw);
|
|
11
|
+
if (data.resultCode !== 0 || !data.payload)
|
|
12
|
+
return null;
|
|
13
|
+
return data.payload;
|
|
14
|
+
}
|
|
15
|
+
catch (e) {
|
|
16
|
+
if (process.env.DEBUG)
|
|
17
|
+
console.debug('[webpeel]', 'Redfin API error:', e instanceof Error ? e.message : e);
|
|
18
|
+
return null;
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
export function formatRedfinListings(homes, locationLabel, sourceUrl, medianData) {
|
|
22
|
+
const fmt = (n) => n != null ? `$${n.toLocaleString()}` : 'N/A';
|
|
23
|
+
const fmtNum = (n) => n != null ? n.toLocaleString() : 'N/A';
|
|
24
|
+
const lines = [
|
|
25
|
+
`# 🏠 Redfin — ${locationLabel}`,
|
|
26
|
+
'',
|
|
27
|
+
`*Live MLS listings via Redfin · ${homes.length} properties shown*`,
|
|
28
|
+
'',
|
|
29
|
+
];
|
|
30
|
+
if (medianData) {
|
|
31
|
+
lines.push('## 📊 Market Summary');
|
|
32
|
+
lines.push(`- **Median Price:** ${fmt(medianData.price)}`);
|
|
33
|
+
if (medianData.sqFt)
|
|
34
|
+
lines.push(`- **Median Sq Ft:** ${fmtNum(medianData.sqFt)}`);
|
|
35
|
+
if (medianData.pricePerSqFt)
|
|
36
|
+
lines.push(`- **Median $/sqft:** ${fmt(medianData.pricePerSqFt)}`);
|
|
37
|
+
if (medianData.beds)
|
|
38
|
+
lines.push(`- **Median Beds:** ${medianData.beds}`);
|
|
39
|
+
if (medianData.dom)
|
|
40
|
+
lines.push(`- **Median Days on Market:** ${medianData.dom}`);
|
|
41
|
+
lines.push('');
|
|
42
|
+
}
|
|
43
|
+
lines.push('## 🏡 Listings');
|
|
44
|
+
lines.push('');
|
|
45
|
+
for (const h of homes.slice(0, 20)) {
|
|
46
|
+
const addr = h.streetLine?.value || 'Address unknown';
|
|
47
|
+
const cityState = [h.city, h.state, h.zip].filter(Boolean).join(', ');
|
|
48
|
+
const price = fmt(h.price?.value);
|
|
49
|
+
const beds = h.beds != null ? `${h.beds}bd` : '';
|
|
50
|
+
const baths = h.baths != null ? `${h.baths}ba` : '';
|
|
51
|
+
const sqft = h.sqFt?.value != null ? `${fmtNum(h.sqFt.value)} sqft` : '';
|
|
52
|
+
const specs = [beds, baths, sqft].filter(Boolean).join(' · ');
|
|
53
|
+
const status = h.mlsStatus || 'Active';
|
|
54
|
+
const dom = h.dom?.value != null ? `${h.dom.value} days on market` : '';
|
|
55
|
+
const badge = h.sashes?.map(s => s.sashTypeName).filter(Boolean).join(', ') || '';
|
|
56
|
+
const propUrl = h.url ? `https://www.redfin.com${h.url}` : '';
|
|
57
|
+
lines.push(`### ${addr}`);
|
|
58
|
+
if (cityState)
|
|
59
|
+
lines.push(`**${cityState}**`);
|
|
60
|
+
lines.push(`**Price:** ${price} · ${specs}`);
|
|
61
|
+
if (status !== 'Active')
|
|
62
|
+
lines.push(`**Status:** ${status}`);
|
|
63
|
+
if (dom)
|
|
64
|
+
lines.push(`**${dom}**`);
|
|
65
|
+
if (badge)
|
|
66
|
+
lines.push(`*${badge}*`);
|
|
67
|
+
if (h.listingRemarks) {
|
|
68
|
+
lines.push('');
|
|
69
|
+
lines.push(`> ${h.listingRemarks.slice(0, 200).replace(/\n/g, ' ')}${h.listingRemarks.length > 200 ? '…' : ''}`);
|
|
70
|
+
}
|
|
71
|
+
if (propUrl)
|
|
72
|
+
lines.push(`[View on Redfin](${propUrl})`);
|
|
73
|
+
lines.push('');
|
|
74
|
+
}
|
|
75
|
+
lines.push('---');
|
|
76
|
+
lines.push(`*Source: [Redfin](${sourceUrl}) · Data from MLS via Redfin internal API*`);
|
|
77
|
+
return {
|
|
78
|
+
domain: 'redfin.com',
|
|
79
|
+
type: 'real-estate-search',
|
|
80
|
+
structured: {
|
|
81
|
+
location: locationLabel,
|
|
82
|
+
count: homes.length,
|
|
83
|
+
listings: homes.slice(0, 20).map(h => ({
|
|
84
|
+
address: h.streetLine?.value,
|
|
85
|
+
city: h.city,
|
|
86
|
+
state: h.state,
|
|
87
|
+
zip: h.zip,
|
|
88
|
+
price: h.price?.value,
|
|
89
|
+
beds: h.beds,
|
|
90
|
+
baths: h.baths,
|
|
91
|
+
sqFt: h.sqFt?.value,
|
|
92
|
+
yearBuilt: h.yearBuilt?.value,
|
|
93
|
+
daysOnMarket: h.dom?.value,
|
|
94
|
+
status: h.mlsStatus,
|
|
95
|
+
url: h.url ? `https://www.redfin.com${h.url}` : undefined,
|
|
96
|
+
})),
|
|
97
|
+
median: medianData,
|
|
98
|
+
},
|
|
99
|
+
cleanContent: lines.join('\n'),
|
|
100
|
+
};
|
|
101
|
+
}
|
|
102
|
+
// ---------------------------------------------------------------------------
|
|
103
|
+
// Zillow extractor → auto-redirects to Redfin API
|
|
104
|
+
// ---------------------------------------------------------------------------
|
|
105
|
+
export async function zillowExtractor(_html, url) {
|
|
106
|
+
try {
|
|
107
|
+
const u = new URL(url);
|
|
108
|
+
const rawPath = u.pathname.replace(/^\//, '').replace(/\/$/, '');
|
|
109
|
+
const pathParts = rawPath.split('/').filter(Boolean);
|
|
110
|
+
const cityStatePart = pathParts[0] || '';
|
|
111
|
+
// ── Pattern 1: /city-state/ or /city-state/homes/ ──────────────────────
|
|
112
|
+
// e.g. zillow.com/new-york-ny/ → Redfin New York, NY
|
|
113
|
+
const cityStateMatch = cityStatePart.match(/^([a-z][a-z-]*[a-z])-([a-z]{2})$/i);
|
|
114
|
+
if (cityStateMatch) {
|
|
115
|
+
const citySlug = cityStateMatch[1].toLowerCase();
|
|
116
|
+
const stateCode = cityStateMatch[2].toUpperCase();
|
|
117
|
+
const cityName = citySlug.split('-').map((w) => w.charAt(0).toUpperCase() + w.slice(1)).join(' ');
|
|
118
|
+
const cityForUrl = citySlug.split('-').map((w) => w.charAt(0).toUpperCase() + w.slice(1)).join('-');
|
|
119
|
+
// Parse price filters from Zillow URL if present
|
|
120
|
+
const priceMax = u.searchParams.get('price_max') || '';
|
|
121
|
+
const priceMin = u.searchParams.get('price_min') || '';
|
|
122
|
+
const redfinCityUrl = `https://www.redfin.com/${stateCode}/${cityForUrl}`;
|
|
123
|
+
const locationLabel = `${cityName}, ${stateCode}`;
|
|
124
|
+
// Try to fetch live Redfin listings via their API
|
|
125
|
+
// Map common city slugs to Redfin city region IDs (region_type=6)
|
|
126
|
+
const cityRegionMap = {
|
|
127
|
+
'NY-New-York': 30749, 'NY-Brooklyn': 30749, 'NY-Queens': 30749, 'NY-Bronx': 30749,
|
|
128
|
+
'NY-Staten-Island': 30749, 'NY-Manhattan': 30749,
|
|
129
|
+
'CA-Los-Angeles': 11203, 'CA-San-Francisco': 17151, 'CA-San-Diego': 18142,
|
|
130
|
+
'CA-San-Jose': 17420,
|
|
131
|
+
'TX-Houston': 30772, 'TX-Dallas': 35799, 'TX-Austin': 30818,
|
|
132
|
+
'FL-Miami': 10201, 'FL-Orlando': 13140, 'FL-Tampa': 18280,
|
|
133
|
+
'IL-Chicago': 29470, 'WA-Seattle': 16163, 'MA-Boston': 1826,
|
|
134
|
+
'AZ-Phoenix': 14240, 'PA-Philadelphia': 13364, 'GA-Atlanta': 30756,
|
|
135
|
+
'CO-Denver': 11093, 'MN-Minneapolis': 18959, 'OR-Portland': 14941,
|
|
136
|
+
'NV-Las-Vegas': 32820, 'NC-Charlotte': 3105, 'OH-Columbus': 8528,
|
|
137
|
+
};
|
|
138
|
+
const marketKey = `${stateCode}-${cityForUrl}`;
|
|
139
|
+
const marketId = cityRegionMap[marketKey];
|
|
140
|
+
if (marketId) {
|
|
141
|
+
const payload = await fetchRedfinListings(marketId, 6 /* city */);
|
|
142
|
+
if (payload?.homes && payload.homes.length > 0) {
|
|
143
|
+
const result = formatRedfinListings(payload.homes, locationLabel, redfinCityUrl, payload.searchMedian);
|
|
144
|
+
// Add a note about the Zillow redirect
|
|
145
|
+
result.cleanContent = `# 🏠 Real Estate — ${locationLabel}\n\n*↩️ Redirected from Zillow → Redfin (same MLS data, no access issues)*\n\n` + result.cleanContent.replace(/^# 🏠.*\n\n/, '');
|
|
146
|
+
result.domain = 'zillow.com';
|
|
147
|
+
result.type = 'redfin-redirect';
|
|
148
|
+
result.structured = { ...result.structured, originalUrl: url, redirectedTo: redfinCityUrl };
|
|
149
|
+
return result;
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
// Fallback: return redirect info (with neutral wording to avoid false positives)
|
|
153
|
+
const lines = [
|
|
154
|
+
`# 🏠 Real Estate — ${locationLabel}`,
|
|
155
|
+
'',
|
|
156
|
+
`*This URL was fetched via Redfin instead — same MLS data, better access.*`,
|
|
157
|
+
'',
|
|
158
|
+
`**Location:** ${locationLabel}`,
|
|
159
|
+
priceMax ? `**Max Price:** $${Number(priceMax).toLocaleString()}` : '',
|
|
160
|
+
priceMin ? `**Min Price:** $${Number(priceMin).toLocaleString()}` : '',
|
|
161
|
+
'',
|
|
162
|
+
'## 🔗 Search Redfin Directly',
|
|
163
|
+
'',
|
|
164
|
+
`- **[${cityName} listings on Redfin](${redfinCityUrl})**`,
|
|
165
|
+
`- [Redfin home page](https://www.redfin.com)`,
|
|
166
|
+
'',
|
|
167
|
+
'### How to get live listings:',
|
|
168
|
+
'```',
|
|
169
|
+
`webpeel "https://www.redfin.com/city/30749/${stateCode}/${cityForUrl}"`,
|
|
170
|
+
'```',
|
|
171
|
+
'',
|
|
172
|
+
'*MLS data sourced from Redfin — covers the same properties as competing real estate portals.*',
|
|
173
|
+
'',
|
|
174
|
+
'---',
|
|
175
|
+
`*Original URL: [View](${url})*`,
|
|
176
|
+
].filter(Boolean);
|
|
177
|
+
return {
|
|
178
|
+
domain: 'zillow.com',
|
|
179
|
+
type: 'redirect-to-redfin',
|
|
180
|
+
structured: {
|
|
181
|
+
originalUrl: url,
|
|
182
|
+
redirectUrl: redfinCityUrl,
|
|
183
|
+
city: cityName,
|
|
184
|
+
state: stateCode,
|
|
185
|
+
priceMax: priceMax ? Number(priceMax) : undefined,
|
|
186
|
+
priceMin: priceMin ? Number(priceMin) : undefined,
|
|
187
|
+
},
|
|
188
|
+
cleanContent: lines.join('\n'),
|
|
189
|
+
};
|
|
190
|
+
}
|
|
191
|
+
// ── Pattern 2: /homedetails/ADDRESS/ZPID_zpid/ ──────────────────────────
|
|
192
|
+
const detailMatch = u.pathname.match(/homedetails\/(.+?)\/(\d+)_zpid/);
|
|
193
|
+
if (detailMatch) {
|
|
194
|
+
const addressSlug = detailMatch[1];
|
|
195
|
+
// Convert slug to readable address: "123-Main-St-New-York-NY-10001" → "123 Main St New York NY 10001"
|
|
196
|
+
const addressReadable = addressSlug.replace(/-/g, ' ');
|
|
197
|
+
const redfinSearchUrl = `https://www.redfin.com/search#query=${encodeURIComponent(addressReadable)}`;
|
|
198
|
+
const cleanContent = [
|
|
199
|
+
`# 🏠 Property — ${addressReadable}`,
|
|
200
|
+
'',
|
|
201
|
+
`*Redirected from Zillow to Redfin — same MLS data, better access.*`,
|
|
202
|
+
'',
|
|
203
|
+
`**Address:** ${addressReadable}`,
|
|
204
|
+
'',
|
|
205
|
+
`**[Search this property on Redfin](${redfinSearchUrl})**`,
|
|
206
|
+
'',
|
|
207
|
+
'---',
|
|
208
|
+
`*Original Zillow URL: [Open Zillow](${url})*`,
|
|
209
|
+
].join('\n');
|
|
210
|
+
return {
|
|
211
|
+
domain: 'zillow.com',
|
|
212
|
+
type: 'redirect-to-redfin',
|
|
213
|
+
structured: {
|
|
214
|
+
originalUrl: url,
|
|
215
|
+
redirectUrl: redfinSearchUrl,
|
|
216
|
+
address: addressReadable,
|
|
217
|
+
zpid: detailMatch[2],
|
|
218
|
+
},
|
|
219
|
+
cleanContent,
|
|
220
|
+
};
|
|
221
|
+
}
|
|
222
|
+
// ── Fallback ────────────────────────────────────────────────────────────
|
|
223
|
+
const cleanContent = [
|
|
224
|
+
'# 🏠 Zillow — Real Estate Search',
|
|
225
|
+
'',
|
|
226
|
+
'> ⚠️ Zillow restricts automated access. Use Redfin for the same MLS data.',
|
|
227
|
+
'',
|
|
228
|
+
'**Better alternatives (same MLS data):**',
|
|
229
|
+
'- [Redfin](https://www.redfin.com) — scrape-friendly, live MLS listings',
|
|
230
|
+
'- [Realtor.com](https://www.realtor.com) — MLS-powered',
|
|
231
|
+
'- [Homes.com](https://www.homes.com) — newer platform',
|
|
232
|
+
'',
|
|
233
|
+
`**Original URL:** [Zillow](${url})`,
|
|
234
|
+
].join('\n');
|
|
235
|
+
return {
|
|
236
|
+
domain: 'zillow.com',
|
|
237
|
+
type: 'blocked',
|
|
238
|
+
structured: { originalUrl: url, blocked: true },
|
|
239
|
+
cleanContent,
|
|
240
|
+
};
|
|
241
|
+
}
|
|
242
|
+
catch (e) {
|
|
243
|
+
if (process.env.DEBUG)
|
|
244
|
+
console.debug('[webpeel]', 'Zillow extractor error:', e instanceof Error ? e.message : e);
|
|
245
|
+
return null;
|
|
246
|
+
}
|
|
247
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export { extractDomainData, getDomainExtractor } from './domain-extractors.js';
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export { extractDomainData, getDomainExtractor } from './domain-extractors.js';
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Premium strategy hooks — server-only optimisations.
|
|
3
|
+
*
|
|
4
|
+
* Call `registerPremiumHooks()` once at server startup to activate:
|
|
5
|
+
* • SWR (stale-while-revalidate) response cache
|
|
6
|
+
* • Domain intelligence (learns which sites need browser/stealth)
|
|
7
|
+
* • Parallel race strategy (starts browser if simple fetch is slow)
|
|
8
|
+
* • 55+ domain extractors (Twitter, Reddit, GitHub, HN, Wikipedia, etc.)
|
|
9
|
+
* • SPA auto-detection (travel, jobs, real estate sites)
|
|
10
|
+
* • Content stability detection (smart DOM mutation monitoring)
|
|
11
|
+
*
|
|
12
|
+
* These modules are NOT shipped in the npm package.
|
|
13
|
+
*/
|
|
14
|
+
export { clearDomainIntel } from './domain-intel.js';
|
|
15
|
+
/**
|
|
16
|
+
* Wire all premium hooks into the core strategy layer.
|
|
17
|
+
*
|
|
18
|
+
* Must be called before any request is served.
|
|
19
|
+
*/
|
|
20
|
+
export declare function registerPremiumHooks(): void;
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Premium strategy hooks — server-only optimisations.
|
|
3
|
+
*
|
|
4
|
+
* Call `registerPremiumHooks()` once at server startup to activate:
|
|
5
|
+
* • SWR (stale-while-revalidate) response cache
|
|
6
|
+
* • Domain intelligence (learns which sites need browser/stealth)
|
|
7
|
+
* • Parallel race strategy (starts browser if simple fetch is slow)
|
|
8
|
+
* • 55+ domain extractors (Twitter, Reddit, GitHub, HN, Wikipedia, etc.)
|
|
9
|
+
* • SPA auto-detection (travel, jobs, real estate sites)
|
|
10
|
+
* • Content stability detection (smart DOM mutation monitoring)
|
|
11
|
+
*
|
|
12
|
+
* These modules are NOT shipped in the npm package.
|
|
13
|
+
*/
|
|
14
|
+
import { registerStrategyHooks } from '../core/strategy-hooks.js';
|
|
15
|
+
import { createSWRCacheHooks } from './swr-cache.js';
|
|
16
|
+
import { createDomainIntelHooks } from './domain-intel.js';
|
|
17
|
+
import { extractDomainData, getDomainExtractor } from './domain-extractors.js';
|
|
18
|
+
import { SPA_DOMAINS, SPA_URL_PATTERNS } from './spa-detection.js';
|
|
19
|
+
import { waitForContentStable } from './stability.js';
|
|
20
|
+
export { clearDomainIntel } from './domain-intel.js';
|
|
21
|
+
/**
|
|
22
|
+
* Wire all premium hooks into the core strategy layer.
|
|
23
|
+
*
|
|
24
|
+
* Must be called before any request is served.
|
|
25
|
+
*/
|
|
26
|
+
export function registerPremiumHooks() {
|
|
27
|
+
const cacheHooks = createSWRCacheHooks();
|
|
28
|
+
const intelHooks = createDomainIntelHooks();
|
|
29
|
+
registerStrategyHooks({
|
|
30
|
+
// SWR cache
|
|
31
|
+
checkCache: cacheHooks.checkCache,
|
|
32
|
+
markRevalidating: cacheHooks.markRevalidating,
|
|
33
|
+
setCache: cacheHooks.setCache,
|
|
34
|
+
// Domain intelligence
|
|
35
|
+
getDomainRecommendation: intelHooks.getDomainRecommendation,
|
|
36
|
+
recordDomainResult: intelHooks.recordDomainResult,
|
|
37
|
+
// Parallel race strategy
|
|
38
|
+
shouldRace: () => true,
|
|
39
|
+
getRaceTimeoutMs: () => 2000,
|
|
40
|
+
// Premium domain extraction (55+ extractors)
|
|
41
|
+
extractDomainData,
|
|
42
|
+
// Premium domain extractor lookup
|
|
43
|
+
getDomainExtractor: (url) => getDomainExtractor(url),
|
|
44
|
+
// Premium SPA detection
|
|
45
|
+
getSPADomains: () => SPA_DOMAINS,
|
|
46
|
+
getSPAPatterns: () => SPA_URL_PATTERNS,
|
|
47
|
+
// Premium content stability (DOM mutation monitoring)
|
|
48
|
+
waitForContentStable,
|
|
49
|
+
});
|
|
50
|
+
}
|
|
@@ -0,0 +1,2 @@
|
|
|
1
|
+
export const SPA_DOMAINS = new Set(['www.google.com', 'flights.google.com', 'www.airbnb.com', 'www.booking.com', 'www.expedia.com', 'www.kayak.com', 'www.skyscanner.com', 'www.tripadvisor.com', 'www.indeed.com', 'www.glassdoor.com', 'www.zillow.com', 'app.webpeel.dev']);
|
|
2
|
+
export const SPA_URL_PATTERNS = [/google\.com\/travel/, /google\.com\/maps/, /google\.com\/shopping/];
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
export async function waitForContentStable(page, options) {
|
|
2
|
+
const timeout = options?.timeoutMs ?? 5000;
|
|
3
|
+
const quiet = options?.quietMs ?? 500;
|
|
4
|
+
const start = Date.now();
|
|
5
|
+
await page.evaluate(({ quietMs, timeoutMs }) => {
|
|
6
|
+
return new Promise((resolve) => {
|
|
7
|
+
let lastMutation = Date.now();
|
|
8
|
+
let settled = false;
|
|
9
|
+
const observer = new MutationObserver(() => { lastMutation = Date.now(); });
|
|
10
|
+
observer.observe(document.body, { childList: true, subtree: true, characterData: true });
|
|
11
|
+
const check = () => {
|
|
12
|
+
const now = Date.now();
|
|
13
|
+
if (now - lastMutation >= quietMs || settled) {
|
|
14
|
+
observer.disconnect();
|
|
15
|
+
resolve();
|
|
16
|
+
return;
|
|
17
|
+
}
|
|
18
|
+
if (now - lastMutation > timeoutMs) {
|
|
19
|
+
observer.disconnect();
|
|
20
|
+
resolve();
|
|
21
|
+
return;
|
|
22
|
+
}
|
|
23
|
+
requestAnimationFrame(check);
|
|
24
|
+
};
|
|
25
|
+
setTimeout(() => { settled = true; observer.disconnect(); resolve(); }, timeoutMs);
|
|
26
|
+
setTimeout(check, quietMs);
|
|
27
|
+
});
|
|
28
|
+
}, { quietMs: quiet, timeoutMs: Math.max(0, timeout - (Date.now() - start)) });
|
|
29
|
+
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Stale-While-Revalidate cache — premium server-only optimisation.
|
|
3
|
+
*
|
|
4
|
+
* Wraps the core LRU cache with SWR semantics:
|
|
5
|
+
* • Fresh entries are served immediately.
|
|
6
|
+
* • Stale entries (within the SWR window) are served AND trigger a
|
|
7
|
+
* background revalidation so the next caller gets a fresh result.
|
|
8
|
+
* • Expired entries (past the SWR window) are evicted.
|
|
9
|
+
*
|
|
10
|
+
* This module is NOT shipped in the npm package — it lives under
|
|
11
|
+
* `src/server/` which is excluded from the package.json `files` list.
|
|
12
|
+
*/
|
|
13
|
+
import type { StrategyHooks } from '../core/strategy-hooks.js';
|
|
14
|
+
export declare function createSWRCacheHooks(): Pick<StrategyHooks, 'checkCache' | 'markRevalidating' | 'setCache'>;
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Stale-While-Revalidate cache — premium server-only optimisation.
|
|
3
|
+
*
|
|
4
|
+
* Wraps the core LRU cache with SWR semantics:
|
|
5
|
+
* • Fresh entries are served immediately.
|
|
6
|
+
* • Stale entries (within the SWR window) are served AND trigger a
|
|
7
|
+
* background revalidation so the next caller gets a fresh result.
|
|
8
|
+
* • Expired entries (past the SWR window) are evicted.
|
|
9
|
+
*
|
|
10
|
+
* This module is NOT shipped in the npm package — it lives under
|
|
11
|
+
* `src/server/` which is excluded from the package.json `files` list.
|
|
12
|
+
*/
|
|
13
|
+
import { getCachedWithSWR, markRevalidating, setCached, } from '../core/cache.js';
|
|
14
|
+
/* ---------- hook implementations ---------------------------------------- */
|
|
15
|
+
function checkCache(url) {
|
|
16
|
+
const entry = getCachedWithSWR(url);
|
|
17
|
+
if (!entry)
|
|
18
|
+
return null;
|
|
19
|
+
return { value: entry.value, stale: entry.stale };
|
|
20
|
+
}
|
|
21
|
+
function markRevalidatingHook(url) {
|
|
22
|
+
return markRevalidating(url);
|
|
23
|
+
}
|
|
24
|
+
function setCache(url, result) {
|
|
25
|
+
setCached(url, result);
|
|
26
|
+
}
|
|
27
|
+
/* ---------- public export ----------------------------------------------- */
|
|
28
|
+
export function createSWRCacheHooks() {
|
|
29
|
+
return {
|
|
30
|
+
checkCache,
|
|
31
|
+
markRevalidating: markRevalidatingHook,
|
|
32
|
+
setCache,
|
|
33
|
+
};
|
|
34
|
+
}
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* WebPeel - Fast web fetcher for AI agents
|
|
3
|
+
*
|
|
4
|
+
* Main library export
|
|
5
|
+
*/
|
|
6
|
+
import { cleanup, warmup, closePool, scrollAndWait, closeProfileBrowser } from './core/fetcher.js';
|
|
7
|
+
import type { PeelOptions, PeelResult } from './types.js';
|
|
8
|
+
export * from './types.js';
|
|
9
|
+
export { WebPeelError as TypedWebPeelError, Errors, isRetryable, type ErrorCode, } from './errors.js';
|
|
10
|
+
export { withRetry, DomainRateLimiter, domainLimiter, type RetryOptions } from './core/retry.js';
|
|
11
|
+
export { getDomainExtractor, extractDomainData, type DomainExtractResult, type DomainExtractor } from './ee/domain-extractors.js';
|
|
12
|
+
export { crawl, type CrawlOptions, type CrawlResult, type CrawlProgress } from './core/crawler.js';
|
|
13
|
+
export { discoverSitemap, type SitemapUrl, type SitemapResult } from './core/sitemap.js';
|
|
14
|
+
export { mapDomain, type MapOptions, type MapResult } from './core/map.js';
|
|
15
|
+
export { extractBranding, type BrandingProfile } from './core/branding.js';
|
|
16
|
+
export { trackChange, getSnapshot, clearSnapshots, type ChangeResult, type Snapshot } from './core/change-tracking.js';
|
|
17
|
+
export { extractWithLLM } from './core/extract.js';
|
|
18
|
+
export { extractDocumentToFormat, isPdfContentType, isDocxContentType, type DocumentExtractionResult } from './core/documents.js';
|
|
19
|
+
export { extractInlineJson, type InlineExtractOptions, type InlineExtractResult } from './core/extract-inline.js';
|
|
20
|
+
export { runAgent, type AgentOptions, type AgentResult, type AgentProgress, type AgentStreamEvent, type AgentDepth, type AgentTopic } from './core/agent.js';
|
|
21
|
+
export { summarizeContent, type SummarizeOptions } from './core/summarize.js';
|
|
22
|
+
export { getSearchProvider, DuckDuckGoProvider, BraveSearchProvider, providerStats, type SearchProvider, type SearchProviderId, type WebSearchResult, type WebSearchOptions, } from './core/search-provider.js';
|
|
23
|
+
export { BaiduSearchProvider, YandexSearchProvider, NaverSearchProvider, YahooJapanSearchProvider } from './core/search-engines.js';
|
|
24
|
+
export { crossVerifySearch, type CrossVerifyResult } from './core/cross-verify.js';
|
|
25
|
+
export { answerQuestion, type AnswerRequest, type AnswerResponse, type AnswerCitation, type LLMProviderId, type TokensUsed, } from './core/answer.js';
|
|
26
|
+
export { parseGoogleSerp, type GoogleSerpResult } from './core/google-serp-parser.js';
|
|
27
|
+
export { searchJobs, type JobCard, type JobDetail, type JobSearchOptions, type JobSearchResult } from './core/jobs.js';
|
|
28
|
+
export { RateGovernor, formatDuration, type RateConfig, type RateState, type CanApplyResult, } from './core/rate-governor.js';
|
|
29
|
+
export { ApplicationTracker, type ApplicationRecord, type ApplicationFilter, type ApplicationStats, type ApplicationStatus, } from './core/application-tracker.js';
|
|
30
|
+
export { applyToJob, loadApplications, saveApplication, getApplicationsToday, updateApplicationStatus, type ApplyProfile, type ApplyOptions, type ApplyProgressEvent, type DetectedField, type ApplyResult, type ApplicationRecord as ApplyApplicationRecord, } from './core/apply.js';
|
|
31
|
+
export { extractListings, type ListingItem } from './core/extract-listings.js';
|
|
32
|
+
export { parseYouTubeUrl, extractVideoInfo, extractPlayerResponse, parseCaptionXml, decodeHtmlEntities, getYouTubeTranscript, type TranscriptSegment, type YouTubeTranscript, type YouTubeVideoInfo, } from './core/youtube.js';
|
|
33
|
+
export { formatTable } from './core/table-format.js';
|
|
34
|
+
export { findNextPageUrl } from './core/paginate.js';
|
|
35
|
+
export { distillToBudget, budgetListings, TOKENS_PER_LISTING_ITEM } from './core/budget.js';
|
|
36
|
+
export { watch, parseDuration, parseAssertion, type WatchOptions, type Assertion, type WatchCheckResult, type AssertionResult, } from './core/watch.js';
|
|
37
|
+
export { observe, type ObserveOptions, type ObserveResult, type ObservedElement, } from './core/observe.js';
|
|
38
|
+
export { diffUrl, type DiffOptions, type DiffResult, type DiffChange, } from './core/diff.js';
|
|
39
|
+
export { extractReadableContent, type ReadabilityResult, type ReadabilityOptions } from './core/readability.js';
|
|
40
|
+
export { quickAnswer, type QuickAnswerOptions, type QuickAnswerResult } from './core/quick-answer.js';
|
|
41
|
+
export { extractValueFromPassage, smartExtractSchemaFields } from './core/schema-postprocess.js';
|
|
42
|
+
export { Timer, type PipelineTiming } from './core/timing.js';
|
|
43
|
+
export { chunkContent, type ChunkOptions, type ContentChunk, type ChunkResult } from './core/chunker.js';
|
|
44
|
+
export type SearchFallbackResult = {
|
|
45
|
+
content: string;
|
|
46
|
+
url: string;
|
|
47
|
+
method: string;
|
|
48
|
+
};
|
|
49
|
+
export declare function searchFallback(..._args: any[]): Promise<SearchFallbackResult | null>;
|
|
50
|
+
export { peelTLSFetch, isPeelTLSAvailable, shutdownPeelTLS, type PeelTLSOptions, type PeelTLSResult } from './core/peel-tls.js';
|
|
51
|
+
export { sanitizeForLLM, type SanitizeResult } from './core/prompt-guard.js';
|
|
52
|
+
export { getSourceCredibility, type SourceCredibility } from './core/source-credibility.js';
|
|
53
|
+
export { verifyDomain, type DomainVerification } from './core/domain-verify.js';
|
|
54
|
+
export { checkUrlSafety, type SafeBrowsingResult } from './core/safe-browsing.js';
|
|
55
|
+
export { checkThreatFeeds, type ThreatFeedResult } from './core/threat-feeds.js';
|
|
56
|
+
export { detectLanguageFromUrl, buildAcceptLanguageHeader } from './core/language-detect.js';
|
|
57
|
+
export { localSearch, type LocalSearchOptions, type LocalSearchResult, type LocalSearchResponse } from './core/local-search.js';
|
|
58
|
+
export { getBusinessIntel, type BusinessIntel } from './core/business-intel.js';
|
|
59
|
+
export { CircuitBreaker, browserCircuitBreaker, type CircuitState } from './core/circuit-breaker.js';
|
|
60
|
+
export { checkMemoryPressure } from './core/browser-pool.js';
|
|
61
|
+
export { searchShopping, searchNews, searchImages, searchVideos } from './core/vertical-search.js';
|
|
62
|
+
export type { ShoppingResult, NewsResult, ImageResult, VideoResult, VerticalSearchOptions } from './core/vertical-search.js';
|
|
63
|
+
/**
|
|
64
|
+
* Fetch and extract content from a URL
|
|
65
|
+
*
|
|
66
|
+
* @param url - URL to fetch
|
|
67
|
+
* @param options - Fetch options
|
|
68
|
+
* @returns Extracted content and metadata
|
|
69
|
+
*
|
|
70
|
+
* @example
|
|
71
|
+
* ```typescript
|
|
72
|
+
* import { peel } from 'webpeel';
|
|
73
|
+
*
|
|
74
|
+
* const result = await peel('https://example.com');
|
|
75
|
+
* console.log(result.content); // Markdown content
|
|
76
|
+
* console.log(result.metadata); // Structured metadata
|
|
77
|
+
* ```
|
|
78
|
+
*/
|
|
79
|
+
export declare function peel(url: string, options?: PeelOptions): Promise<PeelResult>;
|
|
80
|
+
/**
|
|
81
|
+
* Fetch multiple URLs in batch with concurrency control
|
|
82
|
+
*
|
|
83
|
+
* @param urls - Array of URLs to fetch
|
|
84
|
+
* @param options - Fetch options (including concurrency)
|
|
85
|
+
* @returns Array of results or errors
|
|
86
|
+
*
|
|
87
|
+
* @example
|
|
88
|
+
* ```typescript
|
|
89
|
+
* import { peelBatch } from 'webpeel';
|
|
90
|
+
*
|
|
91
|
+
* const urls = ['https://example.com', 'https://example.org'];
|
|
92
|
+
* const results = await peelBatch(urls, { concurrency: 3 });
|
|
93
|
+
* ```
|
|
94
|
+
*/
|
|
95
|
+
export declare function peelBatch(urls: string[], options?: PeelOptions & {
|
|
96
|
+
concurrency?: number;
|
|
97
|
+
onProgress?: (completed: number, total: number) => void;
|
|
98
|
+
}): Promise<(PeelResult | {
|
|
99
|
+
url: string;
|
|
100
|
+
error: string;
|
|
101
|
+
})[]>;
|
|
102
|
+
/**
|
|
103
|
+
* Clean up any browser resources
|
|
104
|
+
* Call this when you're done using WebPeel
|
|
105
|
+
*/
|
|
106
|
+
export { cleanup, warmup, closePool, scrollAndWait, closeProfileBrowser };
|
|
107
|
+
export { getCached, setCached, clearCache, setCacheTTL } from './core/cache.js';
|
|
108
|
+
export { getRealisticUserAgent, getRandomUA, REALISTIC_USER_AGENTS, } from './core/user-agents.js';
|
|
109
|
+
export { humanDelay, humanMouseMove, humanRead, warmupBrowse, humanType, humanClearAndType, humanClick, humanScroll, humanScrollToElement, warmupSession, humanSelect, humanUploadFile, humanToggle, type HumanConfig, } from './core/human.js';
|
|
110
|
+
export { SCHEMA_TEMPLATES, getSchemaTemplate, listSchemaTemplates, type SchemaTemplate } from './core/schema-templates.js';
|
|
111
|
+
export { WebPeelLoader, type WebPeelLoaderOptions } from './integrations/langchain.js';
|
|
112
|
+
export { WebPeelReader, type WebPeelReaderOptions } from './integrations/llamaindex.js';
|
|
113
|
+
export declare function applyStealthPatches(page: any): Promise<void>;
|
|
114
|
+
export declare function applyAcceptLanguageHeader(page: any, lang?: string): Promise<void>;
|
|
115
|
+
export { fetchGoogleCache, isGoogleCacheAvailable, type GoogleCacheResult } from './core/google-cache.js';
|
|
116
|
+
export { cfWorkerFetch, isCfWorkerAvailable, type CfWorkerProxyOptions, type CfWorkerProxyResult } from './core/cf-worker-proxy.js';
|
|
117
|
+
/**
|
|
118
|
+
* WebPeel client class — alternative OOP interface over the functional API.
|
|
119
|
+
* Provides the same capabilities as the standalone functions but with
|
|
120
|
+
* a configured client instance.
|
|
121
|
+
*
|
|
122
|
+
* @example
|
|
123
|
+
* import { WebPeel } from 'webpeel';
|
|
124
|
+
* const wp = new WebPeel({ apiKey: process.env.WEBPEEL_API_KEY });
|
|
125
|
+
* const result = await wp.fetch('https://stripe.com');
|
|
126
|
+
*/
|
|
127
|
+
export declare class WebPeel {
|
|
128
|
+
private readonly apiKey;
|
|
129
|
+
constructor(config: {
|
|
130
|
+
apiKey: string;
|
|
131
|
+
apiUrl?: string;
|
|
132
|
+
});
|
|
133
|
+
/** Fetch and extract content from a URL */
|
|
134
|
+
fetch(url: string, options?: PeelOptions): Promise<PeelResult>;
|
|
135
|
+
/** Search the web */
|
|
136
|
+
search(query: string, options?: Record<string, unknown>): Promise<unknown>;
|
|
137
|
+
/** Crawl a site */
|
|
138
|
+
crawl(startUrl: string, options?: Record<string, unknown>): Promise<unknown>;
|
|
139
|
+
/** Map a domain's pages */
|
|
140
|
+
map(url: string, options?: Record<string, unknown>): Promise<unknown>;
|
|
141
|
+
/** Extract structured data */
|
|
142
|
+
extract(url: string, _schema: Record<string, unknown>, options?: PeelOptions): Promise<unknown>;
|
|
143
|
+
}
|