@iflow-mcp/jakeliume-webpeel 0.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +15 -0
- package/README.md +313 -0
- package/dist/cache.d.ts +30 -0
- package/dist/cache.js +139 -0
- package/dist/cli/commands/auth.d.ts +5 -0
- package/dist/cli/commands/auth.js +411 -0
- package/dist/cli/commands/doctor.d.ts +37 -0
- package/dist/cli/commands/doctor.js +371 -0
- package/dist/cli/commands/fetch.d.ts +6 -0
- package/dist/cli/commands/fetch.js +1345 -0
- package/dist/cli/commands/guide.d.ts +2 -0
- package/dist/cli/commands/guide.js +183 -0
- package/dist/cli/commands/interact.d.ts +5 -0
- package/dist/cli/commands/interact.js +840 -0
- package/dist/cli/commands/jobs.d.ts +5 -0
- package/dist/cli/commands/jobs.js +997 -0
- package/dist/cli/commands/monitor.d.ts +12 -0
- package/dist/cli/commands/monitor.js +197 -0
- package/dist/cli/commands/observe.d.ts +12 -0
- package/dist/cli/commands/observe.js +158 -0
- package/dist/cli/commands/screenshot.d.ts +5 -0
- package/dist/cli/commands/screenshot.js +282 -0
- package/dist/cli/commands/search.d.ts +5 -0
- package/dist/cli/commands/search.js +1021 -0
- package/dist/cli/commands/setup.d.ts +13 -0
- package/dist/cli/commands/setup.js +244 -0
- package/dist/cli/commands/skill.d.ts +15 -0
- package/dist/cli/commands/skill.js +195 -0
- package/dist/cli/utils.d.ts +84 -0
- package/dist/cli/utils.js +806 -0
- package/dist/cli-auth.d.ts +75 -0
- package/dist/cli-auth.js +369 -0
- package/dist/cli.d.ts +17 -0
- package/dist/cli.js +99 -0
- package/dist/core/actions.d.ts +69 -0
- package/dist/core/actions.js +495 -0
- package/dist/core/agent.d.ts +98 -0
- package/dist/core/agent.js +558 -0
- package/dist/core/answer.d.ts +42 -0
- package/dist/core/answer.js +395 -0
- package/dist/core/application-tracker.d.ts +84 -0
- package/dist/core/application-tracker.js +184 -0
- package/dist/core/apply.d.ts +162 -0
- package/dist/core/apply.js +816 -0
- package/dist/core/auth-detection.d.ts +35 -0
- package/dist/core/auth-detection.js +358 -0
- package/dist/core/auto-extract.d.ts +82 -0
- package/dist/core/auto-extract.js +604 -0
- package/dist/core/auto-interact.d.ts +23 -0
- package/dist/core/auto-interact.js +246 -0
- package/dist/core/bm25-filter.d.ts +66 -0
- package/dist/core/bm25-filter.js +288 -0
- package/dist/core/branding.d.ts +54 -0
- package/dist/core/branding.js +234 -0
- package/dist/core/browser-fetch.d.ts +323 -0
- package/dist/core/browser-fetch.js +1600 -0
- package/dist/core/browser-pool.d.ts +91 -0
- package/dist/core/browser-pool.js +550 -0
- package/dist/core/budget.d.ts +42 -0
- package/dist/core/budget.js +324 -0
- package/dist/core/business-intel.d.ts +47 -0
- package/dist/core/business-intel.js +279 -0
- package/dist/core/cache.d.ts +13 -0
- package/dist/core/cache.js +121 -0
- package/dist/core/cf-worker-proxy.d.ts +32 -0
- package/dist/core/cf-worker-proxy.js +87 -0
- package/dist/core/challenge-detection.d.ts +26 -0
- package/dist/core/challenge-detection.js +468 -0
- package/dist/core/change-tracking.d.ts +75 -0
- package/dist/core/change-tracking.js +276 -0
- package/dist/core/chunker.d.ts +46 -0
- package/dist/core/chunker.js +249 -0
- package/dist/core/chunking.d.ts +42 -0
- package/dist/core/chunking.js +181 -0
- package/dist/core/circuit-breaker.d.ts +44 -0
- package/dist/core/circuit-breaker.js +85 -0
- package/dist/core/content-pruner.d.ts +47 -0
- package/dist/core/content-pruner.js +425 -0
- package/dist/core/cookie-cache.d.ts +60 -0
- package/dist/core/cookie-cache.js +163 -0
- package/dist/core/crawl-checkpoint.d.ts +54 -0
- package/dist/core/crawl-checkpoint.js +104 -0
- package/dist/core/crawler.d.ts +84 -0
- package/dist/core/crawler.js +349 -0
- package/dist/core/cross-verify.d.ts +27 -0
- package/dist/core/cross-verify.js +93 -0
- package/dist/core/deep-fetch.d.ts +74 -0
- package/dist/core/deep-fetch.js +405 -0
- package/dist/core/deep-research.d.ts +141 -0
- package/dist/core/deep-research.js +972 -0
- package/dist/core/design-analysis.d.ts +70 -0
- package/dist/core/design-analysis.js +490 -0
- package/dist/core/design-compare.d.ts +38 -0
- package/dist/core/design-compare.js +264 -0
- package/dist/core/diff.d.ts +61 -0
- package/dist/core/diff.js +289 -0
- package/dist/core/dns-cache.d.ts +20 -0
- package/dist/core/dns-cache.js +198 -0
- package/dist/core/documents.d.ts +23 -0
- package/dist/core/documents.js +123 -0
- package/dist/core/domain-memory.d.ts +66 -0
- package/dist/core/domain-memory.js +163 -0
- package/dist/core/domain-verify.d.ts +40 -0
- package/dist/core/domain-verify.js +379 -0
- package/dist/core/engine-ranker.d.ts +112 -0
- package/dist/core/engine-ranker.js +395 -0
- package/dist/core/extract-inline.d.ts +38 -0
- package/dist/core/extract-inline.js +215 -0
- package/dist/core/extract-listings.d.ts +38 -0
- package/dist/core/extract-listings.js +461 -0
- package/dist/core/extract.d.ts +9 -0
- package/dist/core/extract.js +139 -0
- package/dist/core/fetch-cache.d.ts +57 -0
- package/dist/core/fetch-cache.js +95 -0
- package/dist/core/fetcher.d.ts +13 -0
- package/dist/core/fetcher.js +12 -0
- package/dist/core/google-cache.d.ts +29 -0
- package/dist/core/google-cache.js +180 -0
- package/dist/core/google-serp-parser.d.ts +82 -0
- package/dist/core/google-serp-parser.js +287 -0
- package/dist/core/hotel-search.d.ts +122 -0
- package/dist/core/hotel-search.js +382 -0
- package/dist/core/http-fetch.d.ts +72 -0
- package/dist/core/http-fetch.js +820 -0
- package/dist/core/human.d.ts +175 -0
- package/dist/core/human.js +680 -0
- package/dist/core/image-caption.d.ts +44 -0
- package/dist/core/image-caption.js +271 -0
- package/dist/core/jobs.d.ts +75 -0
- package/dist/core/jobs.js +634 -0
- package/dist/core/json-ld.d.ts +15 -0
- package/dist/core/json-ld.js +617 -0
- package/dist/core/language-detect.d.ts +18 -0
- package/dist/core/language-detect.js +135 -0
- package/dist/core/links.d.ts +10 -0
- package/dist/core/links.js +44 -0
- package/dist/core/llm-extract.d.ts +71 -0
- package/dist/core/llm-extract.js +507 -0
- package/dist/core/llm-provider.d.ts +100 -0
- package/dist/core/llm-provider.js +702 -0
- package/dist/core/local-search.d.ts +60 -0
- package/dist/core/local-search.js +308 -0
- package/dist/core/logger.d.ts +28 -0
- package/dist/core/logger.js +104 -0
- package/dist/core/map.d.ts +33 -0
- package/dist/core/map.js +127 -0
- package/dist/core/markdown.d.ts +92 -0
- package/dist/core/markdown.js +809 -0
- package/dist/core/metadata.d.ts +34 -0
- package/dist/core/metadata.js +422 -0
- package/dist/core/observe.d.ts +113 -0
- package/dist/core/observe.js +395 -0
- package/dist/core/ocr.d.ts +12 -0
- package/dist/core/ocr.js +33 -0
- package/dist/core/paginate.d.ts +31 -0
- package/dist/core/paginate.js +106 -0
- package/dist/core/pdf.d.ts +8 -0
- package/dist/core/pdf.js +25 -0
- package/dist/core/peel-tls.d.ts +25 -0
- package/dist/core/peel-tls.js +220 -0
- package/dist/core/pipeline.d.ts +132 -0
- package/dist/core/pipeline.js +1666 -0
- package/dist/core/profiles.d.ts +61 -0
- package/dist/core/profiles.js +350 -0
- package/dist/core/prompt-guard.d.ts +30 -0
- package/dist/core/prompt-guard.js +119 -0
- package/dist/core/proxy-config.d.ts +90 -0
- package/dist/core/proxy-config.js +172 -0
- package/dist/core/quick-answer.d.ts +53 -0
- package/dist/core/quick-answer.js +833 -0
- package/dist/core/rate-governor.d.ts +80 -0
- package/dist/core/rate-governor.js +238 -0
- package/dist/core/readability.d.ts +57 -0
- package/dist/core/readability.js +533 -0
- package/dist/core/research.d.ts +66 -0
- package/dist/core/research.js +270 -0
- package/dist/core/retry.d.ts +60 -0
- package/dist/core/retry.js +119 -0
- package/dist/core/safe-browsing.d.ts +30 -0
- package/dist/core/safe-browsing.js +206 -0
- package/dist/core/schema-extraction.d.ts +66 -0
- package/dist/core/schema-extraction.js +352 -0
- package/dist/core/schema-postprocess.d.ts +32 -0
- package/dist/core/schema-postprocess.js +469 -0
- package/dist/core/schema-templates.d.ts +19 -0
- package/dist/core/schema-templates.js +143 -0
- package/dist/core/screenshot.d.ts +224 -0
- package/dist/core/screenshot.js +207 -0
- package/dist/core/search-engines.d.ts +25 -0
- package/dist/core/search-engines.js +182 -0
- package/dist/core/search-provider.d.ts +243 -0
- package/dist/core/search-provider.js +1629 -0
- package/dist/core/searxng-provider.d.ts +35 -0
- package/dist/core/searxng-provider.js +105 -0
- package/dist/core/selective-evidence.d.ts +151 -0
- package/dist/core/selective-evidence.js +389 -0
- package/dist/core/site-search.d.ts +44 -0
- package/dist/core/site-search.js +252 -0
- package/dist/core/sitemap.d.ts +23 -0
- package/dist/core/sitemap.js +105 -0
- package/dist/core/source-credibility.d.ts +29 -0
- package/dist/core/source-credibility.js +584 -0
- package/dist/core/source-scoring.d.ts +166 -0
- package/dist/core/source-scoring.js +396 -0
- package/dist/core/stemmer.d.ts +38 -0
- package/dist/core/stemmer.js +509 -0
- package/dist/core/strategies.d.ts +104 -0
- package/dist/core/strategies.js +1044 -0
- package/dist/core/strategy-hooks.d.ts +145 -0
- package/dist/core/strategy-hooks.js +74 -0
- package/dist/core/structured-extract.d.ts +43 -0
- package/dist/core/structured-extract.js +550 -0
- package/dist/core/summarize.d.ts +17 -0
- package/dist/core/summarize.js +78 -0
- package/dist/core/synonyms.d.ts +42 -0
- package/dist/core/synonyms.js +184 -0
- package/dist/core/system-monitor.d.ts +61 -0
- package/dist/core/system-monitor.js +133 -0
- package/dist/core/table-format.d.ts +30 -0
- package/dist/core/table-format.js +146 -0
- package/dist/core/threat-feeds.d.ts +23 -0
- package/dist/core/threat-feeds.js +104 -0
- package/dist/core/timing.d.ts +21 -0
- package/dist/core/timing.js +33 -0
- package/dist/core/transcript-export.d.ts +47 -0
- package/dist/core/transcript-export.js +107 -0
- package/dist/core/user-agents.d.ts +82 -0
- package/dist/core/user-agents.js +239 -0
- package/dist/core/vertical-search.d.ts +54 -0
- package/dist/core/vertical-search.js +158 -0
- package/dist/core/watch-manager.d.ts +175 -0
- package/dist/core/watch-manager.js +416 -0
- package/dist/core/watch.d.ts +101 -0
- package/dist/core/watch.js +389 -0
- package/dist/core/youtube.d.ts +130 -0
- package/dist/core/youtube.js +1175 -0
- package/dist/ee/challenge-re-export.d.ts +1 -0
- package/dist/ee/challenge-re-export.js +1 -0
- package/dist/ee/challenge-solver.d.ts +72 -0
- package/dist/ee/challenge-solver.js +720 -0
- package/dist/ee/domain-extractors.d.ts +8 -0
- package/dist/ee/domain-extractors.js +8 -0
- package/dist/ee/domain-intel.d.ts +16 -0
- package/dist/ee/domain-intel.js +133 -0
- package/dist/ee/extractors/allrecipes.d.ts +2 -0
- package/dist/ee/extractors/allrecipes.js +120 -0
- package/dist/ee/extractors/amazon.d.ts +2 -0
- package/dist/ee/extractors/amazon.js +78 -0
- package/dist/ee/extractors/arxiv.d.ts +2 -0
- package/dist/ee/extractors/arxiv.js +137 -0
- package/dist/ee/extractors/bestbuy.d.ts +2 -0
- package/dist/ee/extractors/bestbuy.js +78 -0
- package/dist/ee/extractors/carscom.d.ts +2 -0
- package/dist/ee/extractors/carscom.js +121 -0
- package/dist/ee/extractors/coingecko.d.ts +2 -0
- package/dist/ee/extractors/coingecko.js +134 -0
- package/dist/ee/extractors/craigslist.d.ts +2 -0
- package/dist/ee/extractors/craigslist.js +92 -0
- package/dist/ee/extractors/devto.d.ts +2 -0
- package/dist/ee/extractors/devto.js +135 -0
- package/dist/ee/extractors/ebay.d.ts +2 -0
- package/dist/ee/extractors/ebay.js +90 -0
- package/dist/ee/extractors/espn.d.ts +2 -0
- package/dist/ee/extractors/espn.js +260 -0
- package/dist/ee/extractors/etsy.d.ts +2 -0
- package/dist/ee/extractors/etsy.js +52 -0
- package/dist/ee/extractors/facebook.d.ts +2 -0
- package/dist/ee/extractors/facebook.js +46 -0
- package/dist/ee/extractors/github.d.ts +2 -0
- package/dist/ee/extractors/github.js +196 -0
- package/dist/ee/extractors/google-flights.d.ts +2 -0
- package/dist/ee/extractors/google-flights.js +176 -0
- package/dist/ee/extractors/hackernews.d.ts +2 -0
- package/dist/ee/extractors/hackernews.js +147 -0
- package/dist/ee/extractors/imdb.d.ts +2 -0
- package/dist/ee/extractors/imdb.js +172 -0
- package/dist/ee/extractors/index.d.ts +26 -0
- package/dist/ee/extractors/index.js +247 -0
- package/dist/ee/extractors/instagram.d.ts +2 -0
- package/dist/ee/extractors/instagram.js +102 -0
- package/dist/ee/extractors/kalshi.d.ts +2 -0
- package/dist/ee/extractors/kalshi.js +121 -0
- package/dist/ee/extractors/kayak-cars.d.ts +2 -0
- package/dist/ee/extractors/kayak-cars.js +270 -0
- package/dist/ee/extractors/linkedin.d.ts +2 -0
- package/dist/ee/extractors/linkedin.js +113 -0
- package/dist/ee/extractors/medium.d.ts +2 -0
- package/dist/ee/extractors/medium.js +130 -0
- package/dist/ee/extractors/news.d.ts +4 -0
- package/dist/ee/extractors/news.js +173 -0
- package/dist/ee/extractors/npm.d.ts +2 -0
- package/dist/ee/extractors/npm.js +86 -0
- package/dist/ee/extractors/pdf.d.ts +2 -0
- package/dist/ee/extractors/pdf.js +108 -0
- package/dist/ee/extractors/pinterest.d.ts +2 -0
- package/dist/ee/extractors/pinterest.js +34 -0
- package/dist/ee/extractors/polymarket.d.ts +2 -0
- package/dist/ee/extractors/polymarket.js +358 -0
- package/dist/ee/extractors/producthunt.d.ts +2 -0
- package/dist/ee/extractors/producthunt.js +88 -0
- package/dist/ee/extractors/pubmed.d.ts +2 -0
- package/dist/ee/extractors/pubmed.js +162 -0
- package/dist/ee/extractors/pypi.d.ts +2 -0
- package/dist/ee/extractors/pypi.js +80 -0
- package/dist/ee/extractors/reddit.d.ts +2 -0
- package/dist/ee/extractors/reddit.js +438 -0
- package/dist/ee/extractors/redfin.d.ts +2 -0
- package/dist/ee/extractors/redfin.js +156 -0
- package/dist/ee/extractors/semanticscholar.d.ts +2 -0
- package/dist/ee/extractors/semanticscholar.js +131 -0
- package/dist/ee/extractors/shared.d.ts +12 -0
- package/dist/ee/extractors/shared.js +76 -0
- package/dist/ee/extractors/soundcloud.d.ts +2 -0
- package/dist/ee/extractors/soundcloud.js +34 -0
- package/dist/ee/extractors/sportsbetting.d.ts +2 -0
- package/dist/ee/extractors/sportsbetting.js +37 -0
- package/dist/ee/extractors/spotify.d.ts +2 -0
- package/dist/ee/extractors/spotify.js +34 -0
- package/dist/ee/extractors/stackoverflow.d.ts +2 -0
- package/dist/ee/extractors/stackoverflow.js +61 -0
- package/dist/ee/extractors/substack.d.ts +2 -0
- package/dist/ee/extractors/substack.js +115 -0
- package/dist/ee/extractors/substackroot.d.ts +2 -0
- package/dist/ee/extractors/substackroot.js +46 -0
- package/dist/ee/extractors/tiktok.d.ts +2 -0
- package/dist/ee/extractors/tiktok.js +29 -0
- package/dist/ee/extractors/tradingview.d.ts +2 -0
- package/dist/ee/extractors/tradingview.js +182 -0
- package/dist/ee/extractors/twitch.d.ts +2 -0
- package/dist/ee/extractors/twitch.js +36 -0
- package/dist/ee/extractors/twitter.d.ts +2 -0
- package/dist/ee/extractors/twitter.js +327 -0
- package/dist/ee/extractors/types.d.ts +14 -0
- package/dist/ee/extractors/types.js +1 -0
- package/dist/ee/extractors/walmart.d.ts +2 -0
- package/dist/ee/extractors/walmart.js +50 -0
- package/dist/ee/extractors/weather.d.ts +2 -0
- package/dist/ee/extractors/weather.js +133 -0
- package/dist/ee/extractors/wikipedia.d.ts +4 -0
- package/dist/ee/extractors/wikipedia.js +235 -0
- package/dist/ee/extractors/yelp.d.ts +2 -0
- package/dist/ee/extractors/yelp.js +216 -0
- package/dist/ee/extractors/youtube.d.ts +2 -0
- package/dist/ee/extractors/youtube.js +189 -0
- package/dist/ee/extractors/zillow.d.ts +54 -0
- package/dist/ee/extractors/zillow.js +247 -0
- package/dist/ee/extractors-re-export.d.ts +1 -0
- package/dist/ee/extractors-re-export.js +1 -0
- package/dist/ee/premium-hooks.d.ts +20 -0
- package/dist/ee/premium-hooks.js +50 -0
- package/dist/ee/spa-detection.d.ts +2 -0
- package/dist/ee/spa-detection.js +2 -0
- package/dist/ee/stability.d.ts +4 -0
- package/dist/ee/stability.js +29 -0
- package/dist/ee/swr-cache.d.ts +14 -0
- package/dist/ee/swr-cache.js +34 -0
- package/dist/index.d.ts +143 -0
- package/dist/index.js +291 -0
- package/dist/integrations/index.d.ts +2 -0
- package/dist/integrations/index.js +2 -0
- package/dist/integrations/langchain.d.ts +64 -0
- package/dist/integrations/langchain.js +115 -0
- package/dist/integrations/llamaindex.d.ts +50 -0
- package/dist/integrations/llamaindex.js +91 -0
- package/dist/mcp/handlers/act.d.ts +5 -0
- package/dist/mcp/handlers/act.js +34 -0
- package/dist/mcp/handlers/definitions.d.ts +6 -0
- package/dist/mcp/handlers/definitions.js +395 -0
- package/dist/mcp/handlers/extract.d.ts +7 -0
- package/dist/mcp/handlers/extract.js +135 -0
- package/dist/mcp/handlers/fetch.d.ts +6 -0
- package/dist/mcp/handlers/fetch.js +98 -0
- package/dist/mcp/handlers/find.d.ts +5 -0
- package/dist/mcp/handlers/find.js +137 -0
- package/dist/mcp/handlers/index.d.ts +13 -0
- package/dist/mcp/handlers/index.js +63 -0
- package/dist/mcp/handlers/legacy.d.ts +25 -0
- package/dist/mcp/handlers/legacy.js +450 -0
- package/dist/mcp/handlers/meta.d.ts +6 -0
- package/dist/mcp/handlers/meta.js +40 -0
- package/dist/mcp/handlers/monitor.d.ts +5 -0
- package/dist/mcp/handlers/monitor.js +41 -0
- package/dist/mcp/handlers/observe.d.ts +8 -0
- package/dist/mcp/handlers/observe.js +37 -0
- package/dist/mcp/handlers/read.d.ts +6 -0
- package/dist/mcp/handlers/read.js +78 -0
- package/dist/mcp/handlers/see.d.ts +5 -0
- package/dist/mcp/handlers/see.js +75 -0
- package/dist/mcp/handlers/types.d.ts +29 -0
- package/dist/mcp/handlers/types.js +28 -0
- package/dist/mcp/server.d.ts +7 -0
- package/dist/mcp/server.js +108 -0
- package/dist/mcp/smart-router.d.ts +23 -0
- package/dist/mcp/smart-router.js +178 -0
- package/dist/server/app.d.ts +14 -0
- package/dist/server/app.js +632 -0
- package/dist/server/auth-store.d.ts +28 -0
- package/dist/server/auth-store.js +88 -0
- package/dist/server/bull-queues.d.ts +60 -0
- package/dist/server/bull-queues.js +90 -0
- package/dist/server/email-service.d.ts +55 -0
- package/dist/server/email-service.js +291 -0
- package/dist/server/job-queue.d.ts +100 -0
- package/dist/server/job-queue.js +145 -0
- package/dist/server/logger.d.ts +10 -0
- package/dist/server/logger.js +37 -0
- package/dist/server/middleware/audit-log.d.ts +14 -0
- package/dist/server/middleware/audit-log.js +73 -0
- package/dist/server/middleware/auth.d.ts +35 -0
- package/dist/server/middleware/auth.js +225 -0
- package/dist/server/middleware/rate-limit.d.ts +50 -0
- package/dist/server/middleware/rate-limit.js +270 -0
- package/dist/server/middleware/scope-guard.d.ts +25 -0
- package/dist/server/middleware/scope-guard.js +45 -0
- package/dist/server/middleware/url-validator.d.ts +15 -0
- package/dist/server/middleware/url-validator.js +201 -0
- package/dist/server/openapi.yaml +6418 -0
- package/dist/server/pg-auth-store.d.ts +146 -0
- package/dist/server/pg-auth-store.js +576 -0
- package/dist/server/pg-job-queue.d.ts +59 -0
- package/dist/server/pg-job-queue.js +375 -0
- package/dist/server/routes/activity.d.ts +6 -0
- package/dist/server/routes/activity.js +79 -0
- package/dist/server/routes/admin-active.d.ts +7 -0
- package/dist/server/routes/admin-active.js +120 -0
- package/dist/server/routes/admin-stats.d.ts +7 -0
- package/dist/server/routes/admin-stats.js +176 -0
- package/dist/server/routes/agent.d.ts +24 -0
- package/dist/server/routes/agent.js +480 -0
- package/dist/server/routes/answer.d.ts +5 -0
- package/dist/server/routes/answer.js +125 -0
- package/dist/server/routes/ask.d.ts +28 -0
- package/dist/server/routes/ask.js +295 -0
- package/dist/server/routes/batch.d.ts +6 -0
- package/dist/server/routes/batch.js +493 -0
- package/dist/server/routes/cache-warm.d.ts +25 -0
- package/dist/server/routes/cache-warm.js +212 -0
- package/dist/server/routes/cli-usage.d.ts +6 -0
- package/dist/server/routes/cli-usage.js +127 -0
- package/dist/server/routes/compat.d.ts +23 -0
- package/dist/server/routes/compat.js +652 -0
- package/dist/server/routes/crawl.d.ts +13 -0
- package/dist/server/routes/crawl.js +287 -0
- package/dist/server/routes/deep-fetch.d.ts +8 -0
- package/dist/server/routes/deep-fetch.js +57 -0
- package/dist/server/routes/deep-research.d.ts +11 -0
- package/dist/server/routes/deep-research.js +232 -0
- package/dist/server/routes/demo.d.ts +24 -0
- package/dist/server/routes/demo.js +517 -0
- package/dist/server/routes/do.d.ts +8 -0
- package/dist/server/routes/do.js +72 -0
- package/dist/server/routes/extract.d.ts +14 -0
- package/dist/server/routes/extract.js +325 -0
- package/dist/server/routes/feed.d.ts +15 -0
- package/dist/server/routes/feed.js +311 -0
- package/dist/server/routes/fetch-queue.d.ts +13 -0
- package/dist/server/routes/fetch-queue.js +357 -0
- package/dist/server/routes/fetch.d.ts +7 -0
- package/dist/server/routes/fetch.js +1274 -0
- package/dist/server/routes/go.d.ts +14 -0
- package/dist/server/routes/go.js +81 -0
- package/dist/server/routes/health.d.ts +11 -0
- package/dist/server/routes/health.js +141 -0
- package/dist/server/routes/jobs.d.ts +7 -0
- package/dist/server/routes/jobs.js +574 -0
- package/dist/server/routes/map.d.ts +11 -0
- package/dist/server/routes/map.js +116 -0
- package/dist/server/routes/mcp.d.ts +14 -0
- package/dist/server/routes/mcp.js +197 -0
- package/dist/server/routes/metrics.d.ts +37 -0
- package/dist/server/routes/metrics.js +149 -0
- package/dist/server/routes/oauth.d.ts +9 -0
- package/dist/server/routes/oauth.js +396 -0
- package/dist/server/routes/playground.d.ts +17 -0
- package/dist/server/routes/playground.js +283 -0
- package/dist/server/routes/reader.d.ts +18 -0
- package/dist/server/routes/reader.js +192 -0
- package/dist/server/routes/research.d.ts +14 -0
- package/dist/server/routes/research.js +482 -0
- package/dist/server/routes/screenshot.d.ts +22 -0
- package/dist/server/routes/screenshot.js +820 -0
- package/dist/server/routes/search.d.ts +6 -0
- package/dist/server/routes/search.js +874 -0
- package/dist/server/routes/session.d.ts +17 -0
- package/dist/server/routes/session.js +548 -0
- package/dist/server/routes/share.d.ts +18 -0
- package/dist/server/routes/share.js +462 -0
- package/dist/server/routes/smart-search/handlers/cars.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/cars.js +102 -0
- package/dist/server/routes/smart-search/handlers/flights.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/flights.js +72 -0
- package/dist/server/routes/smart-search/handlers/general.d.ts +13 -0
- package/dist/server/routes/smart-search/handlers/general.js +717 -0
- package/dist/server/routes/smart-search/handlers/hotels.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/hotels.js +88 -0
- package/dist/server/routes/smart-search/handlers/products.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/products.js +1309 -0
- package/dist/server/routes/smart-search/handlers/rental.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/rental.js +154 -0
- package/dist/server/routes/smart-search/handlers/restaurants.d.ts +2 -0
- package/dist/server/routes/smart-search/handlers/restaurants.js +225 -0
- package/dist/server/routes/smart-search/handlers/transit-verdict.d.ts +41 -0
- package/dist/server/routes/smart-search/handlers/transit-verdict.js +224 -0
- package/dist/server/routes/smart-search/index.d.ts +19 -0
- package/dist/server/routes/smart-search/index.js +546 -0
- package/dist/server/routes/smart-search/intent.d.ts +3 -0
- package/dist/server/routes/smart-search/intent.js +264 -0
- package/dist/server/routes/smart-search/llm.d.ts +16 -0
- package/dist/server/routes/smart-search/llm.js +70 -0
- package/dist/server/routes/smart-search/sources/reddit.d.ts +18 -0
- package/dist/server/routes/smart-search/sources/reddit.js +34 -0
- package/dist/server/routes/smart-search/sources/yelp.d.ts +25 -0
- package/dist/server/routes/smart-search/sources/yelp.js +171 -0
- package/dist/server/routes/smart-search/sources/youtube.d.ts +8 -0
- package/dist/server/routes/smart-search/sources/youtube.js +9 -0
- package/dist/server/routes/smart-search/types.d.ts +81 -0
- package/dist/server/routes/smart-search/types.js +1 -0
- package/dist/server/routes/smart-search/utils.d.ts +20 -0
- package/dist/server/routes/smart-search/utils.js +146 -0
- package/dist/server/routes/stats.d.ts +6 -0
- package/dist/server/routes/stats.js +71 -0
- package/dist/server/routes/stripe.d.ts +15 -0
- package/dist/server/routes/stripe.js +296 -0
- package/dist/server/routes/transcript-export.d.ts +10 -0
- package/dist/server/routes/transcript-export.js +178 -0
- package/dist/server/routes/usage.d.ts +9 -0
- package/dist/server/routes/usage.js +279 -0
- package/dist/server/routes/users.d.ts +8 -0
- package/dist/server/routes/users.js +1867 -0
- package/dist/server/routes/watch.d.ts +15 -0
- package/dist/server/routes/watch.js +309 -0
- package/dist/server/routes/webhooks.d.ts +26 -0
- package/dist/server/routes/webhooks.js +170 -0
- package/dist/server/routes/youtube.d.ts +6 -0
- package/dist/server/routes/youtube.js +130 -0
- package/dist/server/sentry.d.ts +14 -0
- package/dist/server/sentry.js +104 -0
- package/dist/server/types.d.ts +15 -0
- package/dist/server/types.js +7 -0
- package/dist/server/utils/response.d.ts +44 -0
- package/dist/server/utils/response.js +69 -0
- package/dist/server/utils/sse.d.ts +22 -0
- package/dist/server/utils/sse.js +38 -0
- package/dist/types.d.ts +552 -0
- package/dist/types.js +39 -0
- package/llms.txt +105 -0
- package/package.json +189 -0
|
@@ -0,0 +1,1021 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Search commands: search, sites, batch, crawl, map
|
|
3
|
+
*/
|
|
4
|
+
import ora from 'ora';
|
|
5
|
+
import { readFileSync } from 'fs';
|
|
6
|
+
import { peel, peelBatch, cleanup } from '../../index.js';
|
|
7
|
+
import { checkUsage, showUsageFooter, loadConfig } from '../../cli-auth.js';
|
|
8
|
+
import { writeStdout, formatListingsCsv } from '../utils.js';
|
|
9
|
+
/**
|
|
10
|
+
* Parse a date range string like "Mar29-Apr4" into an array of date strings.
|
|
11
|
+
* Returns ["Mar 29", "Mar 30", ..., "Apr 4"]
|
|
12
|
+
*/
|
|
13
|
+
function parseDateRange(range) {
|
|
14
|
+
const match = range.match(/(\w{3})\s*(\d{1,2})\s*[-–to]+\s*(\w{3})\s*(\d{1,2})/i);
|
|
15
|
+
if (!match)
|
|
16
|
+
return [];
|
|
17
|
+
const months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'];
|
|
18
|
+
const startMonthIdx = months.findIndex(m => m.toLowerCase() === match[1].toLowerCase().slice(0, 3));
|
|
19
|
+
const endMonthIdx = months.findIndex(m => m.toLowerCase() === match[3].toLowerCase().slice(0, 3));
|
|
20
|
+
if (startMonthIdx === -1 || endMonthIdx === -1)
|
|
21
|
+
return [];
|
|
22
|
+
const startDay = parseInt(match[2]);
|
|
23
|
+
const endDay = parseInt(match[4]);
|
|
24
|
+
const year = new Date().getFullYear();
|
|
25
|
+
const dates = [];
|
|
26
|
+
const start = new Date(year, startMonthIdx, startDay);
|
|
27
|
+
const end = new Date(year, endMonthIdx, endDay);
|
|
28
|
+
for (let d = new Date(start); d <= end; d.setDate(d.getDate() + 1)) {
|
|
29
|
+
const mon = months[d.getMonth()];
|
|
30
|
+
dates.push(`${mon} ${d.getDate()}`);
|
|
31
|
+
}
|
|
32
|
+
return dates;
|
|
33
|
+
}
|
|
34
|
+
export function registerSearchCommands(program) {
|
|
35
|
+
// ── search command ────────────────────────────────────────────────────────
|
|
36
|
+
program
|
|
37
|
+
.command('search <query>')
|
|
38
|
+
.description('Search the web (DuckDuckGo by default, or use --site for site-specific search)')
|
|
39
|
+
.option('-n, --count <n>', 'Number of results (1-10)', '5')
|
|
40
|
+
.option('--top <n>', 'Limit results (alias for --count)')
|
|
41
|
+
.option('--provider <provider>', 'Search provider: duckduckgo (default), brave, google, baidu, yandex, naver, yahoo_japan')
|
|
42
|
+
.option('--search-api-key <key>', 'API key for the search provider (or env WEBPEEL_BRAVE_API_KEY)')
|
|
43
|
+
.option('--site <site>', 'Search a specific site (e.g. ebay, amazon, github). Run "webpeel sites" for full list.')
|
|
44
|
+
.option('--json', 'Output as JSON')
|
|
45
|
+
.option('--urls-only', 'Output only URLs, one per line (pipe-friendly)')
|
|
46
|
+
.option('--table', 'Output site-search results as a formatted table (requires --site)')
|
|
47
|
+
.option('--csv', 'Output site-search results as CSV (requires --site)')
|
|
48
|
+
.option('--budget <n>', 'Token budget for site-search result content', parseInt)
|
|
49
|
+
.option('-s, --silent', 'Silent mode')
|
|
50
|
+
.option('--proxy <url>', 'Proxy URL for requests (http://host:port, socks5://user:pass@host:port)')
|
|
51
|
+
.option('--fetch', 'Also fetch and include content from each result URL')
|
|
52
|
+
.option('--local', 'Local business search via Google Places / Yelp (requires API key)')
|
|
53
|
+
.option('--location <location>', 'Location for local search (e.g. "Shibuya, Tokyo", "35.6595,139.7004")')
|
|
54
|
+
.option('--language <lang>', 'Language code for local search results (e.g. "ja", "fr")')
|
|
55
|
+
.option('--country <code>', 'ISO 3166-1 alpha-2 country code for local search (e.g. "JP", "FR")')
|
|
56
|
+
.option('--agent', 'Agent mode: sets --json, --silent, and --budget 4000 (override with --budget N)')
|
|
57
|
+
.action(async (query, options) => {
|
|
58
|
+
// --agent sets sensible defaults for AI agents; explicit flags override
|
|
59
|
+
if (options.agent) {
|
|
60
|
+
if (!options.json)
|
|
61
|
+
options.json = true;
|
|
62
|
+
if (!options.silent)
|
|
63
|
+
options.silent = true;
|
|
64
|
+
if (options.budget === undefined)
|
|
65
|
+
options.budget = 4000;
|
|
66
|
+
}
|
|
67
|
+
const isJson = options.json;
|
|
68
|
+
const isSilent = options.silent;
|
|
69
|
+
// --top overrides --count when both are provided
|
|
70
|
+
const count = parseInt(options.top ?? options.count) || 5;
|
|
71
|
+
// Check usage quota
|
|
72
|
+
const usageCheck = await checkUsage();
|
|
73
|
+
if (!usageCheck.allowed) {
|
|
74
|
+
console.error(usageCheck.message);
|
|
75
|
+
process.exit(1);
|
|
76
|
+
}
|
|
77
|
+
// ── --site: site-specific structured search ───────────────────────────
|
|
78
|
+
if (options.site) {
|
|
79
|
+
const spinner = isSilent ? null : ora(`Searching ${options.site}...`).start();
|
|
80
|
+
try {
|
|
81
|
+
const { buildSiteSearchUrl } = await import('../../core/site-search.js');
|
|
82
|
+
const siteResult = buildSiteSearchUrl(options.site, query);
|
|
83
|
+
// Fetch the raw HTML (needed for listing extraction)
|
|
84
|
+
const htmlResult = await peel(siteResult.url, {
|
|
85
|
+
format: 'html',
|
|
86
|
+
timeout: 30000,
|
|
87
|
+
proxy: options.proxy,
|
|
88
|
+
});
|
|
89
|
+
if (spinner) {
|
|
90
|
+
spinner.succeed(`Fetched ${siteResult.site} in ${htmlResult.elapsed}ms`);
|
|
91
|
+
}
|
|
92
|
+
// Extract listings from the HTML
|
|
93
|
+
const { extractListings } = await import('../../core/extract-listings.js');
|
|
94
|
+
let listings = extractListings(htmlResult.content, siteResult.url);
|
|
95
|
+
// Apply budget if requested
|
|
96
|
+
if (options.budget && options.budget > 0 && listings.length > 0) {
|
|
97
|
+
const { budgetListings } = await import('../../core/budget.js');
|
|
98
|
+
const { maxItems } = budgetListings(listings.length, options.budget);
|
|
99
|
+
listings = listings.slice(0, maxItems);
|
|
100
|
+
}
|
|
101
|
+
// Show usage footer
|
|
102
|
+
if (usageCheck.usageInfo && !isSilent) {
|
|
103
|
+
showUsageFooter(usageCheck.usageInfo, usageCheck.isAnonymous || false, false);
|
|
104
|
+
}
|
|
105
|
+
// Output
|
|
106
|
+
if (options.csv) {
|
|
107
|
+
const rows = listings.map(item => {
|
|
108
|
+
const row = {};
|
|
109
|
+
for (const [k, v] of Object.entries(item)) {
|
|
110
|
+
if (v !== undefined)
|
|
111
|
+
row[k] = v;
|
|
112
|
+
}
|
|
113
|
+
return row;
|
|
114
|
+
});
|
|
115
|
+
await writeStdout(formatListingsCsv(rows));
|
|
116
|
+
}
|
|
117
|
+
else if (options.table) {
|
|
118
|
+
const { formatTable } = await import('../../core/table-format.js');
|
|
119
|
+
const rows = listings.map(item => {
|
|
120
|
+
const row = {};
|
|
121
|
+
for (const [k, v] of Object.entries(item)) {
|
|
122
|
+
if (v !== undefined)
|
|
123
|
+
row[k] = v;
|
|
124
|
+
}
|
|
125
|
+
return row;
|
|
126
|
+
});
|
|
127
|
+
await writeStdout(formatTable(rows) + '\n');
|
|
128
|
+
}
|
|
129
|
+
else if (isJson) {
|
|
130
|
+
const envelope = {
|
|
131
|
+
site: siteResult.site,
|
|
132
|
+
query: siteResult.query,
|
|
133
|
+
url: siteResult.url,
|
|
134
|
+
count: listings.length,
|
|
135
|
+
items: listings,
|
|
136
|
+
elapsed: htmlResult.elapsed,
|
|
137
|
+
};
|
|
138
|
+
await writeStdout(JSON.stringify(envelope, null, 2) + '\n');
|
|
139
|
+
}
|
|
140
|
+
else {
|
|
141
|
+
if (listings.length === 0) {
|
|
142
|
+
await writeStdout('No listings found.\n');
|
|
143
|
+
}
|
|
144
|
+
else {
|
|
145
|
+
await writeStdout(`Found ${listings.length} listings on ${siteResult.site}:\n\n`);
|
|
146
|
+
for (const [i, item] of listings.entries()) {
|
|
147
|
+
const pricePart = item.price ? ` — ${item.price}` : '';
|
|
148
|
+
process.stdout.write(`${i + 1}. ${item.title}${pricePart}\n`);
|
|
149
|
+
if (item.link)
|
|
150
|
+
process.stdout.write(` ${item.link}\n`);
|
|
151
|
+
process.stdout.write('\n');
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
await cleanup();
|
|
156
|
+
process.exit(0);
|
|
157
|
+
}
|
|
158
|
+
catch (error) {
|
|
159
|
+
if (spinner)
|
|
160
|
+
spinner.fail('Site search failed');
|
|
161
|
+
if (error instanceof Error) {
|
|
162
|
+
console.error(`\nError: ${error.message}`);
|
|
163
|
+
}
|
|
164
|
+
else {
|
|
165
|
+
console.error('\nError: Unknown error occurred');
|
|
166
|
+
}
|
|
167
|
+
await cleanup();
|
|
168
|
+
process.exit(1);
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
// ── --local: local business search via Google Places / Yelp ─────────
|
|
172
|
+
if (options.local) {
|
|
173
|
+
const spinner = isSilent ? null : ora('Searching local businesses...').start();
|
|
174
|
+
try {
|
|
175
|
+
const { localSearch } = await import('../../core/local-search.js');
|
|
176
|
+
const localResults = await localSearch({
|
|
177
|
+
query,
|
|
178
|
+
location: options.location,
|
|
179
|
+
language: options.language,
|
|
180
|
+
country: options.country,
|
|
181
|
+
limit: count,
|
|
182
|
+
});
|
|
183
|
+
if (spinner)
|
|
184
|
+
spinner.succeed(`Found ${localResults.results.length} results (${localResults.source})`);
|
|
185
|
+
if (isJson) {
|
|
186
|
+
await writeStdout(JSON.stringify(localResults, null, 2) + '\n');
|
|
187
|
+
}
|
|
188
|
+
else {
|
|
189
|
+
if (localResults.results.length === 0) {
|
|
190
|
+
await writeStdout('No local results found.\n');
|
|
191
|
+
}
|
|
192
|
+
else {
|
|
193
|
+
await writeStdout(`\n📍 Local results for "${query}"${localResults.location ? ` near ${localResults.location}` : ''}\n`);
|
|
194
|
+
await writeStdout(`Source: ${localResults.source}\n\n`);
|
|
195
|
+
for (const [i, r] of localResults.results.entries()) {
|
|
196
|
+
const rating = r.rating ? `⭐${r.rating}` : '';
|
|
197
|
+
const reviews = r.reviewCount ? `(${r.reviewCount.toLocaleString()})` : '';
|
|
198
|
+
const price = r.priceLevel !== undefined ? ` · ${'$'.repeat(Math.max(1, r.priceLevel))}` : '';
|
|
199
|
+
const open = r.isOpen === true ? ' · 🟢 Open' : r.isOpen === false ? ' · 🔴 Closed' : '';
|
|
200
|
+
await writeStdout(`${i + 1}. ${r.name} ${rating} ${reviews}${price}${open}\n`);
|
|
201
|
+
if (r.address)
|
|
202
|
+
await writeStdout(` ${r.address}\n`);
|
|
203
|
+
if (r.googleMapsUrl)
|
|
204
|
+
await writeStdout(` ${r.googleMapsUrl}\n`);
|
|
205
|
+
await writeStdout('\n');
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
process.exit(0);
|
|
210
|
+
}
|
|
211
|
+
catch (err) {
|
|
212
|
+
if (spinner)
|
|
213
|
+
spinner.fail('Local search failed');
|
|
214
|
+
console.error(`Error: ${err instanceof Error ? err.message : 'Unknown error'}`);
|
|
215
|
+
console.error('Hint: Set GOOGLE_PLACES_API_KEY or YELP_API_KEY environment variable for local search.');
|
|
216
|
+
process.exit(1);
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
const spinner = isSilent ? null : ora('Searching...').start();
|
|
220
|
+
try {
|
|
221
|
+
// Route search through the WebPeel API when a key is configured
|
|
222
|
+
const searchCfg = loadConfig();
|
|
223
|
+
const searchApiKey = searchCfg.apiKey || process.env.WEBPEEL_API_KEY;
|
|
224
|
+
const searchApiUrl = process.env.WEBPEEL_API_URL || 'https://api.webpeel.dev';
|
|
225
|
+
if (!searchApiKey) {
|
|
226
|
+
if (spinner)
|
|
227
|
+
spinner.fail('Authentication required');
|
|
228
|
+
console.error('No API key configured. Run: webpeel auth <your-key>');
|
|
229
|
+
console.error('Get a free key at: https://app.webpeel.dev/keys');
|
|
230
|
+
process.exit(2);
|
|
231
|
+
}
|
|
232
|
+
const searchParams = new URLSearchParams({ q: query });
|
|
233
|
+
searchParams.set('limit', String(Math.min(Math.max(count, 1), 10)));
|
|
234
|
+
if (options.budget)
|
|
235
|
+
searchParams.set('budget', String(options.budget));
|
|
236
|
+
if (options.provider)
|
|
237
|
+
searchParams.set('provider', options.provider);
|
|
238
|
+
if (options.searchApiKey)
|
|
239
|
+
searchParams.set('searchApiKey', options.searchApiKey);
|
|
240
|
+
const searchRes = await fetch(`${searchApiUrl}/v1/search?${searchParams}`, {
|
|
241
|
+
headers: { Authorization: `Bearer ${searchApiKey}` },
|
|
242
|
+
signal: AbortSignal.timeout(30000),
|
|
243
|
+
});
|
|
244
|
+
if (searchRes.status === 401) {
|
|
245
|
+
if (spinner)
|
|
246
|
+
spinner.fail('Authentication failed');
|
|
247
|
+
console.error('API key invalid or expired. Run: webpeel auth <new-key>');
|
|
248
|
+
process.exit(1);
|
|
249
|
+
}
|
|
250
|
+
if (searchRes.status === 429) {
|
|
251
|
+
if (spinner)
|
|
252
|
+
spinner.fail('Rate limited');
|
|
253
|
+
console.error('Rate limit exceeded. Check your plan at https://app.webpeel.dev/billing');
|
|
254
|
+
process.exit(1);
|
|
255
|
+
}
|
|
256
|
+
if (!searchRes.ok) {
|
|
257
|
+
const body = await searchRes.text().catch(() => '');
|
|
258
|
+
throw new Error(`Search API error ${searchRes.status}: ${body.slice(0, 200)}`);
|
|
259
|
+
}
|
|
260
|
+
const searchData = await searchRes.json();
|
|
261
|
+
// API returns { success: true, data: { web: [...] } } or { results: [...] }
|
|
262
|
+
let results = searchData.data?.web || searchData.data?.results || searchData.results || [];
|
|
263
|
+
// Client-side ad filtering: remove DuckDuckGo ads that slip through the server
|
|
264
|
+
results = results.filter(r => {
|
|
265
|
+
// Filter DDG-internal URLs
|
|
266
|
+
try {
|
|
267
|
+
const parsed = new URL(r.url);
|
|
268
|
+
if (parsed.hostname === 'duckduckgo.com')
|
|
269
|
+
return false;
|
|
270
|
+
if (parsed.searchParams.has('ad_domain') ||
|
|
271
|
+
parsed.searchParams.has('ad_provider') ||
|
|
272
|
+
parsed.searchParams.has('ad_type'))
|
|
273
|
+
return false;
|
|
274
|
+
}
|
|
275
|
+
catch {
|
|
276
|
+
return false;
|
|
277
|
+
}
|
|
278
|
+
// Filter ad snippets
|
|
279
|
+
if (r.snippet && (r.snippet.includes('Ad ·') ||
|
|
280
|
+
r.snippet.includes('Ad Viewing ads is privacy protected by DuckDuckGo') ||
|
|
281
|
+
r.snippet.toLowerCase().startsWith('ad ·')))
|
|
282
|
+
return false;
|
|
283
|
+
return true;
|
|
284
|
+
});
|
|
285
|
+
if (spinner) {
|
|
286
|
+
spinner.succeed(`Found ${results.length} results`);
|
|
287
|
+
}
|
|
288
|
+
// --fetch: fetch content from each result
|
|
289
|
+
if (options.fetch && results.length > 0) {
|
|
290
|
+
const fetchCfg = loadConfig();
|
|
291
|
+
const fetchApiKey = fetchCfg.apiKey || process.env.WEBPEEL_API_KEY;
|
|
292
|
+
const fetchApiUrl = process.env.WEBPEEL_API_URL || 'https://api.webpeel.dev';
|
|
293
|
+
if (fetchApiKey) {
|
|
294
|
+
const fetchSpinner = isSilent ? null : ora(`Fetching content from ${results.length} results...`).start();
|
|
295
|
+
await Promise.all(results.map(async (result) => {
|
|
296
|
+
try {
|
|
297
|
+
const fetchParams = new URLSearchParams({ url: result.url });
|
|
298
|
+
if (options.budget)
|
|
299
|
+
fetchParams.set('budget', String(options.budget || 2000));
|
|
300
|
+
const fetchRes = await fetch(`${fetchApiUrl}/v1/fetch?${fetchParams}`, {
|
|
301
|
+
headers: { Authorization: `Bearer ${fetchApiKey}` },
|
|
302
|
+
signal: AbortSignal.timeout(20000),
|
|
303
|
+
});
|
|
304
|
+
if (fetchRes.ok) {
|
|
305
|
+
const fetchData = await fetchRes.json();
|
|
306
|
+
result.content = fetchData.content || fetchData.data?.content || '';
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
catch { /* skip on error */ }
|
|
310
|
+
}));
|
|
311
|
+
if (fetchSpinner)
|
|
312
|
+
fetchSpinner.succeed('Content fetched');
|
|
313
|
+
}
|
|
314
|
+
else if (!isSilent) {
|
|
315
|
+
console.error('Warning: --fetch requires API key (run: webpeel auth <key>)');
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
// Show usage footer for free/anonymous users
|
|
319
|
+
if (usageCheck.usageInfo && !isSilent) {
|
|
320
|
+
showUsageFooter(usageCheck.usageInfo, usageCheck.isAnonymous || false, false);
|
|
321
|
+
}
|
|
322
|
+
if (options.urlsOnly) {
|
|
323
|
+
// Pipe-friendly: one URL per line
|
|
324
|
+
for (const result of results) {
|
|
325
|
+
await writeStdout(result.url + '\n');
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
else if (isJson) {
|
|
329
|
+
const jsonStr = JSON.stringify({ query, results, count: results.length }, null, 2);
|
|
330
|
+
await writeStdout(jsonStr + '\n');
|
|
331
|
+
}
|
|
332
|
+
else {
|
|
333
|
+
// Human-readable numbered results
|
|
334
|
+
if (results.length === 0) {
|
|
335
|
+
await writeStdout('No results found.\n');
|
|
336
|
+
}
|
|
337
|
+
else {
|
|
338
|
+
await writeStdout(`\n`);
|
|
339
|
+
for (const [i, result] of results.entries()) {
|
|
340
|
+
await writeStdout(`${i + 1}. ${result.title}\n`);
|
|
341
|
+
await writeStdout(` ${result.url}\n`);
|
|
342
|
+
if (result.snippet) {
|
|
343
|
+
await writeStdout(` ${result.snippet}\n`);
|
|
344
|
+
}
|
|
345
|
+
if (result.content) {
|
|
346
|
+
const preview = result.content.slice(0, 500);
|
|
347
|
+
await writeStdout(`\n --- Content ---\n${preview}${result.content.length > 500 ? '\n [...]' : ''}\n`);
|
|
348
|
+
}
|
|
349
|
+
await writeStdout('\n');
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
}
|
|
353
|
+
process.exit(0);
|
|
354
|
+
}
|
|
355
|
+
catch (error) {
|
|
356
|
+
if (spinner) {
|
|
357
|
+
spinner.fail('Search failed');
|
|
358
|
+
}
|
|
359
|
+
if (error instanceof Error) {
|
|
360
|
+
console.error(`\nError: ${error.message}`);
|
|
361
|
+
const msg = error.message.toLowerCase();
|
|
362
|
+
if (msg.includes('brave') && msg.includes('api key')) {
|
|
363
|
+
console.error('\n💡 Hint: Set your Brave API key: webpeel config set braveApiKey YOUR_KEY');
|
|
364
|
+
console.error(' Or use free DuckDuckGo search (default, no key needed).');
|
|
365
|
+
}
|
|
366
|
+
else if (msg.includes('timeout') || msg.includes('timed out')) {
|
|
367
|
+
console.error('\n💡 Hint: Search timed out. Try a more specific query or try again.');
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
else {
|
|
371
|
+
console.error('\nError: Unknown error occurred');
|
|
372
|
+
}
|
|
373
|
+
process.exit(1);
|
|
374
|
+
}
|
|
375
|
+
});
|
|
376
|
+
// ── sites command — list all supported site templates ────────────────────
|
|
377
|
+
program
|
|
378
|
+
.command('sites')
|
|
379
|
+
.description('List all sites supported by "webpeel search --site <site>"')
|
|
380
|
+
.option('--json', 'Output as JSON')
|
|
381
|
+
.option('--category <cat>', 'Filter by category (shopping, social, tech, jobs, general, real-estate, food)')
|
|
382
|
+
.action(async (options) => {
|
|
383
|
+
const { listSites } = await import('../../core/site-search.js');
|
|
384
|
+
let sites = listSites();
|
|
385
|
+
if (options.category) {
|
|
386
|
+
sites = sites.filter(s => s.category === options.category);
|
|
387
|
+
}
|
|
388
|
+
if (options.json) {
|
|
389
|
+
await writeStdout(JSON.stringify(sites, null, 2) + '\n');
|
|
390
|
+
process.exit(0);
|
|
391
|
+
}
|
|
392
|
+
// Group by category for pretty output
|
|
393
|
+
const byCategory = new Map();
|
|
394
|
+
for (const site of sites) {
|
|
395
|
+
if (!byCategory.has(site.category))
|
|
396
|
+
byCategory.set(site.category, []);
|
|
397
|
+
byCategory.get(site.category).push(site);
|
|
398
|
+
}
|
|
399
|
+
const categoryOrder = ['shopping', 'general', 'social', 'tech', 'jobs', 'real-estate', 'food'];
|
|
400
|
+
const sortedCategories = categoryOrder.filter(c => byCategory.has(c));
|
|
401
|
+
console.log('\nWebPeel Site-Aware Search — supported sites\n');
|
|
402
|
+
console.log('Usage: webpeel search --site <id> "<query>"\n');
|
|
403
|
+
for (const cat of sortedCategories) {
|
|
404
|
+
const catSites = byCategory.get(cat);
|
|
405
|
+
const label = cat.charAt(0).toUpperCase() + cat.slice(1);
|
|
406
|
+
console.log(` ${label}:`);
|
|
407
|
+
for (const s of catSites) {
|
|
408
|
+
console.log(` ${s.id.padEnd(16)} ${s.name}`);
|
|
409
|
+
}
|
|
410
|
+
console.log('');
|
|
411
|
+
}
|
|
412
|
+
process.exit(0);
|
|
413
|
+
});
|
|
414
|
+
// ── batch command ─────────────────────────────────────────────────────────
|
|
415
|
+
program
|
|
416
|
+
.command('batch [file]')
|
|
417
|
+
.description('Fetch multiple URLs from file or stdin pipe')
|
|
418
|
+
.option('-c, --concurrency <n>', 'Max concurrent fetches (default: 3)', '3')
|
|
419
|
+
.option('-o, --output <dir>', 'Output directory (one file per URL)')
|
|
420
|
+
.option('--json', 'Output as JSON array')
|
|
421
|
+
.option('-s, --silent', 'Silent mode')
|
|
422
|
+
.option('-r, --render', 'Use headless browser')
|
|
423
|
+
.option('--selector <css>', 'CSS selector to extract')
|
|
424
|
+
.action(async (file, options) => {
|
|
425
|
+
const isJson = options.json;
|
|
426
|
+
const isSilent = options.silent;
|
|
427
|
+
const shouldRender = options.render;
|
|
428
|
+
const selector = options.selector;
|
|
429
|
+
// Check usage quota
|
|
430
|
+
const usageCheck = await checkUsage();
|
|
431
|
+
if (!usageCheck.allowed) {
|
|
432
|
+
console.error(usageCheck.message);
|
|
433
|
+
process.exit(1);
|
|
434
|
+
}
|
|
435
|
+
const spinner = isSilent ? null : ora('Loading URLs...').start();
|
|
436
|
+
try {
|
|
437
|
+
// Read URLs from file or stdin
|
|
438
|
+
let urls;
|
|
439
|
+
if (file) {
|
|
440
|
+
// Read from file
|
|
441
|
+
try {
|
|
442
|
+
const content = readFileSync(file, 'utf-8');
|
|
443
|
+
urls = content.split('\n')
|
|
444
|
+
.map(line => line.trim())
|
|
445
|
+
.filter(line => line && !line.startsWith('#'));
|
|
446
|
+
}
|
|
447
|
+
catch (error) {
|
|
448
|
+
throw new Error(`Failed to read file: ${file}`);
|
|
449
|
+
}
|
|
450
|
+
}
|
|
451
|
+
else if (!process.stdin.isTTY) {
|
|
452
|
+
// Read from stdin pipe
|
|
453
|
+
const chunks = [];
|
|
454
|
+
for await (const chunk of process.stdin) {
|
|
455
|
+
chunks.push(chunk);
|
|
456
|
+
}
|
|
457
|
+
const content = Buffer.concat(chunks).toString('utf-8');
|
|
458
|
+
urls = content.split('\n')
|
|
459
|
+
.map(line => line.trim())
|
|
460
|
+
.filter(line => line && !line.startsWith('#'));
|
|
461
|
+
}
|
|
462
|
+
else {
|
|
463
|
+
throw new Error('Provide a file path or pipe URLs via stdin.\n Example: cat urls.txt | webpeel batch');
|
|
464
|
+
}
|
|
465
|
+
if (urls.length === 0) {
|
|
466
|
+
throw new Error('No URLs found in file');
|
|
467
|
+
}
|
|
468
|
+
if (spinner) {
|
|
469
|
+
spinner.text = `Fetching ${urls.length} URLs (concurrency: ${options.concurrency})...`;
|
|
470
|
+
}
|
|
471
|
+
// Batch fetch
|
|
472
|
+
const results = await peelBatch(urls, {
|
|
473
|
+
concurrency: parseInt(options.concurrency) || 3,
|
|
474
|
+
render: shouldRender,
|
|
475
|
+
selector: selector,
|
|
476
|
+
});
|
|
477
|
+
if (spinner) {
|
|
478
|
+
const successCount = results.filter(r => 'content' in r).length;
|
|
479
|
+
spinner.succeed(`Completed: ${successCount}/${urls.length} successful`);
|
|
480
|
+
}
|
|
481
|
+
// Show usage footer for free/anonymous users
|
|
482
|
+
if (usageCheck.usageInfo && !isSilent) {
|
|
483
|
+
showUsageFooter(usageCheck.usageInfo, usageCheck.isAnonymous || false, false);
|
|
484
|
+
}
|
|
485
|
+
// Output results
|
|
486
|
+
if (isJson) {
|
|
487
|
+
const jsonStr = JSON.stringify(results, null, 2);
|
|
488
|
+
await new Promise((resolve, reject) => {
|
|
489
|
+
process.stdout.write(jsonStr + '\n', (err) => {
|
|
490
|
+
if (err)
|
|
491
|
+
reject(err);
|
|
492
|
+
else
|
|
493
|
+
resolve();
|
|
494
|
+
});
|
|
495
|
+
});
|
|
496
|
+
}
|
|
497
|
+
else if (options.output) {
|
|
498
|
+
const { writeFileSync, mkdirSync } = await import('fs');
|
|
499
|
+
const { join } = await import('path');
|
|
500
|
+
// Create output directory
|
|
501
|
+
mkdirSync(options.output, { recursive: true });
|
|
502
|
+
results.forEach((result, i) => {
|
|
503
|
+
const urlObj = new URL(urls[i]);
|
|
504
|
+
const filename = `${i + 1}_${urlObj.hostname.replace(/[^a-z0-9]/gi, '_')}.md`;
|
|
505
|
+
const filepath = join(options.output, filename);
|
|
506
|
+
if ('content' in result) {
|
|
507
|
+
writeFileSync(filepath, result.content);
|
|
508
|
+
}
|
|
509
|
+
else {
|
|
510
|
+
writeFileSync(filepath, `Error: ${result.error}`);
|
|
511
|
+
}
|
|
512
|
+
});
|
|
513
|
+
if (!isSilent) {
|
|
514
|
+
console.log(`\nResults saved to: ${options.output}`);
|
|
515
|
+
}
|
|
516
|
+
}
|
|
517
|
+
else {
|
|
518
|
+
// Print results to stdout
|
|
519
|
+
results.forEach((result, i) => {
|
|
520
|
+
console.log(`\n=== ${urls[i]} ===\n`);
|
|
521
|
+
if ('content' in result) {
|
|
522
|
+
console.log(result.content.slice(0, 500) + '...');
|
|
523
|
+
}
|
|
524
|
+
else {
|
|
525
|
+
console.log(`Error: ${result.error}`);
|
|
526
|
+
}
|
|
527
|
+
});
|
|
528
|
+
}
|
|
529
|
+
await cleanup();
|
|
530
|
+
process.exit(0);
|
|
531
|
+
}
|
|
532
|
+
catch (error) {
|
|
533
|
+
if (spinner) {
|
|
534
|
+
spinner.fail('Batch fetch failed');
|
|
535
|
+
}
|
|
536
|
+
if (error instanceof Error) {
|
|
537
|
+
console.error(`\nError: ${error.message}`);
|
|
538
|
+
}
|
|
539
|
+
else {
|
|
540
|
+
console.error('\nError: Unknown error occurred');
|
|
541
|
+
}
|
|
542
|
+
await cleanup();
|
|
543
|
+
process.exit(1);
|
|
544
|
+
}
|
|
545
|
+
});
|
|
546
|
+
// ── crawl command ─────────────────────────────────────────────────────────
|
|
547
|
+
program
|
|
548
|
+
.command('crawl <url>')
|
|
549
|
+
.description('Crawl a website starting from a URL')
|
|
550
|
+
.option('--max-pages <number>', 'Maximum number of pages to crawl (default: 10, max: 100)', (v) => parseInt(v, 10), 10)
|
|
551
|
+
.option('--max-depth <number>', 'Maximum depth to crawl (default: 2, max: 5)', (v) => parseInt(v, 10), 2)
|
|
552
|
+
.option('--allowed-domains <domains...>', 'Only crawl these domains (default: same as starting URL)')
|
|
553
|
+
.option('--exclude <patterns...>', 'Exclude URLs matching these regex patterns')
|
|
554
|
+
.option('--ignore-robots', 'Ignore robots.txt (default: respect robots.txt)')
|
|
555
|
+
.option('--rate-limit <ms>', 'Rate limit between requests in ms (default: 500)', (v) => parseInt(v, 10), 500)
|
|
556
|
+
.option('-r, --render', 'Use headless browser for all pages')
|
|
557
|
+
.option('--stealth', 'Use stealth mode for all pages')
|
|
558
|
+
.option('-s, --silent', 'Silent mode (no spinner)')
|
|
559
|
+
.option('--json', 'Output as JSON')
|
|
560
|
+
.option('--resume', 'Resume an interrupted crawl from its last checkpoint')
|
|
561
|
+
.action(async (url, options) => {
|
|
562
|
+
// Check usage quota
|
|
563
|
+
const usageCheck = await checkUsage();
|
|
564
|
+
if (!usageCheck.allowed) {
|
|
565
|
+
console.error(usageCheck.message);
|
|
566
|
+
process.exit(1);
|
|
567
|
+
}
|
|
568
|
+
const { crawl } = await import('../../core/crawler.js');
|
|
569
|
+
const spinner = options.silent ? null : ora('Crawling...').start();
|
|
570
|
+
try {
|
|
571
|
+
const results = await crawl(url, {
|
|
572
|
+
maxPages: options.maxPages,
|
|
573
|
+
maxDepth: options.maxDepth,
|
|
574
|
+
allowedDomains: options.allowedDomains,
|
|
575
|
+
excludePatterns: options.exclude,
|
|
576
|
+
respectRobotsTxt: !options.ignoreRobots,
|
|
577
|
+
rateLimitMs: options.rateLimit,
|
|
578
|
+
render: options.render || false,
|
|
579
|
+
stealth: options.stealth || false,
|
|
580
|
+
resume: options.resume || false,
|
|
581
|
+
});
|
|
582
|
+
if (spinner) {
|
|
583
|
+
spinner.succeed(`Crawled ${results.length} pages`);
|
|
584
|
+
}
|
|
585
|
+
// Show usage footer for free/anonymous users
|
|
586
|
+
if (usageCheck.usageInfo && !options.silent) {
|
|
587
|
+
showUsageFooter(usageCheck.usageInfo, usageCheck.isAnonymous || false, options.stealth || false);
|
|
588
|
+
}
|
|
589
|
+
if (options.json) {
|
|
590
|
+
const totalTokens = results.reduce((sum, r) => sum + (r.tokens ?? 0), 0);
|
|
591
|
+
const pages = results.map(r => ({
|
|
592
|
+
url: r.url,
|
|
593
|
+
title: r.title,
|
|
594
|
+
tokens: r.tokens ?? 0,
|
|
595
|
+
content: r.markdown,
|
|
596
|
+
depth: r.depth,
|
|
597
|
+
parent: r.parent,
|
|
598
|
+
links: r.links,
|
|
599
|
+
elapsed: r.elapsed,
|
|
600
|
+
...(r.error ? { error: r.error } : {}),
|
|
601
|
+
...(r.fingerprint ? { fingerprint: r.fingerprint } : {}),
|
|
602
|
+
}));
|
|
603
|
+
console.log(JSON.stringify({ pages, totalPages: results.length, totalTokens }, null, 2));
|
|
604
|
+
}
|
|
605
|
+
else {
|
|
606
|
+
results.forEach((result, i) => {
|
|
607
|
+
console.log(`\n${'='.repeat(60)}`);
|
|
608
|
+
console.log(`[${i + 1}/${results.length}] ${result.title}`);
|
|
609
|
+
console.log(`URL: ${result.url}`);
|
|
610
|
+
console.log(`Depth: ${result.depth}${result.parent ? ` (from: ${result.parent})` : ''}`);
|
|
611
|
+
console.log(`Links found: ${result.links.length}`);
|
|
612
|
+
console.log(`Elapsed: ${result.elapsed}ms`);
|
|
613
|
+
if (result.error) {
|
|
614
|
+
console.log(`ERROR: ${result.error}`);
|
|
615
|
+
}
|
|
616
|
+
else {
|
|
617
|
+
console.log(`\n${result.markdown.slice(0, 500)}${result.markdown.length > 500 ? '...' : ''}`);
|
|
618
|
+
}
|
|
619
|
+
});
|
|
620
|
+
}
|
|
621
|
+
await cleanup();
|
|
622
|
+
process.exit(0);
|
|
623
|
+
}
|
|
624
|
+
catch (error) {
|
|
625
|
+
if (spinner) {
|
|
626
|
+
spinner.fail('Crawl failed');
|
|
627
|
+
}
|
|
628
|
+
if (error instanceof Error) {
|
|
629
|
+
console.error(`\nError: ${error.message}`);
|
|
630
|
+
}
|
|
631
|
+
else {
|
|
632
|
+
console.error('\nError: Unknown error occurred');
|
|
633
|
+
}
|
|
634
|
+
await cleanup();
|
|
635
|
+
process.exit(1);
|
|
636
|
+
}
|
|
637
|
+
});
|
|
638
|
+
// ── map command ───────────────────────────────────────────────────────────
|
|
639
|
+
program
|
|
640
|
+
.command('map <url>')
|
|
641
|
+
.description('Discover all URLs on a domain (sitemap + crawl)')
|
|
642
|
+
.option('--no-sitemap', 'Skip sitemap.xml discovery')
|
|
643
|
+
.option('--no-crawl', 'Skip homepage crawl')
|
|
644
|
+
.option('--max <n>', 'Maximum URLs to discover (default: 5000)', (v) => parseInt(v, 10), 5000)
|
|
645
|
+
.option('--include <patterns...>', 'Include only URLs matching these regex patterns')
|
|
646
|
+
.option('--exclude <patterns...>', 'Exclude URLs matching these regex patterns')
|
|
647
|
+
.option('--json', 'Output as JSON')
|
|
648
|
+
.option('-s, --silent', 'Silent mode')
|
|
649
|
+
.action(async (url, options) => {
|
|
650
|
+
const { mapDomain } = await import('../../core/map.js');
|
|
651
|
+
const spinner = options.silent ? null : ora('Discovering URLs...').start();
|
|
652
|
+
try {
|
|
653
|
+
const result = await mapDomain(url, {
|
|
654
|
+
useSitemap: options.sitemap !== false,
|
|
655
|
+
crawlHomepage: options.crawl !== false,
|
|
656
|
+
maxUrls: options.max,
|
|
657
|
+
includePatterns: options.include,
|
|
658
|
+
excludePatterns: options.exclude,
|
|
659
|
+
});
|
|
660
|
+
if (spinner)
|
|
661
|
+
spinner.succeed(`Found ${result.total} URLs in ${result.elapsed}ms`);
|
|
662
|
+
if (options.json) {
|
|
663
|
+
console.log(JSON.stringify(result, null, 2));
|
|
664
|
+
}
|
|
665
|
+
else {
|
|
666
|
+
for (const u of result.urls) {
|
|
667
|
+
console.log(u);
|
|
668
|
+
}
|
|
669
|
+
if (!options.silent) {
|
|
670
|
+
console.error(`\nTotal: ${result.total} URLs`);
|
|
671
|
+
if (result.sitemapUrls.length > 0) {
|
|
672
|
+
console.error(`Sitemaps used: ${result.sitemapUrls.join(', ')}`);
|
|
673
|
+
}
|
|
674
|
+
}
|
|
675
|
+
}
|
|
676
|
+
process.exit(0);
|
|
677
|
+
}
|
|
678
|
+
catch (error) {
|
|
679
|
+
if (spinner)
|
|
680
|
+
spinner.fail('URL discovery failed');
|
|
681
|
+
console.error(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
682
|
+
process.exit(1);
|
|
683
|
+
}
|
|
684
|
+
});
|
|
685
|
+
// ── flights command ───────────────────────────────────────────────────────
|
|
686
|
+
program
|
|
687
|
+
.command('flights <query>')
|
|
688
|
+
.description('Search for flights (via Google Flights) — e.g. "NYC to Fort Myers Apr 4"')
|
|
689
|
+
.option('--one-way', 'One-way flight (default)')
|
|
690
|
+
.option('--round-trip', 'Round-trip flight')
|
|
691
|
+
.option('-n, --count <n>', 'Max flights to show', '10')
|
|
692
|
+
.option('--dates <range>', 'Compare prices across date range (e.g., "Mar29-Apr4")')
|
|
693
|
+
.option('--json', 'Output as JSON')
|
|
694
|
+
.option('-s, --silent', 'Silent mode')
|
|
695
|
+
.action(async (query, options) => {
|
|
696
|
+
// ── --dates: compare cheapest flight across a date range ──────────────
|
|
697
|
+
if (options.dates) {
|
|
698
|
+
const dates = parseDateRange(options.dates);
|
|
699
|
+
if (dates.length === 0) {
|
|
700
|
+
console.error('Could not parse date range. Format: "Mar29-Apr4"');
|
|
701
|
+
process.exit(1);
|
|
702
|
+
}
|
|
703
|
+
const spinner = options.silent ? null : ora(`Comparing flights across ${dates.length} dates...`).start();
|
|
704
|
+
const tripType = options.roundTrip ? '' : ' one way';
|
|
705
|
+
const rows = [];
|
|
706
|
+
for (const date of dates) {
|
|
707
|
+
if (spinner)
|
|
708
|
+
spinner.text = `Fetching flights for ${date}...`;
|
|
709
|
+
try {
|
|
710
|
+
const dateQuery = `Flights from ${query} ${date}${tripType}`;
|
|
711
|
+
const encoded = encodeURIComponent(dateQuery);
|
|
712
|
+
const url = `https://www.google.com/travel/flights?q=${encoded}`;
|
|
713
|
+
const result = await peel(url, { render: true, timeout: 30000 });
|
|
714
|
+
// Try to extract cheapest flight from structured data or content
|
|
715
|
+
let price = null;
|
|
716
|
+
let airline = null;
|
|
717
|
+
let time = null;
|
|
718
|
+
const flights = result.domainData?.structured?.flights || [];
|
|
719
|
+
if (flights.length > 0) {
|
|
720
|
+
const cheapest = flights.reduce((a, b) => {
|
|
721
|
+
const ap = parseFloat(String(a.price || '').replace(/[^0-9.]/g, '')) || Infinity;
|
|
722
|
+
const bp = parseFloat(String(b.price || '').replace(/[^0-9.]/g, '')) || Infinity;
|
|
723
|
+
return ap <= bp ? a : b;
|
|
724
|
+
});
|
|
725
|
+
price = cheapest.priceStr || (cheapest.price ? `$${cheapest.price}` : null);
|
|
726
|
+
airline = cheapest.airline || cheapest.carrier || null;
|
|
727
|
+
time = cheapest.departTime && cheapest.arriveTime
|
|
728
|
+
? `${cheapest.departTime} → ${cheapest.arriveTime}`
|
|
729
|
+
: (cheapest.time || cheapest.departure || null);
|
|
730
|
+
}
|
|
731
|
+
else {
|
|
732
|
+
// Extract from markdown content — look for price patterns
|
|
733
|
+
const priceMatch = result.content.match(/\$(\d+)/);
|
|
734
|
+
if (priceMatch)
|
|
735
|
+
price = `$${priceMatch[1]}`;
|
|
736
|
+
const airlineMatch = result.content.match(/\b(American|Delta|United|Southwest|Spirit|JetBlue|Alaska|Frontier|Allegiant|Sun Country)\b/i);
|
|
737
|
+
if (airlineMatch)
|
|
738
|
+
airline = airlineMatch[1];
|
|
739
|
+
const timeMatch = result.content.match(/(\d{1,2}:\d{2}\s*(?:AM|PM))\s*[–—→]\s*(\d{1,2}:\d{2}\s*(?:AM|PM))/i);
|
|
740
|
+
if (timeMatch)
|
|
741
|
+
time = `${timeMatch[1]} → ${timeMatch[2]}`;
|
|
742
|
+
}
|
|
743
|
+
const priceNum = price ? parseFloat(price.replace(/[^0-9.]/g, '')) || Infinity : Infinity;
|
|
744
|
+
rows.push({ date, price, airline, time, priceNum });
|
|
745
|
+
}
|
|
746
|
+
catch {
|
|
747
|
+
rows.push({ date, price: null, airline: null, time: null, priceNum: Infinity });
|
|
748
|
+
}
|
|
749
|
+
}
|
|
750
|
+
if (spinner)
|
|
751
|
+
spinner.succeed(`Compared ${rows.length} dates`);
|
|
752
|
+
if (options.json) {
|
|
753
|
+
console.log(JSON.stringify({ query, dateRange: options.dates, rows }, null, 2));
|
|
754
|
+
}
|
|
755
|
+
else {
|
|
756
|
+
// Find best price
|
|
757
|
+
const best = rows.reduce((a, b) => a.priceNum <= b.priceNum ? a : b);
|
|
758
|
+
console.log(`\n# ✈️ Flight Price Comparison — ${query}\n`);
|
|
759
|
+
console.log('| Date | Airline | Time | Price |');
|
|
760
|
+
console.log('|------|---------|------|-------|');
|
|
761
|
+
for (const row of rows) {
|
|
762
|
+
const star = row.priceNum === best.priceNum ? ' ⭐' : '';
|
|
763
|
+
const priceStr = row.price ? `${row.price}${star}` : 'N/A';
|
|
764
|
+
const airlineStr = row.airline || 'Unknown';
|
|
765
|
+
const timeStr = row.time || '—';
|
|
766
|
+
console.log(`| ${row.date} | ${airlineStr} | ${timeStr} | ${priceStr} |`);
|
|
767
|
+
}
|
|
768
|
+
if (best.price) {
|
|
769
|
+
console.log(`\n⭐ Best price: ${best.date} — ${best.airline || 'Unknown'} ${best.price}`);
|
|
770
|
+
}
|
|
771
|
+
}
|
|
772
|
+
await cleanup();
|
|
773
|
+
process.exit(0);
|
|
774
|
+
}
|
|
775
|
+
// ── Single date (default) ─────────────────────────────────────────────
|
|
776
|
+
const tripType = options.roundTrip ? '' : ' one way';
|
|
777
|
+
const encoded = encodeURIComponent(`Flights from ${query}${tripType}`);
|
|
778
|
+
const url = `https://www.google.com/travel/flights?q=${encoded}`;
|
|
779
|
+
const spinner = options.silent ? null : ora(`Searching flights: ${query}...`).start();
|
|
780
|
+
try {
|
|
781
|
+
// render is forced automatically by SPA auto-detect, but be explicit here
|
|
782
|
+
const result = await peel(url, { render: true, timeout: 30000 });
|
|
783
|
+
if (spinner)
|
|
784
|
+
spinner.succeed('Flights loaded');
|
|
785
|
+
if (options.json) {
|
|
786
|
+
console.log(JSON.stringify({
|
|
787
|
+
query,
|
|
788
|
+
url,
|
|
789
|
+
flights: result.domainData?.structured?.flights || [],
|
|
790
|
+
source: 'Google Flights',
|
|
791
|
+
content: result.content,
|
|
792
|
+
tokens: result.tokens,
|
|
793
|
+
}, null, 2));
|
|
794
|
+
}
|
|
795
|
+
else {
|
|
796
|
+
console.log(result.content);
|
|
797
|
+
}
|
|
798
|
+
await cleanup();
|
|
799
|
+
process.exit(0);
|
|
800
|
+
}
|
|
801
|
+
catch (error) {
|
|
802
|
+
if (spinner)
|
|
803
|
+
spinner.fail('Flight search failed');
|
|
804
|
+
console.error(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
805
|
+
await cleanup();
|
|
806
|
+
process.exit(1);
|
|
807
|
+
}
|
|
808
|
+
});
|
|
809
|
+
// ── rental command ────────────────────────────────────────────────────────
|
|
810
|
+
program
|
|
811
|
+
.command('rental <query>')
|
|
812
|
+
.alias('car-rental')
|
|
813
|
+
.description('Search for car rentals via Kayak — e.g. "Punta Gorda FL Apr 1-3"')
|
|
814
|
+
.option('--json', 'Output as JSON')
|
|
815
|
+
.option('-s, --silent', 'Silent mode')
|
|
816
|
+
.action(async (query, options) => {
|
|
817
|
+
// Parse location: strip date portion from query
|
|
818
|
+
const location = query.replace(/\b(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\w*\s+\d+.*/i, '').trim();
|
|
819
|
+
const encodedLocation = encodeURIComponent(location.replace(/\s+/g, '-'));
|
|
820
|
+
// Parse dates: try "Apr 1-3" or "Apr 1 to Apr 3" patterns
|
|
821
|
+
const year = new Date().getFullYear();
|
|
822
|
+
let pickupDate = `${year}-04-01`;
|
|
823
|
+
let returnDate = `${year}-04-03`;
|
|
824
|
+
const rangeMatch = query.match(/\b(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\w*\s+(\d+)\s*[-–to]+\s*(?:(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\w*\s+)?(\d+)/i);
|
|
825
|
+
if (rangeMatch) {
|
|
826
|
+
const months = {
|
|
827
|
+
jan: '01', feb: '02', mar: '03', apr: '04', may: '05', jun: '06',
|
|
828
|
+
jul: '07', aug: '08', sep: '09', oct: '10', nov: '11', dec: '12',
|
|
829
|
+
};
|
|
830
|
+
const startMonth = months[rangeMatch[1].toLowerCase().slice(0, 3)];
|
|
831
|
+
const startDay = rangeMatch[2].padStart(2, '0');
|
|
832
|
+
const endMonth = rangeMatch[3] ? months[rangeMatch[3].toLowerCase().slice(0, 3)] : startMonth;
|
|
833
|
+
const endDay = rangeMatch[4].padStart(2, '0');
|
|
834
|
+
pickupDate = `${year}-${startMonth}-${startDay}`;
|
|
835
|
+
returnDate = `${year}-${endMonth}-${endDay}`;
|
|
836
|
+
}
|
|
837
|
+
const searchUrl = `https://www.kayak.com/cars/${encodedLocation}/${pickupDate}/${returnDate}?sort=price_a`;
|
|
838
|
+
const spinner = options.silent ? null : (await import('ora')).default(`Searching car rentals: ${query}...`).start();
|
|
839
|
+
try {
|
|
840
|
+
const result = await peel(searchUrl, { render: true, timeout: 40000 });
|
|
841
|
+
if (spinner)
|
|
842
|
+
spinner.succeed('Car rentals loaded');
|
|
843
|
+
if (options.json) {
|
|
844
|
+
console.log(JSON.stringify({
|
|
845
|
+
query,
|
|
846
|
+
location,
|
|
847
|
+
pickupDate,
|
|
848
|
+
returnDate,
|
|
849
|
+
url: searchUrl,
|
|
850
|
+
content: result.content,
|
|
851
|
+
tokens: result.tokens,
|
|
852
|
+
}, null, 2));
|
|
853
|
+
}
|
|
854
|
+
else {
|
|
855
|
+
console.log(result.content);
|
|
856
|
+
}
|
|
857
|
+
await cleanup();
|
|
858
|
+
process.exit(0);
|
|
859
|
+
}
|
|
860
|
+
catch (error) {
|
|
861
|
+
if (spinner)
|
|
862
|
+
spinner.fail('Car rental search failed');
|
|
863
|
+
console.error(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
864
|
+
await cleanup();
|
|
865
|
+
process.exit(1);
|
|
866
|
+
}
|
|
867
|
+
});
|
|
868
|
+
// ── cars command ──────────────────────────────────────────────────────────
|
|
869
|
+
program
|
|
870
|
+
.command('cars <query>')
|
|
871
|
+
.description('Search for cars to buy via Cars.com — e.g. "Honda Civic"')
|
|
872
|
+
.option('--zip <zip>', 'ZIP code for local search', '10001')
|
|
873
|
+
.option('--distance <miles>', 'Max distance in miles', '30')
|
|
874
|
+
.option('--max-price <price>', 'Maximum listing price')
|
|
875
|
+
.option('--min-price <price>', 'Minimum listing price')
|
|
876
|
+
.option('--json', 'Output as JSON')
|
|
877
|
+
.option('-s, --silent', 'Silent mode')
|
|
878
|
+
.action(async (query, options) => {
|
|
879
|
+
const zip = options.zip || '10001';
|
|
880
|
+
const distance = options.distance || '30';
|
|
881
|
+
const maxPrice = options.maxPrice || '';
|
|
882
|
+
const minPrice = options.minPrice || '';
|
|
883
|
+
const params = new URLSearchParams({
|
|
884
|
+
keyword: query,
|
|
885
|
+
sort: 'list_price',
|
|
886
|
+
stock_type: 'all',
|
|
887
|
+
zip,
|
|
888
|
+
maximum_distance: distance,
|
|
889
|
+
});
|
|
890
|
+
if (maxPrice)
|
|
891
|
+
params.set('list_price_max', maxPrice);
|
|
892
|
+
if (minPrice)
|
|
893
|
+
params.set('list_price_min', minPrice);
|
|
894
|
+
const url = `https://www.cars.com/shopping/results/?${params.toString()}`;
|
|
895
|
+
const spinner = options.silent ? null : (await import('ora')).default(`Searching cars: ${query}...`).start();
|
|
896
|
+
try {
|
|
897
|
+
const result = await peel(url, { timeout: 25000 });
|
|
898
|
+
if (spinner)
|
|
899
|
+
spinner.succeed('Cars loaded');
|
|
900
|
+
if (options.json) {
|
|
901
|
+
console.log(JSON.stringify({
|
|
902
|
+
query,
|
|
903
|
+
zip,
|
|
904
|
+
distance,
|
|
905
|
+
maxPrice,
|
|
906
|
+
url,
|
|
907
|
+
content: result.content,
|
|
908
|
+
tokens: result.tokens,
|
|
909
|
+
}, null, 2));
|
|
910
|
+
}
|
|
911
|
+
else {
|
|
912
|
+
console.log(result.content);
|
|
913
|
+
}
|
|
914
|
+
await cleanup();
|
|
915
|
+
process.exit(0);
|
|
916
|
+
}
|
|
917
|
+
catch (error) {
|
|
918
|
+
if (spinner)
|
|
919
|
+
spinner.fail('Car search failed');
|
|
920
|
+
console.error(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
921
|
+
await cleanup();
|
|
922
|
+
process.exit(1);
|
|
923
|
+
}
|
|
924
|
+
});
|
|
925
|
+
// ── extractors command ────────────────────────────────────────────────────
|
|
926
|
+
program
|
|
927
|
+
.command('extractors')
|
|
928
|
+
.alias('list-extractors')
|
|
929
|
+
.description('List all supported domain extractors')
|
|
930
|
+
.option('--json', 'Output as JSON')
|
|
931
|
+
.action((options) => {
|
|
932
|
+
const extractors = [
|
|
933
|
+
// Social
|
|
934
|
+
{ domain: 'twitter.com / x.com', category: 'Social', description: 'Tweets, threads, profiles' },
|
|
935
|
+
{ domain: 'reddit.com', category: 'Social', description: 'Subreddits, posts, comments' },
|
|
936
|
+
{ domain: 'instagram.com', category: 'Social', description: 'Photos, reels, profiles' },
|
|
937
|
+
{ domain: 'tiktok.com', category: 'Social', description: 'Video metadata, captions' },
|
|
938
|
+
{ domain: 'pinterest.com', category: 'Social', description: 'Pins, boards' },
|
|
939
|
+
{ domain: 'linkedin.com', category: 'Social', description: 'Profiles, job listings' },
|
|
940
|
+
{ domain: 'facebook.com', category: 'Social', description: 'Marketplace listings' },
|
|
941
|
+
// Video / Audio
|
|
942
|
+
{ domain: 'youtube.com', category: 'Video', description: 'Transcripts, metadata, comments' },
|
|
943
|
+
{ domain: 'twitch.tv', category: 'Video', description: 'Streams, clips, channel info' },
|
|
944
|
+
{ domain: 'soundcloud.com', category: 'Audio', description: 'Tracks, playlists' },
|
|
945
|
+
{ domain: 'open.spotify.com', category: 'Audio', description: 'Tracks, albums, playlists' },
|
|
946
|
+
// Tech / Dev
|
|
947
|
+
{ domain: 'github.com', category: 'Dev', description: 'Repos, issues, PRs, code' },
|
|
948
|
+
{ domain: 'stackoverflow.com', category: 'Dev', description: 'Questions, answers' },
|
|
949
|
+
{ domain: 'npmjs.com', category: 'Dev', description: 'Package metadata, readme' },
|
|
950
|
+
{ domain: 'pypi.org', category: 'Dev', description: 'Package metadata, readme' },
|
|
951
|
+
{ domain: 'dev.to', category: 'Dev', description: 'Articles, comments' },
|
|
952
|
+
// News / Articles
|
|
953
|
+
{ domain: 'news.ycombinator.com', category: 'News', description: 'HN posts, comments, Ask/Show HN' },
|
|
954
|
+
{ domain: 'medium.com', category: 'Articles', description: 'Articles, publications' },
|
|
955
|
+
{ domain: 'substack.com / *.substack.com', category: 'Articles', description: 'Newsletters, posts' },
|
|
956
|
+
{ domain: 'nytimes.com', category: 'News', description: 'Articles, headlines' },
|
|
957
|
+
{ domain: 'bbc.com', category: 'News', description: 'Articles, headlines' },
|
|
958
|
+
{ domain: 'cnn.com', category: 'News', description: 'Articles, headlines' },
|
|
959
|
+
// Shopping / E-commerce
|
|
960
|
+
{ domain: 'amazon.com', category: 'Shopping', description: 'Products, prices, reviews' },
|
|
961
|
+
{ domain: 'bestbuy.com', category: 'Shopping', description: 'Products, prices, specs' },
|
|
962
|
+
{ domain: 'walmart.com', category: 'Shopping', description: 'Products, prices' },
|
|
963
|
+
{ domain: 'ebay.com', category: 'Shopping', description: 'Listings, prices' },
|
|
964
|
+
{ domain: 'etsy.com', category: 'Shopping', description: 'Handmade listings' },
|
|
965
|
+
// Local / Real Estate
|
|
966
|
+
{ domain: 'yelp.com', category: 'Local', description: 'Business info, reviews (needs YELP_API_KEY)' },
|
|
967
|
+
{ domain: 'craigslist.org', category: 'Local', description: 'Listings, classifieds' },
|
|
968
|
+
{ domain: 'zillow.com', category: 'Real Estate', description: 'Property listings, estimates' },
|
|
969
|
+
{ domain: 'redfin.com', category: 'Real Estate', description: 'Property listings, prices' },
|
|
970
|
+
{ domain: 'cars.com', category: 'Automotive', description: 'Car listings, prices' },
|
|
971
|
+
// Knowledge / Academic
|
|
972
|
+
{ domain: 'en.wikipedia.org', category: 'Knowledge', description: 'Articles, structured data' },
|
|
973
|
+
{ domain: 'arxiv.org', category: 'Academic', description: 'Papers, abstracts, metadata' },
|
|
974
|
+
{ domain: 'semanticscholar.org', category: 'Academic', description: 'Papers, citations' },
|
|
975
|
+
{ domain: 'pubmed.ncbi.nlm.nih.gov', category: 'Academic', description: 'Medical papers, abstracts' },
|
|
976
|
+
{ domain: 'imdb.com', category: 'Knowledge', description: 'Movies, TV shows, cast' },
|
|
977
|
+
{ domain: 'allrecipes.com', category: 'Knowledge', description: 'Recipes, ingredients, steps' },
|
|
978
|
+
// Finance / Markets
|
|
979
|
+
{ domain: 'polymarket.com', category: 'Finance', description: 'Prediction markets' },
|
|
980
|
+
{ domain: 'kalshi.com', category: 'Finance', description: 'Prediction markets' },
|
|
981
|
+
{ domain: 'tradingview.com', category: 'Finance', description: 'Charts, indicators, ideas' },
|
|
982
|
+
{ domain: 'coingecko.com', category: 'Finance', description: 'Crypto prices, market data' },
|
|
983
|
+
{ domain: 'coinmarketcap.com', category: 'Finance', description: 'Crypto prices, market data' },
|
|
984
|
+
// Sports / Betting
|
|
985
|
+
{ domain: 'espn.com', category: 'Sports', description: 'Scores, stats, news' },
|
|
986
|
+
{ domain: 'draftkings.com', category: 'Betting', description: 'Odds, lines' },
|
|
987
|
+
{ domain: 'fanduel.com', category: 'Betting', description: 'Odds, lines' },
|
|
988
|
+
{ domain: 'betmgm.com', category: 'Betting', description: 'Odds, lines' },
|
|
989
|
+
// Entertainment
|
|
990
|
+
{ domain: 'producthunt.com', category: 'Tech', description: 'Product launches, upvotes' },
|
|
991
|
+
// Documents
|
|
992
|
+
{ domain: '*.pdf URLs', category: 'Documents', description: 'PDF text extraction' },
|
|
993
|
+
// Weather
|
|
994
|
+
{ domain: 'weather.com', category: 'Weather', description: 'Forecasts, conditions' },
|
|
995
|
+
{ domain: 'accuweather.com', category: 'Weather', description: 'Forecasts, conditions' },
|
|
996
|
+
{ domain: 'api.open-meteo.com', category: 'Weather', description: 'Free weather API' },
|
|
997
|
+
];
|
|
998
|
+
if (options.json) {
|
|
999
|
+
console.log(JSON.stringify(extractors, null, 2));
|
|
1000
|
+
return;
|
|
1001
|
+
}
|
|
1002
|
+
// Group by category
|
|
1003
|
+
const byCategory = new Map();
|
|
1004
|
+
for (const e of extractors) {
|
|
1005
|
+
if (!byCategory.has(e.category))
|
|
1006
|
+
byCategory.set(e.category, []);
|
|
1007
|
+
byCategory.get(e.category).push(e);
|
|
1008
|
+
}
|
|
1009
|
+
console.log(`\n🔌 WebPeel Domain Extractors (${extractors.length} total)\n`);
|
|
1010
|
+
for (const [cat, items] of byCategory) {
|
|
1011
|
+
console.log(` ${cat}`);
|
|
1012
|
+
for (const item of items) {
|
|
1013
|
+
const pad = 35;
|
|
1014
|
+
const domainPad = item.domain.padEnd(pad);
|
|
1015
|
+
console.log(` ${domainPad} ${item.description}`);
|
|
1016
|
+
}
|
|
1017
|
+
console.log('');
|
|
1018
|
+
}
|
|
1019
|
+
console.log(' Run `webpeel <url>` to use these automatically based on the URL.');
|
|
1020
|
+
});
|
|
1021
|
+
}
|