webpeel 0.19.4 → 0.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/dist/cache.d.ts +0 -1
- package/dist/cache.js +0 -1
- package/dist/cli/commands/auth.d.ts +5 -0
- package/dist/cli/commands/auth.js +476 -0
- package/dist/cli/commands/fetch.d.ts +6 -0
- package/dist/cli/commands/fetch.js +1015 -0
- package/dist/cli/commands/interact.d.ts +5 -0
- package/dist/cli/commands/interact.js +839 -0
- package/dist/cli/commands/jobs.d.ts +5 -0
- package/dist/cli/commands/jobs.js +997 -0
- package/dist/cli/commands/screenshot.d.ts +5 -0
- package/dist/cli/commands/screenshot.js +273 -0
- package/dist/cli/commands/search.d.ts +5 -0
- package/dist/cli/commands/search.js +524 -0
- package/dist/cli/utils.d.ts +84 -0
- package/dist/cli/utils.js +686 -0
- package/dist/cli-auth.d.ts +0 -1
- package/dist/cli-auth.js +0 -1
- package/dist/cli.d.ts +7 -6
- package/dist/cli.js +35 -4698
- package/dist/core/actions.d.ts +0 -1
- package/dist/core/actions.js +0 -1
- package/dist/core/agent.d.ts +0 -1
- package/dist/core/agent.js +9 -12
- package/dist/core/answer.d.ts +0 -1
- package/dist/core/answer.js +0 -1
- package/dist/core/application-tracker.d.ts +0 -1
- package/dist/core/application-tracker.js +0 -1
- package/dist/core/apply.d.ts +0 -1
- package/dist/core/apply.js +0 -1
- package/dist/core/auto-extract.d.ts +0 -1
- package/dist/core/auto-extract.js +0 -1
- package/dist/core/auto-interact.d.ts +0 -1
- package/dist/core/auto-interact.js +0 -1
- package/dist/core/bm25-filter.d.ts +0 -1
- package/dist/core/bm25-filter.js +0 -1
- package/dist/core/branding.d.ts +0 -1
- package/dist/core/branding.js +0 -1
- package/dist/core/browser-fetch.d.ts +0 -1
- package/dist/core/browser-fetch.js +17 -10
- package/dist/core/browser-pool.d.ts +0 -1
- package/dist/core/browser-pool.js +0 -1
- package/dist/core/budget.d.ts +0 -1
- package/dist/core/budget.js +0 -1
- package/dist/core/cache.d.ts +0 -1
- package/dist/core/cache.js +0 -1
- package/dist/core/cf-worker-proxy.d.ts +0 -1
- package/dist/core/cf-worker-proxy.js +0 -1
- package/dist/core/challenge-detection.d.ts +0 -1
- package/dist/core/challenge-detection.js +0 -1
- package/dist/core/change-tracking.d.ts +0 -1
- package/dist/core/change-tracking.js +0 -1
- package/dist/core/chunker.d.ts +0 -1
- package/dist/core/chunker.js +0 -1
- package/dist/core/chunking.d.ts +0 -1
- package/dist/core/chunking.js +0 -1
- package/dist/core/cloak-fetch.d.ts +0 -1
- package/dist/core/cloak-fetch.js +0 -1
- package/dist/core/content-pruner.d.ts +0 -1
- package/dist/core/content-pruner.js +0 -1
- package/dist/core/crawl-checkpoint.d.ts +0 -1
- package/dist/core/crawl-checkpoint.js +0 -1
- package/dist/core/crawler.d.ts +0 -1
- package/dist/core/crawler.js +6 -5
- package/dist/core/cycle-fetch.d.ts +0 -1
- package/dist/core/cycle-fetch.js +0 -1
- package/dist/core/deep-fetch.d.ts +0 -1
- package/dist/core/deep-fetch.js +0 -1
- package/dist/core/design-analysis.d.ts +0 -1
- package/dist/core/design-analysis.js +0 -1
- package/dist/core/design-compare.d.ts +0 -1
- package/dist/core/design-compare.js +0 -1
- package/dist/core/diff.d.ts +0 -1
- package/dist/core/diff.js +0 -1
- package/dist/core/dns-cache.d.ts +0 -1
- package/dist/core/dns-cache.js +0 -1
- package/dist/core/documents.d.ts +0 -1
- package/dist/core/documents.js +0 -1
- package/dist/core/domain-extractors.d.ts +0 -1
- package/dist/core/domain-extractors.js +0 -1
- package/dist/core/extract-inline.d.ts +0 -1
- package/dist/core/extract-inline.js +0 -1
- package/dist/core/extract-listings.d.ts +0 -1
- package/dist/core/extract-listings.js +0 -1
- package/dist/core/extract.d.ts +0 -1
- package/dist/core/extract.js +0 -1
- package/dist/core/fetcher.d.ts +0 -1
- package/dist/core/fetcher.js +0 -1
- package/dist/core/google-cache.d.ts +0 -1
- package/dist/core/google-cache.js +0 -1
- package/dist/core/hotel-search.d.ts +0 -1
- package/dist/core/hotel-search.js +0 -1
- package/dist/core/http-fetch.d.ts +0 -1
- package/dist/core/http-fetch.js +5 -7
- package/dist/core/human.d.ts +0 -1
- package/dist/core/human.js +0 -1
- package/dist/core/jobs.d.ts +0 -1
- package/dist/core/jobs.js +0 -1
- package/dist/core/json-ld.d.ts +0 -1
- package/dist/core/json-ld.js +0 -1
- package/dist/core/llm-extract.d.ts +0 -1
- package/dist/core/llm-extract.js +0 -1
- package/dist/core/logger.d.ts +17 -0
- package/dist/core/logger.js +44 -0
- package/dist/core/map.d.ts +0 -1
- package/dist/core/map.js +0 -1
- package/dist/core/markdown.d.ts +0 -1
- package/dist/core/markdown.js +0 -1
- package/dist/core/metadata.d.ts +0 -1
- package/dist/core/metadata.js +0 -1
- package/dist/core/paginate.d.ts +0 -1
- package/dist/core/paginate.js +0 -1
- package/dist/core/pdf.d.ts +0 -1
- package/dist/core/pdf.js +0 -1
- package/dist/core/peel-tls.d.ts +0 -1
- package/dist/core/peel-tls.js +0 -1
- package/dist/core/pipeline.d.ts +0 -1
- package/dist/core/pipeline.js +22 -25
- package/dist/core/profiles.d.ts +0 -1
- package/dist/core/profiles.js +0 -1
- package/dist/core/quick-answer.d.ts +0 -1
- package/dist/core/quick-answer.js +0 -1
- package/dist/core/rate-governor.d.ts +0 -1
- package/dist/core/rate-governor.js +0 -1
- package/dist/core/readability.d.ts +0 -1
- package/dist/core/readability.js +0 -1
- package/dist/core/research.d.ts +0 -1
- package/dist/core/research.js +0 -1
- package/dist/core/schema-extraction.d.ts +0 -1
- package/dist/core/schema-extraction.js +0 -1
- package/dist/core/schema-postprocess.d.ts +0 -1
- package/dist/core/schema-postprocess.js +0 -1
- package/dist/core/schema-templates.d.ts +0 -1
- package/dist/core/schema-templates.js +0 -1
- package/dist/core/screenshot.d.ts +0 -1
- package/dist/core/screenshot.js +0 -1
- package/dist/core/search-fallback.d.ts +0 -1
- package/dist/core/search-fallback.js +0 -1
- package/dist/core/search-provider.d.ts +0 -1
- package/dist/core/search-provider.js +18 -21
- package/dist/core/site-search.d.ts +0 -1
- package/dist/core/site-search.js +0 -1
- package/dist/core/sitemap.d.ts +0 -1
- package/dist/core/sitemap.js +0 -1
- package/dist/core/stealth-patches.d.ts +0 -1
- package/dist/core/stealth-patches.js +0 -1
- package/dist/core/stemmer.d.ts +0 -1
- package/dist/core/stemmer.js +0 -1
- package/dist/core/strategies.d.ts +6 -1
- package/dist/core/strategies.js +29 -41
- package/dist/core/strategy-hooks.d.ts +0 -1
- package/dist/core/strategy-hooks.js +0 -1
- package/dist/core/summarize.d.ts +0 -1
- package/dist/core/summarize.js +0 -1
- package/dist/core/synonyms.d.ts +0 -1
- package/dist/core/synonyms.js +0 -1
- package/dist/core/table-format.d.ts +0 -1
- package/dist/core/table-format.js +0 -1
- package/dist/core/timing.d.ts +0 -1
- package/dist/core/timing.js +0 -1
- package/dist/core/user-agents.d.ts +0 -1
- package/dist/core/user-agents.js +0 -1
- package/dist/core/watch-manager.d.ts +0 -1
- package/dist/core/watch-manager.js +0 -1
- package/dist/core/watch.d.ts +0 -1
- package/dist/core/watch.js +0 -1
- package/dist/core/youtube.d.ts +0 -1
- package/dist/core/youtube.js +0 -1
- package/dist/index.d.ts +8 -3
- package/dist/index.js +27 -3
- package/dist/integrations/index.d.ts +0 -1
- package/dist/integrations/index.js +0 -1
- package/dist/integrations/langchain.d.ts +0 -1
- package/dist/integrations/langchain.js +0 -1
- package/dist/integrations/llamaindex.d.ts +0 -1
- package/dist/integrations/llamaindex.js +0 -1
- package/dist/mcp/handlers/act.d.ts +5 -0
- package/dist/mcp/handlers/act.js +34 -0
- package/dist/mcp/handlers/definitions.d.ts +6 -0
- package/dist/mcp/handlers/definitions.js +266 -0
- package/dist/mcp/handlers/extract.d.ts +6 -0
- package/dist/mcp/handlers/extract.js +102 -0
- package/dist/mcp/handlers/fetch.d.ts +6 -0
- package/dist/mcp/handlers/fetch.js +98 -0
- package/dist/mcp/handlers/find.d.ts +5 -0
- package/dist/mcp/handlers/find.js +137 -0
- package/dist/mcp/handlers/index.d.ts +13 -0
- package/dist/mcp/handlers/index.js +61 -0
- package/dist/mcp/handlers/legacy.d.ts +25 -0
- package/dist/mcp/handlers/legacy.js +450 -0
- package/dist/mcp/handlers/meta.d.ts +6 -0
- package/dist/mcp/handlers/meta.js +31 -0
- package/dist/mcp/handlers/monitor.d.ts +5 -0
- package/dist/mcp/handlers/monitor.js +41 -0
- package/dist/mcp/handlers/read.d.ts +6 -0
- package/dist/mcp/handlers/read.js +63 -0
- package/dist/mcp/handlers/see.d.ts +5 -0
- package/dist/mcp/handlers/see.js +75 -0
- package/dist/mcp/handlers/types.d.ts +29 -0
- package/dist/mcp/handlers/types.js +28 -0
- package/dist/mcp/server.d.ts +3 -4
- package/dist/mcp/server.js +35 -1101
- package/dist/mcp/smart-router.d.ts +0 -1
- package/dist/mcp/smart-router.js +3 -1
- package/dist/types.d.ts +6 -1
- package/dist/types.js +0 -1
- package/package.json +3 -13
- package/dist/cache.d.ts.map +0 -1
- package/dist/cache.js.map +0 -1
- package/dist/cli-auth.d.ts.map +0 -1
- package/dist/cli-auth.js.map +0 -1
- package/dist/cli.bundle.cjs +0 -159248
- package/dist/cli.d.ts.map +0 -1
- package/dist/cli.js.map +0 -1
- package/dist/core/actions.d.ts.map +0 -1
- package/dist/core/actions.js.map +0 -1
- package/dist/core/agent.d.ts.map +0 -1
- package/dist/core/agent.js.map +0 -1
- package/dist/core/answer.d.ts.map +0 -1
- package/dist/core/answer.js.map +0 -1
- package/dist/core/application-tracker.d.ts.map +0 -1
- package/dist/core/application-tracker.js.map +0 -1
- package/dist/core/apply.d.ts.map +0 -1
- package/dist/core/apply.js.map +0 -1
- package/dist/core/auto-extract.d.ts.map +0 -1
- package/dist/core/auto-extract.js.map +0 -1
- package/dist/core/auto-interact.d.ts.map +0 -1
- package/dist/core/auto-interact.js.map +0 -1
- package/dist/core/bm25-filter.d.ts.map +0 -1
- package/dist/core/bm25-filter.js.map +0 -1
- package/dist/core/branding.d.ts.map +0 -1
- package/dist/core/branding.js.map +0 -1
- package/dist/core/browser-fetch.d.ts.map +0 -1
- package/dist/core/browser-fetch.js.map +0 -1
- package/dist/core/browser-pool.d.ts.map +0 -1
- package/dist/core/browser-pool.js.map +0 -1
- package/dist/core/budget.d.ts.map +0 -1
- package/dist/core/budget.js.map +0 -1
- package/dist/core/cache.d.ts.map +0 -1
- package/dist/core/cache.js.map +0 -1
- package/dist/core/cf-worker-proxy.d.ts.map +0 -1
- package/dist/core/cf-worker-proxy.js.map +0 -1
- package/dist/core/challenge-detection.d.ts.map +0 -1
- package/dist/core/challenge-detection.js.map +0 -1
- package/dist/core/change-tracking.d.ts.map +0 -1
- package/dist/core/change-tracking.js.map +0 -1
- package/dist/core/chunker.d.ts.map +0 -1
- package/dist/core/chunker.js.map +0 -1
- package/dist/core/chunking.d.ts.map +0 -1
- package/dist/core/chunking.js.map +0 -1
- package/dist/core/cloak-fetch.d.ts.map +0 -1
- package/dist/core/cloak-fetch.js.map +0 -1
- package/dist/core/content-pruner.d.ts.map +0 -1
- package/dist/core/content-pruner.js.map +0 -1
- package/dist/core/crawl-checkpoint.d.ts.map +0 -1
- package/dist/core/crawl-checkpoint.js.map +0 -1
- package/dist/core/crawler.d.ts.map +0 -1
- package/dist/core/crawler.js.map +0 -1
- package/dist/core/cycle-fetch.d.ts.map +0 -1
- package/dist/core/cycle-fetch.js.map +0 -1
- package/dist/core/deep-fetch.d.ts.map +0 -1
- package/dist/core/deep-fetch.js.map +0 -1
- package/dist/core/design-analysis.d.ts.map +0 -1
- package/dist/core/design-analysis.js.map +0 -1
- package/dist/core/design-compare.d.ts.map +0 -1
- package/dist/core/design-compare.js.map +0 -1
- package/dist/core/diff.d.ts.map +0 -1
- package/dist/core/diff.js.map +0 -1
- package/dist/core/dns-cache.d.ts.map +0 -1
- package/dist/core/dns-cache.js.map +0 -1
- package/dist/core/documents.d.ts.map +0 -1
- package/dist/core/documents.js.map +0 -1
- package/dist/core/domain-extractors.d.ts.map +0 -1
- package/dist/core/domain-extractors.js.map +0 -1
- package/dist/core/extract-inline.d.ts.map +0 -1
- package/dist/core/extract-inline.js.map +0 -1
- package/dist/core/extract-listings.d.ts.map +0 -1
- package/dist/core/extract-listings.js.map +0 -1
- package/dist/core/extract.d.ts.map +0 -1
- package/dist/core/extract.js.map +0 -1
- package/dist/core/fetcher.d.ts.map +0 -1
- package/dist/core/fetcher.js.map +0 -1
- package/dist/core/google-cache.d.ts.map +0 -1
- package/dist/core/google-cache.js.map +0 -1
- package/dist/core/hotel-search.d.ts.map +0 -1
- package/dist/core/hotel-search.js.map +0 -1
- package/dist/core/http-fetch.d.ts.map +0 -1
- package/dist/core/http-fetch.js.map +0 -1
- package/dist/core/human.d.ts.map +0 -1
- package/dist/core/human.js.map +0 -1
- package/dist/core/jobs.d.ts.map +0 -1
- package/dist/core/jobs.js.map +0 -1
- package/dist/core/json-ld.d.ts.map +0 -1
- package/dist/core/json-ld.js.map +0 -1
- package/dist/core/llm-extract.d.ts.map +0 -1
- package/dist/core/llm-extract.js.map +0 -1
- package/dist/core/map.d.ts.map +0 -1
- package/dist/core/map.js.map +0 -1
- package/dist/core/markdown.d.ts.map +0 -1
- package/dist/core/markdown.js.map +0 -1
- package/dist/core/metadata.d.ts.map +0 -1
- package/dist/core/metadata.js.map +0 -1
- package/dist/core/paginate.d.ts.map +0 -1
- package/dist/core/paginate.js.map +0 -1
- package/dist/core/pdf.d.ts.map +0 -1
- package/dist/core/pdf.js.map +0 -1
- package/dist/core/peel-tls.d.ts.map +0 -1
- package/dist/core/peel-tls.js.map +0 -1
- package/dist/core/pipeline.d.ts.map +0 -1
- package/dist/core/pipeline.js.map +0 -1
- package/dist/core/profiles.d.ts.map +0 -1
- package/dist/core/profiles.js.map +0 -1
- package/dist/core/quick-answer.d.ts.map +0 -1
- package/dist/core/quick-answer.js.map +0 -1
- package/dist/core/rate-governor.d.ts.map +0 -1
- package/dist/core/rate-governor.js.map +0 -1
- package/dist/core/readability.d.ts.map +0 -1
- package/dist/core/readability.js.map +0 -1
- package/dist/core/research.d.ts.map +0 -1
- package/dist/core/research.js.map +0 -1
- package/dist/core/schema-extraction.d.ts.map +0 -1
- package/dist/core/schema-extraction.js.map +0 -1
- package/dist/core/schema-postprocess.d.ts.map +0 -1
- package/dist/core/schema-postprocess.js.map +0 -1
- package/dist/core/schema-templates.d.ts.map +0 -1
- package/dist/core/schema-templates.js.map +0 -1
- package/dist/core/screenshot.d.ts.map +0 -1
- package/dist/core/screenshot.js.map +0 -1
- package/dist/core/search-fallback.d.ts.map +0 -1
- package/dist/core/search-fallback.js.map +0 -1
- package/dist/core/search-provider.d.ts.map +0 -1
- package/dist/core/search-provider.js.map +0 -1
- package/dist/core/site-search.d.ts.map +0 -1
- package/dist/core/site-search.js.map +0 -1
- package/dist/core/sitemap.d.ts.map +0 -1
- package/dist/core/sitemap.js.map +0 -1
- package/dist/core/stealth-patches.d.ts.map +0 -1
- package/dist/core/stealth-patches.js.map +0 -1
- package/dist/core/stemmer.d.ts.map +0 -1
- package/dist/core/stemmer.js.map +0 -1
- package/dist/core/strategies.d.ts.map +0 -1
- package/dist/core/strategies.js.map +0 -1
- package/dist/core/strategy-hooks.d.ts.map +0 -1
- package/dist/core/strategy-hooks.js.map +0 -1
- package/dist/core/summarize.d.ts.map +0 -1
- package/dist/core/summarize.js.map +0 -1
- package/dist/core/synonyms.d.ts.map +0 -1
- package/dist/core/synonyms.js.map +0 -1
- package/dist/core/table-format.d.ts.map +0 -1
- package/dist/core/table-format.js.map +0 -1
- package/dist/core/timing.d.ts.map +0 -1
- package/dist/core/timing.js.map +0 -1
- package/dist/core/user-agents.d.ts.map +0 -1
- package/dist/core/user-agents.js.map +0 -1
- package/dist/core/watch-manager.d.ts.map +0 -1
- package/dist/core/watch-manager.js.map +0 -1
- package/dist/core/watch.d.ts.map +0 -1
- package/dist/core/watch.js.map +0 -1
- package/dist/core/youtube.d.ts.map +0 -1
- package/dist/core/youtube.js.map +0 -1
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/integrations/index.d.ts.map +0 -1
- package/dist/integrations/index.js.map +0 -1
- package/dist/integrations/langchain.d.ts.map +0 -1
- package/dist/integrations/langchain.js.map +0 -1
- package/dist/integrations/llamaindex.d.ts.map +0 -1
- package/dist/integrations/llamaindex.js.map +0 -1
- package/dist/mcp/server.d.ts.map +0 -1
- package/dist/mcp/server.js.map +0 -1
- package/dist/mcp/smart-router.d.ts.map +0 -1
- package/dist/mcp/smart-router.js.map +0 -1
- package/dist/server/app.d.ts +0 -15
- package/dist/server/app.d.ts.map +0 -1
- package/dist/server/app.js +0 -350
- package/dist/server/app.js.map +0 -1
- package/dist/server/auth-store.d.ts +0 -28
- package/dist/server/auth-store.d.ts.map +0 -1
- package/dist/server/auth-store.js +0 -89
- package/dist/server/auth-store.js.map +0 -1
- package/dist/server/email-service.d.ts +0 -22
- package/dist/server/email-service.d.ts.map +0 -1
- package/dist/server/email-service.js +0 -80
- package/dist/server/email-service.js.map +0 -1
- package/dist/server/job-queue.d.ts +0 -93
- package/dist/server/job-queue.d.ts.map +0 -1
- package/dist/server/job-queue.js +0 -146
- package/dist/server/job-queue.js.map +0 -1
- package/dist/server/logger.d.ts +0 -11
- package/dist/server/logger.d.ts.map +0 -1
- package/dist/server/logger.js +0 -38
- package/dist/server/logger.js.map +0 -1
- package/dist/server/middleware/auth.d.ts +0 -29
- package/dist/server/middleware/auth.d.ts.map +0 -1
- package/dist/server/middleware/auth.js +0 -222
- package/dist/server/middleware/auth.js.map +0 -1
- package/dist/server/middleware/rate-limit.d.ts +0 -25
- package/dist/server/middleware/rate-limit.d.ts.map +0 -1
- package/dist/server/middleware/rate-limit.js +0 -168
- package/dist/server/middleware/rate-limit.js.map +0 -1
- package/dist/server/middleware/url-validator.d.ts +0 -16
- package/dist/server/middleware/url-validator.d.ts.map +0 -1
- package/dist/server/middleware/url-validator.js +0 -187
- package/dist/server/middleware/url-validator.js.map +0 -1
- package/dist/server/openapi.yaml +0 -4944
- package/dist/server/pg-auth-store.d.ts +0 -133
- package/dist/server/pg-auth-store.d.ts.map +0 -1
- package/dist/server/pg-auth-store.js +0 -473
- package/dist/server/pg-auth-store.js.map +0 -1
- package/dist/server/pg-job-queue.d.ts +0 -60
- package/dist/server/pg-job-queue.d.ts.map +0 -1
- package/dist/server/pg-job-queue.js +0 -365
- package/dist/server/pg-job-queue.js.map +0 -1
- package/dist/server/premium/domain-intel.d.ts +0 -17
- package/dist/server/premium/domain-intel.d.ts.map +0 -1
- package/dist/server/premium/domain-intel.js +0 -134
- package/dist/server/premium/domain-intel.js.map +0 -1
- package/dist/server/premium/index.d.ts +0 -18
- package/dist/server/premium/index.d.ts.map +0 -1
- package/dist/server/premium/index.js +0 -36
- package/dist/server/premium/index.js.map +0 -1
- package/dist/server/premium/swr-cache.d.ts +0 -15
- package/dist/server/premium/swr-cache.d.ts.map +0 -1
- package/dist/server/premium/swr-cache.js +0 -35
- package/dist/server/premium/swr-cache.js.map +0 -1
- package/dist/server/routes/activity.d.ts +0 -7
- package/dist/server/routes/activity.d.ts.map +0 -1
- package/dist/server/routes/activity.js +0 -68
- package/dist/server/routes/activity.js.map +0 -1
- package/dist/server/routes/agent.d.ts +0 -16
- package/dist/server/routes/agent.d.ts.map +0 -1
- package/dist/server/routes/agent.js +0 -247
- package/dist/server/routes/agent.js.map +0 -1
- package/dist/server/routes/answer.d.ts +0 -6
- package/dist/server/routes/answer.d.ts.map +0 -1
- package/dist/server/routes/answer.js +0 -133
- package/dist/server/routes/answer.js.map +0 -1
- package/dist/server/routes/ask.d.ts +0 -23
- package/dist/server/routes/ask.d.ts.map +0 -1
- package/dist/server/routes/ask.js +0 -119
- package/dist/server/routes/ask.js.map +0 -1
- package/dist/server/routes/batch.d.ts +0 -7
- package/dist/server/routes/batch.d.ts.map +0 -1
- package/dist/server/routes/batch.js +0 -412
- package/dist/server/routes/batch.js.map +0 -1
- package/dist/server/routes/cli-usage.d.ts +0 -7
- package/dist/server/routes/cli-usage.d.ts.map +0 -1
- package/dist/server/routes/cli-usage.js +0 -121
- package/dist/server/routes/cli-usage.js.map +0 -1
- package/dist/server/routes/compat.d.ts +0 -24
- package/dist/server/routes/compat.d.ts.map +0 -1
- package/dist/server/routes/compat.js +0 -653
- package/dist/server/routes/compat.js.map +0 -1
- package/dist/server/routes/deep-fetch.d.ts +0 -9
- package/dist/server/routes/deep-fetch.d.ts.map +0 -1
- package/dist/server/routes/deep-fetch.js +0 -50
- package/dist/server/routes/deep-fetch.js.map +0 -1
- package/dist/server/routes/demo.d.ts +0 -25
- package/dist/server/routes/demo.d.ts.map +0 -1
- package/dist/server/routes/demo.js +0 -434
- package/dist/server/routes/demo.js.map +0 -1
- package/dist/server/routes/extract.d.ts +0 -9
- package/dist/server/routes/extract.d.ts.map +0 -1
- package/dist/server/routes/extract.js +0 -150
- package/dist/server/routes/extract.js.map +0 -1
- package/dist/server/routes/fetch.d.ts +0 -8
- package/dist/server/routes/fetch.d.ts.map +0 -1
- package/dist/server/routes/fetch.js +0 -988
- package/dist/server/routes/fetch.js.map +0 -1
- package/dist/server/routes/health.d.ts +0 -8
- package/dist/server/routes/health.d.ts.map +0 -1
- package/dist/server/routes/health.js +0 -20
- package/dist/server/routes/health.js.map +0 -1
- package/dist/server/routes/jobs.d.ts +0 -8
- package/dist/server/routes/jobs.d.ts.map +0 -1
- package/dist/server/routes/jobs.js +0 -487
- package/dist/server/routes/jobs.js.map +0 -1
- package/dist/server/routes/mcp.d.ts +0 -18
- package/dist/server/routes/mcp.d.ts.map +0 -1
- package/dist/server/routes/mcp.js +0 -1260
- package/dist/server/routes/mcp.js.map +0 -1
- package/dist/server/routes/oauth.d.ts +0 -10
- package/dist/server/routes/oauth.d.ts.map +0 -1
- package/dist/server/routes/oauth.js +0 -334
- package/dist/server/routes/oauth.js.map +0 -1
- package/dist/server/routes/quick-answer.d.ts +0 -9
- package/dist/server/routes/quick-answer.d.ts.map +0 -1
- package/dist/server/routes/quick-answer.js +0 -93
- package/dist/server/routes/quick-answer.js.map +0 -1
- package/dist/server/routes/screenshot.d.ts +0 -23
- package/dist/server/routes/screenshot.d.ts.map +0 -1
- package/dist/server/routes/screenshot.js +0 -819
- package/dist/server/routes/screenshot.js.map +0 -1
- package/dist/server/routes/search.d.ts +0 -7
- package/dist/server/routes/search.d.ts.map +0 -1
- package/dist/server/routes/search.js +0 -312
- package/dist/server/routes/search.js.map +0 -1
- package/dist/server/routes/session.d.ts +0 -16
- package/dist/server/routes/session.d.ts.map +0 -1
- package/dist/server/routes/session.js +0 -278
- package/dist/server/routes/session.js.map +0 -1
- package/dist/server/routes/stats.d.ts +0 -7
- package/dist/server/routes/stats.d.ts.map +0 -1
- package/dist/server/routes/stats.js +0 -65
- package/dist/server/routes/stats.js.map +0 -1
- package/dist/server/routes/stripe.d.ts +0 -16
- package/dist/server/routes/stripe.d.ts.map +0 -1
- package/dist/server/routes/stripe.js +0 -283
- package/dist/server/routes/stripe.js.map +0 -1
- package/dist/server/routes/users.d.ts +0 -9
- package/dist/server/routes/users.d.ts.map +0 -1
- package/dist/server/routes/users.js +0 -1211
- package/dist/server/routes/users.js.map +0 -1
- package/dist/server/routes/watch.d.ts +0 -16
- package/dist/server/routes/watch.d.ts.map +0 -1
- package/dist/server/routes/watch.js +0 -257
- package/dist/server/routes/watch.js.map +0 -1
- package/dist/server/routes/webhooks.d.ts +0 -16
- package/dist/server/routes/webhooks.d.ts.map +0 -1
- package/dist/server/routes/webhooks.js +0 -74
- package/dist/server/routes/webhooks.js.map +0 -1
- package/dist/server/routes/youtube.d.ts +0 -7
- package/dist/server/routes/youtube.d.ts.map +0 -1
- package/dist/server/routes/youtube.js +0 -93
- package/dist/server/routes/youtube.js.map +0 -1
- package/dist/server/sentry.d.ts +0 -14
- package/dist/server/sentry.d.ts.map +0 -1
- package/dist/server/sentry.js +0 -39
- package/dist/server/sentry.js.map +0 -1
- package/dist/server/types.d.ts +0 -16
- package/dist/server/types.d.ts.map +0 -1
- package/dist/server/types.js +0 -8
- package/dist/server/types.js.map +0 -1
- package/dist/server/utils/response.d.ts +0 -45
- package/dist/server/utils/response.d.ts.map +0 -1
- package/dist/server/utils/response.js +0 -70
- package/dist/server/utils/response.js.map +0 -1
- package/dist/server/utils/sse.d.ts +0 -23
- package/dist/server/utils/sse.d.ts.map +0 -1
- package/dist/server/utils/sse.js +0 -39
- package/dist/server/utils/sse.js.map +0 -1
- package/dist/types.d.ts.map +0 -1
- package/dist/types.js.map +0 -1
package/dist/cli.js
CHANGED
|
@@ -1,4729 +1,66 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
/**
|
|
3
|
-
* WebPeel CLI
|
|
3
|
+
* WebPeel CLI — Entry point
|
|
4
|
+
*
|
|
5
|
+
* Registers all command groups and starts the Commander program.
|
|
6
|
+
* The heavy implementation lives in src/cli/commands/*.ts
|
|
4
7
|
*
|
|
5
8
|
* Usage:
|
|
6
9
|
* npx webpeel <url> - Fetch and convert to markdown
|
|
7
10
|
* npx webpeel <url> --json - Output as JSON
|
|
8
|
-
* npx webpeel <url> --html - Output raw HTML
|
|
9
11
|
* npx webpeel <url> --render - Force browser mode
|
|
10
|
-
* npx webpeel <url> --wait 5000 - Wait 5s for JS to load
|
|
11
12
|
* npx webpeel search "query" - DuckDuckGo search
|
|
12
|
-
* npx webpeel
|
|
13
|
-
* npx webpeel
|
|
13
|
+
* npx webpeel mcp - Start MCP server
|
|
14
|
+
* npx webpeel --help - Condensed help
|
|
15
|
+
* npx webpeel --help-all - Full option reference
|
|
14
16
|
*/
|
|
15
17
|
import { Command } from 'commander';
|
|
16
|
-
import
|
|
17
|
-
import {
|
|
18
|
-
import {
|
|
19
|
-
import {
|
|
20
|
-
import {
|
|
21
|
-
import {
|
|
22
|
-
import {
|
|
23
|
-
|
|
24
|
-
import { SCHEMA_TEMPLATES, getSchemaTemplate, listSchemaTemplates } from './core/schema-templates.js';
|
|
25
|
-
// Intercept verb-first syntax before Commander parses
|
|
18
|
+
import { VERB_ALIASES, cliVersion, checkForUpdates, buildCommanderHelp, buildCondensedHelp, } from './cli/utils.js';
|
|
19
|
+
import { registerFetchCommands } from './cli/commands/fetch.js';
|
|
20
|
+
import { registerSearchCommands } from './cli/commands/search.js';
|
|
21
|
+
import { registerInteractCommands } from './cli/commands/interact.js';
|
|
22
|
+
import { registerAuthCommands } from './cli/commands/auth.js';
|
|
23
|
+
import { registerScreenshotCommands } from './cli/commands/screenshot.js';
|
|
24
|
+
import { registerJobsCommands } from './cli/commands/jobs.js';
|
|
25
|
+
// ── Verb alias intercept (before Commander parses) ────────────────────────────
|
|
26
26
|
// "webpeel fetch <url>" → "webpeel <url>"
|
|
27
|
-
// Note: 'read' is intentionally excluded — it's a registered subcommand
|
|
28
|
-
const VERB_ALIASES = new Set(['fetch', 'get', 'scrape', 'peel']);
|
|
27
|
+
// Note: 'read' is intentionally excluded — it's a registered subcommand.
|
|
29
28
|
if (process.argv.length >= 3 && VERB_ALIASES.has(process.argv[2]?.toLowerCase())) {
|
|
30
|
-
// Remove the verb, shift URL to its position
|
|
31
29
|
process.argv.splice(2, 1);
|
|
32
30
|
}
|
|
33
|
-
|
|
34
|
-
// Read version from package.json dynamically
|
|
35
|
-
import { fileURLToPath } from 'url';
|
|
36
|
-
import { dirname, resolve } from 'path';
|
|
37
|
-
let cliVersion = '0.0.0';
|
|
38
|
-
try {
|
|
39
|
-
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
40
|
-
const pkgPath = resolve(__dirname, '..', 'package.json');
|
|
41
|
-
const pkg = JSON.parse(readFileSync(pkgPath, 'utf-8'));
|
|
42
|
-
cliVersion = pkg.version;
|
|
43
|
-
}
|
|
44
|
-
catch { /* fallback */ }
|
|
45
|
-
program
|
|
46
|
-
.name('webpeel')
|
|
47
|
-
.description('Fast web fetcher for AI agents')
|
|
48
|
-
.version(cliVersion)
|
|
49
|
-
.enablePositionalOptions();
|
|
50
|
-
// Check for updates (non-blocking, runs in background)
|
|
51
|
-
async function checkForUpdates() {
|
|
52
|
-
try {
|
|
53
|
-
const res = await fetch('https://registry.npmjs.org/webpeel/latest', {
|
|
54
|
-
signal: AbortSignal.timeout(2000),
|
|
55
|
-
});
|
|
56
|
-
if (!res.ok)
|
|
57
|
-
return;
|
|
58
|
-
const data = await res.json();
|
|
59
|
-
const latest = data.version;
|
|
60
|
-
if (latest && latest !== cliVersion && cliVersion !== '0.0.0') {
|
|
61
|
-
console.error(`\n💡 WebPeel v${latest} available (you have v${cliVersion}). Update: npm i -g webpeel@latest\n`);
|
|
62
|
-
}
|
|
63
|
-
}
|
|
64
|
-
catch { /* silently ignore — don't slow down the user */ }
|
|
65
|
-
}
|
|
66
|
-
// Fire and forget — don't await, don't block
|
|
67
|
-
void checkForUpdates();
|
|
68
|
-
/**
|
|
69
|
-
* Parse action strings into PageAction array
|
|
70
|
-
* Formats:
|
|
71
|
-
* click:.selector — click an element
|
|
72
|
-
* type:.selector=text — type text into an input
|
|
73
|
-
* fill:.selector=text — fill an input (replaces existing value)
|
|
74
|
-
* scroll:down:500 — scroll direction + amount
|
|
75
|
-
* scroll:bottom — scroll to bottom (legacy)
|
|
76
|
-
* scroll:top — scroll to top (legacy)
|
|
77
|
-
* wait:2000 — wait N ms
|
|
78
|
-
* press:Enter — press a keyboard key
|
|
79
|
-
* hover:.selector — hover over an element
|
|
80
|
-
* waitFor:.selector — wait for a selector to appear
|
|
81
|
-
* select:.selector=value — select dropdown option
|
|
82
|
-
* screenshot — take a screenshot
|
|
83
|
-
*/
|
|
84
|
-
function parseActions(actionStrings) {
|
|
85
|
-
return actionStrings.map(str => {
|
|
86
|
-
const [type, ...rest] = str.split(':');
|
|
87
|
-
const value = rest.join(':');
|
|
88
|
-
switch (type) {
|
|
89
|
-
case 'wait':
|
|
90
|
-
return { type: 'wait', ms: parseInt(value) || 1000 };
|
|
91
|
-
case 'click':
|
|
92
|
-
return { type: 'click', selector: value };
|
|
93
|
-
case 'scroll': {
|
|
94
|
-
// scroll:down:500 or scroll:bottom or scroll:500 or scroll:0,1500
|
|
95
|
-
const parts = value.split(':');
|
|
96
|
-
const dir = parts[0];
|
|
97
|
-
// Handle scroll:x,y format (e.g., scroll:0,1500)
|
|
98
|
-
if (dir && dir.includes(',')) {
|
|
99
|
-
const [x, y] = dir.split(',').map(Number);
|
|
100
|
-
if (!isNaN(x) && !isNaN(y)) {
|
|
101
|
-
return { type: 'scroll', to: { x, y } };
|
|
102
|
-
}
|
|
103
|
-
}
|
|
104
|
-
if (dir === 'top' || dir === 'bottom') {
|
|
105
|
-
return { type: 'scroll', to: dir };
|
|
106
|
-
}
|
|
107
|
-
if (dir === 'down' || dir === 'up' || dir === 'left' || dir === 'right') {
|
|
108
|
-
const amount = parseInt(parts[1] || '500', 10);
|
|
109
|
-
return { type: 'scroll', direction: dir, amount };
|
|
110
|
-
}
|
|
111
|
-
// Bare number: absolute position
|
|
112
|
-
const num = parseInt(dir, 10);
|
|
113
|
-
if (!isNaN(num)) {
|
|
114
|
-
return { type: 'scroll', to: num };
|
|
115
|
-
}
|
|
116
|
-
// Default: scroll to bottom
|
|
117
|
-
return { type: 'scroll', to: 'bottom' };
|
|
118
|
-
}
|
|
119
|
-
case 'type': {
|
|
120
|
-
const [sel, ...text] = value.split('=');
|
|
121
|
-
return { type: 'type', selector: sel, value: text.join('=') };
|
|
122
|
-
}
|
|
123
|
-
case 'fill': {
|
|
124
|
-
const [sel, ...text] = value.split('=');
|
|
125
|
-
return { type: 'fill', selector: sel, value: text.join('=') };
|
|
126
|
-
}
|
|
127
|
-
case 'select': {
|
|
128
|
-
const [sel, ...vals] = value.split('=');
|
|
129
|
-
return { type: 'select', selector: sel, value: vals.join('=') };
|
|
130
|
-
}
|
|
131
|
-
case 'press':
|
|
132
|
-
return { type: 'press', key: value };
|
|
133
|
-
case 'hover':
|
|
134
|
-
return { type: 'hover', selector: value };
|
|
135
|
-
case 'waitFor':
|
|
136
|
-
return { type: 'waitForSelector', selector: value };
|
|
137
|
-
case 'wait-for':
|
|
138
|
-
return { type: 'waitForSelector', selector: value, timeout: 10000 };
|
|
139
|
-
case 'screenshot':
|
|
140
|
-
return { type: 'screenshot' };
|
|
141
|
-
default:
|
|
142
|
-
throw new Error(`Unknown action type: ${type}`);
|
|
143
|
-
}
|
|
144
|
-
});
|
|
145
|
-
}
|
|
146
|
-
/**
|
|
147
|
-
* Format an error with actionable suggestions based on error type
|
|
148
|
-
*/
|
|
149
|
-
function formatError(error, _url, options) {
|
|
150
|
-
const msg = error.message || String(error);
|
|
151
|
-
const lines = [`\x1b[31m✖ ${msg}\x1b[0m`];
|
|
152
|
-
if (msg.includes('net::ERR_') || msg.includes('ECONNREFUSED') || msg.includes('ENOTFOUND')) {
|
|
153
|
-
lines.push('\x1b[33m💡 Check the URL is correct and the site is accessible.\x1b[0m');
|
|
154
|
-
}
|
|
155
|
-
else if (msg.includes('timeout') || msg.includes('Timeout') || msg.includes('Navigation timeout')) {
|
|
156
|
-
lines.push('\x1b[33m💡 Try increasing timeout: --timeout 60000\x1b[0m');
|
|
157
|
-
if (!options.render) {
|
|
158
|
-
lines.push('\x1b[33m💡 Site may need browser rendering: --render\x1b[0m');
|
|
159
|
-
}
|
|
160
|
-
}
|
|
161
|
-
else if (msg.includes('blocked') || msg.includes('403') || msg.includes('Access Denied') || msg.includes('challenge')) {
|
|
162
|
-
if (!options.stealth) {
|
|
163
|
-
lines.push('\x1b[33m💡 Try stealth mode to bypass bot detection: --stealth\x1b[0m');
|
|
164
|
-
}
|
|
165
|
-
lines.push('\x1b[33m💡 Try a different user agent: --ua "Mozilla/5.0..."\x1b[0m');
|
|
166
|
-
}
|
|
167
|
-
else if (msg.includes('empty') || msg.includes('no content') || msg.includes('0 tokens')) {
|
|
168
|
-
if (!options.render) {
|
|
169
|
-
lines.push('\x1b[33m💡 Page may be JavaScript-rendered. Try: --render\x1b[0m');
|
|
170
|
-
}
|
|
171
|
-
else if (!options.stealth) {
|
|
172
|
-
lines.push('\x1b[33m💡 Content may be behind bot detection. Try: --stealth\x1b[0m');
|
|
173
|
-
}
|
|
174
|
-
lines.push('\x1b[33m💡 Try waiting longer for content: --wait 5000\x1b[0m');
|
|
175
|
-
}
|
|
176
|
-
else if (msg.includes('captcha') || msg.includes('CAPTCHA') || msg.includes('Captcha')) {
|
|
177
|
-
lines.push('\x1b[33m💡 This site requires CAPTCHA solving. Try a browser profile: --profile mysite --headed\x1b[0m');
|
|
178
|
-
}
|
|
179
|
-
else if (msg.includes('rate limit') || msg.includes('429')) {
|
|
180
|
-
lines.push('\x1b[33m💡 Rate limited. Wait a moment and try again, or use --proxy.\x1b[0m');
|
|
181
|
-
}
|
|
182
|
-
else if (msg.toLowerCase().includes('enotfound') || msg.toLowerCase().includes('getaddrinfo')) {
|
|
183
|
-
lines.push('\x1b[33m💡 Could not resolve hostname. Check the URL is correct.\x1b[0m');
|
|
184
|
-
}
|
|
185
|
-
else if (msg.toLowerCase().includes('certificate') || msg.toLowerCase().includes('ssl') || msg.toLowerCase().includes('tls')) {
|
|
186
|
-
lines.push('\x1b[33m💡 SSL/TLS error. The site may have an invalid certificate.\x1b[0m');
|
|
187
|
-
}
|
|
188
|
-
else if (msg.toLowerCase().includes('usage') || msg.toLowerCase().includes('quota') || msg.toLowerCase().includes('limit')) {
|
|
189
|
-
lines.push('\x1b[33m💡 Run `webpeel usage` to check your quota, or `webpeel login` to authenticate.\x1b[0m');
|
|
190
|
-
}
|
|
191
|
-
return lines.join('\n');
|
|
192
|
-
}
|
|
193
|
-
program
|
|
194
|
-
.argument('[url]', 'URL to fetch')
|
|
195
|
-
.option('-r, --render', 'Use headless browser (for JS-heavy sites)')
|
|
196
|
-
.option('--stealth', 'Use stealth mode to bypass bot detection (auto-enables --render)')
|
|
197
|
-
.option('--cloaked', 'Use CloakBrowser stealth (requires: npm install cloakbrowser)')
|
|
198
|
-
.option('--tls', 'Use PeelTLS TLS fingerprint spoofing (built-in, no install needed)')
|
|
199
|
-
.option('--cycle', 'Use PeelTLS TLS fingerprint spoofing (alias for --tls)', false)
|
|
200
|
-
.option('--proxy <url>', 'Proxy URL for requests (http://host:port, socks5://user:pass@host:port)')
|
|
201
|
-
.option('--proxies <urls>', 'Comma-separated list of proxy URLs for rotation (tried in order on failure)', (val) => val.split(',').map((s) => s.trim()).filter(Boolean))
|
|
202
|
-
.option('-w, --wait <ms>', 'Wait time after page load (ms)', parseInt)
|
|
203
|
-
.option('--html', 'Output raw HTML instead of markdown')
|
|
204
|
-
.option('--text', 'Output plain text instead of markdown')
|
|
205
|
-
.option('--clean', 'Clean output — article content only, no links or metadata (alias for --readable with URL-stripped markdown)')
|
|
206
|
-
.option('--json', 'Output as JSON')
|
|
207
|
-
.option('-t, --timeout <ms>', 'Request timeout (ms)', (v) => parseInt(v, 10), 30000)
|
|
208
|
-
.option('--ua <agent>', 'Custom user agent')
|
|
209
|
-
.option('-s, --silent', 'Silent mode (no spinner)')
|
|
210
|
-
.option('--screenshot [path]', 'Take a screenshot (optionally save to file path)')
|
|
211
|
-
.option('--full-page', 'Full-page screenshot (use with --screenshot)')
|
|
212
|
-
.option('--selector <css>', 'CSS selector to extract (e.g., "article", ".content")')
|
|
213
|
-
.option('--exclude <selectors...>', 'CSS selectors to exclude (e.g., ".sidebar" ".ads")')
|
|
214
|
-
.option('--include-tags <tags>', 'Comma-separated HTML tags/selectors to include (e.g., "main,article,.content")')
|
|
215
|
-
.option('--exclude-tags <tags>', 'Comma-separated HTML tags/selectors to exclude (e.g., "nav,footer,aside")')
|
|
216
|
-
.option('--only-main-content', 'Shortcut for --include-tags main,article')
|
|
217
|
-
.option('--full-content', 'Return full page content (disable automatic content density pruning)')
|
|
218
|
-
.option('--readable', 'Reader mode — extract only the main article content, strip all noise (like browser Reader Mode)')
|
|
219
|
-
.option('--full-nav', 'Keep full navigation/content (disable auto-readability when piped or in agent mode)')
|
|
220
|
-
.option('--focus <query>', 'Query-focused filtering — only return content relevant to this query (BM25 ranking)')
|
|
221
|
-
.option('--chunk', 'Split content into RAG-ready chunks')
|
|
222
|
-
.option('--chunk-size <tokens>', 'Max tokens per chunk (default: 512)', parseInt)
|
|
223
|
-
.option('--chunk-overlap <tokens>', 'Overlap tokens between chunks (default: 50)', parseInt)
|
|
224
|
-
.option('--chunk-strategy <strategy>', 'Chunking strategy: section (default), paragraph, fixed')
|
|
225
|
-
.option('-H, --header <header...>', 'Custom headers (e.g., "Authorization: Bearer token")')
|
|
226
|
-
.option('--cookie <cookie...>', 'Cookies to set (e.g., "session=abc123")')
|
|
227
|
-
.option('--cache <ttl>', 'Cache results locally (e.g., "5m", "1h", "1d") — default: 5m')
|
|
228
|
-
.option('--no-cache', 'Disable automatic caching for this request')
|
|
229
|
-
.option('--links', 'Output only the links found on the page')
|
|
230
|
-
.option('--images', 'Output image URLs from the page')
|
|
231
|
-
.option('--meta', 'Output only the page metadata (title, description, author, etc.)')
|
|
232
|
-
.option('--raw', 'Return full page without smart content extraction')
|
|
233
|
-
.option('--full', 'Alias for --raw — full page content, no budget')
|
|
234
|
-
.option('--lite', 'Lite mode — minimal processing, maximum speed (skip pruning, budget, metadata)')
|
|
235
|
-
.option('--action <actions...>', 'Page actions before scraping (e.g., "click:.btn" "wait:2000" "scroll:bottom")')
|
|
236
|
-
.option('--extract <json>', 'Extract structured data using CSS selectors (JSON object of field:selector pairs)')
|
|
237
|
-
.option('--llm-extract [instruction]', 'Extract structured data using LLM (optional instruction, e.g. "extract hotel names and prices")')
|
|
238
|
-
.option('--extract-schema <schema>', 'JSON schema for structured extraction (requires LLM key). Pass inline JSON or @file.json')
|
|
239
|
-
.option('--llm-key <key>', 'LLM API key for AI features (or use OPENAI_API_KEY env var)')
|
|
240
|
-
.option('--llm-model <model>', 'LLM model to use (default: gpt-4o-mini)')
|
|
241
|
-
.option('--llm-base-url <url>', 'LLM API base URL (default: https://api.openai.com/v1)')
|
|
242
|
-
.option('--summary', 'Generate AI summary of content (requires --llm-key or OPENAI_API_KEY)')
|
|
243
|
-
.option('--location <country>', 'ISO country code for geo-targeting (e.g., "US", "DE", "JP")')
|
|
244
|
-
.option('--language <lang>', 'Language preference (e.g., "en", "de", "ja")')
|
|
245
|
-
.option('--max-tokens <n>', 'Maximum token count for output (truncate if exceeded)', parseInt)
|
|
246
|
-
.option('--budget <n>', 'Smart token budget — distill content to fit within N tokens (heuristic, no LLM key needed)', parseInt)
|
|
247
|
-
.option('--extract-all', 'Auto-detect and extract repeated listing items (e.g., search results)')
|
|
248
|
-
.option('--schema <name>', 'Force a specific extraction schema by name or domain (e.g., "booking.com", "amazon")')
|
|
249
|
-
.option('--list-schemas', 'List all available extraction schemas and their supported domains')
|
|
250
|
-
.option('--scroll-extract [count]', 'Scroll page N times to load lazy content (bare flag = smart auto-scroll until stable), then extract (implies --render)', (v) => parseInt(v, 10))
|
|
251
|
-
.option('--scroll-extract-timeout <ms>', 'Total timeout in ms for auto-scroll (default: 30000, only used with bare --scroll-extract)', parseInt)
|
|
252
|
-
.option('--csv', 'Output extraction results as CSV')
|
|
253
|
-
.option('--table', 'Output extraction results as a formatted table')
|
|
254
|
-
.option('--pages <n>', 'Follow pagination "Next" links for N pages (max 10)', (v) => parseInt(v, 10))
|
|
255
|
-
.option('--profile <path>', 'Use a persistent browser profile directory (cookies/sessions survive between calls)')
|
|
256
|
-
.option('--headed', 'Run browser in headed (visible) mode — useful for profile setup and debugging')
|
|
257
|
-
.option('-q, --question <q>', 'Ask a question about the page content (BM25-powered, no LLM key needed)')
|
|
258
|
-
.option('--agent', 'Agent mode: sets --json, --silent, --extract-all, and --budget 4000 (override with --budget N)')
|
|
259
|
-
.option('--device <type>', 'Device emulation: desktop (default), mobile, tablet (auto-enables --render)')
|
|
260
|
-
.option('--viewport <WxH>', 'Browser viewport size (e.g., "1920x1080") (auto-enables --render)', (val) => {
|
|
261
|
-
const [w, h] = val.split('x').map(Number);
|
|
262
|
-
return { width: w, height: h };
|
|
263
|
-
})
|
|
264
|
-
.option('--wait-until <event>', 'Page load event: domcontentloaded, networkidle, load, commit (auto-enables --render)')
|
|
265
|
-
.option('--wait-selector <css>', 'Wait for CSS selector before extracting (auto-enables --render)')
|
|
266
|
-
.option('--block-resources <types>', 'Block resource types, comma-separated: image,stylesheet,font,media,script (auto-enables --render)')
|
|
267
|
-
.option('--format <type>', 'Output format: markdown (default), text, html, json');
|
|
268
|
-
// ─── Help System ─────────────────────────────────────────────────────────────
|
|
269
|
-
// Detect --help-all early, before Commander parses argv.
|
|
31
|
+
// ── --help-all detection (must happen before Commander parses) ────────────────
|
|
270
32
|
const isHelpAll = process.argv.slice(2).some(a => a === '--help-all');
|
|
271
33
|
if (isHelpAll) {
|
|
272
|
-
// Translate --help-all → --help so Commander generates its standard output.
|
|
273
34
|
const idx = process.argv.indexOf('--help-all');
|
|
274
35
|
if (idx !== -1)
|
|
275
36
|
process.argv[idx] = '--help';
|
|
276
37
|
}
|
|
277
|
-
//
|
|
278
|
-
const
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
*/
|
|
286
|
-
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
287
|
-
function buildCommanderHelp(cmd, helper) {
|
|
288
|
-
const termWidth = helper.padWidth(cmd, helper);
|
|
289
|
-
const helpWidth = helper.helpWidth ?? 80;
|
|
290
|
-
const pad = ' ';
|
|
291
|
-
const formatItem = (term, description) => {
|
|
292
|
-
if (description) {
|
|
293
|
-
const full = `${term.padEnd(termWidth + 2)}${description}`;
|
|
294
|
-
return helper.wrap(full, helpWidth - pad.length, termWidth + 2);
|
|
295
|
-
}
|
|
296
|
-
return term;
|
|
297
|
-
};
|
|
298
|
-
const formatList = (items) => items.join('\n').replace(/^/gm, pad);
|
|
299
|
-
let out = [`Usage: ${helper.commandUsage(cmd)}`, ''];
|
|
300
|
-
const desc = helper.commandDescription(cmd);
|
|
301
|
-
if (desc.length > 0) {
|
|
302
|
-
out = out.concat([helper.wrap(desc, helpWidth, 0), '']);
|
|
303
|
-
}
|
|
304
|
-
// Arguments
|
|
305
|
-
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
306
|
-
const args = helper.visibleArguments(cmd).map(a => formatItem(helper.argumentTerm(a), helper.argumentDescription(a)));
|
|
307
|
-
if (args.length > 0)
|
|
308
|
-
out = out.concat(['Arguments:', formatList(args), '']);
|
|
309
|
-
// Options
|
|
310
|
-
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
311
|
-
const opts = helper.visibleOptions(cmd).map(o => formatItem(helper.optionTerm(o), helper.optionDescription(o)));
|
|
312
|
-
if (opts.length > 0)
|
|
313
|
-
out = out.concat(['Options:', formatList(opts), '']);
|
|
314
|
-
// Subcommands
|
|
315
|
-
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
316
|
-
const cmds = helper.visibleCommands(cmd).map(c => formatItem(helper.subcommandTerm(c), helper.subcommandDescription(c)));
|
|
317
|
-
if (cmds.length > 0)
|
|
318
|
-
out = out.concat(['Commands:', formatList(cmds), '']);
|
|
319
|
-
// Append grouped option sections only on root command (--help-all)
|
|
320
|
-
if (cmd.parent === null) {
|
|
321
|
-
out = out.concat([`
|
|
322
|
-
Output Formats:
|
|
323
|
-
--json JSON output with full metadata
|
|
324
|
-
--html Raw HTML output
|
|
325
|
-
--text Plain text output
|
|
326
|
-
--csv / --table Tabular output for extractions
|
|
327
|
-
-s, --silent No spinner or progress output
|
|
328
|
-
|
|
329
|
-
Content Control:
|
|
330
|
-
--readable Reader mode — clean article content only
|
|
331
|
-
--budget <n> Smart token budget (no LLM key needed)
|
|
332
|
-
--focus <query> BM25 query-focused filtering
|
|
333
|
-
--selector <css> Extract specific CSS selector
|
|
334
|
-
--only-main-content Just main/article content
|
|
335
|
-
--full-content Disable content pruning
|
|
336
|
-
-q, --question <q> Ask a question about the content
|
|
337
|
-
|
|
338
|
-
Rendering:
|
|
339
|
-
-r, --render Browser rendering for JS-heavy sites
|
|
340
|
-
--stealth Stealth mode for bot-protected sites
|
|
341
|
-
--profile <path> Persistent browser profile
|
|
342
|
-
--headed Visible browser (for debugging)
|
|
343
|
-
--action <actions> Browser automation (click, type, scroll...)
|
|
344
|
-
|
|
345
|
-
Extraction:
|
|
346
|
-
--extract <json> CSS selector extraction
|
|
347
|
-
--extract-all Auto-detect listing items
|
|
348
|
-
--schema <name> Named extraction schema
|
|
349
|
-
--llm-extract [inst] LLM-powered extraction (BYOK)
|
|
350
|
-
|
|
351
|
-
Examples:
|
|
352
|
-
$ webpeel "https://example.com" Basic fetch
|
|
353
|
-
$ webpeel "https://youtube.com/watch?v=..." --json YouTube transcript
|
|
354
|
-
$ webpeel "https://openai.com/pricing" -q "GPT-4 cost?" Quick answer
|
|
355
|
-
$ webpeel "https://nytimes.com/article" --readable Reader mode
|
|
356
|
-
$ webpeel search "best restaurants in NYC" Web search
|
|
357
|
-
$ webpeel hotels "Manhattan" --checkin tomorrow Hotel search
|
|
358
|
-
|
|
359
|
-
Agent Integration:
|
|
360
|
-
$ webpeel mcp Start MCP server
|
|
361
|
-
$ cat urls.txt | webpeel batch Batch from stdin
|
|
362
|
-
$ webpeel pipe "https://example.com" | jq .content Pipe-friendly JSON
|
|
363
|
-
$ webpeel "https://site.com" --json --silent Same as pipe
|
|
364
|
-
$ curl https://webpeel.dev/llms.txt AI-readable docs
|
|
365
|
-
`]);
|
|
366
|
-
}
|
|
367
|
-
return out.join('\n');
|
|
368
|
-
}
|
|
369
|
-
/**
|
|
370
|
-
* Condensed, Anthropic-style help for the root command (default --help).
|
|
371
|
-
*/
|
|
372
|
-
function buildCondensedHelp() {
|
|
373
|
-
const v = cliVersion;
|
|
374
|
-
return [
|
|
375
|
-
'',
|
|
376
|
-
` ${bold('◆ WebPeel')} ${dim(`v${v}`)}`,
|
|
377
|
-
` ${dim('The web data platform for AI agents')}`,
|
|
378
|
-
'',
|
|
379
|
-
` ${bold('Usage:')} webpeel [url] [options]`,
|
|
380
|
-
` webpeel <command> [options]`,
|
|
381
|
-
'',
|
|
382
|
-
` ${bold('Examples:')}`,
|
|
383
|
-
` webpeel https://example.com ${dim('Clean content (reader mode)')}`,
|
|
384
|
-
` webpeel read https://example.com ${dim('Explicit reader mode')}`,
|
|
385
|
-
` webpeel screenshot https://example.com ${dim('Screenshot any page')}`,
|
|
386
|
-
` webpeel ask https://news.com "summary" ${dim('Ask about any page')}`,
|
|
387
|
-
` webpeel search "webpeel vs jina" ${dim('Web search')}`,
|
|
388
|
-
` echo "url" | webpeel ${dim('Pipe mode (auto JSON)')}`,
|
|
389
|
-
'',
|
|
390
|
-
` ${bold('Commands:')}`,
|
|
391
|
-
` fetch (default) Fetch a URL as clean markdown`,
|
|
392
|
-
` read <url> Reader mode (article content only)`,
|
|
393
|
-
` screenshot <url> Take a screenshot`,
|
|
394
|
-
` ask <url> <question> Ask about any page`,
|
|
395
|
-
` search <query> Search the web (DuckDuckGo + sources)`,
|
|
396
|
-
` crawl <url> Crawl a website`,
|
|
397
|
-
` mcp Start MCP server for AI tools`,
|
|
398
|
-
` ${dim('... (use --help-all for all 25+ commands)')}`,
|
|
399
|
-
'',
|
|
400
|
-
` ${bold('Common Options:')}`,
|
|
401
|
-
` -r, --render Browser rendering (JS-heavy sites)`,
|
|
402
|
-
` --stealth Stealth mode (anti-bot bypass)`,
|
|
403
|
-
` --raw Full page (disable auto reader mode)`,
|
|
404
|
-
` --full Full page, no budget limit`,
|
|
405
|
-
` --json JSON output with metadata`,
|
|
406
|
-
` --budget: 4000)`,
|
|
407
|
-
` -q, --question <q> Ask about the content`,
|
|
408
|
-
` -s, --silent No spinner output`,
|
|
409
|
-
'',
|
|
410
|
-
` Use ${cyan("'webpeel <command> --help'")} for command-specific options.`,
|
|
411
|
-
` Use ${cyan("'webpeel --help-all'")} for the full option reference.`,
|
|
412
|
-
'',
|
|
413
|
-
` Docs: ${cyan('https://webpeel.dev/docs')}`,
|
|
414
|
-
'',
|
|
415
|
-
].join('\n');
|
|
416
|
-
}
|
|
38
|
+
// ── Program setup ─────────────────────────────────────────────────────────────
|
|
39
|
+
const program = new Command();
|
|
40
|
+
program
|
|
41
|
+
.name('webpeel')
|
|
42
|
+
.description('Fast web fetcher for AI agents')
|
|
43
|
+
.version(cliVersion)
|
|
44
|
+
.enablePositionalOptions();
|
|
45
|
+
// ── Help formatting ───────────────────────────────────────────────────────────
|
|
417
46
|
program.configureHelp({
|
|
418
47
|
sortSubcommands: true,
|
|
419
48
|
showGlobalOptions: false,
|
|
420
|
-
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
421
49
|
formatHelp: (cmd, helper) => {
|
|
422
|
-
// Subcommands always get standard Commander help.
|
|
423
|
-
// Root command with --help-all also gets standard full help.
|
|
424
50
|
if (cmd.parent !== null || isHelpAll) {
|
|
425
51
|
return buildCommanderHelp(cmd, helper);
|
|
426
52
|
}
|
|
427
|
-
// Root command default: beautiful condensed help.
|
|
428
53
|
return buildCondensedHelp();
|
|
429
54
|
},
|
|
430
55
|
});
|
|
431
|
-
//
|
|
432
|
-
|
|
433
|
-
//
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
params.set('render', 'true');
|
|
442
|
-
if (options.stealth)
|
|
443
|
-
params.set('stealth', 'true');
|
|
444
|
-
if (options.wait)
|
|
445
|
-
params.set('wait', String(options.wait));
|
|
446
|
-
if (options.selector)
|
|
447
|
-
params.set('selector', options.selector);
|
|
448
|
-
if (options.readable)
|
|
449
|
-
params.set('readable', 'true');
|
|
450
|
-
if (options.summary)
|
|
451
|
-
params.set('summary', 'true');
|
|
452
|
-
if (options.budget)
|
|
453
|
-
params.set('budget', String(options.budget));
|
|
454
|
-
if (options.question)
|
|
455
|
-
params.set('question', options.question);
|
|
456
|
-
const res = await fetch(`${apiUrl}/v1/fetch?${params}`, {
|
|
457
|
-
headers: { Authorization: `Bearer ${apiKey}` },
|
|
458
|
-
signal: AbortSignal.timeout(60000),
|
|
459
|
-
});
|
|
460
|
-
if (res.status === 401) {
|
|
461
|
-
throw Object.assign(new Error('API key invalid or expired. Run: webpeel auth <new-key>'), { code: 'AUTH_FAILED' });
|
|
462
|
-
}
|
|
463
|
-
if (res.status === 429) {
|
|
464
|
-
throw Object.assign(new Error('Rate limit exceeded. Check your plan at https://app.webpeel.dev/billing'), { code: 'RATE_LIMITED' });
|
|
465
|
-
}
|
|
466
|
-
if (!res.ok) {
|
|
467
|
-
const body = await res.text().catch(() => '');
|
|
468
|
-
throw new Error(`API error ${res.status}: ${body.slice(0, 200)}`);
|
|
469
|
-
}
|
|
470
|
-
const data = await res.json();
|
|
471
|
-
// Map API response to PeelResult shape that the CLI already handles
|
|
472
|
-
return {
|
|
473
|
-
url: data.url || url,
|
|
474
|
-
title: data.metadata?.title || data.title || '',
|
|
475
|
-
content: data.content || '',
|
|
476
|
-
method: data.method || 'simple',
|
|
477
|
-
tokens: data.tokenCount || data.tokens || 0,
|
|
478
|
-
elapsed: data.fetchTimeMs || data.elapsed || 0,
|
|
479
|
-
tokenSavingsPercent: data.tokenSavingsPercent,
|
|
480
|
-
rawTokenEstimate: data.rawTokenEstimate,
|
|
481
|
-
metadata: data.metadata || {},
|
|
482
|
-
links: data.links || [],
|
|
483
|
-
answer: data.answer,
|
|
484
|
-
summary: data.summary,
|
|
485
|
-
format: options.format || 'markdown',
|
|
486
|
-
};
|
|
487
|
-
}
|
|
488
|
-
// Main fetch handler — shared with the `pipe` subcommand
|
|
489
|
-
async function runFetch(url, options) {
|
|
490
|
-
// Handle --format flag: maps to existing boolean flags
|
|
491
|
-
if (options.format) {
|
|
492
|
-
const fmt = options.format.toLowerCase();
|
|
493
|
-
if (fmt === 'text')
|
|
494
|
-
options.text = true;
|
|
495
|
-
else if (fmt === 'html')
|
|
496
|
-
options.html = true;
|
|
497
|
-
else if (fmt === 'json')
|
|
498
|
-
options.json = true;
|
|
499
|
-
else if (fmt === 'markdown' || fmt === 'md') { /* default, do nothing */ }
|
|
500
|
-
else {
|
|
501
|
-
console.error(`Unknown format: ${options.format}. Use: text, markdown, html, or json`);
|
|
502
|
-
process.exit(1);
|
|
503
|
-
}
|
|
504
|
-
}
|
|
505
|
-
// Smart defaults: when piped (not a TTY), default to silent JSON + budget
|
|
506
|
-
// BUT respect explicit --format flag (user chose the output format)
|
|
507
|
-
const isPiped = !process.stdout.isTTY;
|
|
508
|
-
const hasExplicitFormat = options.format && ['text', 'html', 'markdown', 'md'].includes(options.format.toLowerCase());
|
|
509
|
-
if (isPiped && !options.html && !options.text && !hasExplicitFormat) {
|
|
510
|
-
if (!options.json)
|
|
511
|
-
options.json = true;
|
|
512
|
-
if (!options.silent)
|
|
513
|
-
options.silent = true;
|
|
514
|
-
// Auto-enable readability for AI consumers — clean content by default
|
|
515
|
-
if (!options.readable && !options.fullNav) {
|
|
516
|
-
options.readable = true;
|
|
517
|
-
}
|
|
518
|
-
// Auto token budget for piped mode (AI consumers want concise content)
|
|
519
|
-
if (options.budget === undefined && !options.fullContent && !options.raw && !options.full) {
|
|
520
|
-
options.budget = 4000;
|
|
521
|
-
}
|
|
522
|
-
}
|
|
523
|
-
// --full alias: sets raw + fullContent
|
|
524
|
-
if (options.full) {
|
|
525
|
-
options.raw = true;
|
|
526
|
-
options.fullContent = true;
|
|
527
|
-
}
|
|
528
|
-
// Smart defaults for terminal (interactive) mode
|
|
529
|
-
const isTerminal = process.stdout.isTTY && !isPiped;
|
|
530
|
-
if (isTerminal && !options.raw && !options.html && !options.text) {
|
|
531
|
-
// Auto-readable: clean content by default (like browser Reader Mode)
|
|
532
|
-
if (!options.readable && !options.fullNav && !options.selector) {
|
|
533
|
-
options.readable = true;
|
|
534
|
-
}
|
|
535
|
-
// Default token budget: don't flood the terminal with 20K tokens
|
|
536
|
-
if (options.budget === undefined && !options.fullContent && !options.raw) {
|
|
537
|
-
options.budget = 4000;
|
|
538
|
-
}
|
|
539
|
-
}
|
|
540
|
-
// --agent sets sensible defaults for AI agents; explicit flags override
|
|
541
|
-
if (options.agent) {
|
|
542
|
-
if (!options.json)
|
|
543
|
-
options.json = true;
|
|
544
|
-
if (!options.silent)
|
|
545
|
-
options.silent = true;
|
|
546
|
-
if (!options.extractAll)
|
|
547
|
-
options.extractAll = true;
|
|
548
|
-
if (options.budget === undefined)
|
|
549
|
-
options.budget = 4000;
|
|
550
|
-
// Agent mode = clean content by default
|
|
551
|
-
if (!options.readable && !options.fullNav) {
|
|
552
|
-
options.readable = true;
|
|
553
|
-
}
|
|
554
|
-
}
|
|
555
|
-
const isJson = options.json;
|
|
556
|
-
// --- --list-schemas: print all available schemas and exit ---
|
|
557
|
-
if (options.listSchemas) {
|
|
558
|
-
const { loadBundledSchemas } = await import('./core/schema-extraction.js');
|
|
559
|
-
const schemas = loadBundledSchemas();
|
|
560
|
-
if (isJson) {
|
|
561
|
-
await writeStdout(JSON.stringify(schemas.map(s => ({
|
|
562
|
-
name: s.name,
|
|
563
|
-
version: s.version,
|
|
564
|
-
domains: s.domains,
|
|
565
|
-
urlPatterns: s.urlPatterns,
|
|
566
|
-
})), null, 2) + '\n');
|
|
567
|
-
}
|
|
568
|
-
else {
|
|
569
|
-
console.log(`\nAvailable extraction schemas (${schemas.length}):\n`);
|
|
570
|
-
for (const s of schemas) {
|
|
571
|
-
console.log(` ${s.name} (v${s.version})`);
|
|
572
|
-
console.log(` Domains: ${s.domains.join(', ')}`);
|
|
573
|
-
if (s.urlPatterns && s.urlPatterns.length > 0) {
|
|
574
|
-
console.log(` URL patterns: ${s.urlPatterns.join(', ')}`);
|
|
575
|
-
}
|
|
576
|
-
console.log('');
|
|
577
|
-
}
|
|
578
|
-
}
|
|
579
|
-
process.exit(0);
|
|
580
|
-
}
|
|
581
|
-
// --- #5: Concise error for missing URL (no help dump) ---
|
|
582
|
-
if (!url || url.trim() === '') {
|
|
583
|
-
if (isJson) {
|
|
584
|
-
await writeStdout(JSON.stringify({ success: false, error: { type: 'invalid_request', message: 'URL is required' } }) + '\n');
|
|
585
|
-
}
|
|
586
|
-
else {
|
|
587
|
-
console.error('Error: URL is required');
|
|
588
|
-
console.error('Usage: webpeel <url> [options]');
|
|
589
|
-
console.error('Run "webpeel --help" for full usage.');
|
|
590
|
-
}
|
|
591
|
-
process.exit(1);
|
|
592
|
-
}
|
|
593
|
-
// --- #6: Helper to output JSON errors and exit ---
|
|
594
|
-
function exitWithJsonError(message, code) {
|
|
595
|
-
if (isJson) {
|
|
596
|
-
process.stdout.write(JSON.stringify({
|
|
597
|
-
success: false,
|
|
598
|
-
error: { type: code.toLowerCase(), message },
|
|
599
|
-
}) + '\n');
|
|
600
|
-
}
|
|
601
|
-
else {
|
|
602
|
-
console.error(`Error: ${message}`);
|
|
603
|
-
}
|
|
604
|
-
process.exit(1);
|
|
605
|
-
}
|
|
606
|
-
// SECURITY: Enhanced URL validation
|
|
607
|
-
if (url.length > 2048) {
|
|
608
|
-
exitWithJsonError('URL too long (max 2048 characters)', 'INVALID_URL');
|
|
609
|
-
}
|
|
610
|
-
// Check for control characters
|
|
611
|
-
if (/[\x00-\x1F\x7F]/.test(url)) {
|
|
612
|
-
exitWithJsonError('URL contains invalid control characters', 'INVALID_URL');
|
|
613
|
-
}
|
|
614
|
-
// Validate URL format
|
|
615
|
-
try {
|
|
616
|
-
const parsed = new URL(url);
|
|
617
|
-
if (!['http:', 'https:'].includes(parsed.protocol)) {
|
|
618
|
-
exitWithJsonError('Only HTTP and HTTPS protocols are allowed', 'INVALID_URL');
|
|
619
|
-
}
|
|
620
|
-
}
|
|
621
|
-
catch {
|
|
622
|
-
// Check if it looks like a command/verb the user typed by mistake
|
|
623
|
-
const commonVerbs = ['fetch', 'get', 'scrape', 'read', 'download', 'curl', 'wget', 'peel'];
|
|
624
|
-
if (commonVerbs.includes(url.toLowerCase())) {
|
|
625
|
-
exitWithJsonError(`Did you mean: webpeel "${program.args[1] || '<url>'}"?\nThe URL goes directly after webpeel — no verb needed.\nExample: webpeel "https://example.com" --json`, 'INVALID_URL');
|
|
626
|
-
}
|
|
627
|
-
else {
|
|
628
|
-
exitWithJsonError(`Invalid URL: "${url}"\nMake sure to include the protocol (https://)\nExample: webpeel "https://${url}" --json`, 'INVALID_URL');
|
|
629
|
-
}
|
|
630
|
-
}
|
|
631
|
-
const useStealth = options.stealth || false;
|
|
632
|
-
// Check usage quota
|
|
633
|
-
const usageCheck = await checkUsage();
|
|
634
|
-
if (!usageCheck.allowed) {
|
|
635
|
-
if (isJson) {
|
|
636
|
-
await writeStdout(JSON.stringify({ success: false, error: { type: 'rate_limited', message: usageCheck.message } }) + '\n');
|
|
637
|
-
process.exit(1);
|
|
638
|
-
}
|
|
639
|
-
console.error(usageCheck.message);
|
|
640
|
-
process.exit(1);
|
|
641
|
-
}
|
|
642
|
-
// Check cache first (before spinner/network)
|
|
643
|
-
// Default: 5m TTL for all CLI fetches unless --no-cache is set
|
|
644
|
-
let cacheTtlMs;
|
|
645
|
-
const cacheDisabled = options.cache === false; // --no-cache sets options.cache to false
|
|
646
|
-
const explicitTtl = typeof options.cache === 'string' ? options.cache : undefined;
|
|
647
|
-
if (!cacheDisabled) {
|
|
648
|
-
const ttlStr = explicitTtl || '5m';
|
|
649
|
-
try {
|
|
650
|
-
cacheTtlMs = parseTTL(ttlStr);
|
|
651
|
-
}
|
|
652
|
-
catch (e) {
|
|
653
|
-
exitWithJsonError(e.message, 'FETCH_FAILED');
|
|
654
|
-
}
|
|
655
|
-
const cacheOptions = {
|
|
656
|
-
render: options.render,
|
|
657
|
-
stealth: options.stealth,
|
|
658
|
-
selector: options.selector,
|
|
659
|
-
format: options.html ? 'html' : options.text ? 'text' : options.clean ? 'clean' : 'markdown',
|
|
660
|
-
budget: null, // Budget excluded from cache key — cache stores full content
|
|
661
|
-
readable: options.readable || false,
|
|
662
|
-
};
|
|
663
|
-
const cachedResult = getCache(url, cacheOptions);
|
|
664
|
-
if (cachedResult) {
|
|
665
|
-
if (!options.silent) {
|
|
666
|
-
console.error(`\x1b[36m⚡ Cache hit\x1b[0m (TTL: ${ttlStr})`);
|
|
667
|
-
}
|
|
668
|
-
// Apply budget to cached content (cache stores full, budget is post-process)
|
|
669
|
-
if (options.budget && options.budget > 0 && cachedResult.content) {
|
|
670
|
-
const { distillToBudget } = await import('./core/budget.js');
|
|
671
|
-
const fmt = options.text ? 'text' : 'markdown';
|
|
672
|
-
cachedResult.content = distillToBudget(cachedResult.content, options.budget, fmt);
|
|
673
|
-
cachedResult.tokens = Math.ceil(cachedResult.content.length / 4);
|
|
674
|
-
}
|
|
675
|
-
// LLM extraction from cached content
|
|
676
|
-
if (options.llmExtract || options.extractSchema) {
|
|
677
|
-
const { extractWithLLM } = await import('./core/llm-extract.js');
|
|
678
|
-
const llmCfgCached = loadConfig();
|
|
679
|
-
const llmApiKeyCached = options.llmKey || llmCfgCached.llm?.apiKey || process.env.OPENAI_API_KEY;
|
|
680
|
-
if (!llmApiKeyCached) {
|
|
681
|
-
console.error('Error: LLM extraction requires an API key.\nSet OPENAI_API_KEY environment variable or use --llm-key <key>');
|
|
682
|
-
process.exit(1);
|
|
683
|
-
}
|
|
684
|
-
const llmModelCached = options.llmModel || llmCfgCached.llm?.model || process.env.WEBPEEL_LLM_MODEL || 'gpt-4o-mini';
|
|
685
|
-
const llmBaseUrlCached = options.llmBaseUrl || llmCfgCached.llm?.baseUrl || process.env.WEBPEEL_LLM_BASE_URL || 'https://api.openai.com/v1';
|
|
686
|
-
const llmInstructionCached = typeof options.llmExtract === 'string' ? options.llmExtract : undefined;
|
|
687
|
-
// Parse schema if provided
|
|
688
|
-
let llmSchemaCached;
|
|
689
|
-
if (options.extractSchema) {
|
|
690
|
-
let schemaStr = options.extractSchema;
|
|
691
|
-
if (schemaStr.startsWith('@')) {
|
|
692
|
-
schemaStr = readFileSync(schemaStr.slice(1), 'utf-8');
|
|
693
|
-
}
|
|
694
|
-
try {
|
|
695
|
-
llmSchemaCached = JSON.parse(schemaStr);
|
|
696
|
-
}
|
|
697
|
-
catch {
|
|
698
|
-
console.error('Error: --extract-schema must be valid JSON or a valid @file.json path');
|
|
699
|
-
process.exit(1);
|
|
700
|
-
}
|
|
701
|
-
}
|
|
702
|
-
const llmResultCached = await extractWithLLM({
|
|
703
|
-
content: cachedResult.content,
|
|
704
|
-
instruction: llmInstructionCached,
|
|
705
|
-
schema: llmSchemaCached,
|
|
706
|
-
apiKey: llmApiKeyCached,
|
|
707
|
-
model: llmModelCached,
|
|
708
|
-
baseUrl: llmBaseUrlCached,
|
|
709
|
-
});
|
|
710
|
-
await writeStdout(JSON.stringify(llmResultCached.items, null, 2) + '\n');
|
|
711
|
-
if (!options.silent) {
|
|
712
|
-
const { input, output } = llmResultCached.tokensUsed;
|
|
713
|
-
const costStr = llmResultCached.cost !== undefined ? ` | Est. cost: $${llmResultCached.cost.toFixed(6)}` : '';
|
|
714
|
-
console.error(`\n🤖 LLM extraction: ${llmResultCached.items.length} items | ${input} input + ${output} output tokens${costStr} | model: ${llmResultCached.model}`);
|
|
715
|
-
}
|
|
716
|
-
process.exit(0);
|
|
717
|
-
}
|
|
718
|
-
// --- LLM-free Quick Answer (also on cached content) ---
|
|
719
|
-
if (options.question && cachedResult.content) {
|
|
720
|
-
const { quickAnswer } = await import('./core/quick-answer.js');
|
|
721
|
-
const qa = quickAnswer({
|
|
722
|
-
question: options.question,
|
|
723
|
-
content: cachedResult.content,
|
|
724
|
-
url: cachedResult.url,
|
|
725
|
-
});
|
|
726
|
-
cachedResult.quickAnswer = qa;
|
|
727
|
-
if (!isJson) {
|
|
728
|
-
const conf = (qa.confidence * 100).toFixed(0);
|
|
729
|
-
await writeStdout(`\n\x1b[36m📋 ${qa.question}\x1b[0m\n\n`);
|
|
730
|
-
if (qa.answer) {
|
|
731
|
-
await writeStdout(`\x1b[32m💡 Answer (${conf}% confidence):\x1b[0m\n${qa.answer}\n`);
|
|
732
|
-
}
|
|
733
|
-
else {
|
|
734
|
-
await writeStdout(`\x1b[33m💡 No relevant answer found (${conf}% confidence)\x1b[0m\n`);
|
|
735
|
-
}
|
|
736
|
-
if (qa.passages && qa.passages.length > 1) {
|
|
737
|
-
await writeStdout(`\n\x1b[33m📝 Supporting evidence:\x1b[0m\n`);
|
|
738
|
-
for (const p of qa.passages.slice(1, 4)) {
|
|
739
|
-
await writeStdout(` • [${(p.score * 100).toFixed(0)}%] ${p.text.substring(0, 200)}${p.text.length > 200 ? '...' : ''}\n`);
|
|
740
|
-
}
|
|
741
|
-
}
|
|
742
|
-
await writeStdout('\n');
|
|
743
|
-
await cleanup();
|
|
744
|
-
process.exit(0);
|
|
745
|
-
}
|
|
746
|
-
}
|
|
747
|
-
// --- BM25 Schema Template Extraction (cached path) ---
|
|
748
|
-
if (options.schema && cachedResult.content) {
|
|
749
|
-
const { getSchemaTemplate: getSchTmplCached } = await import('./core/schema-templates.js');
|
|
750
|
-
const schTemplateCached = getSchTmplCached(options.schema);
|
|
751
|
-
if (schTemplateCached) {
|
|
752
|
-
const { quickAnswer: qaCached } = await import('./core/quick-answer.js');
|
|
753
|
-
const { smartExtractSchemaFields: smartExtractCached } = await import('./core/schema-postprocess.js');
|
|
754
|
-
const extractedCached = smartExtractCached(cachedResult.content, schTemplateCached.fields, qaCached, {
|
|
755
|
-
pageTitle: cachedResult.title,
|
|
756
|
-
pageUrl: cachedResult.url,
|
|
757
|
-
metadata: cachedResult.metadata,
|
|
758
|
-
});
|
|
759
|
-
cachedResult.extracted = extractedCached;
|
|
760
|
-
}
|
|
761
|
-
}
|
|
762
|
-
await outputResult(cachedResult, options, { cached: true });
|
|
763
|
-
process.exit(0);
|
|
764
|
-
}
|
|
765
|
-
}
|
|
766
|
-
const spinner = options.silent ? null : ora('Fetching...').start();
|
|
767
|
-
try {
|
|
768
|
-
// Validate options
|
|
769
|
-
if (options.wait && (options.wait < 0 || options.wait > 60000)) {
|
|
770
|
-
throw Object.assign(new Error('Wait time must be between 0 and 60000ms'), { _code: 'FETCH_FAILED' });
|
|
771
|
-
}
|
|
772
|
-
// Parse custom headers
|
|
773
|
-
let headers;
|
|
774
|
-
if (options.header && options.header.length > 0) {
|
|
775
|
-
headers = {};
|
|
776
|
-
for (const header of options.header) {
|
|
777
|
-
const colonIndex = header.indexOf(':');
|
|
778
|
-
if (colonIndex === -1) {
|
|
779
|
-
throw Object.assign(new Error(`Invalid header format: ${header}. Expected "Key: Value"`), { _code: 'FETCH_FAILED' });
|
|
780
|
-
}
|
|
781
|
-
const key = header.slice(0, colonIndex).trim();
|
|
782
|
-
const value = header.slice(colonIndex + 1).trim();
|
|
783
|
-
headers[key] = value;
|
|
784
|
-
}
|
|
785
|
-
}
|
|
786
|
-
// Parse actions
|
|
787
|
-
let actions;
|
|
788
|
-
if (options.action && options.action.length > 0) {
|
|
789
|
-
try {
|
|
790
|
-
actions = parseActions(options.action);
|
|
791
|
-
}
|
|
792
|
-
catch (e) {
|
|
793
|
-
throw Object.assign(new Error(e.message), { _code: 'FETCH_FAILED' });
|
|
794
|
-
}
|
|
795
|
-
}
|
|
796
|
-
// --extract-schema auto-enables JSON output
|
|
797
|
-
if (options.extractSchema) {
|
|
798
|
-
options.json = true;
|
|
799
|
-
}
|
|
800
|
-
// Parse extract
|
|
801
|
-
let extract;
|
|
802
|
-
if (options.llmExtract || options.extractSchema) {
|
|
803
|
-
// LLM-based extraction is handled post-fetch (after peel returns markdown).
|
|
804
|
-
// Early-validate that an API key is available so we fail fast.
|
|
805
|
-
const llmCfg = loadConfig();
|
|
806
|
-
const llmApiKey = options.llmKey || llmCfg.llm?.apiKey || process.env.OPENAI_API_KEY;
|
|
807
|
-
if (!llmApiKey) {
|
|
808
|
-
throw Object.assign(new Error('LLM extraction requires an API key.\n' +
|
|
809
|
-
'Set OPENAI_API_KEY environment variable or use --llm-key <key>'), { _code: 'FETCH_FAILED' });
|
|
810
|
-
}
|
|
811
|
-
// Do NOT set extract here — peel runs normally, LLM extraction happens below.
|
|
812
|
-
}
|
|
813
|
-
else if (options.extract) {
|
|
814
|
-
// CSS-based extraction
|
|
815
|
-
try {
|
|
816
|
-
extract = { selectors: JSON.parse(options.extract) };
|
|
817
|
-
}
|
|
818
|
-
catch {
|
|
819
|
-
throw Object.assign(new Error('--extract must be valid JSON (e.g., \'{"title": "h1", "price": ".price"}\')'), { _code: 'FETCH_FAILED' });
|
|
820
|
-
}
|
|
821
|
-
}
|
|
822
|
-
// Validate maxTokens
|
|
823
|
-
if (options.maxTokens !== undefined) {
|
|
824
|
-
if (isNaN(options.maxTokens) || options.maxTokens < 100) {
|
|
825
|
-
throw Object.assign(new Error('--max-tokens must be at least 100'), { _code: 'FETCH_FAILED' });
|
|
826
|
-
}
|
|
827
|
-
}
|
|
828
|
-
// Parse include-tags and exclude-tags
|
|
829
|
-
let includeTags;
|
|
830
|
-
let excludeTags;
|
|
831
|
-
if (options.onlyMainContent) {
|
|
832
|
-
includeTags = ['main', 'article'];
|
|
833
|
-
}
|
|
834
|
-
else if (options.includeTags) {
|
|
835
|
-
includeTags = options.includeTags.split(',').map((t) => t.trim());
|
|
836
|
-
}
|
|
837
|
-
if (options.excludeTags) {
|
|
838
|
-
excludeTags = options.excludeTags.split(',').map((t) => t.trim());
|
|
839
|
-
}
|
|
840
|
-
// Build location options
|
|
841
|
-
let locationOptions;
|
|
842
|
-
if (options.location || options.language) {
|
|
843
|
-
locationOptions = {};
|
|
844
|
-
if (options.location) {
|
|
845
|
-
locationOptions.country = options.location;
|
|
846
|
-
}
|
|
847
|
-
if (options.language) {
|
|
848
|
-
locationOptions.languages = [options.language];
|
|
849
|
-
}
|
|
850
|
-
}
|
|
851
|
-
// ── Resolve --profile: name → path + storage state ─────────────────
|
|
852
|
-
let resolvedProfileDir;
|
|
853
|
-
let resolvedStorageState;
|
|
854
|
-
let resolvedProfileName;
|
|
855
|
-
if (options.profile) {
|
|
856
|
-
const profilePath = getProfilePath(options.profile);
|
|
857
|
-
if (profilePath) {
|
|
858
|
-
// It's a named profile in ~/.webpeel/profiles/
|
|
859
|
-
resolvedProfileDir = profilePath;
|
|
860
|
-
resolvedStorageState = loadStorageState(options.profile) ?? undefined;
|
|
861
|
-
resolvedProfileName = options.profile;
|
|
862
|
-
}
|
|
863
|
-
else if (existsSync(options.profile)) {
|
|
864
|
-
// It's a raw directory path (backward compat)
|
|
865
|
-
resolvedProfileDir = options.profile;
|
|
866
|
-
}
|
|
867
|
-
else {
|
|
868
|
-
exitWithJsonError(`Profile "${options.profile}" not found. Run "webpeel profile list" to see available profiles.`, 'PROFILE_NOT_FOUND');
|
|
869
|
-
}
|
|
870
|
-
}
|
|
871
|
-
// Build peel options
|
|
872
|
-
// --stealth auto-enables --render (stealth requires browser)
|
|
873
|
-
// --action auto-enables --render (actions require browser)
|
|
874
|
-
// --scroll-extract implies --render (needs browser)
|
|
875
|
-
//
|
|
876
|
-
// Bare --scroll-extract (no number) → smart autoScroll (detects stable height)
|
|
877
|
-
// --scroll-extract N (with number) → legacy fixed N scrolls via actions
|
|
878
|
-
const scrollExtractRaw = options.scrollExtract;
|
|
879
|
-
const isAutoScroll = scrollExtractRaw !== undefined && typeof scrollExtractRaw !== 'number';
|
|
880
|
-
const scrollExtractCount = isAutoScroll
|
|
881
|
-
? 0
|
|
882
|
-
: (scrollExtractRaw !== undefined ? scrollExtractRaw : 0);
|
|
883
|
-
const useRender = options.render || options.stealth || (actions && actions.length > 0) || scrollExtractCount > 0 || isAutoScroll
|
|
884
|
-
|| (options.device && options.device !== 'desktop')
|
|
885
|
-
|| !!options.viewport
|
|
886
|
-
|| !!options.waitUntil
|
|
887
|
-
|| !!options.waitSelector
|
|
888
|
-
|| !!options.blockResources
|
|
889
|
-
|| !!options.screenshot // Auto-enable render for screenshot (needs browser)
|
|
890
|
-
|| false;
|
|
891
|
-
// Inject scroll actions when --scroll-extract N (fixed count) is used
|
|
892
|
-
if (scrollExtractCount > 0) {
|
|
893
|
-
const scrollActions = [];
|
|
894
|
-
for (let i = 0; i < scrollExtractCount; i++) {
|
|
895
|
-
scrollActions.push({ type: 'scroll', to: 'bottom' });
|
|
896
|
-
scrollActions.push({ type: 'wait', ms: 1500 });
|
|
897
|
-
}
|
|
898
|
-
actions = actions ? [...actions, ...scrollActions] : scrollActions;
|
|
899
|
-
}
|
|
900
|
-
const peelOptions = {
|
|
901
|
-
render: useRender,
|
|
902
|
-
stealth: options.stealth || false,
|
|
903
|
-
wait: options.wait || 0,
|
|
904
|
-
timeout: options.timeout,
|
|
905
|
-
userAgent: options.ua,
|
|
906
|
-
screenshot: options.screenshot !== undefined,
|
|
907
|
-
screenshotFullPage: options.fullPage || false,
|
|
908
|
-
selector: options.selector,
|
|
909
|
-
exclude: options.exclude,
|
|
910
|
-
includeTags,
|
|
911
|
-
excludeTags,
|
|
912
|
-
headers,
|
|
913
|
-
cookies: options.cookie,
|
|
914
|
-
raw: options.raw || false,
|
|
915
|
-
lite: options.lite || false,
|
|
916
|
-
actions,
|
|
917
|
-
maxTokens: options.maxTokens,
|
|
918
|
-
// Note: budget is applied AFTER caching (so cache stores full content)
|
|
919
|
-
// We pass it to peel() for programmatic API compatibility, but the CLI
|
|
920
|
-
// also applies it post-fetch (see below) to ensure cache stores full result.
|
|
921
|
-
extract,
|
|
922
|
-
images: options.images || false,
|
|
923
|
-
location: locationOptions,
|
|
924
|
-
profileDir: resolvedProfileDir,
|
|
925
|
-
headed: options.headed || false,
|
|
926
|
-
storageState: resolvedStorageState,
|
|
927
|
-
proxy: options.proxy,
|
|
928
|
-
proxies: options.proxies,
|
|
929
|
-
fullPage: options.fullContent || false,
|
|
930
|
-
readable: options.readable || false,
|
|
931
|
-
// Smart auto-scroll (bare --scroll-extract flag)
|
|
932
|
-
autoScroll: isAutoScroll
|
|
933
|
-
? { timeout: options.scrollExtractTimeout }
|
|
934
|
-
: undefined,
|
|
935
|
-
device: options.device,
|
|
936
|
-
viewportWidth: options.viewport ? options.viewport.width : undefined,
|
|
937
|
-
viewportHeight: options.viewport ? options.viewport.height : undefined,
|
|
938
|
-
waitUntil: options.waitUntil,
|
|
939
|
-
waitSelector: options.waitSelector,
|
|
940
|
-
blockResources: options.blockResources ? options.blockResources.split(',').map((s) => s.trim()) : undefined,
|
|
941
|
-
cloaked: options.cloaked ? true : undefined,
|
|
942
|
-
cycle: options.cycle ? true : undefined,
|
|
943
|
-
tls: (options.tls || options.cycle) ? true : undefined,
|
|
944
|
-
};
|
|
945
|
-
if (options.cloaked) {
|
|
946
|
-
peelOptions.render = true; // CloakBrowser is a browser
|
|
947
|
-
}
|
|
948
|
-
// Add chunk option if requested
|
|
949
|
-
if (options.chunk) {
|
|
950
|
-
peelOptions.chunk = {
|
|
951
|
-
maxTokens: options.chunkSize || 512,
|
|
952
|
-
overlap: options.chunkOverlap || 50,
|
|
953
|
-
strategy: options.chunkStrategy || 'section',
|
|
954
|
-
};
|
|
955
|
-
}
|
|
956
|
-
// Add summary option if requested
|
|
957
|
-
if (options.summary) {
|
|
958
|
-
const llmApiKey = options.llmKey || process.env.OPENAI_API_KEY;
|
|
959
|
-
if (!llmApiKey) {
|
|
960
|
-
throw Object.assign(new Error('--summary requires --llm-key or OPENAI_API_KEY environment variable'), { _code: 'FETCH_FAILED' });
|
|
961
|
-
}
|
|
962
|
-
peelOptions.summary = true;
|
|
963
|
-
peelOptions.llm = {
|
|
964
|
-
apiKey: llmApiKey,
|
|
965
|
-
model: process.env.WEBPEEL_LLM_MODEL || 'gpt-4o-mini',
|
|
966
|
-
baseUrl: process.env.WEBPEEL_LLM_BASE_URL || 'https://api.openai.com/v1',
|
|
967
|
-
};
|
|
968
|
-
}
|
|
969
|
-
// Determine format
|
|
970
|
-
if (options.html) {
|
|
971
|
-
peelOptions.format = 'html';
|
|
972
|
-
}
|
|
973
|
-
else if (options.text) {
|
|
974
|
-
peelOptions.format = 'text';
|
|
975
|
-
}
|
|
976
|
-
else if (options.clean) {
|
|
977
|
-
peelOptions.format = 'clean';
|
|
978
|
-
// --clean implies readable mode (article content only, no navs/footers)
|
|
979
|
-
peelOptions.readable = true;
|
|
980
|
-
}
|
|
981
|
-
else {
|
|
982
|
-
peelOptions.format = 'markdown';
|
|
983
|
-
}
|
|
984
|
-
// Fetch the page — route through API if key is configured, otherwise require auth
|
|
985
|
-
const fetchCfg = loadConfig();
|
|
986
|
-
const fetchApiKey = fetchCfg.apiKey || process.env.WEBPEEL_API_KEY;
|
|
987
|
-
const fetchApiUrl = process.env.WEBPEEL_API_URL || 'https://api.webpeel.dev';
|
|
988
|
-
let result;
|
|
989
|
-
if (fetchApiKey) {
|
|
990
|
-
// Use the WebPeel API — no local Playwright needed
|
|
991
|
-
result = await fetchViaApi(url, peelOptions, fetchApiKey, fetchApiUrl);
|
|
992
|
-
}
|
|
993
|
-
else {
|
|
994
|
-
// No API key — show helpful message instead of trying local mode
|
|
995
|
-
if (spinner)
|
|
996
|
-
spinner.fail('Authentication required');
|
|
997
|
-
console.error('No API key configured. Run: webpeel auth <your-key>');
|
|
998
|
-
console.error('Get a free key at: https://app.webpeel.dev/keys');
|
|
999
|
-
await cleanup();
|
|
1000
|
-
process.exit(2);
|
|
1001
|
-
}
|
|
1002
|
-
// Update lastUsed timestamp for named profiles
|
|
1003
|
-
if (resolvedProfileName) {
|
|
1004
|
-
touchProfile(resolvedProfileName);
|
|
1005
|
-
}
|
|
1006
|
-
if (spinner) {
|
|
1007
|
-
const domainTag = result.domainData
|
|
1008
|
-
? ` [${result.domainData.domain}:${result.domainData.type}]`
|
|
1009
|
-
: '';
|
|
1010
|
-
spinner.succeed(`Fetched in ${result.elapsed}ms using ${result.method} method${domainTag}`);
|
|
1011
|
-
}
|
|
1012
|
-
// Show metadata header
|
|
1013
|
-
const pageTitle = result.metadata?.title || result.title;
|
|
1014
|
-
if (!options.silent && !options.json && pageTitle) {
|
|
1015
|
-
const parts = [];
|
|
1016
|
-
if (result.metadata?.author)
|
|
1017
|
-
parts.push(`by ${result.metadata.author}`);
|
|
1018
|
-
if (result.readability?.readingTime)
|
|
1019
|
-
parts.push(result.readability.readingTime);
|
|
1020
|
-
if (result.tokens)
|
|
1021
|
-
parts.push(`${result.tokens.toLocaleString()} tokens`);
|
|
1022
|
-
const subtitle = parts.length ? ` · ${parts.join(' · ')}` : '';
|
|
1023
|
-
console.error(`\x1b[36m📄 ${pageTitle}${subtitle}\x1b[0m`);
|
|
1024
|
-
}
|
|
1025
|
-
// Show usage footer for free/anonymous users
|
|
1026
|
-
if (usageCheck.usageInfo && !options.silent) {
|
|
1027
|
-
showUsageFooter(usageCheck.usageInfo, usageCheck.isAnonymous || false, useStealth);
|
|
1028
|
-
}
|
|
1029
|
-
// Handle screenshot saving
|
|
1030
|
-
if (options.screenshot && result.screenshot) {
|
|
1031
|
-
const screenshotPath = typeof options.screenshot === 'string'
|
|
1032
|
-
? options.screenshot
|
|
1033
|
-
: 'screenshot.png';
|
|
1034
|
-
const screenshotBuffer = Buffer.from(result.screenshot, 'base64');
|
|
1035
|
-
writeFileSync(screenshotPath, screenshotBuffer);
|
|
1036
|
-
if (!options.silent) {
|
|
1037
|
-
console.error(`Screenshot saved to: ${screenshotPath}`);
|
|
1038
|
-
}
|
|
1039
|
-
// Remove screenshot from JSON output if saving to file
|
|
1040
|
-
if (typeof options.screenshot === 'string') {
|
|
1041
|
-
delete result.screenshot;
|
|
1042
|
-
}
|
|
1043
|
-
}
|
|
1044
|
-
// Store full result in cache (before budget distillation so cache is reusable)
|
|
1045
|
-
if (cacheTtlMs && !cacheDisabled) {
|
|
1046
|
-
setCache(url, result, cacheTtlMs, {
|
|
1047
|
-
render: options.render,
|
|
1048
|
-
stealth: useStealth,
|
|
1049
|
-
selector: options.selector,
|
|
1050
|
-
format: peelOptions.format,
|
|
1051
|
-
budget: null, // Budget excluded — cache stores full content, budget applied post-cache
|
|
1052
|
-
readable: options.readable || false,
|
|
1053
|
-
});
|
|
1054
|
-
}
|
|
1055
|
-
// Apply smart budget distillation AFTER caching (cache always stores full content)
|
|
1056
|
-
// When --agent is set, always apply budget even with --extract-all (listings will be budgeted
|
|
1057
|
-
// separately, but if no listings are found the content itself still needs trimming).
|
|
1058
|
-
const skipBudgetForExtract = (options.extractAll || options.scrollExtract !== undefined) && !options.agent;
|
|
1059
|
-
let contentTruncated = false;
|
|
1060
|
-
if (options.budget && options.budget > 0 && !skipBudgetForExtract) {
|
|
1061
|
-
const budgetFormat = peelOptions.format === 'text' ? 'text' : 'markdown';
|
|
1062
|
-
const distilled = distillToBudget(result.content, options.budget, budgetFormat);
|
|
1063
|
-
if (distilled !== result.content) {
|
|
1064
|
-
contentTruncated = true;
|
|
1065
|
-
result.content = distilled;
|
|
1066
|
-
result.tokens = estimateTokens(distilled);
|
|
1067
|
-
}
|
|
1068
|
-
}
|
|
1069
|
-
// --- BM25 Query-Focused Filtering ---
|
|
1070
|
-
if (options.focus && result.content) {
|
|
1071
|
-
const { filterByRelevance } = await import('./core/bm25-filter.js');
|
|
1072
|
-
const focusResult = filterByRelevance(result.content, { query: options.focus });
|
|
1073
|
-
result.content = focusResult.content;
|
|
1074
|
-
result.tokens = estimateTokens(focusResult.content);
|
|
1075
|
-
if (isJson) {
|
|
1076
|
-
result.focusQuery = options.focus;
|
|
1077
|
-
result.focusReduction = focusResult.reductionPercent;
|
|
1078
|
-
}
|
|
1079
|
-
}
|
|
1080
|
-
// --- LLM-free Quick Answer ---
|
|
1081
|
-
if (options.question && result.content) {
|
|
1082
|
-
const { quickAnswer } = await import('./core/quick-answer.js');
|
|
1083
|
-
const qa = quickAnswer({
|
|
1084
|
-
question: options.question,
|
|
1085
|
-
content: result.content,
|
|
1086
|
-
url: result.url,
|
|
1087
|
-
});
|
|
1088
|
-
result.quickAnswer = qa;
|
|
1089
|
-
if (!isJson) {
|
|
1090
|
-
// Display answer prominently in human-readable mode
|
|
1091
|
-
const conf = (qa.confidence * 100).toFixed(0);
|
|
1092
|
-
await writeStdout(`\n\x1b[36m📋 ${qa.question}\x1b[0m\n\n`);
|
|
1093
|
-
if (qa.answer) {
|
|
1094
|
-
await writeStdout(`\x1b[32m💡 Answer (${conf}% confidence):\x1b[0m\n${qa.answer}\n`);
|
|
1095
|
-
}
|
|
1096
|
-
else {
|
|
1097
|
-
await writeStdout(`\x1b[33m💡 No relevant answer found (${conf}% confidence)\x1b[0m\n`);
|
|
1098
|
-
}
|
|
1099
|
-
if (qa.passages && qa.passages.length > 1) {
|
|
1100
|
-
await writeStdout(`\n\x1b[33m📝 Supporting evidence:\x1b[0m\n`);
|
|
1101
|
-
for (const p of qa.passages.slice(1, 4)) {
|
|
1102
|
-
await writeStdout(` • [${(p.score * 100).toFixed(0)}%] ${p.text.substring(0, 200)}${p.text.length > 200 ? '...' : ''}\n`);
|
|
1103
|
-
}
|
|
1104
|
-
}
|
|
1105
|
-
await writeStdout('\n');
|
|
1106
|
-
await cleanup();
|
|
1107
|
-
process.exit(0);
|
|
1108
|
-
}
|
|
1109
|
-
}
|
|
1110
|
-
// --- RAG Chunking output (chunks come from pipeline via peelOptions.chunk) ---
|
|
1111
|
-
if (result.chunks && result.chunks.length > 0 && !isJson) {
|
|
1112
|
-
console.log(`\n${'─'.repeat(60)}`);
|
|
1113
|
-
console.log(`📦 ${result.chunks.length} chunks (${options.chunkStrategy || 'section'} strategy)\n`);
|
|
1114
|
-
for (const chunk of result.chunks) {
|
|
1115
|
-
const sectionLabel = chunk.section ? ` [${chunk.section}]` : '';
|
|
1116
|
-
console.log(`── Chunk ${chunk.index + 1}${sectionLabel} (${chunk.tokenCount} tokens, ${chunk.wordCount} words) ──`);
|
|
1117
|
-
console.log(chunk.text.substring(0, 200) + (chunk.text.length > 200 ? '...' : ''));
|
|
1118
|
-
console.log('');
|
|
1119
|
-
}
|
|
1120
|
-
}
|
|
1121
|
-
// --- #4: Content quality warning ---
|
|
1122
|
-
const isHtmlContent = result.contentType ? result.contentType.toLowerCase().includes('html') : true;
|
|
1123
|
-
const isRedirect = false; // peel() follows redirects — final result is always 200
|
|
1124
|
-
if (result.tokens < 20 && !useRender && isHtmlContent && !isRedirect) {
|
|
1125
|
-
const warningMsg = `Low content detected (${result.tokens} tokens). Try: webpeel ${url} --render`;
|
|
1126
|
-
if (isJson) {
|
|
1127
|
-
result.warning = warningMsg;
|
|
1128
|
-
}
|
|
1129
|
-
else {
|
|
1130
|
-
console.error(`⚠ ${warningMsg}`);
|
|
1131
|
-
}
|
|
1132
|
-
}
|
|
1133
|
-
// --- LLM-based extraction (post-peel) ---
|
|
1134
|
-
if (options.llmExtract || options.extractSchema) {
|
|
1135
|
-
const { extractWithLLM } = await import('./core/llm-extract.js');
|
|
1136
|
-
const llmCfg = loadConfig();
|
|
1137
|
-
const llmApiKey = options.llmKey || llmCfg.llm?.apiKey || process.env.OPENAI_API_KEY;
|
|
1138
|
-
const llmModel = options.llmModel || llmCfg.llm?.model || process.env.WEBPEEL_LLM_MODEL || 'gpt-4o-mini';
|
|
1139
|
-
const llmBaseUrl = options.llmBaseUrl || llmCfg.llm?.baseUrl || process.env.WEBPEEL_LLM_BASE_URL || 'https://api.openai.com/v1';
|
|
1140
|
-
const llmInstruction = typeof options.llmExtract === 'string' ? options.llmExtract : undefined;
|
|
1141
|
-
// Parse --extract-schema if provided
|
|
1142
|
-
let llmSchema;
|
|
1143
|
-
if (options.extractSchema) {
|
|
1144
|
-
let schemaStr = options.extractSchema;
|
|
1145
|
-
if (schemaStr.startsWith('@')) {
|
|
1146
|
-
schemaStr = readFileSync(schemaStr.slice(1), 'utf-8');
|
|
1147
|
-
}
|
|
1148
|
-
try {
|
|
1149
|
-
llmSchema = JSON.parse(schemaStr);
|
|
1150
|
-
}
|
|
1151
|
-
catch {
|
|
1152
|
-
exitWithJsonError('--extract-schema must be valid JSON or a valid @file.json path', 'FETCH_FAILED');
|
|
1153
|
-
}
|
|
1154
|
-
}
|
|
1155
|
-
const llmResult = await extractWithLLM({
|
|
1156
|
-
content: result.content,
|
|
1157
|
-
instruction: llmInstruction,
|
|
1158
|
-
schema: llmSchema,
|
|
1159
|
-
apiKey: llmApiKey,
|
|
1160
|
-
model: llmModel,
|
|
1161
|
-
baseUrl: llmBaseUrl,
|
|
1162
|
-
});
|
|
1163
|
-
// Output structured items as JSON
|
|
1164
|
-
await writeStdout(JSON.stringify(llmResult.items, null, 2) + '\n');
|
|
1165
|
-
// Show token usage and estimated cost
|
|
1166
|
-
if (!options.silent) {
|
|
1167
|
-
const { input, output } = llmResult.tokensUsed;
|
|
1168
|
-
const costStr = llmResult.cost !== undefined
|
|
1169
|
-
? ` | Est. cost: $${llmResult.cost.toFixed(6)}`
|
|
1170
|
-
: '';
|
|
1171
|
-
console.error(`\n🤖 LLM extraction: ${llmResult.items.length} items | ${input} input + ${output} output tokens${costStr} | model: ${llmResult.model}`);
|
|
1172
|
-
}
|
|
1173
|
-
await cleanup();
|
|
1174
|
-
process.exit(0);
|
|
1175
|
-
}
|
|
1176
|
-
// --- Extract-all / pagination / output formatting ---
|
|
1177
|
-
const wantsExtractAll = options.extractAll || options.scrollExtract !== undefined;
|
|
1178
|
-
const pagesCount = Math.min(Math.max(options.pages || 1, 1), 10);
|
|
1179
|
-
if (wantsExtractAll) {
|
|
1180
|
-
const { extractListings } = await import('./core/extract-listings.js');
|
|
1181
|
-
const { findNextPageUrl } = await import('./core/paginate.js');
|
|
1182
|
-
const { findSchemaForUrl, extractWithSchema, loadBundledSchemas } = await import('./core/schema-extraction.js');
|
|
1183
|
-
// Resolve which schema to use (explicit --schema flag or auto-detect)
|
|
1184
|
-
let activeSchema = null;
|
|
1185
|
-
if (options.schema) {
|
|
1186
|
-
// Find schema by name or domain match
|
|
1187
|
-
const schemaQuery = options.schema.toLowerCase();
|
|
1188
|
-
const allSchemas = loadBundledSchemas();
|
|
1189
|
-
activeSchema = allSchemas.find(s => s.name.toLowerCase().includes(schemaQuery) ||
|
|
1190
|
-
s.domains.some(d => d.toLowerCase().includes(schemaQuery))) ?? null;
|
|
1191
|
-
if (!activeSchema && !options.silent) {
|
|
1192
|
-
console.error(`Warning: No schema found for "${options.schema}", falling back to auto-detection`);
|
|
1193
|
-
}
|
|
1194
|
-
}
|
|
1195
|
-
else {
|
|
1196
|
-
// Auto-detect from URL
|
|
1197
|
-
activeSchema = findSchemaForUrl(result.url || url);
|
|
1198
|
-
}
|
|
1199
|
-
// We need the raw HTML for extraction. Re-fetch with format=html if needed.
|
|
1200
|
-
let allListings = [];
|
|
1201
|
-
// Fetch HTML for extraction
|
|
1202
|
-
const htmlResult = peelOptions.format === 'html'
|
|
1203
|
-
? result
|
|
1204
|
-
: await peel(url, { ...peelOptions, format: 'html', maxTokens: undefined });
|
|
1205
|
-
// Try schema extraction first, fall back to generic
|
|
1206
|
-
if (activeSchema) {
|
|
1207
|
-
const schemaListings = extractWithSchema(htmlResult.content, activeSchema, result.url);
|
|
1208
|
-
if (schemaListings.length > 0) {
|
|
1209
|
-
allListings.push(...schemaListings);
|
|
1210
|
-
}
|
|
1211
|
-
else {
|
|
1212
|
-
// Schema returned nothing — fall back to generic
|
|
1213
|
-
allListings.push(...extractListings(htmlResult.content, result.url));
|
|
1214
|
-
}
|
|
1215
|
-
}
|
|
1216
|
-
else {
|
|
1217
|
-
allListings.push(...extractListings(htmlResult.content, result.url));
|
|
1218
|
-
}
|
|
1219
|
-
// Pagination: follow "Next" links
|
|
1220
|
-
if (pagesCount > 1) {
|
|
1221
|
-
let currentHtml = htmlResult.content;
|
|
1222
|
-
let currentUrl = result.url;
|
|
1223
|
-
for (let page = 1; page < pagesCount; page++) {
|
|
1224
|
-
const nextUrl = findNextPageUrl(currentHtml, currentUrl);
|
|
1225
|
-
if (!nextUrl)
|
|
1226
|
-
break;
|
|
1227
|
-
try {
|
|
1228
|
-
const nextResult = await peel(nextUrl, { ...peelOptions, format: 'html', maxTokens: undefined });
|
|
1229
|
-
let pageListings;
|
|
1230
|
-
if (activeSchema) {
|
|
1231
|
-
const schemaPage = extractWithSchema(nextResult.content, activeSchema, nextResult.url);
|
|
1232
|
-
pageListings = schemaPage.length > 0
|
|
1233
|
-
? schemaPage
|
|
1234
|
-
: extractListings(nextResult.content, nextResult.url);
|
|
1235
|
-
}
|
|
1236
|
-
else {
|
|
1237
|
-
pageListings = extractListings(nextResult.content, nextResult.url);
|
|
1238
|
-
}
|
|
1239
|
-
allListings.push(...pageListings);
|
|
1240
|
-
currentHtml = nextResult.content;
|
|
1241
|
-
currentUrl = nextResult.url;
|
|
1242
|
-
}
|
|
1243
|
-
catch {
|
|
1244
|
-
break; // Stop paginating on error
|
|
1245
|
-
}
|
|
1246
|
-
}
|
|
1247
|
-
}
|
|
1248
|
-
// Apply budget to listings if requested
|
|
1249
|
-
let listingsTruncated = false;
|
|
1250
|
-
let totalAvailableListings;
|
|
1251
|
-
if (options.budget && options.budget > 0 && allListings.length > 0) {
|
|
1252
|
-
const { maxItems, truncated, totalAvailable } = budgetListings(allListings.length, options.budget);
|
|
1253
|
-
if (truncated) {
|
|
1254
|
-
listingsTruncated = true;
|
|
1255
|
-
totalAvailableListings = totalAvailable;
|
|
1256
|
-
allListings = allListings.slice(0, maxItems);
|
|
1257
|
-
}
|
|
1258
|
-
}
|
|
1259
|
-
// Output based on format flags
|
|
1260
|
-
if (options.csv) {
|
|
1261
|
-
const csvOutput = formatListingsCsv(allListings);
|
|
1262
|
-
await writeStdout(csvOutput);
|
|
1263
|
-
}
|
|
1264
|
-
else if (options.table) {
|
|
1265
|
-
const { formatTable } = await import('./core/table-format.js');
|
|
1266
|
-
const tableRows = allListings.map(item => {
|
|
1267
|
-
const row = {};
|
|
1268
|
-
for (const [k, v] of Object.entries(item)) {
|
|
1269
|
-
if (v !== undefined)
|
|
1270
|
-
row[k] = v;
|
|
1271
|
-
}
|
|
1272
|
-
return row;
|
|
1273
|
-
});
|
|
1274
|
-
await writeStdout(formatTable(tableRows) + '\n');
|
|
1275
|
-
}
|
|
1276
|
-
else if (isJson) {
|
|
1277
|
-
// Use unified envelope for JSON output
|
|
1278
|
-
const structured = allListings;
|
|
1279
|
-
const envelope = buildEnvelope(result, {
|
|
1280
|
-
cached: false,
|
|
1281
|
-
structured,
|
|
1282
|
-
truncated: listingsTruncated || undefined,
|
|
1283
|
-
totalAvailable: totalAvailableListings,
|
|
1284
|
-
});
|
|
1285
|
-
// Also include legacy fields for backward compat
|
|
1286
|
-
envelope.listings = allListings;
|
|
1287
|
-
envelope.count = allListings.length;
|
|
1288
|
-
await writeStdout(JSON.stringify(envelope, null, 2) + '\n');
|
|
1289
|
-
}
|
|
1290
|
-
else {
|
|
1291
|
-
// Formatted text output
|
|
1292
|
-
if (allListings.length === 0) {
|
|
1293
|
-
await writeStdout('No listings found.\n');
|
|
1294
|
-
}
|
|
1295
|
-
else {
|
|
1296
|
-
const truncNote = listingsTruncated && totalAvailableListings
|
|
1297
|
-
? ` (${totalAvailableListings} total — budget limited to ${allListings.length})`
|
|
1298
|
-
: '';
|
|
1299
|
-
await writeStdout(`Found ${allListings.length} listings${truncNote}:\n\n`);
|
|
1300
|
-
allListings.forEach((item, i) => {
|
|
1301
|
-
const pricePart = item.price ? ` — ${item.price}` : '';
|
|
1302
|
-
const line = `${i + 1}. ${item.title}${pricePart}\n`;
|
|
1303
|
-
process.stdout.write(line);
|
|
1304
|
-
if (item.link) {
|
|
1305
|
-
process.stdout.write(` ${item.link}\n`);
|
|
1306
|
-
}
|
|
1307
|
-
process.stdout.write('\n');
|
|
1308
|
-
});
|
|
1309
|
-
}
|
|
1310
|
-
}
|
|
1311
|
-
}
|
|
1312
|
-
else if (options.csv || options.table) {
|
|
1313
|
-
// CSV / table output for --extract (CSS selector extraction)
|
|
1314
|
-
if (result.extracted) {
|
|
1315
|
-
const rows = normaliseExtractedToRows(result.extracted);
|
|
1316
|
-
if (options.csv) {
|
|
1317
|
-
await writeStdout(formatListingsCsv(rows));
|
|
1318
|
-
}
|
|
1319
|
-
else {
|
|
1320
|
-
const { formatTable } = await import('./core/table-format.js');
|
|
1321
|
-
await writeStdout(formatTable(rows) + '\n');
|
|
1322
|
-
}
|
|
1323
|
-
}
|
|
1324
|
-
else {
|
|
1325
|
-
console.error('--csv / --table require --extract-all or --extract to produce structured data.');
|
|
1326
|
-
}
|
|
1327
|
-
}
|
|
1328
|
-
else {
|
|
1329
|
-
// --- BM25 Schema Template Extraction (no LLM needed) ---
|
|
1330
|
-
if (options.schema && result.content) {
|
|
1331
|
-
const { getSchemaTemplate: getSchTmpl } = await import('./core/schema-templates.js');
|
|
1332
|
-
const schTemplate = getSchTmpl(options.schema);
|
|
1333
|
-
if (schTemplate) {
|
|
1334
|
-
const { quickAnswer: qa } = await import('./core/quick-answer.js');
|
|
1335
|
-
const { smartExtractSchemaFields } = await import('./core/schema-postprocess.js');
|
|
1336
|
-
const extracted = smartExtractSchemaFields(result.content, schTemplate.fields, qa, {
|
|
1337
|
-
pageTitle: result.title,
|
|
1338
|
-
pageUrl: result.url,
|
|
1339
|
-
metadata: result.metadata,
|
|
1340
|
-
});
|
|
1341
|
-
result.extracted = extracted;
|
|
1342
|
-
}
|
|
1343
|
-
}
|
|
1344
|
-
// Output results (default path)
|
|
1345
|
-
await outputResult(result, options, {
|
|
1346
|
-
cached: false,
|
|
1347
|
-
truncated: contentTruncated || undefined,
|
|
1348
|
-
});
|
|
1349
|
-
}
|
|
1350
|
-
// Clean up and exit
|
|
1351
|
-
await cleanup();
|
|
1352
|
-
process.exit(0);
|
|
1353
|
-
}
|
|
1354
|
-
catch (error) {
|
|
1355
|
-
if (spinner) {
|
|
1356
|
-
spinner.fail('Failed to fetch');
|
|
1357
|
-
}
|
|
1358
|
-
// --- #6: Consistent JSON error output ---
|
|
1359
|
-
if (isJson) {
|
|
1360
|
-
const errMsg = error instanceof Error ? error.message : 'Unknown error';
|
|
1361
|
-
const errCode = classifyErrorCode(error);
|
|
1362
|
-
await writeStdout(JSON.stringify({ success: false, error: { type: errCode.toLowerCase(), message: errMsg } }) + '\n');
|
|
1363
|
-
await cleanup();
|
|
1364
|
-
process.exit(1);
|
|
1365
|
-
}
|
|
1366
|
-
if (error instanceof Error) {
|
|
1367
|
-
console.error('\n' + formatError(error, url || '', options));
|
|
1368
|
-
}
|
|
1369
|
-
else {
|
|
1370
|
-
console.error('\x1b[31m✖ Unknown error occurred\x1b[0m');
|
|
1371
|
-
}
|
|
1372
|
-
await cleanup();
|
|
1373
|
-
process.exit(1);
|
|
1374
|
-
}
|
|
1375
|
-
}
|
|
1376
|
-
program
|
|
1377
|
-
.action(async (url, options) => {
|
|
1378
|
-
await runFetch(url, options);
|
|
1379
|
-
});
|
|
1380
|
-
// Read subcommand (explicit readable mode)
|
|
1381
|
-
program
|
|
1382
|
-
.command('read <url>')
|
|
1383
|
-
.description('Read a page in clean reader mode (like browser Reader View)')
|
|
1384
|
-
.option('--json', 'Output as JSON')
|
|
1385
|
-
.option('-s, --silent', 'Silent mode')
|
|
1386
|
-
.option('--budget <n>', 'Token budget (default: 4000)', parseInt)
|
|
1387
|
-
.option('--focus <query>', 'Focus on content relevant to this query')
|
|
1388
|
-
.action(async (url, opts) => {
|
|
1389
|
-
await runFetch(url, {
|
|
1390
|
-
...opts,
|
|
1391
|
-
readable: true,
|
|
1392
|
-
budget: 4000,
|
|
1393
|
-
});
|
|
1394
|
-
});
|
|
1395
|
-
// Ask subcommand (question mode)
|
|
1396
|
-
program
|
|
1397
|
-
.command('ask <url> <question>')
|
|
1398
|
-
.description('Ask a question about any page')
|
|
1399
|
-
.option('--json', 'Output as JSON')
|
|
1400
|
-
.option('-s, --silent', 'Silent mode')
|
|
1401
|
-
.action(async (url, question, opts) => {
|
|
1402
|
-
await runFetch(url, {
|
|
1403
|
-
...opts,
|
|
1404
|
-
question,
|
|
1405
|
-
readable: true,
|
|
1406
|
-
});
|
|
1407
|
-
});
|
|
1408
|
-
// Search command
|
|
1409
|
-
program
|
|
1410
|
-
.command('search <query>')
|
|
1411
|
-
.description('Search the web (DuckDuckGo by default, or use --site for site-specific search)')
|
|
1412
|
-
.option('-n, --count <n>', 'Number of results (1-10)', '5')
|
|
1413
|
-
.option('--top <n>', 'Limit results (alias for --count)')
|
|
1414
|
-
.option('--provider <provider>', 'Search provider: duckduckgo (default) or brave')
|
|
1415
|
-
.option('--search-api-key <key>', 'API key for the search provider (or env WEBPEEL_BRAVE_API_KEY)')
|
|
1416
|
-
.option('--site <site>', 'Search a specific site (e.g. ebay, amazon, github). Run "webpeel sites" for full list.')
|
|
1417
|
-
.option('--json', 'Output as JSON')
|
|
1418
|
-
.option('--urls-only', 'Output only URLs, one per line (pipe-friendly)')
|
|
1419
|
-
.option('--table', 'Output site-search results as a formatted table (requires --site)')
|
|
1420
|
-
.option('--csv', 'Output site-search results as CSV (requires --site)')
|
|
1421
|
-
.option('--budget <n>', 'Token budget for site-search result content', parseInt)
|
|
1422
|
-
.option('-s, --silent', 'Silent mode')
|
|
1423
|
-
.option('--proxy <url>', 'Proxy URL for requests (http://host:port, socks5://user:pass@host:port)')
|
|
1424
|
-
.option('--agent', 'Agent mode: sets --json, --silent, and --budget 4000 (override with --budget N)')
|
|
1425
|
-
.action(async (query, options) => {
|
|
1426
|
-
// --agent sets sensible defaults for AI agents; explicit flags override
|
|
1427
|
-
if (options.agent) {
|
|
1428
|
-
if (!options.json)
|
|
1429
|
-
options.json = true;
|
|
1430
|
-
if (!options.silent)
|
|
1431
|
-
options.silent = true;
|
|
1432
|
-
if (options.budget === undefined)
|
|
1433
|
-
options.budget = 4000;
|
|
1434
|
-
}
|
|
1435
|
-
const isJson = options.json;
|
|
1436
|
-
const isSilent = options.silent;
|
|
1437
|
-
// --top overrides --count when both are provided
|
|
1438
|
-
const count = parseInt(options.top ?? options.count) || 5;
|
|
1439
|
-
// Check usage quota
|
|
1440
|
-
const usageCheck = await checkUsage();
|
|
1441
|
-
if (!usageCheck.allowed) {
|
|
1442
|
-
console.error(usageCheck.message);
|
|
1443
|
-
process.exit(1);
|
|
1444
|
-
}
|
|
1445
|
-
// ── --site: site-specific structured search ───────────────────────────
|
|
1446
|
-
if (options.site) {
|
|
1447
|
-
const spinner = isSilent ? null : ora(`Searching ${options.site}...`).start();
|
|
1448
|
-
try {
|
|
1449
|
-
const { buildSiteSearchUrl } = await import('./core/site-search.js');
|
|
1450
|
-
const siteResult = buildSiteSearchUrl(options.site, query);
|
|
1451
|
-
// Fetch the raw HTML (needed for listing extraction)
|
|
1452
|
-
const htmlResult = await peel(siteResult.url, {
|
|
1453
|
-
format: 'html',
|
|
1454
|
-
timeout: 30000,
|
|
1455
|
-
proxy: options.proxy,
|
|
1456
|
-
});
|
|
1457
|
-
if (spinner) {
|
|
1458
|
-
spinner.succeed(`Fetched ${siteResult.site} in ${htmlResult.elapsed}ms`);
|
|
1459
|
-
}
|
|
1460
|
-
// Extract listings from the HTML
|
|
1461
|
-
const { extractListings } = await import('./core/extract-listings.js');
|
|
1462
|
-
let listings = extractListings(htmlResult.content, siteResult.url);
|
|
1463
|
-
// Apply budget if requested
|
|
1464
|
-
if (options.budget && options.budget > 0 && listings.length > 0) {
|
|
1465
|
-
const { budgetListings } = await import('./core/budget.js');
|
|
1466
|
-
const { maxItems } = budgetListings(listings.length, options.budget);
|
|
1467
|
-
listings = listings.slice(0, maxItems);
|
|
1468
|
-
}
|
|
1469
|
-
// Show usage footer
|
|
1470
|
-
if (usageCheck.usageInfo && !isSilent) {
|
|
1471
|
-
showUsageFooter(usageCheck.usageInfo, usageCheck.isAnonymous || false, false);
|
|
1472
|
-
}
|
|
1473
|
-
// Output
|
|
1474
|
-
if (options.csv) {
|
|
1475
|
-
const rows = listings.map(item => {
|
|
1476
|
-
const row = {};
|
|
1477
|
-
for (const [k, v] of Object.entries(item)) {
|
|
1478
|
-
if (v !== undefined)
|
|
1479
|
-
row[k] = v;
|
|
1480
|
-
}
|
|
1481
|
-
return row;
|
|
1482
|
-
});
|
|
1483
|
-
await writeStdout(formatListingsCsv(rows));
|
|
1484
|
-
}
|
|
1485
|
-
else if (options.table) {
|
|
1486
|
-
const { formatTable } = await import('./core/table-format.js');
|
|
1487
|
-
const rows = listings.map(item => {
|
|
1488
|
-
const row = {};
|
|
1489
|
-
for (const [k, v] of Object.entries(item)) {
|
|
1490
|
-
if (v !== undefined)
|
|
1491
|
-
row[k] = v;
|
|
1492
|
-
}
|
|
1493
|
-
return row;
|
|
1494
|
-
});
|
|
1495
|
-
await writeStdout(formatTable(rows) + '\n');
|
|
1496
|
-
}
|
|
1497
|
-
else if (isJson) {
|
|
1498
|
-
const envelope = {
|
|
1499
|
-
site: siteResult.site,
|
|
1500
|
-
query: siteResult.query,
|
|
1501
|
-
url: siteResult.url,
|
|
1502
|
-
count: listings.length,
|
|
1503
|
-
items: listings,
|
|
1504
|
-
elapsed: htmlResult.elapsed,
|
|
1505
|
-
};
|
|
1506
|
-
await writeStdout(JSON.stringify(envelope, null, 2) + '\n');
|
|
1507
|
-
}
|
|
1508
|
-
else {
|
|
1509
|
-
if (listings.length === 0) {
|
|
1510
|
-
await writeStdout('No listings found.\n');
|
|
1511
|
-
}
|
|
1512
|
-
else {
|
|
1513
|
-
await writeStdout(`Found ${listings.length} listings on ${siteResult.site}:\n\n`);
|
|
1514
|
-
for (const [i, item] of listings.entries()) {
|
|
1515
|
-
const pricePart = item.price ? ` — ${item.price}` : '';
|
|
1516
|
-
process.stdout.write(`${i + 1}. ${item.title}${pricePart}\n`);
|
|
1517
|
-
if (item.link)
|
|
1518
|
-
process.stdout.write(` ${item.link}\n`);
|
|
1519
|
-
process.stdout.write('\n');
|
|
1520
|
-
}
|
|
1521
|
-
}
|
|
1522
|
-
}
|
|
1523
|
-
await cleanup();
|
|
1524
|
-
process.exit(0);
|
|
1525
|
-
}
|
|
1526
|
-
catch (error) {
|
|
1527
|
-
if (spinner)
|
|
1528
|
-
spinner.fail('Site search failed');
|
|
1529
|
-
if (error instanceof Error) {
|
|
1530
|
-
console.error(`\nError: ${error.message}`);
|
|
1531
|
-
}
|
|
1532
|
-
else {
|
|
1533
|
-
console.error('\nError: Unknown error occurred');
|
|
1534
|
-
}
|
|
1535
|
-
await cleanup();
|
|
1536
|
-
process.exit(1);
|
|
1537
|
-
}
|
|
1538
|
-
}
|
|
1539
|
-
const spinner = isSilent ? null : ora('Searching...').start();
|
|
1540
|
-
try {
|
|
1541
|
-
// Route search through the WebPeel API when a key is configured
|
|
1542
|
-
const searchCfg = loadConfig();
|
|
1543
|
-
const searchApiKey = searchCfg.apiKey || process.env.WEBPEEL_API_KEY;
|
|
1544
|
-
const searchApiUrl = process.env.WEBPEEL_API_URL || 'https://api.webpeel.dev';
|
|
1545
|
-
if (!searchApiKey) {
|
|
1546
|
-
if (spinner)
|
|
1547
|
-
spinner.fail('Authentication required');
|
|
1548
|
-
console.error('No API key configured. Run: webpeel auth <your-key>');
|
|
1549
|
-
console.error('Get a free key at: https://app.webpeel.dev/keys');
|
|
1550
|
-
process.exit(2);
|
|
1551
|
-
}
|
|
1552
|
-
const searchParams = new URLSearchParams({ q: query });
|
|
1553
|
-
searchParams.set('limit', String(Math.min(Math.max(count, 1), 10)));
|
|
1554
|
-
if (options.budget)
|
|
1555
|
-
searchParams.set('budget', String(options.budget));
|
|
1556
|
-
const searchRes = await fetch(`${searchApiUrl}/v1/search?${searchParams}`, {
|
|
1557
|
-
headers: { Authorization: `Bearer ${searchApiKey}` },
|
|
1558
|
-
signal: AbortSignal.timeout(30000),
|
|
1559
|
-
});
|
|
1560
|
-
if (searchRes.status === 401) {
|
|
1561
|
-
if (spinner)
|
|
1562
|
-
spinner.fail('Authentication failed');
|
|
1563
|
-
console.error('API key invalid or expired. Run: webpeel auth <new-key>');
|
|
1564
|
-
process.exit(1);
|
|
1565
|
-
}
|
|
1566
|
-
if (searchRes.status === 429) {
|
|
1567
|
-
if (spinner)
|
|
1568
|
-
spinner.fail('Rate limited');
|
|
1569
|
-
console.error('Rate limit exceeded. Check your plan at https://app.webpeel.dev/billing');
|
|
1570
|
-
process.exit(1);
|
|
1571
|
-
}
|
|
1572
|
-
if (!searchRes.ok) {
|
|
1573
|
-
const body = await searchRes.text().catch(() => '');
|
|
1574
|
-
throw new Error(`Search API error ${searchRes.status}: ${body.slice(0, 200)}`);
|
|
1575
|
-
}
|
|
1576
|
-
const searchData = await searchRes.json();
|
|
1577
|
-
// API returns { success: true, data: { web: [...] } } or { results: [...] }
|
|
1578
|
-
let results = searchData.data?.web || searchData.data?.results || searchData.results || [];
|
|
1579
|
-
if (spinner) {
|
|
1580
|
-
spinner.succeed(`Found ${results.length} results`);
|
|
1581
|
-
}
|
|
1582
|
-
// Show usage footer for free/anonymous users
|
|
1583
|
-
if (usageCheck.usageInfo && !isSilent) {
|
|
1584
|
-
showUsageFooter(usageCheck.usageInfo, usageCheck.isAnonymous || false, false);
|
|
1585
|
-
}
|
|
1586
|
-
if (options.urlsOnly) {
|
|
1587
|
-
// Pipe-friendly: one URL per line
|
|
1588
|
-
for (const result of results) {
|
|
1589
|
-
await writeStdout(result.url + '\n');
|
|
1590
|
-
}
|
|
1591
|
-
}
|
|
1592
|
-
else if (isJson) {
|
|
1593
|
-
const jsonStr = JSON.stringify({ query, results, count: results.length }, null, 2);
|
|
1594
|
-
await writeStdout(jsonStr + '\n');
|
|
1595
|
-
}
|
|
1596
|
-
else {
|
|
1597
|
-
for (const result of results) {
|
|
1598
|
-
console.log(`\n${result.title}`);
|
|
1599
|
-
console.log(result.url);
|
|
1600
|
-
console.log(result.snippet);
|
|
1601
|
-
}
|
|
1602
|
-
}
|
|
1603
|
-
process.exit(0);
|
|
1604
|
-
}
|
|
1605
|
-
catch (error) {
|
|
1606
|
-
if (spinner) {
|
|
1607
|
-
spinner.fail('Search failed');
|
|
1608
|
-
}
|
|
1609
|
-
if (error instanceof Error) {
|
|
1610
|
-
console.error(`\nError: ${error.message}`);
|
|
1611
|
-
const msg = error.message.toLowerCase();
|
|
1612
|
-
if (msg.includes('brave') && msg.includes('api key')) {
|
|
1613
|
-
console.error('\n💡 Hint: Set your Brave API key: webpeel config set braveApiKey YOUR_KEY');
|
|
1614
|
-
console.error(' Or use free DuckDuckGo search (default, no key needed).');
|
|
1615
|
-
}
|
|
1616
|
-
else if (msg.includes('timeout') || msg.includes('timed out')) {
|
|
1617
|
-
console.error('\n💡 Hint: Search timed out. Try a more specific query or try again.');
|
|
1618
|
-
}
|
|
1619
|
-
}
|
|
1620
|
-
else {
|
|
1621
|
-
console.error('\nError: Unknown error occurred');
|
|
1622
|
-
}
|
|
1623
|
-
process.exit(1);
|
|
1624
|
-
}
|
|
1625
|
-
});
|
|
1626
|
-
// Sites command — list all supported site templates
|
|
1627
|
-
program
|
|
1628
|
-
.command('sites')
|
|
1629
|
-
.description('List all sites supported by "webpeel search --site <site>"')
|
|
1630
|
-
.option('--json', 'Output as JSON')
|
|
1631
|
-
.option('--category <cat>', 'Filter by category (shopping, social, tech, jobs, general, real-estate, food)')
|
|
1632
|
-
.action(async (options) => {
|
|
1633
|
-
const { listSites } = await import('./core/site-search.js');
|
|
1634
|
-
let sites = listSites();
|
|
1635
|
-
if (options.category) {
|
|
1636
|
-
sites = sites.filter(s => s.category === options.category);
|
|
1637
|
-
}
|
|
1638
|
-
if (options.json) {
|
|
1639
|
-
await writeStdout(JSON.stringify(sites, null, 2) + '\n');
|
|
1640
|
-
process.exit(0);
|
|
1641
|
-
}
|
|
1642
|
-
// Group by category for pretty output
|
|
1643
|
-
const byCategory = new Map();
|
|
1644
|
-
for (const site of sites) {
|
|
1645
|
-
if (!byCategory.has(site.category))
|
|
1646
|
-
byCategory.set(site.category, []);
|
|
1647
|
-
byCategory.get(site.category).push(site);
|
|
1648
|
-
}
|
|
1649
|
-
const categoryOrder = ['shopping', 'general', 'social', 'tech', 'jobs', 'real-estate', 'food'];
|
|
1650
|
-
const sortedCategories = categoryOrder.filter(c => byCategory.has(c));
|
|
1651
|
-
console.log('\nWebPeel Site-Aware Search — supported sites\n');
|
|
1652
|
-
console.log('Usage: webpeel search --site <id> "<query>"\n');
|
|
1653
|
-
for (const cat of sortedCategories) {
|
|
1654
|
-
const catSites = byCategory.get(cat);
|
|
1655
|
-
const label = cat.charAt(0).toUpperCase() + cat.slice(1);
|
|
1656
|
-
console.log(` ${label}:`);
|
|
1657
|
-
for (const s of catSites) {
|
|
1658
|
-
console.log(` ${s.id.padEnd(16)} ${s.name}`);
|
|
1659
|
-
}
|
|
1660
|
-
console.log('');
|
|
1661
|
-
}
|
|
1662
|
-
process.exit(0);
|
|
1663
|
-
});
|
|
1664
|
-
// Batch command
|
|
1665
|
-
program
|
|
1666
|
-
.command('batch [file]')
|
|
1667
|
-
.description('Fetch multiple URLs from file or stdin pipe')
|
|
1668
|
-
.option('-c, --concurrency <n>', 'Max concurrent fetches (default: 3)', '3')
|
|
1669
|
-
.option('-o, --output <dir>', 'Output directory (one file per URL)')
|
|
1670
|
-
.option('--json', 'Output as JSON array')
|
|
1671
|
-
.option('-s, --silent', 'Silent mode')
|
|
1672
|
-
.option('-r, --render', 'Use headless browser')
|
|
1673
|
-
.option('--selector <css>', 'CSS selector to extract')
|
|
1674
|
-
.action(async (file, options) => {
|
|
1675
|
-
const isJson = options.json;
|
|
1676
|
-
const isSilent = options.silent;
|
|
1677
|
-
const shouldRender = options.render;
|
|
1678
|
-
const selector = options.selector;
|
|
1679
|
-
// Check usage quota
|
|
1680
|
-
const usageCheck = await checkUsage();
|
|
1681
|
-
if (!usageCheck.allowed) {
|
|
1682
|
-
console.error(usageCheck.message);
|
|
1683
|
-
process.exit(1);
|
|
1684
|
-
}
|
|
1685
|
-
const spinner = isSilent ? null : ora('Loading URLs...').start();
|
|
1686
|
-
try {
|
|
1687
|
-
// Read URLs from file or stdin
|
|
1688
|
-
let urls;
|
|
1689
|
-
if (file) {
|
|
1690
|
-
// Read from file
|
|
1691
|
-
try {
|
|
1692
|
-
const content = readFileSync(file, 'utf-8');
|
|
1693
|
-
urls = content.split('\n')
|
|
1694
|
-
.map(line => line.trim())
|
|
1695
|
-
.filter(line => line && !line.startsWith('#'));
|
|
1696
|
-
}
|
|
1697
|
-
catch (error) {
|
|
1698
|
-
throw new Error(`Failed to read file: ${file}`);
|
|
1699
|
-
}
|
|
1700
|
-
}
|
|
1701
|
-
else if (!process.stdin.isTTY) {
|
|
1702
|
-
// Read from stdin pipe
|
|
1703
|
-
const chunks = [];
|
|
1704
|
-
for await (const chunk of process.stdin) {
|
|
1705
|
-
chunks.push(chunk);
|
|
1706
|
-
}
|
|
1707
|
-
const content = Buffer.concat(chunks).toString('utf-8');
|
|
1708
|
-
urls = content.split('\n')
|
|
1709
|
-
.map(line => line.trim())
|
|
1710
|
-
.filter(line => line && !line.startsWith('#'));
|
|
1711
|
-
}
|
|
1712
|
-
else {
|
|
1713
|
-
throw new Error('Provide a file path or pipe URLs via stdin.\n Example: cat urls.txt | webpeel batch');
|
|
1714
|
-
}
|
|
1715
|
-
if (urls.length === 0) {
|
|
1716
|
-
throw new Error('No URLs found in file');
|
|
1717
|
-
}
|
|
1718
|
-
if (spinner) {
|
|
1719
|
-
spinner.text = `Fetching ${urls.length} URLs (concurrency: ${options.concurrency})...`;
|
|
1720
|
-
}
|
|
1721
|
-
// Batch fetch
|
|
1722
|
-
const results = await peelBatch(urls, {
|
|
1723
|
-
concurrency: parseInt(options.concurrency) || 3,
|
|
1724
|
-
render: shouldRender,
|
|
1725
|
-
selector: selector,
|
|
1726
|
-
});
|
|
1727
|
-
if (spinner) {
|
|
1728
|
-
const successCount = results.filter(r => 'content' in r).length;
|
|
1729
|
-
spinner.succeed(`Completed: ${successCount}/${urls.length} successful`);
|
|
1730
|
-
}
|
|
1731
|
-
// Show usage footer for free/anonymous users
|
|
1732
|
-
if (usageCheck.usageInfo && !isSilent) {
|
|
1733
|
-
showUsageFooter(usageCheck.usageInfo, usageCheck.isAnonymous || false, false);
|
|
1734
|
-
}
|
|
1735
|
-
// Output results
|
|
1736
|
-
if (isJson) {
|
|
1737
|
-
const jsonStr = JSON.stringify(results, null, 2);
|
|
1738
|
-
await new Promise((resolve, reject) => {
|
|
1739
|
-
process.stdout.write(jsonStr + '\n', (err) => {
|
|
1740
|
-
if (err)
|
|
1741
|
-
reject(err);
|
|
1742
|
-
else
|
|
1743
|
-
resolve();
|
|
1744
|
-
});
|
|
1745
|
-
});
|
|
1746
|
-
}
|
|
1747
|
-
else if (options.output) {
|
|
1748
|
-
const { writeFileSync, mkdirSync } = await import('fs');
|
|
1749
|
-
const { join } = await import('path');
|
|
1750
|
-
// Create output directory
|
|
1751
|
-
mkdirSync(options.output, { recursive: true });
|
|
1752
|
-
results.forEach((result, i) => {
|
|
1753
|
-
const urlObj = new URL(urls[i]);
|
|
1754
|
-
const filename = `${i + 1}_${urlObj.hostname.replace(/[^a-z0-9]/gi, '_')}.md`;
|
|
1755
|
-
const filepath = join(options.output, filename);
|
|
1756
|
-
if ('content' in result) {
|
|
1757
|
-
writeFileSync(filepath, result.content);
|
|
1758
|
-
}
|
|
1759
|
-
else {
|
|
1760
|
-
writeFileSync(filepath, `Error: ${result.error}`);
|
|
1761
|
-
}
|
|
1762
|
-
});
|
|
1763
|
-
if (!isSilent) {
|
|
1764
|
-
console.log(`\nResults saved to: ${options.output}`);
|
|
1765
|
-
}
|
|
1766
|
-
}
|
|
1767
|
-
else {
|
|
1768
|
-
// Print results to stdout
|
|
1769
|
-
results.forEach((result, i) => {
|
|
1770
|
-
console.log(`\n=== ${urls[i]} ===\n`);
|
|
1771
|
-
if ('content' in result) {
|
|
1772
|
-
console.log(result.content.slice(0, 500) + '...');
|
|
1773
|
-
}
|
|
1774
|
-
else {
|
|
1775
|
-
console.log(`Error: ${result.error}`);
|
|
1776
|
-
}
|
|
1777
|
-
});
|
|
1778
|
-
}
|
|
1779
|
-
await cleanup();
|
|
1780
|
-
process.exit(0);
|
|
1781
|
-
}
|
|
1782
|
-
catch (error) {
|
|
1783
|
-
if (spinner) {
|
|
1784
|
-
spinner.fail('Batch fetch failed');
|
|
1785
|
-
}
|
|
1786
|
-
if (error instanceof Error) {
|
|
1787
|
-
console.error(`\nError: ${error.message}`);
|
|
1788
|
-
}
|
|
1789
|
-
else {
|
|
1790
|
-
console.error('\nError: Unknown error occurred');
|
|
1791
|
-
}
|
|
1792
|
-
await cleanup();
|
|
1793
|
-
process.exit(1);
|
|
1794
|
-
}
|
|
1795
|
-
});
|
|
1796
|
-
program
|
|
1797
|
-
.command('crawl <url>')
|
|
1798
|
-
.description('Crawl a website starting from a URL')
|
|
1799
|
-
.option('--max-pages <number>', 'Maximum number of pages to crawl (default: 10, max: 100)', (v) => parseInt(v, 10), 10)
|
|
1800
|
-
.option('--max-depth <number>', 'Maximum depth to crawl (default: 2, max: 5)', (v) => parseInt(v, 10), 2)
|
|
1801
|
-
.option('--allowed-domains <domains...>', 'Only crawl these domains (default: same as starting URL)')
|
|
1802
|
-
.option('--exclude <patterns...>', 'Exclude URLs matching these regex patterns')
|
|
1803
|
-
.option('--ignore-robots', 'Ignore robots.txt (default: respect robots.txt)')
|
|
1804
|
-
.option('--rate-limit <ms>', 'Rate limit between requests in ms (default: 1000)', (v) => parseInt(v, 10), 1000)
|
|
1805
|
-
.option('-r, --render', 'Use headless browser for all pages')
|
|
1806
|
-
.option('--stealth', 'Use stealth mode for all pages')
|
|
1807
|
-
.option('-s, --silent', 'Silent mode (no spinner)')
|
|
1808
|
-
.option('--json', 'Output as JSON')
|
|
1809
|
-
.option('--resume', 'Resume an interrupted crawl from its last checkpoint')
|
|
1810
|
-
.action(async (url, options) => {
|
|
1811
|
-
// Check usage quota
|
|
1812
|
-
const usageCheck = await checkUsage();
|
|
1813
|
-
if (!usageCheck.allowed) {
|
|
1814
|
-
console.error(usageCheck.message);
|
|
1815
|
-
process.exit(1);
|
|
1816
|
-
}
|
|
1817
|
-
const { crawl } = await import('./core/crawler.js');
|
|
1818
|
-
const spinner = options.silent ? null : ora('Crawling...').start();
|
|
1819
|
-
try {
|
|
1820
|
-
const results = await crawl(url, {
|
|
1821
|
-
maxPages: options.maxPages,
|
|
1822
|
-
maxDepth: options.maxDepth,
|
|
1823
|
-
allowedDomains: options.allowedDomains,
|
|
1824
|
-
excludePatterns: options.exclude,
|
|
1825
|
-
respectRobotsTxt: !options.ignoreRobots,
|
|
1826
|
-
rateLimitMs: options.rateLimit,
|
|
1827
|
-
render: options.render || false,
|
|
1828
|
-
stealth: options.stealth || false,
|
|
1829
|
-
resume: options.resume || false,
|
|
1830
|
-
});
|
|
1831
|
-
if (spinner) {
|
|
1832
|
-
spinner.succeed(`Crawled ${results.length} pages`);
|
|
1833
|
-
}
|
|
1834
|
-
// Show usage footer for free/anonymous users
|
|
1835
|
-
if (usageCheck.usageInfo && !options.silent) {
|
|
1836
|
-
showUsageFooter(usageCheck.usageInfo, usageCheck.isAnonymous || false, options.stealth || false);
|
|
1837
|
-
}
|
|
1838
|
-
if (options.json) {
|
|
1839
|
-
console.log(JSON.stringify({ pages: results, count: results.length }, null, 2));
|
|
1840
|
-
}
|
|
1841
|
-
else {
|
|
1842
|
-
results.forEach((result, i) => {
|
|
1843
|
-
console.log(`\n${'='.repeat(60)}`);
|
|
1844
|
-
console.log(`[${i + 1}/${results.length}] ${result.title}`);
|
|
1845
|
-
console.log(`URL: ${result.url}`);
|
|
1846
|
-
console.log(`Depth: ${result.depth}${result.parent ? ` (from: ${result.parent})` : ''}`);
|
|
1847
|
-
console.log(`Links found: ${result.links.length}`);
|
|
1848
|
-
console.log(`Elapsed: ${result.elapsed}ms`);
|
|
1849
|
-
if (result.error) {
|
|
1850
|
-
console.log(`ERROR: ${result.error}`);
|
|
1851
|
-
}
|
|
1852
|
-
else {
|
|
1853
|
-
console.log(`\n${result.markdown.slice(0, 500)}${result.markdown.length > 500 ? '...' : ''}`);
|
|
1854
|
-
}
|
|
1855
|
-
});
|
|
1856
|
-
}
|
|
1857
|
-
await cleanup();
|
|
1858
|
-
process.exit(0);
|
|
1859
|
-
}
|
|
1860
|
-
catch (error) {
|
|
1861
|
-
if (spinner) {
|
|
1862
|
-
spinner.fail('Crawl failed');
|
|
1863
|
-
}
|
|
1864
|
-
if (error instanceof Error) {
|
|
1865
|
-
console.error(`\nError: ${error.message}`);
|
|
1866
|
-
}
|
|
1867
|
-
else {
|
|
1868
|
-
console.error('\nError: Unknown error occurred');
|
|
1869
|
-
}
|
|
1870
|
-
await cleanup();
|
|
1871
|
-
process.exit(1);
|
|
1872
|
-
}
|
|
1873
|
-
});
|
|
1874
|
-
program
|
|
1875
|
-
.command('map <url>')
|
|
1876
|
-
.description('Discover all URLs on a domain (sitemap + crawl)')
|
|
1877
|
-
.option('--no-sitemap', 'Skip sitemap.xml discovery')
|
|
1878
|
-
.option('--no-crawl', 'Skip homepage crawl')
|
|
1879
|
-
.option('--max <n>', 'Maximum URLs to discover (default: 5000)', (v) => parseInt(v, 10), 5000)
|
|
1880
|
-
.option('--include <patterns...>', 'Include only URLs matching these regex patterns')
|
|
1881
|
-
.option('--exclude <patterns...>', 'Exclude URLs matching these regex patterns')
|
|
1882
|
-
.option('--json', 'Output as JSON')
|
|
1883
|
-
.option('-s, --silent', 'Silent mode')
|
|
1884
|
-
.action(async (url, options) => {
|
|
1885
|
-
const { mapDomain } = await import('./core/map.js');
|
|
1886
|
-
const spinner = options.silent ? null : ora('Discovering URLs...').start();
|
|
1887
|
-
try {
|
|
1888
|
-
const result = await mapDomain(url, {
|
|
1889
|
-
useSitemap: options.sitemap !== false,
|
|
1890
|
-
crawlHomepage: options.crawl !== false,
|
|
1891
|
-
maxUrls: options.max,
|
|
1892
|
-
includePatterns: options.include,
|
|
1893
|
-
excludePatterns: options.exclude,
|
|
1894
|
-
});
|
|
1895
|
-
if (spinner)
|
|
1896
|
-
spinner.succeed(`Found ${result.total} URLs in ${result.elapsed}ms`);
|
|
1897
|
-
if (options.json) {
|
|
1898
|
-
console.log(JSON.stringify(result, null, 2));
|
|
1899
|
-
}
|
|
1900
|
-
else {
|
|
1901
|
-
for (const url of result.urls) {
|
|
1902
|
-
console.log(url);
|
|
1903
|
-
}
|
|
1904
|
-
if (!options.silent) {
|
|
1905
|
-
console.error(`\nTotal: ${result.total} URLs`);
|
|
1906
|
-
if (result.sitemapUrls.length > 0) {
|
|
1907
|
-
console.error(`Sitemaps used: ${result.sitemapUrls.join(', ')}`);
|
|
1908
|
-
}
|
|
1909
|
-
}
|
|
1910
|
-
}
|
|
1911
|
-
process.exit(0);
|
|
1912
|
-
}
|
|
1913
|
-
catch (error) {
|
|
1914
|
-
if (spinner)
|
|
1915
|
-
spinner.fail('URL discovery failed');
|
|
1916
|
-
console.error(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
1917
|
-
process.exit(1);
|
|
1918
|
-
}
|
|
1919
|
-
});
|
|
1920
|
-
// Watch command - monitor a URL for changes / assertion failures
|
|
1921
|
-
program
|
|
1922
|
-
.command('watch <url>')
|
|
1923
|
-
.description('Monitor a URL for changes and assertion failures')
|
|
1924
|
-
.option('--interval <duration>', 'Check interval (e.g. 30s, 5m, 1h)', '5m')
|
|
1925
|
-
.option('--assert <condition...>', 'Assertion(s) to check (e.g. "status=200" "body.health=ok")')
|
|
1926
|
-
.option('--webhook <url>', 'POST this URL on assertion failure or content change')
|
|
1927
|
-
.option('-t, --timeout <ms>', 'Per-request timeout in ms', (v) => parseInt(v, 10), 10000)
|
|
1928
|
-
.option('--max-checks <n>', 'Stop after N checks (default: unlimited)', (v) => parseInt(v, 10))
|
|
1929
|
-
.option('--json', 'Output each check as NDJSON to stdout')
|
|
1930
|
-
.option('-s, --silent', 'Only output on failures/changes')
|
|
1931
|
-
.option('-r, --render', 'Use browser rendering for checks')
|
|
1932
|
-
.action(async (url, options) => {
|
|
1933
|
-
const { watch: runWatch, parseDuration, parseAssertion } = await import('./core/watch.js');
|
|
1934
|
-
// Validate URL
|
|
1935
|
-
try {
|
|
1936
|
-
const parsed = new URL(url);
|
|
1937
|
-
if (!['http:', 'https:'].includes(parsed.protocol)) {
|
|
1938
|
-
console.error('Error: Only HTTP and HTTPS protocols are allowed');
|
|
1939
|
-
process.exit(1);
|
|
1940
|
-
}
|
|
1941
|
-
}
|
|
1942
|
-
catch {
|
|
1943
|
-
console.error(`Error: Invalid URL format: ${url}`);
|
|
1944
|
-
process.exit(1);
|
|
1945
|
-
}
|
|
1946
|
-
// Parse interval
|
|
1947
|
-
let intervalMs;
|
|
1948
|
-
try {
|
|
1949
|
-
intervalMs = parseDuration(options.interval);
|
|
1950
|
-
}
|
|
1951
|
-
catch (e) {
|
|
1952
|
-
console.error(`Error: ${e.message}`);
|
|
1953
|
-
process.exit(1);
|
|
1954
|
-
}
|
|
1955
|
-
// Parse assertions
|
|
1956
|
-
const assertions = [];
|
|
1957
|
-
if (options.assert && Array.isArray(options.assert)) {
|
|
1958
|
-
for (const expr of options.assert) {
|
|
1959
|
-
try {
|
|
1960
|
-
assertions.push(parseAssertion(expr));
|
|
1961
|
-
}
|
|
1962
|
-
catch (e) {
|
|
1963
|
-
console.error(`Error: ${e.message}`);
|
|
1964
|
-
process.exit(1);
|
|
1965
|
-
}
|
|
1966
|
-
}
|
|
1967
|
-
}
|
|
1968
|
-
if (!options.json && !options.silent) {
|
|
1969
|
-
const intervalLabel = options.interval;
|
|
1970
|
-
const assertLabel = assertions.length > 0
|
|
1971
|
-
? ` with ${assertions.length} assertion(s)`
|
|
1972
|
-
: '';
|
|
1973
|
-
process.stderr.write(`Watching ${url} every ${intervalLabel}${assertLabel}. Press Ctrl+C to stop.\n`);
|
|
1974
|
-
}
|
|
1975
|
-
const watchOptions = {
|
|
1976
|
-
url,
|
|
1977
|
-
intervalMs,
|
|
1978
|
-
assertions,
|
|
1979
|
-
webhookUrl: options.webhook,
|
|
1980
|
-
timeout: options.timeout,
|
|
1981
|
-
maxChecks: options.maxChecks,
|
|
1982
|
-
render: options.render || false,
|
|
1983
|
-
json: options.json || false,
|
|
1984
|
-
silent: options.silent || false,
|
|
1985
|
-
};
|
|
1986
|
-
try {
|
|
1987
|
-
await runWatch(watchOptions);
|
|
1988
|
-
}
|
|
1989
|
-
catch (error) {
|
|
1990
|
-
console.error(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
1991
|
-
process.exit(1);
|
|
1992
|
-
}
|
|
1993
|
-
process.exit(0);
|
|
1994
|
-
});
|
|
1995
|
-
// Diff command - semantic diff against last snapshot
|
|
1996
|
-
program
|
|
1997
|
-
.command('diff <url>')
|
|
1998
|
-
.description('Show semantic diff between current content and the last tracked snapshot')
|
|
1999
|
-
.option('--last', 'Compare against last tracked snapshot (default)')
|
|
2000
|
-
.option('--against <snapshot-url>', 'Compare against the snapshot stored for a different URL')
|
|
2001
|
-
.option('--fields <fields>', 'For JSON responses: only diff these fields (comma-separated dot-notation)')
|
|
2002
|
-
.option('--json', 'Output diff as JSON')
|
|
2003
|
-
.option('-r, --render', 'Use browser rendering')
|
|
2004
|
-
.option('-t, --timeout <ms>', 'Request timeout in ms', (v) => parseInt(v, 10), 30000)
|
|
2005
|
-
.option('-s, --silent', 'Silent mode (no spinner)')
|
|
2006
|
-
.action(async (url, options) => {
|
|
2007
|
-
const isJson = options.json;
|
|
2008
|
-
// Validate URL
|
|
2009
|
-
try {
|
|
2010
|
-
const parsed = new URL(url);
|
|
2011
|
-
if (!['http:', 'https:'].includes(parsed.protocol)) {
|
|
2012
|
-
if (isJson) {
|
|
2013
|
-
await writeStdout(JSON.stringify({ success: false, error: { type: 'invalid_url', message: 'Only HTTP and HTTPS protocols are allowed' } }) + '\n');
|
|
2014
|
-
}
|
|
2015
|
-
else {
|
|
2016
|
-
console.error('Error: Only HTTP and HTTPS protocols are allowed');
|
|
2017
|
-
}
|
|
2018
|
-
process.exit(1);
|
|
2019
|
-
}
|
|
2020
|
-
}
|
|
2021
|
-
catch {
|
|
2022
|
-
if (isJson) {
|
|
2023
|
-
await writeStdout(JSON.stringify({ success: false, error: { type: 'invalid_url', message: `Invalid URL format: ${url}` } }) + '\n');
|
|
2024
|
-
}
|
|
2025
|
-
else {
|
|
2026
|
-
console.error(`Error: Invalid URL format: ${url}`);
|
|
2027
|
-
}
|
|
2028
|
-
process.exit(1);
|
|
2029
|
-
}
|
|
2030
|
-
const spinner = options.silent ? null : ora('Fetching and diffing...').start();
|
|
2031
|
-
try {
|
|
2032
|
-
const { diffUrl } = await import('./core/diff.js');
|
|
2033
|
-
const fields = options.fields
|
|
2034
|
-
? options.fields.split(',').map((f) => f.trim()).filter(Boolean)
|
|
2035
|
-
: undefined;
|
|
2036
|
-
const result = await diffUrl(url, {
|
|
2037
|
-
render: options.render || false,
|
|
2038
|
-
timeout: options.timeout,
|
|
2039
|
-
fields,
|
|
2040
|
-
});
|
|
2041
|
-
if (spinner) {
|
|
2042
|
-
spinner.succeed(`Diff completed in ${result.changed ? 'CHANGED' : 'no change'}`);
|
|
2043
|
-
}
|
|
2044
|
-
if (isJson) {
|
|
2045
|
-
await writeStdout(JSON.stringify(result, null, 2) + '\n');
|
|
2046
|
-
}
|
|
2047
|
-
else {
|
|
2048
|
-
// Human-readable output
|
|
2049
|
-
const ago = result.previousTimestamp
|
|
2050
|
-
? formatRelativeTime(new Date(result.previousTimestamp))
|
|
2051
|
-
: 'unknown';
|
|
2052
|
-
console.log(`\nComparing ${result.url} (now vs ${ago})\n`);
|
|
2053
|
-
if (!result.changed) {
|
|
2054
|
-
console.log(' No changes detected.');
|
|
2055
|
-
}
|
|
2056
|
-
else {
|
|
2057
|
-
for (const change of result.changes) {
|
|
2058
|
-
const label = change.field ?? change.path ?? '(unknown)';
|
|
2059
|
-
if (change.type === 'modified') {
|
|
2060
|
-
console.log(` Modified: ${label} ${change.before} → ${change.after}`);
|
|
2061
|
-
}
|
|
2062
|
-
else if (change.type === 'added') {
|
|
2063
|
-
console.log(` Added: ${label} ${change.after}`);
|
|
2064
|
-
}
|
|
2065
|
-
else if (change.type === 'removed') {
|
|
2066
|
-
console.log(` Removed: ${label} ${change.before}`);
|
|
2067
|
-
}
|
|
2068
|
-
}
|
|
2069
|
-
}
|
|
2070
|
-
console.log(`\nSummary: ${result.summary}`);
|
|
2071
|
-
}
|
|
2072
|
-
await cleanup();
|
|
2073
|
-
process.exit(0);
|
|
2074
|
-
}
|
|
2075
|
-
catch (error) {
|
|
2076
|
-
if (spinner)
|
|
2077
|
-
spinner.fail('Diff failed');
|
|
2078
|
-
if (isJson) {
|
|
2079
|
-
await writeStdout(JSON.stringify({
|
|
2080
|
-
error: error instanceof Error ? error.message : 'Unknown error',
|
|
2081
|
-
code: 'FETCH_FAILED',
|
|
2082
|
-
}) + '\n');
|
|
2083
|
-
}
|
|
2084
|
-
else {
|
|
2085
|
-
console.error(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
2086
|
-
}
|
|
2087
|
-
await cleanup();
|
|
2088
|
-
process.exit(1);
|
|
2089
|
-
}
|
|
2090
|
-
});
|
|
2091
|
-
// auth command — set and verify API key in one step
|
|
2092
|
-
program
|
|
2093
|
-
.command('auth [key]')
|
|
2094
|
-
.description('Set and verify your WebPeel API key')
|
|
2095
|
-
.option('--json', 'Output as JSON')
|
|
2096
|
-
.action(async (key, opts) => {
|
|
2097
|
-
const config = loadConfig();
|
|
2098
|
-
// If no key provided, show current auth status (or error if not set)
|
|
2099
|
-
if (!key) {
|
|
2100
|
-
const currentKey = config.apiKey;
|
|
2101
|
-
if (!currentKey) {
|
|
2102
|
-
if (opts.json) {
|
|
2103
|
-
console.log(JSON.stringify({ authenticated: false, error: 'No API key set. Run: webpeel auth <key>' }));
|
|
2104
|
-
}
|
|
2105
|
-
else {
|
|
2106
|
-
console.error('No API key set. Run: webpeel auth <your-key>');
|
|
2107
|
-
console.error('Get a free key at: https://app.webpeel.dev/keys');
|
|
2108
|
-
}
|
|
2109
|
-
process.exit(2);
|
|
2110
|
-
}
|
|
2111
|
-
// Fall through to verify current key
|
|
2112
|
-
key = currentKey;
|
|
2113
|
-
}
|
|
2114
|
-
// Save the key first
|
|
2115
|
-
config.apiKey = key;
|
|
2116
|
-
saveConfig(config);
|
|
2117
|
-
// Verify by calling the API
|
|
2118
|
-
const apiUrl = (process.env.WEBPEEL_API_URL || 'https://api.webpeel.dev');
|
|
2119
|
-
try {
|
|
2120
|
-
const res = await fetch(`${apiUrl}/v1/usage`, {
|
|
2121
|
-
headers: { Authorization: `Bearer ${key}` },
|
|
2122
|
-
signal: AbortSignal.timeout(8000),
|
|
2123
|
-
});
|
|
2124
|
-
if (res.status === 401) {
|
|
2125
|
-
if (opts.json) {
|
|
2126
|
-
console.log(JSON.stringify({ authenticated: false, error: 'Invalid API key' }));
|
|
2127
|
-
}
|
|
2128
|
-
else {
|
|
2129
|
-
console.error('❌ Invalid API key. Get a valid key at: https://app.webpeel.dev/keys');
|
|
2130
|
-
}
|
|
2131
|
-
// Revert the key save
|
|
2132
|
-
config.apiKey = undefined;
|
|
2133
|
-
saveConfig(config);
|
|
2134
|
-
process.exit(2);
|
|
2135
|
-
}
|
|
2136
|
-
if (res.ok) {
|
|
2137
|
-
const data = await res.json();
|
|
2138
|
-
const plan = data.tier || (typeof data.plan === 'string' ? data.plan : data.plan?.tier) || 'free';
|
|
2139
|
-
const used = data.used ?? data.totalRequests ?? data.weekly?.used ?? 0;
|
|
2140
|
-
const limit = data.limit ?? data.weeklyLimit ?? data.weekly?.limit ?? 500;
|
|
2141
|
-
const remaining = limit - used;
|
|
2142
|
-
if (opts.json) {
|
|
2143
|
-
console.log(JSON.stringify({
|
|
2144
|
-
authenticated: true,
|
|
2145
|
-
plan,
|
|
2146
|
-
used,
|
|
2147
|
-
limit,
|
|
2148
|
-
remaining,
|
|
2149
|
-
keyPrefix: key.slice(0, 12) + '...',
|
|
2150
|
-
}));
|
|
2151
|
-
}
|
|
2152
|
-
else {
|
|
2153
|
-
console.log(`✅ API key verified`);
|
|
2154
|
-
console.log(` Plan: ${plan}`);
|
|
2155
|
-
console.log(` Usage: ${used} / ${limit} this week (${remaining} remaining)`);
|
|
2156
|
-
console.log(` Key: ${key.slice(0, 12)}...`);
|
|
2157
|
-
}
|
|
2158
|
-
process.exit(0);
|
|
2159
|
-
}
|
|
2160
|
-
// Non-200 non-401 — still save key but warn
|
|
2161
|
-
if (opts.json) {
|
|
2162
|
-
console.log(JSON.stringify({ authenticated: 'unknown', warning: `API returned ${res.status}` }));
|
|
2163
|
-
}
|
|
2164
|
-
else {
|
|
2165
|
-
console.log(`⚠️ Key saved but couldn't verify (API returned ${res.status})`);
|
|
2166
|
-
}
|
|
2167
|
-
}
|
|
2168
|
-
catch (e) {
|
|
2169
|
-
if (opts.json) {
|
|
2170
|
-
console.log(JSON.stringify({ authenticated: 'unknown', warning: 'Network error', error: e.message }));
|
|
2171
|
-
}
|
|
2172
|
-
else {
|
|
2173
|
-
console.log(`⚠️ Key saved but couldn't verify (network error: ${e.message})`);
|
|
2174
|
-
}
|
|
2175
|
-
}
|
|
2176
|
-
});
|
|
2177
|
-
// status command — check auth status and API health
|
|
2178
|
-
program
|
|
2179
|
-
.command('status')
|
|
2180
|
-
.description('Check authentication status and API usage')
|
|
2181
|
-
.option('--json', 'Output as JSON')
|
|
2182
|
-
.action(async (opts) => {
|
|
2183
|
-
const config = loadConfig();
|
|
2184
|
-
const key = config.apiKey;
|
|
2185
|
-
if (!key) {
|
|
2186
|
-
if (opts.json) {
|
|
2187
|
-
console.log(JSON.stringify({ authenticated: false, error: 'No API key configured' }));
|
|
2188
|
-
}
|
|
2189
|
-
else {
|
|
2190
|
-
console.error('Not authenticated. Run: webpeel auth <your-key>');
|
|
2191
|
-
console.error('Get a free key at: https://app.webpeel.dev/keys');
|
|
2192
|
-
}
|
|
2193
|
-
process.exit(2);
|
|
2194
|
-
}
|
|
2195
|
-
const apiUrl = (process.env.WEBPEEL_API_URL || 'https://api.webpeel.dev');
|
|
2196
|
-
try {
|
|
2197
|
-
const [healthRes, usageRes] = await Promise.all([
|
|
2198
|
-
fetch(`${apiUrl}/health`, { signal: AbortSignal.timeout(5000) }).catch(() => null),
|
|
2199
|
-
fetch(`${apiUrl}/v1/usage`, {
|
|
2200
|
-
headers: { Authorization: `Bearer ${key}` },
|
|
2201
|
-
signal: AbortSignal.timeout(8000),
|
|
2202
|
-
}),
|
|
2203
|
-
]);
|
|
2204
|
-
const apiOnline = healthRes?.ok ?? false;
|
|
2205
|
-
if (usageRes.status === 401) {
|
|
2206
|
-
if (opts.json) {
|
|
2207
|
-
console.log(JSON.stringify({ authenticated: false, apiOnline, error: 'API key is invalid or expired' }));
|
|
2208
|
-
}
|
|
2209
|
-
else {
|
|
2210
|
-
console.error('❌ API key is invalid. Run: webpeel auth <new-key>');
|
|
2211
|
-
}
|
|
2212
|
-
process.exit(2);
|
|
2213
|
-
}
|
|
2214
|
-
const usage = usageRes.ok ? await usageRes.json() : null;
|
|
2215
|
-
const plan = usage?.tier || (typeof usage?.plan === 'string' ? usage?.plan : usage?.plan?.tier) || 'free';
|
|
2216
|
-
const used = usage?.used ?? usage?.totalRequests ?? usage?.weekly?.used ?? 0;
|
|
2217
|
-
const limit = usage?.limit ?? usage?.weeklyLimit ?? usage?.weekly?.limit ?? 500;
|
|
2218
|
-
const remaining = limit - used;
|
|
2219
|
-
if (opts.json) {
|
|
2220
|
-
console.log(JSON.stringify({
|
|
2221
|
-
authenticated: true,
|
|
2222
|
-
apiOnline,
|
|
2223
|
-
plan,
|
|
2224
|
-
used,
|
|
2225
|
-
limit,
|
|
2226
|
-
remaining,
|
|
2227
|
-
keyPrefix: key.slice(0, 12) + '...',
|
|
2228
|
-
}));
|
|
2229
|
-
}
|
|
2230
|
-
else {
|
|
2231
|
-
console.log(`✅ Authenticated`);
|
|
2232
|
-
console.log(` API: ${apiOnline ? '🟢 online' : '🔴 offline'}`);
|
|
2233
|
-
console.log(` Plan: ${plan}`);
|
|
2234
|
-
console.log(` Usage: ${used} / ${limit} this week (${remaining} remaining)`);
|
|
2235
|
-
console.log(` Key: ${key.slice(0, 12)}...`);
|
|
2236
|
-
}
|
|
2237
|
-
}
|
|
2238
|
-
catch (e) {
|
|
2239
|
-
if (opts.json) {
|
|
2240
|
-
console.log(JSON.stringify({ authenticated: 'unknown', error: e.message }));
|
|
2241
|
-
}
|
|
2242
|
-
else {
|
|
2243
|
-
console.error(`❌ Could not reach API: ${e.message}`);
|
|
2244
|
-
}
|
|
2245
|
-
process.exit(1);
|
|
2246
|
-
}
|
|
2247
|
-
});
|
|
2248
|
-
program
|
|
2249
|
-
.command('doctor')
|
|
2250
|
-
.description('Diagnose WebPeel installation (API key, connectivity, fetch test)')
|
|
2251
|
-
.action(async () => {
|
|
2252
|
-
const cfg = loadConfig();
|
|
2253
|
-
const apiKey = cfg.apiKey || process.env.WEBPEEL_API_KEY;
|
|
2254
|
-
const apiUrl = process.env.WEBPEEL_API_URL || 'https://api.webpeel.dev';
|
|
2255
|
-
console.log('WebPeel Doctor\n');
|
|
2256
|
-
console.log(`Version: ${cliVersion}`);
|
|
2257
|
-
console.log(`API URL: ${apiUrl}`);
|
|
2258
|
-
console.log(`API Key: ${apiKey ? apiKey.slice(0, 12) + '...' : '❌ Not configured'}`);
|
|
2259
|
-
if (!apiKey) {
|
|
2260
|
-
console.log('\n❌ No API key. Run: webpeel auth <your-key>');
|
|
2261
|
-
console.log(' Get a free key at: https://app.webpeel.dev/keys');
|
|
2262
|
-
process.exit(1);
|
|
2263
|
-
}
|
|
2264
|
-
// Check API connectivity
|
|
2265
|
-
console.log('\nChecking API connectivity...');
|
|
2266
|
-
try {
|
|
2267
|
-
const healthRes = await fetch(`${apiUrl}/health`, { signal: AbortSignal.timeout(10000) });
|
|
2268
|
-
const health = await healthRes.json();
|
|
2269
|
-
console.log(`API Health: ✅ ${health.status || 'ok'} (uptime: ${Math.round((health.uptime || 0) / 60)}min)`);
|
|
2270
|
-
}
|
|
2271
|
-
catch (err) {
|
|
2272
|
-
console.log(`API Health: ❌ Cannot reach ${apiUrl} (${err.message})`);
|
|
2273
|
-
}
|
|
2274
|
-
// Check API key validity
|
|
2275
|
-
console.log('Checking API key...');
|
|
2276
|
-
try {
|
|
2277
|
-
const usageRes = await fetch(`${apiUrl}/v1/usage`, {
|
|
2278
|
-
headers: { Authorization: `Bearer ${apiKey}` },
|
|
2279
|
-
signal: AbortSignal.timeout(10000),
|
|
2280
|
-
});
|
|
2281
|
-
if (usageRes.ok) {
|
|
2282
|
-
const usage = await usageRes.json();
|
|
2283
|
-
const plan = usage?.tier || (typeof usage?.plan === 'string' ? usage?.plan : usage?.plan?.tier) || 'free';
|
|
2284
|
-
const used = usage?.used ?? usage?.totalRequests ?? usage?.weekly?.used ?? 0;
|
|
2285
|
-
const limit = usage?.limit ?? usage?.weeklyLimit ?? usage?.weekly?.limit ?? 500;
|
|
2286
|
-
console.log(`API Key: ✅ Valid (${plan} plan, ${used}/${limit} used this week)`);
|
|
2287
|
-
}
|
|
2288
|
-
else if (usageRes.status === 401) {
|
|
2289
|
-
console.log('API Key: ❌ Invalid or expired. Run: webpeel auth <new-key>');
|
|
2290
|
-
}
|
|
2291
|
-
else {
|
|
2292
|
-
console.log(`API Key: ⚠️ Unexpected response (${usageRes.status})`);
|
|
2293
|
-
}
|
|
2294
|
-
}
|
|
2295
|
-
catch (err) {
|
|
2296
|
-
console.log(`API Key: ❌ Check failed (${err.message})`);
|
|
2297
|
-
}
|
|
2298
|
-
// Quick fetch test
|
|
2299
|
-
console.log('Testing fetch...');
|
|
2300
|
-
try {
|
|
2301
|
-
const testRes = await fetch(`${apiUrl}/v1/fetch?url=https://example.com`, {
|
|
2302
|
-
headers: { Authorization: `Bearer ${apiKey}` },
|
|
2303
|
-
signal: AbortSignal.timeout(15000),
|
|
2304
|
-
});
|
|
2305
|
-
if (testRes.ok) {
|
|
2306
|
-
const data = await testRes.json();
|
|
2307
|
-
console.log(`Fetch Test: ✅ OK (${data.tokenCount || data.tokens || '?'} tokens, ${data.fetchTimeMs || data.elapsed || '?'}ms)`);
|
|
2308
|
-
}
|
|
2309
|
-
else {
|
|
2310
|
-
console.log(`Fetch Test: ❌ Failed (${testRes.status})`);
|
|
2311
|
-
}
|
|
2312
|
-
}
|
|
2313
|
-
catch (err) {
|
|
2314
|
-
console.log(`Fetch Test: ❌ Failed (${err.message})`);
|
|
2315
|
-
}
|
|
2316
|
-
// Check YouTube
|
|
2317
|
-
console.log('Testing YouTube...');
|
|
2318
|
-
try {
|
|
2319
|
-
const ytRes = await fetch(`${apiUrl}/v1/fetch?url=${encodeURIComponent('https://www.youtube.com/watch?v=dQw4w9WgXcQ')}`, {
|
|
2320
|
-
headers: { Authorization: `Bearer ${apiKey}` },
|
|
2321
|
-
signal: AbortSignal.timeout(15000),
|
|
2322
|
-
});
|
|
2323
|
-
if (ytRes.ok) {
|
|
2324
|
-
const data = await ytRes.json();
|
|
2325
|
-
const hasContent = (data.content || '').length > 100;
|
|
2326
|
-
console.log(`YouTube: ${hasContent ? '✅' : '⚠️'} ${hasContent ? `Content extracted (${data.tokenCount || data.tokens || '?'} tokens)` : 'Content limited'}`);
|
|
2327
|
-
}
|
|
2328
|
-
else {
|
|
2329
|
-
console.log(`YouTube: ⚠️ Response ${ytRes.status}`);
|
|
2330
|
-
}
|
|
2331
|
-
}
|
|
2332
|
-
catch (err) {
|
|
2333
|
-
console.log(`YouTube: ⚠️ ${err.message}`);
|
|
2334
|
-
}
|
|
2335
|
-
console.log('\n✅ WebPeel is ready to use!');
|
|
2336
|
-
console.log(' Try: webpeel "https://news.ycombinator.com" --json');
|
|
2337
|
-
});
|
|
2338
|
-
program
|
|
2339
|
-
.command('login')
|
|
2340
|
-
.description('Authenticate the CLI with your API key')
|
|
2341
|
-
.action(async () => {
|
|
2342
|
-
try {
|
|
2343
|
-
await handleLogin();
|
|
2344
|
-
process.exit(0);
|
|
2345
|
-
}
|
|
2346
|
-
catch (error) {
|
|
2347
|
-
console.error(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
2348
|
-
process.exit(1);
|
|
2349
|
-
}
|
|
2350
|
-
});
|
|
2351
|
-
program
|
|
2352
|
-
.command('whoami')
|
|
2353
|
-
.description('Show your current authentication status')
|
|
2354
|
-
.action(async () => {
|
|
2355
|
-
try {
|
|
2356
|
-
const { loadConfig } = await import('./cli-auth.js');
|
|
2357
|
-
const config = loadConfig();
|
|
2358
|
-
if (!config.apiKey) {
|
|
2359
|
-
console.log('Not logged in. Run `webpeel login` to authenticate.');
|
|
2360
|
-
}
|
|
2361
|
-
else {
|
|
2362
|
-
const masked = config.apiKey.slice(0, 7) + '...' + config.apiKey.slice(-4);
|
|
2363
|
-
console.log(`Logged in with API key: ${masked}`);
|
|
2364
|
-
if (config.planTier) {
|
|
2365
|
-
const tierLabel = config.planTier.charAt(0).toUpperCase() + config.planTier.slice(1);
|
|
2366
|
-
console.log(`Plan: ${tierLabel}`);
|
|
2367
|
-
}
|
|
2368
|
-
console.log(`Config: ~/.webpeel/config.json`);
|
|
2369
|
-
}
|
|
2370
|
-
process.exit(0);
|
|
2371
|
-
}
|
|
2372
|
-
catch (error) {
|
|
2373
|
-
console.error(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
2374
|
-
process.exit(1);
|
|
2375
|
-
}
|
|
2376
|
-
});
|
|
2377
|
-
program
|
|
2378
|
-
.command('logout')
|
|
2379
|
-
.description('Clear your saved credentials')
|
|
2380
|
-
.action(() => {
|
|
2381
|
-
try {
|
|
2382
|
-
handleLogout();
|
|
2383
|
-
process.exit(0);
|
|
2384
|
-
}
|
|
2385
|
-
catch (error) {
|
|
2386
|
-
console.error(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
2387
|
-
process.exit(1);
|
|
2388
|
-
}
|
|
2389
|
-
});
|
|
2390
|
-
program
|
|
2391
|
-
.command('usage')
|
|
2392
|
-
.description('Show your current usage and quota')
|
|
2393
|
-
.action(async () => {
|
|
2394
|
-
try {
|
|
2395
|
-
await handleUsage();
|
|
2396
|
-
process.exit(0);
|
|
2397
|
-
}
|
|
2398
|
-
catch (error) {
|
|
2399
|
-
console.error(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
2400
|
-
process.exit(1);
|
|
2401
|
-
}
|
|
2402
|
-
});
|
|
2403
|
-
program
|
|
2404
|
-
.command('serve')
|
|
2405
|
-
.description('Start API server')
|
|
2406
|
-
.option('-p, --port <port>', 'Port number', '3000')
|
|
2407
|
-
.action(async (options) => {
|
|
2408
|
-
const { startServer } = await import('./server/app.js');
|
|
2409
|
-
startServer({ port: parseInt(options.port, 10) });
|
|
2410
|
-
});
|
|
2411
|
-
program
|
|
2412
|
-
.command('mcp')
|
|
2413
|
-
.description('Start MCP server for Claude Desktop / Cursor')
|
|
2414
|
-
.action(async () => {
|
|
2415
|
-
await import('./mcp/server.js');
|
|
2416
|
-
});
|
|
2417
|
-
// Pipe command — always JSON, no UI (agent-friendly)
|
|
2418
|
-
program
|
|
2419
|
-
.command('pipe <url>')
|
|
2420
|
-
.description('Pipe-friendly fetch (always JSON, no UI). Alias for: webpeel <url> --json --silent')
|
|
2421
|
-
.option('-r, --render', 'Use headless browser')
|
|
2422
|
-
.option('--stealth', 'Stealth mode')
|
|
2423
|
-
.option('--budget <n>', 'Token budget', parseInt)
|
|
2424
|
-
.option('--clean', 'Clean format for AI')
|
|
2425
|
-
.option('-q, --question <q>', 'Quick answer')
|
|
2426
|
-
.option('--proxy <url>', 'Proxy URL')
|
|
2427
|
-
.option('--timeout <ms>', 'Timeout in ms', parseInt)
|
|
2428
|
-
.option('-s, --silent', 'Silent mode (always on for pipe, accepted for compatibility)')
|
|
2429
|
-
.action(async (url, opts) => {
|
|
2430
|
-
// Force JSON + silent — always, unconditionally
|
|
2431
|
-
opts.json = true;
|
|
2432
|
-
opts.silent = true;
|
|
2433
|
-
await runFetch(url, opts);
|
|
2434
|
-
});
|
|
2435
|
-
// Config command — webpeel config [get|set] [key] [value]
|
|
2436
|
-
program
|
|
2437
|
-
.command('config')
|
|
2438
|
-
.description('View or update CLI configuration')
|
|
2439
|
-
.argument('[action]', '"list", "get <key>", "set <key> <value>", or omit for overview')
|
|
2440
|
-
.argument('[key]', 'Config key')
|
|
2441
|
-
.argument('[value]', 'Value to set')
|
|
2442
|
-
.action(async (action, key, value) => {
|
|
2443
|
-
const config = loadConfig();
|
|
2444
|
-
// Settable config keys (safe for user modification)
|
|
2445
|
-
// Supports dot-notation for nested keys (e.g., llm.apiKey)
|
|
2446
|
-
const SETTABLE_KEYS = {
|
|
2447
|
-
apiKey: 'WebPeel API key (tip: use `webpeel auth <key>` to set and verify in one step)',
|
|
2448
|
-
braveApiKey: 'Brave Search API key',
|
|
2449
|
-
'llm.apiKey': 'LLM API key for AI-powered extraction (OpenAI-compatible)',
|
|
2450
|
-
'llm.model': 'LLM model name (default: gpt-4o-mini)',
|
|
2451
|
-
'llm.baseUrl': 'LLM API base URL (default: https://api.openai.com/v1)',
|
|
2452
|
-
};
|
|
2453
|
-
const maskSecret = (k, v) => {
|
|
2454
|
-
if (!v)
|
|
2455
|
-
return '(not set)';
|
|
2456
|
-
if (k === 'apiKey' || k === 'braveApiKey' || k === 'llm.apiKey') {
|
|
2457
|
-
return v.slice(0, 4) + '...' + v.slice(-4);
|
|
2458
|
-
}
|
|
2459
|
-
return String(v);
|
|
2460
|
-
};
|
|
2461
|
-
/** Get a potentially nested value using dot-notation (e.g., "llm.apiKey") */
|
|
2462
|
-
function getNestedValue(obj, path) {
|
|
2463
|
-
const parts = path.split('.');
|
|
2464
|
-
let cur = obj;
|
|
2465
|
-
for (const part of parts) {
|
|
2466
|
-
if (cur == null || typeof cur !== 'object')
|
|
2467
|
-
return undefined;
|
|
2468
|
-
cur = cur[part];
|
|
2469
|
-
}
|
|
2470
|
-
return cur;
|
|
2471
|
-
}
|
|
2472
|
-
/** Set a potentially nested value using dot-notation (e.g., "llm.apiKey") */
|
|
2473
|
-
function setNestedValue(obj, path, val) {
|
|
2474
|
-
const parts = path.split('.');
|
|
2475
|
-
let cur = obj;
|
|
2476
|
-
for (let i = 0; i < parts.length - 1; i++) {
|
|
2477
|
-
const part = parts[i];
|
|
2478
|
-
if (cur[part] == null || typeof cur[part] !== 'object')
|
|
2479
|
-
cur[part] = {};
|
|
2480
|
-
cur = cur[part];
|
|
2481
|
-
}
|
|
2482
|
-
cur[parts[parts.length - 1]] = val;
|
|
2483
|
-
}
|
|
2484
|
-
if (!action || action === 'list') {
|
|
2485
|
-
// Show all config (also triggered by `webpeel config list`)
|
|
2486
|
-
console.log('WebPeel CLI Configuration');
|
|
2487
|
-
console.log(` Config file: ~/.webpeel/config.json`);
|
|
2488
|
-
console.log('');
|
|
2489
|
-
console.log(` apiKey: ${maskSecret('apiKey', config.apiKey)}`);
|
|
2490
|
-
console.log(` braveApiKey: ${maskSecret('braveApiKey', config.braveApiKey)}`);
|
|
2491
|
-
console.log(` planTier: ${config.planTier || 'free'}`);
|
|
2492
|
-
console.log(` anonymousUsage: ${config.anonymousUsage}`);
|
|
2493
|
-
console.log('');
|
|
2494
|
-
console.log(' LLM:');
|
|
2495
|
-
console.log(` llm.apiKey: ${maskSecret('llm.apiKey', config.llm?.apiKey)}`);
|
|
2496
|
-
console.log(` llm.model: ${config.llm?.model || '(not set, default: gpt-4o-mini)'}`);
|
|
2497
|
-
console.log(` llm.baseUrl: ${config.llm?.baseUrl || '(not set, default: https://api.openai.com/v1)'}`);
|
|
2498
|
-
const stats = cacheStats();
|
|
2499
|
-
console.log('');
|
|
2500
|
-
console.log(' Cache:');
|
|
2501
|
-
console.log(` entries: ${stats.entries}`);
|
|
2502
|
-
console.log(` size: ${(stats.sizeBytes / 1024).toFixed(1)} KB`);
|
|
2503
|
-
console.log(` dir: ${stats.dir}`);
|
|
2504
|
-
console.log('');
|
|
2505
|
-
console.log(' Settable keys: ' + Object.keys(SETTABLE_KEYS).join(', '));
|
|
2506
|
-
console.log(' Usage: webpeel config set <key> <value>');
|
|
2507
|
-
if (!config.apiKey) {
|
|
2508
|
-
console.log('');
|
|
2509
|
-
console.log(' Tip: Run `webpeel auth <your-key>` to set and verify your API key.');
|
|
2510
|
-
console.log(' Get a free key at: https://app.webpeel.dev/keys');
|
|
2511
|
-
}
|
|
2512
|
-
process.exit(0);
|
|
2513
|
-
}
|
|
2514
|
-
if (action === 'set') {
|
|
2515
|
-
if (!key) {
|
|
2516
|
-
console.error('Usage: webpeel config set <key> <value>');
|
|
2517
|
-
console.error('Settable keys: ' + Object.keys(SETTABLE_KEYS).join(', '));
|
|
2518
|
-
process.exit(1);
|
|
2519
|
-
}
|
|
2520
|
-
if (!(key in SETTABLE_KEYS)) {
|
|
2521
|
-
console.error(`Cannot set "${key}". Settable keys: ${Object.keys(SETTABLE_KEYS).join(', ')}`);
|
|
2522
|
-
process.exit(1);
|
|
2523
|
-
}
|
|
2524
|
-
if (!value) {
|
|
2525
|
-
console.error(`Usage: webpeel config set ${key} <value>`);
|
|
2526
|
-
process.exit(1);
|
|
2527
|
-
}
|
|
2528
|
-
setNestedValue(config, key, value);
|
|
2529
|
-
saveConfig(config);
|
|
2530
|
-
console.log(`✓ ${key} saved`);
|
|
2531
|
-
process.exit(0);
|
|
2532
|
-
}
|
|
2533
|
-
if (action === 'get') {
|
|
2534
|
-
const lookupKey = key || '';
|
|
2535
|
-
const val = getNestedValue(config, lookupKey) ?? config[lookupKey];
|
|
2536
|
-
if (val !== undefined) {
|
|
2537
|
-
console.log(maskSecret(lookupKey, String(val)));
|
|
2538
|
-
}
|
|
2539
|
-
else {
|
|
2540
|
-
console.error(`Unknown config key: ${lookupKey}`);
|
|
2541
|
-
process.exit(1);
|
|
2542
|
-
}
|
|
2543
|
-
process.exit(0);
|
|
2544
|
-
}
|
|
2545
|
-
// Legacy: `webpeel config <key>` — treat action as the key name
|
|
2546
|
-
const val = getNestedValue(config, action) ?? config[action];
|
|
2547
|
-
if (val !== undefined) {
|
|
2548
|
-
console.log(maskSecret(action, String(val)));
|
|
2549
|
-
}
|
|
2550
|
-
else {
|
|
2551
|
-
console.error(`Unknown config key or action: ${action}`);
|
|
2552
|
-
console.error('Usage: webpeel config [get|set] [key] [value]');
|
|
2553
|
-
process.exit(1);
|
|
2554
|
-
}
|
|
2555
|
-
process.exit(0);
|
|
2556
|
-
});
|
|
2557
|
-
// Cache management command
|
|
2558
|
-
program
|
|
2559
|
-
.command('cache')
|
|
2560
|
-
.description('Manage the local response cache')
|
|
2561
|
-
.argument('<action>', '"stats", "clear", or "purge" (clear expired / clear all)')
|
|
2562
|
-
.action(async (action) => {
|
|
2563
|
-
switch (action) {
|
|
2564
|
-
case 'stats': {
|
|
2565
|
-
const stats = cacheStats();
|
|
2566
|
-
console.log(`Cache: ${stats.entries} entries, ${(stats.sizeBytes / 1024).toFixed(1)} KB`);
|
|
2567
|
-
console.log(`Location: ${stats.dir}`);
|
|
2568
|
-
break;
|
|
2569
|
-
}
|
|
2570
|
-
case 'clear': {
|
|
2571
|
-
const cleared = clearCache(false);
|
|
2572
|
-
console.log(`Cleared ${cleared} expired cache entries.`);
|
|
2573
|
-
break;
|
|
2574
|
-
}
|
|
2575
|
-
case 'purge': {
|
|
2576
|
-
const cleared = clearCache(true);
|
|
2577
|
-
console.log(`Purged all ${cleared} cache entries.`);
|
|
2578
|
-
break;
|
|
2579
|
-
}
|
|
2580
|
-
default:
|
|
2581
|
-
console.error('Unknown cache action. Use: stats, clear, or purge');
|
|
2582
|
-
process.exit(1);
|
|
2583
|
-
}
|
|
2584
|
-
process.exit(0);
|
|
2585
|
-
});
|
|
2586
|
-
// Brand command - extract branding/design system
|
|
2587
|
-
program
|
|
2588
|
-
.command('brand <url>')
|
|
2589
|
-
.description('Extract branding and design system from a URL')
|
|
2590
|
-
.option('-s, --silent', 'Silent mode (no spinner)')
|
|
2591
|
-
.option('--json', 'Output as JSON (default)')
|
|
2592
|
-
.action(async (url, options) => {
|
|
2593
|
-
const spinner = options.silent ? null : ora('Extracting branding...').start();
|
|
2594
|
-
try {
|
|
2595
|
-
const result = await peel(url, {
|
|
2596
|
-
extract: {
|
|
2597
|
-
selectors: {
|
|
2598
|
-
primaryColor: 'meta[name="theme-color"]',
|
|
2599
|
-
title: 'title',
|
|
2600
|
-
logo: 'img[class*="logo"], img[alt*="logo"]',
|
|
2601
|
-
},
|
|
2602
|
-
},
|
|
2603
|
-
});
|
|
2604
|
-
if (spinner) {
|
|
2605
|
-
spinner.succeed(`Extracted branding in ${result.elapsed}ms`);
|
|
2606
|
-
}
|
|
2607
|
-
// Extract branding data from metadata and page
|
|
2608
|
-
const branding = {
|
|
2609
|
-
url: result.url,
|
|
2610
|
-
title: result.title,
|
|
2611
|
-
colors: extractColors(result.content),
|
|
2612
|
-
fonts: extractFonts(result.content),
|
|
2613
|
-
extracted: result.extracted,
|
|
2614
|
-
metadata: result.metadata,
|
|
2615
|
-
};
|
|
2616
|
-
console.log(JSON.stringify(branding, null, 2));
|
|
2617
|
-
await cleanup();
|
|
2618
|
-
process.exit(0);
|
|
2619
|
-
}
|
|
2620
|
-
catch (error) {
|
|
2621
|
-
if (spinner)
|
|
2622
|
-
spinner.fail('Branding extraction failed');
|
|
2623
|
-
console.error(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
2624
|
-
await cleanup();
|
|
2625
|
-
process.exit(1);
|
|
2626
|
-
}
|
|
2627
|
-
});
|
|
2628
|
-
// Track command - track changes on a URL
|
|
2629
|
-
program
|
|
2630
|
-
.command('track <url>')
|
|
2631
|
-
.description('Track changes on a URL (saves snapshot for use with `webpeel diff`)')
|
|
2632
|
-
.option('-s, --silent', 'Silent mode (no spinner)')
|
|
2633
|
-
.option('--json', 'Output as JSON')
|
|
2634
|
-
.option('-r, --render', 'Use browser rendering')
|
|
2635
|
-
.action(async (url, options) => {
|
|
2636
|
-
const spinner = options.silent ? null : ora('Fetching and tracking...').start();
|
|
2637
|
-
try {
|
|
2638
|
-
// changeTracking: true saves the snapshot to ~/.webpeel/snapshots/ so that
|
|
2639
|
-
// `webpeel diff` can compare against it later.
|
|
2640
|
-
const result = await peel(url, {
|
|
2641
|
-
render: options.render || false,
|
|
2642
|
-
changeTracking: true,
|
|
2643
|
-
});
|
|
2644
|
-
if (spinner) {
|
|
2645
|
-
spinner.succeed(`Tracked in ${result.elapsed}ms`);
|
|
2646
|
-
}
|
|
2647
|
-
const changeStatus = result.changeTracking?.changeStatus ?? 'new';
|
|
2648
|
-
const previousScrapeAt = result.changeTracking?.previousScrapeAt ?? null;
|
|
2649
|
-
if (options.json) {
|
|
2650
|
-
await writeStdout(JSON.stringify({
|
|
2651
|
-
url: result.url,
|
|
2652
|
-
title: result.title,
|
|
2653
|
-
fingerprint: result.fingerprint,
|
|
2654
|
-
tokens: result.tokens,
|
|
2655
|
-
contentType: result.contentType,
|
|
2656
|
-
changeStatus,
|
|
2657
|
-
previousScrapeAt,
|
|
2658
|
-
lastChecked: new Date().toISOString(),
|
|
2659
|
-
}, null, 2) + '\n');
|
|
2660
|
-
}
|
|
2661
|
-
else {
|
|
2662
|
-
console.log(`URL: ${result.url}`);
|
|
2663
|
-
console.log(`Title: ${result.title}`);
|
|
2664
|
-
console.log(`Fingerprint: ${result.fingerprint}`);
|
|
2665
|
-
console.log(`Tokens: ${result.tokens}`);
|
|
2666
|
-
console.log(`Status: ${changeStatus}`);
|
|
2667
|
-
if (previousScrapeAt)
|
|
2668
|
-
console.log(`Previous check: ${previousScrapeAt}`);
|
|
2669
|
-
console.log(`Last checked: ${new Date().toISOString()}`);
|
|
2670
|
-
console.log('\nSnapshot saved. Run `webpeel diff <url> --last` to compare future changes.');
|
|
2671
|
-
}
|
|
2672
|
-
await cleanup();
|
|
2673
|
-
process.exit(0);
|
|
2674
|
-
}
|
|
2675
|
-
catch (error) {
|
|
2676
|
-
if (spinner)
|
|
2677
|
-
spinner.fail('Tracking failed');
|
|
2678
|
-
console.error(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
2679
|
-
await cleanup();
|
|
2680
|
-
process.exit(1);
|
|
2681
|
-
}
|
|
2682
|
-
});
|
|
2683
|
-
// Summarize command - AI-powered summary
|
|
2684
|
-
program
|
|
2685
|
-
.command('summarize <url>')
|
|
2686
|
-
.description('Generate an AI-powered summary of a URL')
|
|
2687
|
-
.option('--llm-key <key>', 'LLM API key (or use OPENAI_API_KEY env var)')
|
|
2688
|
-
.option('--llm-model <model>', 'LLM model to use (default: gpt-4o-mini)')
|
|
2689
|
-
.option('--llm-base-url <url>', 'LLM API base URL (default: https://api.openai.com/v1)')
|
|
2690
|
-
.option('--prompt <prompt>', 'Custom summary prompt')
|
|
2691
|
-
.option('-s, --silent', 'Silent mode (no spinner)')
|
|
2692
|
-
.option('--json', 'Output as JSON')
|
|
2693
|
-
.action(async (url, options) => {
|
|
2694
|
-
const llmApiKey = options.llmKey || process.env.OPENAI_API_KEY;
|
|
2695
|
-
if (!llmApiKey) {
|
|
2696
|
-
console.error('Error: --llm-key or OPENAI_API_KEY environment variable is required');
|
|
2697
|
-
process.exit(1);
|
|
2698
|
-
}
|
|
2699
|
-
const spinner = options.silent ? null : ora('Fetching and summarizing...').start();
|
|
2700
|
-
try {
|
|
2701
|
-
const result = await peel(url, {
|
|
2702
|
-
extract: {
|
|
2703
|
-
prompt: options.prompt || 'Summarize this webpage in 2-3 sentences.',
|
|
2704
|
-
llmApiKey,
|
|
2705
|
-
llmModel: options.llmModel || 'gpt-4o-mini',
|
|
2706
|
-
llmBaseUrl: options.llmBaseUrl || 'https://api.openai.com/v1',
|
|
2707
|
-
},
|
|
2708
|
-
});
|
|
2709
|
-
if (spinner) {
|
|
2710
|
-
spinner.succeed(`Summarized in ${result.elapsed}ms`);
|
|
2711
|
-
}
|
|
2712
|
-
if (options.json) {
|
|
2713
|
-
console.log(JSON.stringify({
|
|
2714
|
-
url: result.url,
|
|
2715
|
-
title: result.title,
|
|
2716
|
-
summary: result.extracted,
|
|
2717
|
-
}, null, 2));
|
|
2718
|
-
}
|
|
2719
|
-
else {
|
|
2720
|
-
console.log(`\n${result.title}\n`);
|
|
2721
|
-
console.log(result.extracted);
|
|
2722
|
-
}
|
|
2723
|
-
await cleanup();
|
|
2724
|
-
process.exit(0);
|
|
2725
|
-
}
|
|
2726
|
-
catch (error) {
|
|
2727
|
-
if (spinner)
|
|
2728
|
-
spinner.fail('Summary generation failed');
|
|
2729
|
-
console.error(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
2730
|
-
await cleanup();
|
|
2731
|
-
process.exit(1);
|
|
2732
|
-
}
|
|
2733
|
-
});
|
|
2734
|
-
// Agent command - autonomous web research
|
|
2735
|
-
program
|
|
2736
|
-
.command('agent <prompt>')
|
|
2737
|
-
.description('Web research agent — LLM-free by default, add --llm-key for AI synthesis')
|
|
2738
|
-
.option('--llm-key <key>', 'LLM API key (or use OPENAI_API_KEY env var)')
|
|
2739
|
-
.option('--llm-model <model>', 'LLM model to use (default: gpt-4o-mini)')
|
|
2740
|
-
.option('--llm-base-url <url>', 'LLM API base URL')
|
|
2741
|
-
.option('--urls <urls>', 'Comma-separated starting URLs')
|
|
2742
|
-
.option('--max-pages <n>', 'Maximum pages to visit (default: 10)', '10')
|
|
2743
|
-
.option('--schema <json>', 'Schema template name (e.g. product, article) or JSON schema for structured output')
|
|
2744
|
-
.option('-s, --silent', 'Silent mode (no spinner)')
|
|
2745
|
-
.option('--json', 'Output as JSON')
|
|
2746
|
-
.action(async (prompt, options) => {
|
|
2747
|
-
const llmApiKey = options.llmKey || process.env.OPENAI_API_KEY;
|
|
2748
|
-
const urls = options.urls ? options.urls.split(',').map((u) => u.trim()) : undefined;
|
|
2749
|
-
// Parse schema (support templates)
|
|
2750
|
-
let schema;
|
|
2751
|
-
if (options.schema) {
|
|
2752
|
-
const template = getSchemaTemplate(options.schema);
|
|
2753
|
-
if (template) {
|
|
2754
|
-
schema = template.fields;
|
|
2755
|
-
}
|
|
2756
|
-
else {
|
|
2757
|
-
try {
|
|
2758
|
-
schema = JSON.parse(options.schema);
|
|
2759
|
-
}
|
|
2760
|
-
catch {
|
|
2761
|
-
console.error(`Error: --schema must be a template name (${listSchemaTemplates().join(', ')}) or valid JSON`);
|
|
2762
|
-
process.exit(1);
|
|
2763
|
-
}
|
|
2764
|
-
}
|
|
2765
|
-
}
|
|
2766
|
-
if (llmApiKey) {
|
|
2767
|
-
// Full LLM agent mode (existing code)
|
|
2768
|
-
const spinner = options.silent ? null : ora('Running agent research...').start();
|
|
2769
|
-
try {
|
|
2770
|
-
const { runAgent } = await import('./core/agent.js');
|
|
2771
|
-
const result = await runAgent({
|
|
2772
|
-
prompt,
|
|
2773
|
-
urls,
|
|
2774
|
-
schema,
|
|
2775
|
-
llmApiKey,
|
|
2776
|
-
llmModel: options.llmModel,
|
|
2777
|
-
llmApiBase: options.llmBaseUrl,
|
|
2778
|
-
maxPages: parseInt(options.maxPages, 10),
|
|
2779
|
-
onProgress: (progress) => {
|
|
2780
|
-
if (spinner)
|
|
2781
|
-
spinner.text = progress.message;
|
|
2782
|
-
},
|
|
2783
|
-
});
|
|
2784
|
-
if (spinner)
|
|
2785
|
-
spinner.succeed(`Agent finished: ${result.pagesVisited} pages`);
|
|
2786
|
-
if (options.json) {
|
|
2787
|
-
console.log(JSON.stringify(result, null, 2));
|
|
2788
|
-
}
|
|
2789
|
-
else {
|
|
2790
|
-
console.log(`\nSources (${result.sources.length}):`);
|
|
2791
|
-
result.sources.forEach(s => console.log(` • ${s}`));
|
|
2792
|
-
console.log(`\nResults:`);
|
|
2793
|
-
console.log(JSON.stringify(result.data, null, 2));
|
|
2794
|
-
}
|
|
2795
|
-
await cleanup();
|
|
2796
|
-
process.exit(0);
|
|
2797
|
-
}
|
|
2798
|
-
catch (e) {
|
|
2799
|
-
if (spinner)
|
|
2800
|
-
spinner.fail('Agent failed');
|
|
2801
|
-
console.error(e instanceof Error ? e.message : e);
|
|
2802
|
-
await cleanup();
|
|
2803
|
-
process.exit(1);
|
|
2804
|
-
}
|
|
2805
|
-
}
|
|
2806
|
-
else {
|
|
2807
|
-
// LLM-free mode: search + fetch + BM25 extraction
|
|
2808
|
-
const spinner = options.silent ? null : ora('Running LLM-free research...').start();
|
|
2809
|
-
try {
|
|
2810
|
-
// Import needed modules
|
|
2811
|
-
const { quickAnswer } = await import('./core/quick-answer.js');
|
|
2812
|
-
// Step 1: Get URLs to process
|
|
2813
|
-
let targetUrls = urls || [];
|
|
2814
|
-
// If no URLs, search the web
|
|
2815
|
-
if (targetUrls.length === 0) {
|
|
2816
|
-
if (spinner)
|
|
2817
|
-
spinner.text = 'Searching the web...';
|
|
2818
|
-
try {
|
|
2819
|
-
const { getBestSearchProvider } = await import('./core/search-provider.js');
|
|
2820
|
-
const { provider, apiKey: searchApiKey } = getBestSearchProvider();
|
|
2821
|
-
const searchResults = await provider.searchWeb(prompt, {
|
|
2822
|
-
count: Math.min(parseInt(options.maxPages, 10) || 5, 10),
|
|
2823
|
-
apiKey: searchApiKey,
|
|
2824
|
-
});
|
|
2825
|
-
targetUrls = searchResults.map((r) => r.url);
|
|
2826
|
-
}
|
|
2827
|
-
catch {
|
|
2828
|
-
// Fallback: try DuckDuckGo HTML
|
|
2829
|
-
if (spinner)
|
|
2830
|
-
spinner.text = 'Searching via DuckDuckGo...';
|
|
2831
|
-
try {
|
|
2832
|
-
const duckUrl = `https://html.duckduckgo.com/html/?q=${encodeURIComponent(prompt)}`;
|
|
2833
|
-
const searchResult = await peel(duckUrl, { budget: 4000 });
|
|
2834
|
-
// Extract URLs from search results content
|
|
2835
|
-
const urlMatches = searchResult.content.match(/https?:\/\/[^\s\)]+/g) || [];
|
|
2836
|
-
targetUrls = urlMatches
|
|
2837
|
-
.filter((u) => !u.includes('duckduckgo.com'))
|
|
2838
|
-
.slice(0, parseInt(options.maxPages, 10) || 5);
|
|
2839
|
-
}
|
|
2840
|
-
catch {
|
|
2841
|
-
// No search results
|
|
2842
|
-
}
|
|
2843
|
-
}
|
|
2844
|
-
}
|
|
2845
|
-
if (targetUrls.length === 0) {
|
|
2846
|
-
if (spinner)
|
|
2847
|
-
spinner.fail('No URLs found. Provide --urls or a more specific prompt.');
|
|
2848
|
-
process.exit(1);
|
|
2849
|
-
}
|
|
2850
|
-
if (spinner)
|
|
2851
|
-
spinner.text = `Processing ${targetUrls.length} pages...`;
|
|
2852
|
-
// Step 2: Fetch and extract from each URL
|
|
2853
|
-
const results = [];
|
|
2854
|
-
for (const url of targetUrls) {
|
|
2855
|
-
try {
|
|
2856
|
-
if (spinner)
|
|
2857
|
-
spinner.text = `Fetching: ${url.substring(0, 60)}...`;
|
|
2858
|
-
const pageResult = await peel(url, { budget: 4000 });
|
|
2859
|
-
let extracted = null;
|
|
2860
|
-
let confidence = 0;
|
|
2861
|
-
if (schema) {
|
|
2862
|
-
// Extract each schema field using smartExtractSchemaFields
|
|
2863
|
-
const { smartExtractSchemaFields: smartExtractResearch } = await import('./core/schema-postprocess.js');
|
|
2864
|
-
extracted = smartExtractResearch(pageResult.content, schema, quickAnswer, {
|
|
2865
|
-
pageTitle: pageResult.title,
|
|
2866
|
-
pageUrl: url,
|
|
2867
|
-
metadata: pageResult.metadata,
|
|
2868
|
-
});
|
|
2869
|
-
// Calculate confidence from quickAnswer for any field
|
|
2870
|
-
for (const question of Object.values(schema)) {
|
|
2871
|
-
try {
|
|
2872
|
-
const qa = quickAnswer({ content: pageResult.content, question: typeof question === 'string' ? question : '' });
|
|
2873
|
-
confidence = Math.max(confidence, qa.confidence || 0);
|
|
2874
|
-
}
|
|
2875
|
-
catch { /* ignore */ }
|
|
2876
|
-
break; // just need one confidence estimate
|
|
2877
|
-
}
|
|
2878
|
-
}
|
|
2879
|
-
else {
|
|
2880
|
-
// Answer the prompt directly
|
|
2881
|
-
try {
|
|
2882
|
-
const qa = quickAnswer({ content: pageResult.content, question: prompt });
|
|
2883
|
-
extracted = { answer: qa.answer || '' };
|
|
2884
|
-
confidence = qa.confidence || 0;
|
|
2885
|
-
}
|
|
2886
|
-
catch {
|
|
2887
|
-
extracted = null;
|
|
2888
|
-
}
|
|
2889
|
-
}
|
|
2890
|
-
results.push({
|
|
2891
|
-
url,
|
|
2892
|
-
title: pageResult.metadata?.title || url,
|
|
2893
|
-
extracted,
|
|
2894
|
-
content: pageResult.content.substring(0, 500),
|
|
2895
|
-
confidence,
|
|
2896
|
-
});
|
|
2897
|
-
}
|
|
2898
|
-
catch (e) {
|
|
2899
|
-
// Skip failed URLs
|
|
2900
|
-
if (process.env.DEBUG) {
|
|
2901
|
-
console.debug('[webpeel]', `Failed to fetch ${url}:`, e instanceof Error ? e.message : e);
|
|
2902
|
-
}
|
|
2903
|
-
}
|
|
2904
|
-
}
|
|
2905
|
-
if (spinner)
|
|
2906
|
-
spinner.succeed(`Processed ${results.length}/${targetUrls.length} pages (LLM-free)`);
|
|
2907
|
-
if (options.json) {
|
|
2908
|
-
console.log(JSON.stringify({
|
|
2909
|
-
mode: 'llm-free',
|
|
2910
|
-
prompt,
|
|
2911
|
-
schema: schema || null,
|
|
2912
|
-
results,
|
|
2913
|
-
sources: results.map(r => r.url),
|
|
2914
|
-
pagesVisited: results.length,
|
|
2915
|
-
}, null, 2));
|
|
2916
|
-
}
|
|
2917
|
-
else {
|
|
2918
|
-
console.log(`\n📊 Results (${results.length} pages, LLM-free):\n`);
|
|
2919
|
-
for (const r of results) {
|
|
2920
|
-
console.log(`── ${r.title} ──`);
|
|
2921
|
-
console.log(` ${r.url}`);
|
|
2922
|
-
if (r.extracted) {
|
|
2923
|
-
for (const [k, v] of Object.entries(r.extracted)) {
|
|
2924
|
-
if (v)
|
|
2925
|
-
console.log(` ${k}: ${v}`);
|
|
2926
|
-
}
|
|
2927
|
-
}
|
|
2928
|
-
console.log(` Confidence: ${(r.confidence * 100).toFixed(0)}%\n`);
|
|
2929
|
-
}
|
|
2930
|
-
}
|
|
2931
|
-
await cleanup();
|
|
2932
|
-
process.exit(0);
|
|
2933
|
-
}
|
|
2934
|
-
catch (e) {
|
|
2935
|
-
if (spinner)
|
|
2936
|
-
spinner.fail('Research failed');
|
|
2937
|
-
console.error(e instanceof Error ? e.message : e);
|
|
2938
|
-
await cleanup();
|
|
2939
|
-
process.exit(1);
|
|
2940
|
-
}
|
|
2941
|
-
}
|
|
2942
|
-
});
|
|
2943
|
-
// ── Jobs command group ─────────────────────────────────────────────────────
|
|
2944
|
-
const jobsCmd = program
|
|
2945
|
-
.command('jobs')
|
|
2946
|
-
.description('Job board operations: search listings and auto-apply (LinkedIn, Indeed, Glassdoor, Upwork)')
|
|
2947
|
-
.argument('[keywords]', 'Search keywords — shorthand for "jobs search <keywords>"')
|
|
2948
|
-
.option('-l, --location <location>', 'Location filter')
|
|
2949
|
-
.option('-s, --source <source>', 'Job board: glassdoor, indeed, linkedin, or upwork (default: linkedin)', 'linkedin')
|
|
2950
|
-
.option('-n, --limit <number>', 'Max results (default: 25)', '25')
|
|
2951
|
-
.option('-d, --details <number>', 'Fetch full details for top N results (default: 0)', '0')
|
|
2952
|
-
.option('--json', 'Output raw JSON')
|
|
2953
|
-
.option('--timeout <ms>', 'Request timeout in ms (default: 30000)', '30000')
|
|
2954
|
-
.option('--silent', 'Silent mode (no spinner)')
|
|
2955
|
-
.action(async (keywords, options) => {
|
|
2956
|
-
// Default action: when called as `webpeel jobs <keywords>`, act as search
|
|
2957
|
-
if (!keywords) {
|
|
2958
|
-
jobsCmd.help();
|
|
2959
|
-
process.exit(0);
|
|
2960
|
-
}
|
|
2961
|
-
// Delegate to shared search logic
|
|
2962
|
-
await runJobSearch(keywords, options);
|
|
2963
|
-
});
|
|
2964
|
-
// ── Shared job-search logic (used by both `jobs` default and `jobs search`) ───
|
|
2965
|
-
async function runJobSearch(keywords, options) {
|
|
2966
|
-
const spinner = options.silent ? null : ora('Searching jobs...').start();
|
|
2967
|
-
try {
|
|
2968
|
-
const { searchJobs } = await import('./core/jobs.js');
|
|
2969
|
-
const VALID_SOURCES = ['glassdoor', 'indeed', 'linkedin', 'upwork'];
|
|
2970
|
-
const source = (VALID_SOURCES.includes((options.source ?? 'linkedin'))
|
|
2971
|
-
? options.source
|
|
2972
|
-
: 'linkedin');
|
|
2973
|
-
const limit = Math.min(Math.max(parseInt(options.limit ?? '25', 10) || 25, 1), 100);
|
|
2974
|
-
const fetchDetails = Math.min(Math.max(parseInt(options.details ?? '0', 10) || 0, 0), limit);
|
|
2975
|
-
const timeout = parseInt(options.timeout ?? '30000', 10) || 30000;
|
|
2976
|
-
const result = await searchJobs({
|
|
2977
|
-
keywords,
|
|
2978
|
-
location: options.location,
|
|
2979
|
-
source,
|
|
2980
|
-
limit,
|
|
2981
|
-
fetchDetails,
|
|
2982
|
-
timeout,
|
|
2983
|
-
});
|
|
2984
|
-
if (spinner)
|
|
2985
|
-
spinner.stop();
|
|
2986
|
-
if (options.json) {
|
|
2987
|
-
await writeStdout(JSON.stringify(result, null, 2) + '\n');
|
|
2988
|
-
process.exit(0);
|
|
2989
|
-
}
|
|
2990
|
-
const totalLabel = result.totalFound >= 1000
|
|
2991
|
-
? `${(result.totalFound / 1000).toFixed(0).replace(/\.0$/, '')}k+`
|
|
2992
|
-
: String(result.totalFound);
|
|
2993
|
-
const locationLabel = options.location ? ` in ${options.location}` : '';
|
|
2994
|
-
console.log(`\n🔍 Found ${totalLabel} ${keywords} jobs${locationLabel} (${result.source})\n`);
|
|
2995
|
-
if (result.jobs.length === 0) {
|
|
2996
|
-
console.log(' No jobs found.\n');
|
|
2997
|
-
process.exit(0);
|
|
2998
|
-
}
|
|
2999
|
-
const colNum = 3;
|
|
3000
|
-
const colTitle = 40;
|
|
3001
|
-
const colCompany = 18;
|
|
3002
|
-
const colLocation = 16;
|
|
3003
|
-
const colSalary = 14;
|
|
3004
|
-
const colPosted = 10;
|
|
3005
|
-
const pad = (s, w) => s.length > w ? s.slice(0, w - 1) + '…' : s.padEnd(w);
|
|
3006
|
-
const rpad = (s, w) => s.padStart(w);
|
|
3007
|
-
console.log(` ${rpad('#', colNum)} ${pad('Title', colTitle)} ${pad('Company', colCompany)} ${pad('Location', colLocation)} ${pad('Salary/Budget', colSalary)} ${pad('Posted', colPosted)}`);
|
|
3008
|
-
result.jobs.forEach((job, i) => {
|
|
3009
|
-
const titleStr = job.title + (job.remote ? ' 🏠' : '');
|
|
3010
|
-
const salaryStr = job.salary ?? ('budget' in job ? job.budget : '') ?? '';
|
|
3011
|
-
console.log(` ${rpad(String(i + 1), colNum)} ${pad(titleStr, colTitle)} ${pad(job.company, colCompany)} ${pad(job.location, colLocation)} ${pad(salaryStr, colSalary)} ${pad(job.postedAt ?? '', colPosted)}`);
|
|
3012
|
-
});
|
|
3013
|
-
const timeSec = (result.timeTakenMs / 1000).toFixed(1);
|
|
3014
|
-
const detailsNote = fetchDetails > 0 ? ` | Details: ${result.detailsFetched} fetched` : '';
|
|
3015
|
-
console.log(`\nFetched ${result.jobs.length} jobs in ${timeSec}s${detailsNote}\n`);
|
|
3016
|
-
const detailedJobs = result.jobs.filter((j) => 'description' in j);
|
|
3017
|
-
for (let i = 0; i < detailedJobs.length; i++) {
|
|
3018
|
-
const job = detailedJobs[i];
|
|
3019
|
-
console.log(`━━━ Job #${i + 1}: ${job.title} ━━━`);
|
|
3020
|
-
const metaParts = [`Company: ${job.company}`, `Location: ${job.location}`];
|
|
3021
|
-
if (job.salary)
|
|
3022
|
-
metaParts.push(`Salary: ${job.salary}`);
|
|
3023
|
-
console.log(metaParts.join(' | '));
|
|
3024
|
-
const typeParts = [];
|
|
3025
|
-
if (job.employmentType)
|
|
3026
|
-
typeParts.push(`Type: ${job.employmentType}`);
|
|
3027
|
-
if (job.experienceLevel)
|
|
3028
|
-
typeParts.push(`Level: ${job.experienceLevel}`);
|
|
3029
|
-
if (job.postedAt)
|
|
3030
|
-
typeParts.push(`Posted: ${job.postedAt}`);
|
|
3031
|
-
if (typeParts.length > 0)
|
|
3032
|
-
console.log(typeParts.join(' | '));
|
|
3033
|
-
if (job.description) {
|
|
3034
|
-
console.log(`\nDescription:\n ${job.description.slice(0, 500).replace(/\n/g, '\n ')}`);
|
|
3035
|
-
}
|
|
3036
|
-
if (job.requirements && job.requirements.length > 0) {
|
|
3037
|
-
console.log(`\nRequirements:`);
|
|
3038
|
-
job.requirements.forEach(r => console.log(` • ${r}`));
|
|
3039
|
-
}
|
|
3040
|
-
if (job.responsibilities && job.responsibilities.length > 0) {
|
|
3041
|
-
console.log(`\nResponsibilities:`);
|
|
3042
|
-
job.responsibilities.forEach(r => console.log(` • ${r}`));
|
|
3043
|
-
}
|
|
3044
|
-
if (job.benefits && job.benefits.length > 0) {
|
|
3045
|
-
console.log(`\nBenefits:`);
|
|
3046
|
-
job.benefits.forEach(b => console.log(` • ${b}`));
|
|
3047
|
-
}
|
|
3048
|
-
if (job.applyUrl) {
|
|
3049
|
-
console.log(`\nApply: ${job.applyUrl}`);
|
|
3050
|
-
}
|
|
3051
|
-
console.log('');
|
|
3052
|
-
}
|
|
3053
|
-
process.exit(0);
|
|
3054
|
-
}
|
|
3055
|
-
catch (error) {
|
|
3056
|
-
if (spinner)
|
|
3057
|
-
spinner.fail?.('Job search failed');
|
|
3058
|
-
console.error(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
3059
|
-
process.exit(1);
|
|
3060
|
-
}
|
|
3061
|
-
}
|
|
3062
|
-
// jobs search <keywords> — explicit subcommand (same logic as default action)
|
|
3063
|
-
jobsCmd
|
|
3064
|
-
.command('search <keywords>')
|
|
3065
|
-
.description('Search job boards for listings (LinkedIn, Indeed, Glassdoor, Upwork)')
|
|
3066
|
-
.alias('s')
|
|
3067
|
-
.option('-l, --location <location>', 'Location filter')
|
|
3068
|
-
.option('-s, --source <source>', 'Job board: glassdoor, indeed, linkedin, or upwork (default: linkedin)', 'linkedin')
|
|
3069
|
-
.option('-n, --limit <number>', 'Max results (default: 25)', '25')
|
|
3070
|
-
.option('-d, --details <number>', 'Fetch full details for top N results (default: 0)', '0')
|
|
3071
|
-
.option('--json', 'Output raw JSON')
|
|
3072
|
-
.option('--timeout <ms>', 'Request timeout in ms (default: 30000)', '30000')
|
|
3073
|
-
.option('--silent', 'Silent mode (no spinner)')
|
|
3074
|
-
.action(async (keywords, options) => {
|
|
3075
|
-
await runJobSearch(keywords, options);
|
|
3076
|
-
});
|
|
3077
|
-
// ── jobs apply <url> ─────────────────────────────────────────────────────────
|
|
3078
|
-
// Stealth automated job application using human behavior simulation
|
|
3079
|
-
jobsCmd
|
|
3080
|
-
.command('apply <url>')
|
|
3081
|
-
.description('Stealth automated job application using human behavior simulation')
|
|
3082
|
-
.option('--profile <path>', 'Path to profile JSON file', `${process.env.HOME ?? '~'}/.webpeel/profile.json`)
|
|
3083
|
-
.option('--resume <path>', 'Path to resume PDF (overrides profile.resumePath)')
|
|
3084
|
-
.option('--mode <mode>', 'Submission mode: auto | review | dry-run (default: review)', 'review')
|
|
3085
|
-
.option('--session-dir <path>', 'Browser session directory (preserves login cookies)')
|
|
3086
|
-
.option('--llm-key <key>', 'LLM API key for custom question answers')
|
|
3087
|
-
.option('--llm-provider <name>', 'LLM provider: openai | anthropic (default: openai)', 'openai')
|
|
3088
|
-
.option('--daily-limit <n>', 'Max applications per day (default: 8)', '8')
|
|
3089
|
-
.option('--no-warmup', 'Skip browsing warmup phase')
|
|
3090
|
-
.option('--json', 'Output result as JSON')
|
|
3091
|
-
.option('--silent', 'Minimal output')
|
|
3092
|
-
.action(async (url, options) => {
|
|
3093
|
-
const isSilent = options.silent;
|
|
3094
|
-
const isJson = options.json;
|
|
3095
|
-
const mode = (['auto', 'review', 'dry-run'].includes(options.mode)
|
|
3096
|
-
? options.mode
|
|
3097
|
-
: 'review');
|
|
3098
|
-
if (!isSilent) {
|
|
3099
|
-
console.log(`\n🤖 WebPeel Auto-Apply — mode: ${mode}`);
|
|
3100
|
-
console.log(` URL: ${url}\n`);
|
|
3101
|
-
}
|
|
3102
|
-
// Load profile
|
|
3103
|
-
const profilePath = options.profile;
|
|
3104
|
-
let profile;
|
|
3105
|
-
try {
|
|
3106
|
-
const raw = readFileSync(profilePath, 'utf-8');
|
|
3107
|
-
profile = JSON.parse(raw);
|
|
3108
|
-
}
|
|
3109
|
-
catch {
|
|
3110
|
-
console.error(`Error: Could not load profile from ${profilePath}`);
|
|
3111
|
-
console.error(`Run "webpeel jobs apply-setup" to create a profile.`);
|
|
3112
|
-
process.exit(1);
|
|
3113
|
-
}
|
|
3114
|
-
if (options.resume) {
|
|
3115
|
-
profile.resumePath = options.resume;
|
|
3116
|
-
}
|
|
3117
|
-
const spinner = isSilent ? null : ora('Applying...').start();
|
|
3118
|
-
try {
|
|
3119
|
-
const { applyToJob } = await import('./core/apply.js');
|
|
3120
|
-
const result = await applyToJob({
|
|
3121
|
-
url,
|
|
3122
|
-
profile,
|
|
3123
|
-
mode,
|
|
3124
|
-
sessionDir: options.sessionDir,
|
|
3125
|
-
llmKey: options.llmKey,
|
|
3126
|
-
llmProvider: options.llmProvider,
|
|
3127
|
-
dailyLimit: parseInt(options.dailyLimit, 10) || 8,
|
|
3128
|
-
warmup: options.warmup !== false,
|
|
3129
|
-
onProgress: isSilent
|
|
3130
|
-
? undefined
|
|
3131
|
-
: (event) => {
|
|
3132
|
-
if (spinner)
|
|
3133
|
-
spinner.text = `[${event.stage}] ${event.message}`;
|
|
3134
|
-
else
|
|
3135
|
-
console.log(` [${event.stage}] ${event.message}`);
|
|
3136
|
-
},
|
|
3137
|
-
});
|
|
3138
|
-
if (spinner)
|
|
3139
|
-
spinner.stop();
|
|
3140
|
-
if (isJson) {
|
|
3141
|
-
await writeStdout(JSON.stringify(result, null, 2) + '\n');
|
|
3142
|
-
process.exit(result.error ? 1 : 0);
|
|
3143
|
-
}
|
|
3144
|
-
const statusIcon = result.submitted ? '✅' : result.error ? '❌' : '📋';
|
|
3145
|
-
console.log(`\n${statusIcon} ${result.submitted
|
|
3146
|
-
? 'Application submitted!'
|
|
3147
|
-
: result.error
|
|
3148
|
-
? `Error: ${result.error}`
|
|
3149
|
-
: 'Application completed (not submitted)'}`);
|
|
3150
|
-
if (result.job.title || result.job.company) {
|
|
3151
|
-
console.log(` ${result.job.title}${result.job.company ? ` @ ${result.job.company}` : ''}`);
|
|
3152
|
-
}
|
|
3153
|
-
console.log(`\n Fields filled: ${result.fieldsFilled}`);
|
|
3154
|
-
if (result.llmAnswers > 0)
|
|
3155
|
-
console.log(` LLM answers: ${result.llmAnswers}`);
|
|
3156
|
-
if (result.fieldsSkipped.length > 0)
|
|
3157
|
-
console.log(` Skipped: ${result.fieldsSkipped.join(', ')}`);
|
|
3158
|
-
if (result.warnings.length > 0 && !isSilent) {
|
|
3159
|
-
console.log(`\n Warnings:`);
|
|
3160
|
-
result.warnings.forEach(w => console.log(` ⚠️ ${w}`));
|
|
3161
|
-
}
|
|
3162
|
-
console.log(` Time: ${(result.elapsed / 1000).toFixed(1)}s\n`);
|
|
3163
|
-
process.exit(result.error ? 1 : 0);
|
|
3164
|
-
}
|
|
3165
|
-
catch (error) {
|
|
3166
|
-
if (spinner)
|
|
3167
|
-
spinner.fail('Application failed');
|
|
3168
|
-
console.error(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
3169
|
-
process.exit(1);
|
|
3170
|
-
}
|
|
3171
|
-
});
|
|
3172
|
-
// ── jobs apply-setup ─────────────────────────────────────────────────────────
|
|
3173
|
-
// Interactive wizard to create ~/.webpeel/profile.json
|
|
3174
|
-
jobsCmd
|
|
3175
|
-
.command('apply-setup')
|
|
3176
|
-
.description('Interactive setup wizard — creates ~/.webpeel/profile.json')
|
|
3177
|
-
.action(async () => {
|
|
3178
|
-
const { createInterface } = await import('readline');
|
|
3179
|
-
const rl = createInterface({ input: process.stdin, output: process.stdout });
|
|
3180
|
-
const ask = (q) => new Promise(resolve => rl.question(q, ans => resolve(ans.trim())));
|
|
3181
|
-
console.log('\n🤖 WebPeel Apply Setup — Create your applicant profile\n');
|
|
3182
|
-
console.log('This creates ~/.webpeel/profile.json used by "webpeel jobs apply".\n');
|
|
3183
|
-
try {
|
|
3184
|
-
const name = await ask('Full name: ');
|
|
3185
|
-
const email = await ask('Email address: ');
|
|
3186
|
-
const phone = await ask('Phone number: ');
|
|
3187
|
-
const linkedin = await ask('LinkedIn URL (optional, press Enter to skip): ');
|
|
3188
|
-
const website = await ask('Portfolio/website URL (optional): ');
|
|
3189
|
-
const location = await ask('City, State (e.g. San Francisco, CA): ');
|
|
3190
|
-
const workAuth = await ask('Work authorization (e.g. US Citizen, Permanent Resident, H-1B, Need Sponsorship): ');
|
|
3191
|
-
const yearsExp = await ask('Years of experience: ');
|
|
3192
|
-
const currentTitle = await ask('Current/most recent job title: ');
|
|
3193
|
-
const skills = await ask('Skills (comma-separated, e.g. TypeScript, React, Node.js): ');
|
|
3194
|
-
const education = await ask('Education (e.g. B.S. Computer Science, MIT): ');
|
|
3195
|
-
const resumePath = await ask('Path to resume PDF (e.g. /Users/you/resume.pdf): ');
|
|
3196
|
-
const summary = await ask('Professional summary (1-3 sentences): ');
|
|
3197
|
-
const salaryMin = await ask('Minimum desired salary (optional, e.g. 120000): ');
|
|
3198
|
-
const salaryMax = await ask('Maximum desired salary (optional, e.g. 180000): ');
|
|
3199
|
-
const relocate = await ask('Willing to relocate? (y/n): ');
|
|
3200
|
-
const sponsorship = await ask('Need visa sponsorship? (y/n): ');
|
|
3201
|
-
rl.close();
|
|
3202
|
-
const profileData = {
|
|
3203
|
-
name,
|
|
3204
|
-
email,
|
|
3205
|
-
phone,
|
|
3206
|
-
...(linkedin ? { linkedin } : {}),
|
|
3207
|
-
...(website ? { website } : {}),
|
|
3208
|
-
location,
|
|
3209
|
-
workAuthorization: workAuth,
|
|
3210
|
-
yearsExperience: parseInt(yearsExp, 10) || 0,
|
|
3211
|
-
currentTitle,
|
|
3212
|
-
skills: skills.split(',').map(s => s.trim()).filter(Boolean),
|
|
3213
|
-
education,
|
|
3214
|
-
resumePath,
|
|
3215
|
-
summary,
|
|
3216
|
-
...(salaryMin && salaryMax
|
|
3217
|
-
? { salaryRange: { min: parseInt(salaryMin, 10), max: parseInt(salaryMax, 10) } }
|
|
3218
|
-
: {}),
|
|
3219
|
-
willingToRelocate: relocate.toLowerCase().startsWith('y'),
|
|
3220
|
-
needsSponsorship: sponsorship.toLowerCase().startsWith('y'),
|
|
3221
|
-
};
|
|
3222
|
-
const { mkdirSync: mk, writeFileSync: wf, existsSync: ex } = await import('fs');
|
|
3223
|
-
const { join: j } = await import('path');
|
|
3224
|
-
const { homedir: hd } = await import('os');
|
|
3225
|
-
const webpeelDir = j(hd(), '.webpeel');
|
|
3226
|
-
if (!ex(webpeelDir))
|
|
3227
|
-
mk(webpeelDir, { recursive: true });
|
|
3228
|
-
const profilePath = j(webpeelDir, 'profile.json');
|
|
3229
|
-
wf(profilePath, JSON.stringify(profileData, null, 2), 'utf-8');
|
|
3230
|
-
console.log(`\n✅ Profile saved to: ${profilePath}`);
|
|
3231
|
-
console.log('\nNext steps:');
|
|
3232
|
-
console.log(' 1. Apply to a job: webpeel jobs apply https://linkedin.com/jobs/view/...');
|
|
3233
|
-
console.log(' (First run opens a browser — log in to LinkedIn, then the session is saved)\n');
|
|
3234
|
-
}
|
|
3235
|
-
catch (error) {
|
|
3236
|
-
rl.close();
|
|
3237
|
-
console.error(`\nError: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
3238
|
-
process.exit(1);
|
|
3239
|
-
}
|
|
3240
|
-
});
|
|
3241
|
-
// ── jobs apply-history ───────────────────────────────────────────────────────
|
|
3242
|
-
// View application history from ~/.webpeel/applications.json
|
|
3243
|
-
jobsCmd
|
|
3244
|
-
.command('apply-history')
|
|
3245
|
-
.description('View application history from ~/.webpeel/applications.json')
|
|
3246
|
-
.option('--json', 'Output as JSON')
|
|
3247
|
-
.option('--limit <n>', 'Number of recent applications to show (default: 20)', '20')
|
|
3248
|
-
.action(async (options) => {
|
|
3249
|
-
const isJson = options.json;
|
|
3250
|
-
const limit = parseInt(options.limit, 10) || 20;
|
|
3251
|
-
try {
|
|
3252
|
-
const { loadApplications } = await import('./core/apply.js');
|
|
3253
|
-
const allApps = loadApplications();
|
|
3254
|
-
const apps = allApps.slice().reverse().slice(0, limit);
|
|
3255
|
-
if (isJson) {
|
|
3256
|
-
await writeStdout(JSON.stringify(apps, null, 2) + '\n');
|
|
3257
|
-
process.exit(0);
|
|
3258
|
-
}
|
|
3259
|
-
if (apps.length === 0) {
|
|
3260
|
-
console.log('\nNo applications yet. Use "webpeel jobs apply <url>" to start.\n');
|
|
3261
|
-
process.exit(0);
|
|
3262
|
-
}
|
|
3263
|
-
console.log(`\n📋 Application History (${apps.length} of ${allApps.length} total)\n`);
|
|
3264
|
-
const colDate = 22;
|
|
3265
|
-
const colStatus = 10;
|
|
3266
|
-
const colTitle = 35;
|
|
3267
|
-
const colCompany = 20;
|
|
3268
|
-
const colMode = 8;
|
|
3269
|
-
const pad = (s, w) => (s.length > w ? s.slice(0, w - 1) + '…' : s.padEnd(w));
|
|
3270
|
-
console.log(` ${pad('Applied', colDate)} ${pad('Status', colStatus)} ${pad('Title', colTitle)} ${pad('Company', colCompany)} ${pad('Mode', colMode)}`);
|
|
3271
|
-
console.log(` ${'-'.repeat(colDate)} ${'-'.repeat(colStatus)} ${'-'.repeat(colTitle)} ${'-'.repeat(colCompany)} ${'-'.repeat(colMode)}`);
|
|
3272
|
-
for (const app of apps) {
|
|
3273
|
-
const date = new Date(app.appliedAt).toLocaleString('en-US', {
|
|
3274
|
-
month: 'short',
|
|
3275
|
-
day: 'numeric',
|
|
3276
|
-
year: 'numeric',
|
|
3277
|
-
hour: '2-digit',
|
|
3278
|
-
minute: '2-digit',
|
|
3279
|
-
});
|
|
3280
|
-
const statusEmoji = { applied: '📤', interview: '🎯', offer: '🎉', rejected: '❌', withdrawn: '🚫' }[app.status] ?? '';
|
|
3281
|
-
console.log(` ${pad(date, colDate)} ${pad(`${statusEmoji} ${app.status}`, colStatus)} ${pad(app.title, colTitle)} ${pad(app.company, colCompany)} ${pad(app.mode, colMode)}`);
|
|
3282
|
-
}
|
|
3283
|
-
const today = new Date().toISOString().slice(0, 10);
|
|
3284
|
-
const todayCount = allApps.filter(a => a.appliedAt.startsWith(today)).length;
|
|
3285
|
-
console.log(`\n Today: ${todayCount} application(s)\n`);
|
|
3286
|
-
process.exit(0);
|
|
3287
|
-
}
|
|
3288
|
-
catch (error) {
|
|
3289
|
-
console.error(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
3290
|
-
process.exit(1);
|
|
3291
|
-
}
|
|
3292
|
-
});
|
|
3293
|
-
// Queue command - list active async jobs (crawl, batch)
|
|
3294
|
-
program
|
|
3295
|
-
.command('queue')
|
|
3296
|
-
.description('List active async jobs (crawl, batch)')
|
|
3297
|
-
.option('--json', 'Output as JSON')
|
|
3298
|
-
.action(async (options) => {
|
|
3299
|
-
try {
|
|
3300
|
-
const config = loadConfig();
|
|
3301
|
-
if (!config.apiKey) {
|
|
3302
|
-
console.error('Error: API key required. Run `webpeel login` first.');
|
|
3303
|
-
process.exit(1);
|
|
3304
|
-
}
|
|
3305
|
-
const { fetch: undiciFetch } = await import('undici');
|
|
3306
|
-
const response = await undiciFetch(`${process.env.WEBPEEL_API_URL || 'https://api.webpeel.dev'}/v1/jobs`, {
|
|
3307
|
-
headers: {
|
|
3308
|
-
'Authorization': `Bearer ${config.apiKey}`,
|
|
3309
|
-
},
|
|
3310
|
-
});
|
|
3311
|
-
if (!response.ok) {
|
|
3312
|
-
throw new Error(`API error: HTTP ${response.status}`);
|
|
3313
|
-
}
|
|
3314
|
-
const data = await response.json();
|
|
3315
|
-
const jobs = data.jobs || data;
|
|
3316
|
-
if (options.json) {
|
|
3317
|
-
console.log(JSON.stringify(data, null, 2));
|
|
3318
|
-
}
|
|
3319
|
-
else {
|
|
3320
|
-
if (!Array.isArray(jobs) || jobs.length === 0) {
|
|
3321
|
-
console.log('No active jobs.');
|
|
3322
|
-
}
|
|
3323
|
-
else {
|
|
3324
|
-
console.log(`Active Jobs (${jobs.length}):\n`);
|
|
3325
|
-
for (const job of jobs) {
|
|
3326
|
-
console.log(`ID: ${job.id}`);
|
|
3327
|
-
console.log(`Type: ${job.type}`);
|
|
3328
|
-
console.log(`Status: ${job.status}`);
|
|
3329
|
-
console.log(`URL: ${job.url}`);
|
|
3330
|
-
console.log(`Created: ${job.createdAt}`);
|
|
3331
|
-
console.log('---');
|
|
3332
|
-
}
|
|
3333
|
-
}
|
|
3334
|
-
}
|
|
3335
|
-
process.exit(0);
|
|
3336
|
-
}
|
|
3337
|
-
catch (error) {
|
|
3338
|
-
console.error(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
3339
|
-
process.exit(1);
|
|
3340
|
-
}
|
|
3341
|
-
});
|
|
3342
|
-
// Job command - get job status
|
|
3343
|
-
program
|
|
3344
|
-
.command('job <id>')
|
|
3345
|
-
.description('Get status of a specific job')
|
|
3346
|
-
.option('--json', 'Output as JSON')
|
|
3347
|
-
.action(async (id, options) => {
|
|
3348
|
-
try {
|
|
3349
|
-
const config = loadConfig();
|
|
3350
|
-
if (!config.apiKey) {
|
|
3351
|
-
console.error('Error: API key required. Run `webpeel login` first.');
|
|
3352
|
-
process.exit(1);
|
|
3353
|
-
}
|
|
3354
|
-
const { fetch: undiciFetch } = await import('undici');
|
|
3355
|
-
const response = await undiciFetch(`${process.env.WEBPEEL_API_URL || 'https://api.webpeel.dev'}/v1/jobs/${id}`, {
|
|
3356
|
-
headers: {
|
|
3357
|
-
'Authorization': `Bearer ${config.apiKey}`,
|
|
3358
|
-
},
|
|
3359
|
-
});
|
|
3360
|
-
if (!response.ok) {
|
|
3361
|
-
throw new Error(`API error: HTTP ${response.status}`);
|
|
3362
|
-
}
|
|
3363
|
-
const job = await response.json();
|
|
3364
|
-
if (options.json) {
|
|
3365
|
-
console.log(JSON.stringify(job, null, 2));
|
|
3366
|
-
}
|
|
3367
|
-
else {
|
|
3368
|
-
console.log(`Job ID: ${job.id}`);
|
|
3369
|
-
console.log(`Type: ${job.type}`);
|
|
3370
|
-
console.log(`Status: ${job.status}`);
|
|
3371
|
-
console.log(`URL: ${job.url}`);
|
|
3372
|
-
console.log(`Created: ${job.createdAt}`);
|
|
3373
|
-
if (job.completedAt) {
|
|
3374
|
-
console.log(`Completed: ${job.completedAt}`);
|
|
3375
|
-
}
|
|
3376
|
-
if (job.error) {
|
|
3377
|
-
console.log(`Error: ${job.error}`);
|
|
3378
|
-
}
|
|
3379
|
-
if (job.results) {
|
|
3380
|
-
console.log(`\nResults: ${job.results.length} items`);
|
|
3381
|
-
if (job.type === 'crawl' && job.results.length > 0) {
|
|
3382
|
-
console.log('\nFirst 5 URLs:');
|
|
3383
|
-
for (const result of job.results.slice(0, 5)) {
|
|
3384
|
-
console.log(` - ${result.url}`);
|
|
3385
|
-
}
|
|
3386
|
-
}
|
|
3387
|
-
}
|
|
3388
|
-
}
|
|
3389
|
-
process.exit(0);
|
|
3390
|
-
}
|
|
3391
|
-
catch (error) {
|
|
3392
|
-
console.error(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
3393
|
-
process.exit(1);
|
|
3394
|
-
}
|
|
3395
|
-
});
|
|
3396
|
-
// Answer command - search + fetch + LLM-generated answer
|
|
3397
|
-
program
|
|
3398
|
-
.command('answer <question>')
|
|
3399
|
-
.description('Ask a question, search the web, and get an AI-generated answer with citations (BYOK)')
|
|
3400
|
-
.option('--provider <provider>', 'Search provider: duckduckgo (default) or brave')
|
|
3401
|
-
.option('--search-api-key <key>', 'Search provider API key (or env WEBPEEL_BRAVE_API_KEY)')
|
|
3402
|
-
.option('--llm <provider>', 'LLM provider: openai, anthropic, or google (required)')
|
|
3403
|
-
.option('--llm-api-key <key>', 'LLM API key (or env OPENAI_API_KEY / ANTHROPIC_API_KEY / GOOGLE_API_KEY)')
|
|
3404
|
-
.option('--llm-model <model>', 'LLM model name (optional, uses provider default)')
|
|
3405
|
-
.option('--max-sources <n>', 'Maximum sources to fetch (1-10, default 5)', '5')
|
|
3406
|
-
.option('--json', 'Output as JSON')
|
|
3407
|
-
.option('-s, --silent', 'Silent mode')
|
|
3408
|
-
.action(async (question, options) => {
|
|
3409
|
-
const spinner = options.silent ? null : ora('Thinking...').start();
|
|
3410
|
-
try {
|
|
3411
|
-
const { answerQuestion } = await import('./core/answer.js');
|
|
3412
|
-
const config = loadConfig();
|
|
3413
|
-
const llmProvider = options.llm;
|
|
3414
|
-
if (!llmProvider || !['openai', 'anthropic', 'google'].includes(llmProvider)) {
|
|
3415
|
-
console.error('Error: --llm is required (openai, anthropic, or google)');
|
|
3416
|
-
process.exit(1);
|
|
3417
|
-
}
|
|
3418
|
-
const llmApiKey = options.llmApiKey
|
|
3419
|
-
|| process.env.OPENAI_API_KEY
|
|
3420
|
-
|| process.env.ANTHROPIC_API_KEY
|
|
3421
|
-
|| process.env.GOOGLE_API_KEY
|
|
3422
|
-
|| '';
|
|
3423
|
-
if (!llmApiKey) {
|
|
3424
|
-
console.error('Error: --llm-api-key is required (or set OPENAI_API_KEY / ANTHROPIC_API_KEY / GOOGLE_API_KEY)');
|
|
3425
|
-
process.exit(1);
|
|
3426
|
-
}
|
|
3427
|
-
const searchProvider = (options.provider || 'duckduckgo');
|
|
3428
|
-
const searchApiKey = options.searchApiKey
|
|
3429
|
-
|| process.env.WEBPEEL_BRAVE_API_KEY
|
|
3430
|
-
|| config.braveApiKey
|
|
3431
|
-
|| undefined;
|
|
3432
|
-
const maxSources = Math.min(Math.max(parseInt(options.maxSources) || 5, 1), 10);
|
|
3433
|
-
if (spinner)
|
|
3434
|
-
spinner.text = 'Searching the web...';
|
|
3435
|
-
const result = await answerQuestion({
|
|
3436
|
-
question,
|
|
3437
|
-
searchProvider,
|
|
3438
|
-
searchApiKey,
|
|
3439
|
-
llmProvider,
|
|
3440
|
-
llmApiKey,
|
|
3441
|
-
llmModel: options.llmModel,
|
|
3442
|
-
maxSources,
|
|
3443
|
-
stream: false,
|
|
3444
|
-
});
|
|
3445
|
-
if (spinner)
|
|
3446
|
-
spinner.succeed('Done');
|
|
3447
|
-
if (options.json) {
|
|
3448
|
-
const jsonStr = JSON.stringify(result, null, 2);
|
|
3449
|
-
await new Promise((resolve, reject) => {
|
|
3450
|
-
process.stdout.write(jsonStr + '\n', (err) => {
|
|
3451
|
-
if (err)
|
|
3452
|
-
reject(err);
|
|
3453
|
-
else
|
|
3454
|
-
resolve();
|
|
3455
|
-
});
|
|
3456
|
-
});
|
|
3457
|
-
}
|
|
3458
|
-
else {
|
|
3459
|
-
console.log(`\n${result.answer}`);
|
|
3460
|
-
console.log(`\nSources:`);
|
|
3461
|
-
result.citations.forEach((c, i) => {
|
|
3462
|
-
console.log(` [${i + 1}] ${c.title}`);
|
|
3463
|
-
console.log(` ${c.url}`);
|
|
3464
|
-
});
|
|
3465
|
-
console.log(`\nModel: ${result.llmModel} (${result.llmProvider})`);
|
|
3466
|
-
}
|
|
3467
|
-
await cleanup();
|
|
3468
|
-
process.exit(0);
|
|
3469
|
-
}
|
|
3470
|
-
catch (error) {
|
|
3471
|
-
if (spinner)
|
|
3472
|
-
spinner.fail('Answer generation failed');
|
|
3473
|
-
console.error(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
3474
|
-
await cleanup();
|
|
3475
|
-
process.exit(1);
|
|
3476
|
-
}
|
|
3477
|
-
});
|
|
3478
|
-
// Screenshot command
|
|
3479
|
-
program
|
|
3480
|
-
.command('screenshot <url>')
|
|
3481
|
-
.alias('snap')
|
|
3482
|
-
.description('Take a screenshot of a URL and save as PNG/JPEG')
|
|
3483
|
-
.option('--full-page', 'Capture full page (not just viewport)')
|
|
3484
|
-
.option('--width <px>', 'Viewport width in pixels (default: 1280)', parseInt)
|
|
3485
|
-
.option('--height <px>', 'Viewport height in pixels (default: 720)', parseInt)
|
|
3486
|
-
.option('--format <fmt>', 'Image format: png (default) or jpeg', 'png')
|
|
3487
|
-
.option('--quality <n>', 'JPEG quality 1-100 (ignored for PNG)', parseInt)
|
|
3488
|
-
.option('-w, --wait <ms>', 'Wait time after page load (ms)', parseInt)
|
|
3489
|
-
.option('-t, --timeout <ms>', 'Request timeout (ms)', (v) => parseInt(v, 10), 30000)
|
|
3490
|
-
.option('--stealth', 'Use stealth mode to bypass bot detection')
|
|
3491
|
-
.option('--action <actions...>', 'Page actions before screenshot (e.g., "click:.btn" "wait:2000")')
|
|
3492
|
-
.option('--scroll-through', 'Auto-scroll page before screenshot (triggers lazy content + scroll animations)')
|
|
3493
|
-
.option('-o, --output <path>', 'Output file path (default: screenshot.png)')
|
|
3494
|
-
.option('-s, --silent', 'Silent mode (no spinner)')
|
|
3495
|
-
.option('--json', 'Output base64 JSON instead of binary file')
|
|
3496
|
-
.action(async (url, options) => {
|
|
3497
|
-
// Validate URL
|
|
3498
|
-
try {
|
|
3499
|
-
const parsed = new URL(url);
|
|
3500
|
-
if (!['http:', 'https:'].includes(parsed.protocol)) {
|
|
3501
|
-
console.error('Error: Only HTTP and HTTPS protocols are allowed');
|
|
3502
|
-
process.exit(1);
|
|
3503
|
-
}
|
|
3504
|
-
}
|
|
3505
|
-
catch {
|
|
3506
|
-
console.error(`Error: Invalid URL format: ${url}`);
|
|
3507
|
-
process.exit(1);
|
|
3508
|
-
}
|
|
3509
|
-
// Check usage quota
|
|
3510
|
-
const usageCheck = await checkUsage();
|
|
3511
|
-
if (!usageCheck.allowed) {
|
|
3512
|
-
console.error(usageCheck.message);
|
|
3513
|
-
process.exit(1);
|
|
3514
|
-
}
|
|
3515
|
-
const spinner = options.silent ? null : ora('Taking screenshot...').start();
|
|
3516
|
-
try {
|
|
3517
|
-
// Validate format
|
|
3518
|
-
const format = options.format?.toLowerCase();
|
|
3519
|
-
if (format && !['png', 'jpeg', 'jpg'].includes(format)) {
|
|
3520
|
-
console.error('Error: --format must be png, jpeg, or jpg');
|
|
3521
|
-
process.exit(1);
|
|
3522
|
-
}
|
|
3523
|
-
// Parse actions
|
|
3524
|
-
let actions;
|
|
3525
|
-
if (options.action && options.action.length > 0) {
|
|
3526
|
-
try {
|
|
3527
|
-
actions = parseActions(options.action);
|
|
3528
|
-
}
|
|
3529
|
-
catch (e) {
|
|
3530
|
-
console.error(`Error: ${e.message}`);
|
|
3531
|
-
process.exit(1);
|
|
3532
|
-
}
|
|
3533
|
-
}
|
|
3534
|
-
const { takeScreenshot } = await import('./core/screenshot.js');
|
|
3535
|
-
const result = await takeScreenshot(url, {
|
|
3536
|
-
fullPage: options.fullPage || false,
|
|
3537
|
-
width: options.width,
|
|
3538
|
-
height: options.height,
|
|
3539
|
-
format: format || 'png',
|
|
3540
|
-
quality: options.quality,
|
|
3541
|
-
waitFor: options.wait,
|
|
3542
|
-
timeout: options.timeout,
|
|
3543
|
-
stealth: options.stealth || false,
|
|
3544
|
-
actions,
|
|
3545
|
-
scrollThrough: options.scrollThrough || false,
|
|
3546
|
-
});
|
|
3547
|
-
if (spinner) {
|
|
3548
|
-
spinner.succeed(`Screenshot taken (${result.format})`);
|
|
3549
|
-
}
|
|
3550
|
-
// Show usage footer for free/anonymous users
|
|
3551
|
-
if (usageCheck.usageInfo && !options.silent) {
|
|
3552
|
-
showUsageFooter(usageCheck.usageInfo, usageCheck.isAnonymous || false, true);
|
|
3553
|
-
}
|
|
3554
|
-
if (options.json) {
|
|
3555
|
-
// Output JSON with base64
|
|
3556
|
-
const jsonStr = JSON.stringify({
|
|
3557
|
-
url: result.url,
|
|
3558
|
-
format: result.format,
|
|
3559
|
-
contentType: result.contentType,
|
|
3560
|
-
screenshot: result.screenshot,
|
|
3561
|
-
}, null, 2);
|
|
3562
|
-
await new Promise((resolve, reject) => {
|
|
3563
|
-
process.stdout.write(jsonStr + '\n', (err) => {
|
|
3564
|
-
if (err)
|
|
3565
|
-
reject(err);
|
|
3566
|
-
else
|
|
3567
|
-
resolve();
|
|
3568
|
-
});
|
|
3569
|
-
});
|
|
3570
|
-
}
|
|
3571
|
-
else {
|
|
3572
|
-
// Save to file
|
|
3573
|
-
const ext = result.format === 'jpeg' ? 'jpg' : 'png';
|
|
3574
|
-
const outputPath = options.output || `screenshot.${ext}`;
|
|
3575
|
-
const buffer = Buffer.from(result.screenshot, 'base64');
|
|
3576
|
-
writeFileSync(outputPath, buffer);
|
|
3577
|
-
if (!options.silent) {
|
|
3578
|
-
console.error(`Screenshot saved to: ${outputPath} (${(buffer.length / 1024).toFixed(1)} KB)`);
|
|
3579
|
-
}
|
|
3580
|
-
}
|
|
3581
|
-
await cleanup();
|
|
3582
|
-
process.exit(0);
|
|
3583
|
-
}
|
|
3584
|
-
catch (error) {
|
|
3585
|
-
if (spinner) {
|
|
3586
|
-
spinner.fail('Screenshot failed');
|
|
3587
|
-
}
|
|
3588
|
-
if (error instanceof Error) {
|
|
3589
|
-
console.error(`\nError: ${error.message}`);
|
|
3590
|
-
}
|
|
3591
|
-
else {
|
|
3592
|
-
console.error('\nError: Unknown error occurred');
|
|
3593
|
-
}
|
|
3594
|
-
await cleanup();
|
|
3595
|
-
process.exit(1);
|
|
3596
|
-
}
|
|
3597
|
-
});
|
|
3598
|
-
// ── Top-level Apply command group ──────────────────────────────────────────
|
|
3599
|
-
//
|
|
3600
|
-
// webpeel apply <url> — submit a job application
|
|
3601
|
-
// webpeel apply init — interactive profile setup wizard
|
|
3602
|
-
// webpeel apply status — show application stats
|
|
3603
|
-
// webpeel apply list — list tracked applications (with filters)
|
|
3604
|
-
// webpeel apply rate — show rate-governor status
|
|
3605
|
-
const applyCmd = program
|
|
3606
|
-
.command('apply')
|
|
3607
|
-
.description('Auto-apply pipeline: submit applications, track history, manage rate limits');
|
|
3608
|
-
// apply <url> — auto-apply to a job posting
|
|
3609
|
-
applyCmd
|
|
3610
|
-
.command('submit <url>')
|
|
3611
|
-
.description('Auto-apply to a job posting')
|
|
3612
|
-
.alias('s')
|
|
3613
|
-
.option('--profile-path <path>', 'Path to apply profile JSON', `${process.env.HOME ?? '~'}/.webpeel/profile.json`)
|
|
3614
|
-
.option('--browser-profile <path>', 'Path to persistent browser data dir', `${process.env.HOME ?? '~'}/.webpeel/browser-profile`)
|
|
3615
|
-
.option('--headed', 'Run browser visibly (default for apply)')
|
|
3616
|
-
.option('--headless', 'Run browser invisibly')
|
|
3617
|
-
.option('--confirm', 'Pause for confirmation before submit (default: true)')
|
|
3618
|
-
.option('--no-confirm', 'Skip confirmation, auto-submit')
|
|
3619
|
-
.option('--dry-run', 'Go through flow but do not submit')
|
|
3620
|
-
.option('--generate-cover', 'Generate tailored cover letter (needs OPENAI_API_KEY)')
|
|
3621
|
-
.option('--timeout <ms>', 'Timeout in ms (default: 300000)', '300000')
|
|
3622
|
-
.option('--json', 'Output result as JSON')
|
|
3623
|
-
.option('--silent', 'Silent mode')
|
|
3624
|
-
.action(async (url, options) => {
|
|
3625
|
-
const isSilent = options.silent;
|
|
3626
|
-
const isJson = options.json;
|
|
3627
|
-
// Load profile
|
|
3628
|
-
const profilePath = options.profilePath;
|
|
3629
|
-
let profile;
|
|
3630
|
-
try {
|
|
3631
|
-
const raw = readFileSync(profilePath, 'utf-8');
|
|
3632
|
-
profile = JSON.parse(raw);
|
|
3633
|
-
}
|
|
3634
|
-
catch {
|
|
3635
|
-
const msg = `Could not load profile from ${profilePath}. Run "webpeel apply init" to create one.`;
|
|
3636
|
-
if (isJson) {
|
|
3637
|
-
await writeStdout(JSON.stringify({ success: false, error: { type: 'fetch_failed', message: msg } }) + '\n');
|
|
3638
|
-
}
|
|
3639
|
-
else {
|
|
3640
|
-
console.error(`Error: ${msg}`);
|
|
3641
|
-
}
|
|
3642
|
-
process.exit(1);
|
|
3643
|
-
}
|
|
3644
|
-
const spinner = isSilent ? null : ora('Applying...').start();
|
|
3645
|
-
try {
|
|
3646
|
-
const { applyToJob } = await import('./core/apply.js');
|
|
3647
|
-
const result = await applyToJob({
|
|
3648
|
-
url,
|
|
3649
|
-
profile,
|
|
3650
|
-
// Use sessionDir for persistent session storage (renamed from browserProfile)
|
|
3651
|
-
sessionDir: options.browserProfile,
|
|
3652
|
-
// Map dryRun flag → mode: 'dry-run'
|
|
3653
|
-
mode: (options.dryRun ? 'dry-run' : (options.noConfirm ? 'auto' : 'review')),
|
|
3654
|
-
timeout: parseInt(options.timeout, 10) || 300_000,
|
|
3655
|
-
});
|
|
3656
|
-
if (spinner)
|
|
3657
|
-
spinner.stop();
|
|
3658
|
-
// Normalize result to a consistent output shape
|
|
3659
|
-
const success = result.submitted && !result.error;
|
|
3660
|
-
const jobTitle = result.job?.title ?? '';
|
|
3661
|
-
const jobCompany = result.job?.company ?? '';
|
|
3662
|
-
if (isJson) {
|
|
3663
|
-
await writeStdout(JSON.stringify(result, null, 2) + '\n');
|
|
3664
|
-
process.exit(success ? 0 : 1);
|
|
3665
|
-
}
|
|
3666
|
-
const icon = success ? '✅' : '❌';
|
|
3667
|
-
console.log(`\n${icon} ${success ? 'Application submitted!' : `Failed: ${result.error ?? 'Unknown error'}`}`);
|
|
3668
|
-
if (jobTitle)
|
|
3669
|
-
console.log(` ${jobTitle}${jobCompany ? ` @ ${jobCompany}` : ''}`);
|
|
3670
|
-
if (options.dryRun)
|
|
3671
|
-
console.log(' (Dry run — not submitted)');
|
|
3672
|
-
console.log(` Time: ${(result.elapsed / 1000).toFixed(1)}s\n`);
|
|
3673
|
-
process.exit(success ? 0 : 1);
|
|
3674
|
-
}
|
|
3675
|
-
catch (error) {
|
|
3676
|
-
if (spinner)
|
|
3677
|
-
spinner.fail('Application failed');
|
|
3678
|
-
const msg = error instanceof Error ? error.message : 'Unknown error';
|
|
3679
|
-
if (isJson) {
|
|
3680
|
-
await writeStdout(JSON.stringify({ success: false, error: { type: 'fetch_failed', message: msg } }) + '\n');
|
|
3681
|
-
}
|
|
3682
|
-
else {
|
|
3683
|
-
console.error(`Error: ${msg}`);
|
|
3684
|
-
}
|
|
3685
|
-
process.exit(1);
|
|
3686
|
-
}
|
|
3687
|
-
});
|
|
3688
|
-
// apply init — interactive profile setup
|
|
3689
|
-
applyCmd
|
|
3690
|
-
.command('init')
|
|
3691
|
-
.description('Interactive profile setup — creates ~/.webpeel/profile.json')
|
|
3692
|
-
.action(async () => {
|
|
3693
|
-
const { createInterface } = await import('readline');
|
|
3694
|
-
const rl = createInterface({ input: process.stdin, output: process.stdout });
|
|
3695
|
-
const ask = (q) => new Promise((resolve) => rl.question(q, (ans) => resolve(ans.trim())));
|
|
3696
|
-
console.log('\n🤖 WebPeel Apply Setup — Create your applicant profile\n');
|
|
3697
|
-
console.log('This creates ~/.webpeel/profile.json used by "webpeel apply submit".\n');
|
|
3698
|
-
try {
|
|
3699
|
-
const name = await ask('Full name: ');
|
|
3700
|
-
const email = await ask('Email address: ');
|
|
3701
|
-
const phone = await ask('Phone number (optional): ');
|
|
3702
|
-
const resumePath = await ask('Path to resume PDF (e.g. /Users/you/resume.pdf): ');
|
|
3703
|
-
const currentTitle = await ask('Current/most recent job title: ');
|
|
3704
|
-
const yearsExp = await ask('Years of experience: ');
|
|
3705
|
-
const skills = await ask('Skills (comma-separated, e.g. TypeScript, React, Node.js): ');
|
|
3706
|
-
const education = await ask('Education (e.g. B.S. Computer Science, MIT): ');
|
|
3707
|
-
const location = await ask('City, State (e.g. San Francisco, CA): ');
|
|
3708
|
-
const workAuth = await ask('Work authorization (e.g. US Citizen, Permanent Resident, H-1B, Need Sponsorship): ');
|
|
3709
|
-
const linkedinUrl = await ask('LinkedIn URL (optional): ');
|
|
3710
|
-
const websiteUrl = await ask('Portfolio/website URL (optional): ');
|
|
3711
|
-
const desiredSalary = await ask('Desired salary (optional, e.g. $150,000): ');
|
|
3712
|
-
rl.close();
|
|
3713
|
-
const { mkdirSync: mk, writeFileSync: wf } = await import('fs');
|
|
3714
|
-
const { join: j } = await import('path');
|
|
3715
|
-
const { homedir: hd } = await import('os');
|
|
3716
|
-
const webpeelDir = j(hd(), '.webpeel');
|
|
3717
|
-
mk(webpeelDir, { recursive: true });
|
|
3718
|
-
const profile = {
|
|
3719
|
-
name,
|
|
3720
|
-
email,
|
|
3721
|
-
...(phone ? { phone } : {}),
|
|
3722
|
-
resumePath,
|
|
3723
|
-
currentTitle,
|
|
3724
|
-
yearsExperience: parseInt(yearsExp, 10) || 0,
|
|
3725
|
-
skills: skills.split(',').map((s) => s.trim()).filter(Boolean),
|
|
3726
|
-
education,
|
|
3727
|
-
location,
|
|
3728
|
-
workAuthorization: workAuth,
|
|
3729
|
-
...(linkedinUrl ? { linkedinUrl } : {}),
|
|
3730
|
-
...(websiteUrl ? { websiteUrl } : {}),
|
|
3731
|
-
...(desiredSalary ? { desiredSalary } : {}),
|
|
3732
|
-
};
|
|
3733
|
-
const profilePath = j(webpeelDir, 'profile.json');
|
|
3734
|
-
wf(profilePath, JSON.stringify(profile, null, 2), 'utf-8');
|
|
3735
|
-
console.log(`\n✅ Profile saved to: ${profilePath}`);
|
|
3736
|
-
console.log('\nNext steps:');
|
|
3737
|
-
console.log(' • Apply to a job: webpeel apply submit <url>');
|
|
3738
|
-
console.log(' • Dry run first: webpeel apply submit <url> --dry-run');
|
|
3739
|
-
console.log(' • View stats: webpeel apply status\n');
|
|
3740
|
-
}
|
|
3741
|
-
catch (error) {
|
|
3742
|
-
rl.close();
|
|
3743
|
-
console.error(`\nError: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
3744
|
-
process.exit(1);
|
|
3745
|
-
}
|
|
3746
|
-
});
|
|
3747
|
-
// apply status — application stats summary
|
|
3748
|
-
applyCmd
|
|
3749
|
-
.command('status')
|
|
3750
|
-
.description('Show application stats')
|
|
3751
|
-
.option('--json', 'Output as JSON')
|
|
3752
|
-
.action(async (options) => {
|
|
3753
|
-
try {
|
|
3754
|
-
const { ApplicationTracker } = await import('./core/application-tracker.js');
|
|
3755
|
-
const tracker = new ApplicationTracker();
|
|
3756
|
-
const stats = tracker.stats();
|
|
3757
|
-
if (options.json) {
|
|
3758
|
-
await writeStdout(JSON.stringify(stats, null, 2) + '\n');
|
|
3759
|
-
process.exit(0);
|
|
3760
|
-
}
|
|
3761
|
-
console.log('\n📊 Application Stats\n');
|
|
3762
|
-
console.log(` Total: ${stats.total}`);
|
|
3763
|
-
console.log(` Today: ${stats.today}`);
|
|
3764
|
-
console.log(` This week: ${stats.thisWeek}`);
|
|
3765
|
-
if (Object.keys(stats.byPlatform).length > 0) {
|
|
3766
|
-
console.log('\n By Platform:');
|
|
3767
|
-
for (const [platform, count] of Object.entries(stats.byPlatform)) {
|
|
3768
|
-
console.log(` ${platform.padEnd(12)} ${count}`);
|
|
3769
|
-
}
|
|
3770
|
-
}
|
|
3771
|
-
if (Object.keys(stats.byStatus).length > 0) {
|
|
3772
|
-
console.log('\n By Status:');
|
|
3773
|
-
for (const [status, count] of Object.entries(stats.byStatus)) {
|
|
3774
|
-
console.log(` ${status.padEnd(12)} ${count}`);
|
|
3775
|
-
}
|
|
3776
|
-
}
|
|
3777
|
-
console.log('');
|
|
3778
|
-
process.exit(0);
|
|
3779
|
-
}
|
|
3780
|
-
catch (error) {
|
|
3781
|
-
console.error(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
3782
|
-
process.exit(1);
|
|
3783
|
-
}
|
|
3784
|
-
});
|
|
3785
|
-
// apply list — list applications with optional filters
|
|
3786
|
-
applyCmd
|
|
3787
|
-
.command('list')
|
|
3788
|
-
.description('List tracked applications')
|
|
3789
|
-
.option('--platform <platform>', 'Filter by platform (e.g. linkedin, upwork)')
|
|
3790
|
-
.option('--status <status>', 'Filter by status (applied, interview, rejected, offer, ...)')
|
|
3791
|
-
.option('--since <date>', 'Filter to applications on or after this date (YYYY-MM-DD)')
|
|
3792
|
-
.option('--json', 'Output as JSON')
|
|
3793
|
-
.option('--limit <n>', 'Max records to show (default: 50)', '50')
|
|
3794
|
-
.action(async (options) => {
|
|
3795
|
-
try {
|
|
3796
|
-
const { ApplicationTracker } = await import('./core/application-tracker.js');
|
|
3797
|
-
const tracker = new ApplicationTracker();
|
|
3798
|
-
const limit = parseInt(options.limit, 10) || 50;
|
|
3799
|
-
const records = tracker.list({
|
|
3800
|
-
platform: options.platform,
|
|
3801
|
-
status: options.status,
|
|
3802
|
-
since: options.since,
|
|
3803
|
-
}).slice(0, limit);
|
|
3804
|
-
if (options.json) {
|
|
3805
|
-
await writeStdout(JSON.stringify(records, null, 2) + '\n');
|
|
3806
|
-
process.exit(0);
|
|
3807
|
-
}
|
|
3808
|
-
if (records.length === 0) {
|
|
3809
|
-
console.log('\nNo applications found.\n');
|
|
3810
|
-
process.exit(0);
|
|
3811
|
-
}
|
|
3812
|
-
console.log(`\n📋 Applications (${records.length})\n`);
|
|
3813
|
-
const colDate = 12;
|
|
3814
|
-
const colStatus = 10;
|
|
3815
|
-
const colTitle = 35;
|
|
3816
|
-
const colCompany = 20;
|
|
3817
|
-
const pad = (s, w) => s.length > w ? s.slice(0, w - 1) + '…' : s.padEnd(w);
|
|
3818
|
-
console.log(` ${'Date'.padEnd(colDate)} ${'Status'.padEnd(colStatus)} ${'Title'.padEnd(colTitle)} ${'Company'.padEnd(colCompany)}`);
|
|
3819
|
-
console.log(` ${'-'.repeat(colDate)} ${'-'.repeat(colStatus)} ${'-'.repeat(colTitle)} ${'-'.repeat(colCompany)}`);
|
|
3820
|
-
for (const r of records) {
|
|
3821
|
-
const dateStr = r.appliedAt.slice(0, 10);
|
|
3822
|
-
console.log(` ${pad(dateStr, colDate)} ${pad(r.status, colStatus)} ${pad(r.title, colTitle)} ${pad(r.company, colCompany)}`);
|
|
3823
|
-
}
|
|
3824
|
-
console.log('');
|
|
3825
|
-
process.exit(0);
|
|
3826
|
-
}
|
|
3827
|
-
catch (error) {
|
|
3828
|
-
console.error(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
3829
|
-
process.exit(1);
|
|
3830
|
-
}
|
|
3831
|
-
});
|
|
3832
|
-
// apply rate — rate governor status
|
|
3833
|
-
applyCmd
|
|
3834
|
-
.command('rate')
|
|
3835
|
-
.description('Show rate governor status (daily limits, cooldown, next allowed time)')
|
|
3836
|
-
.option('--json', 'Output as JSON')
|
|
3837
|
-
.option('--reset-cooldown', 'Clear any active cooldown (manual override)')
|
|
3838
|
-
.action(async (options) => {
|
|
3839
|
-
try {
|
|
3840
|
-
const { RateGovernor, formatDuration } = await import('./core/rate-governor.js');
|
|
3841
|
-
const governor = new RateGovernor();
|
|
3842
|
-
if (options.resetCooldown) {
|
|
3843
|
-
governor.resetCooldown();
|
|
3844
|
-
console.log('✅ Cooldown cleared.');
|
|
3845
|
-
process.exit(0);
|
|
3846
|
-
}
|
|
3847
|
-
const state = governor.getState();
|
|
3848
|
-
const config = governor.getConfig();
|
|
3849
|
-
const check = governor.canApply();
|
|
3850
|
-
if (options.json) {
|
|
3851
|
-
await writeStdout(JSON.stringify({
|
|
3852
|
-
state,
|
|
3853
|
-
config,
|
|
3854
|
-
canApply: check.allowed,
|
|
3855
|
-
reason: check.reason,
|
|
3856
|
-
waitMs: check.waitMs,
|
|
3857
|
-
nextDelayMs: governor.getNextDelay(),
|
|
3858
|
-
}, null, 2) + '\n');
|
|
3859
|
-
process.exit(0);
|
|
3860
|
-
}
|
|
3861
|
-
console.log('\n⏱ Rate Governor Status\n');
|
|
3862
|
-
console.log(` Today's applications: ${state.todayCount} / ${config.maxPerDay}`);
|
|
3863
|
-
console.log(` Total applications: ${state.totalApplications}`);
|
|
3864
|
-
console.log(` Can apply now: ${check.allowed ? '✅ Yes' : '❌ No'}`);
|
|
3865
|
-
if (!check.allowed && check.reason) {
|
|
3866
|
-
console.log(` Reason: ${check.reason}`);
|
|
3867
|
-
}
|
|
3868
|
-
if (!check.allowed && check.waitMs) {
|
|
3869
|
-
console.log(` Wait time: ${formatDuration(check.waitMs)}`);
|
|
3870
|
-
}
|
|
3871
|
-
if (state.cooldownUntil > 0) {
|
|
3872
|
-
const remaining = state.cooldownUntil - Date.now();
|
|
3873
|
-
console.log(` Cooldown: Active (${formatDuration(Math.max(0, remaining))} remaining)`);
|
|
3874
|
-
}
|
|
3875
|
-
console.log(` Min delay: ${formatDuration(config.minDelayMs)}`);
|
|
3876
|
-
console.log(` Max delay: ${formatDuration(config.maxDelayMs)}`);
|
|
3877
|
-
console.log(` Active hours: ${config.activeHours[0]}:00 – ${config.activeHours[1]}:00`);
|
|
3878
|
-
console.log(` Weekdays only: ${config.weekdaysOnly ? 'Yes' : 'No'}`);
|
|
3879
|
-
console.log('');
|
|
3880
|
-
process.exit(0);
|
|
3881
|
-
}
|
|
3882
|
-
catch (error) {
|
|
3883
|
-
console.error(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
3884
|
-
process.exit(1);
|
|
3885
|
-
}
|
|
3886
|
-
});
|
|
3887
|
-
// ============================================================
|
|
3888
|
-
// Profile management commands
|
|
3889
|
-
// ============================================================
|
|
3890
|
-
const profileCmd = program
|
|
3891
|
-
.command('profile')
|
|
3892
|
-
.description('Manage named browser profiles (saved login sessions)');
|
|
3893
|
-
profileCmd
|
|
3894
|
-
.command('create <name>')
|
|
3895
|
-
.description('Create a new profile interactively (launches browser, log in, press Ctrl+C when done)')
|
|
3896
|
-
.option('--description <text>', 'Optional description for this profile')
|
|
3897
|
-
.action(async (name, opts) => {
|
|
3898
|
-
try {
|
|
3899
|
-
await createProfile(name, opts.description);
|
|
3900
|
-
process.exit(0);
|
|
3901
|
-
}
|
|
3902
|
-
catch (error) {
|
|
3903
|
-
console.error(`Error: ${error instanceof Error ? error.message : String(error)}`);
|
|
3904
|
-
process.exit(1);
|
|
3905
|
-
}
|
|
3906
|
-
});
|
|
3907
|
-
profileCmd
|
|
3908
|
-
.command('list')
|
|
3909
|
-
.description('List all saved browser profiles')
|
|
3910
|
-
.action(() => {
|
|
3911
|
-
const profiles = listProfiles();
|
|
3912
|
-
if (profiles.length === 0) {
|
|
3913
|
-
console.log('No profiles found.');
|
|
3914
|
-
console.log('');
|
|
3915
|
-
console.log('Create one with:');
|
|
3916
|
-
console.log(' webpeel profile create <name>');
|
|
3917
|
-
console.log('');
|
|
3918
|
-
console.log('Then use it with:');
|
|
3919
|
-
console.log(' webpeel <url> --profile <name>');
|
|
3920
|
-
process.exit(0);
|
|
3921
|
-
}
|
|
3922
|
-
console.log('');
|
|
3923
|
-
console.log('Saved profiles:');
|
|
3924
|
-
console.log('');
|
|
3925
|
-
// Column widths
|
|
3926
|
-
const nameW = Math.max(8, ...profiles.map((p) => p.name.length));
|
|
3927
|
-
const domainsW = Math.max(10, ...profiles.map((p) => (p.domains.join(', ') || '(none)').length));
|
|
3928
|
-
const header = 'Name'.padEnd(nameW) + ' ' +
|
|
3929
|
-
'Domains'.padEnd(domainsW) + ' ' +
|
|
3930
|
-
'Last Used'.padEnd(12) + ' ' +
|
|
3931
|
-
'Created';
|
|
3932
|
-
console.log(header);
|
|
3933
|
-
console.log('─'.repeat(header.length + 4));
|
|
3934
|
-
for (const p of profiles) {
|
|
3935
|
-
const domainsStr = p.domains.length > 0 ? p.domains.join(', ') : '(none)';
|
|
3936
|
-
const lastUsed = formatRelativeTime(new Date(p.lastUsed));
|
|
3937
|
-
const created = new Date(p.created).toISOString().split('T')[0];
|
|
3938
|
-
console.log(p.name.padEnd(nameW) + ' ' +
|
|
3939
|
-
domainsStr.padEnd(domainsW) + ' ' +
|
|
3940
|
-
lastUsed.padEnd(12) + ' ' +
|
|
3941
|
-
created);
|
|
3942
|
-
}
|
|
3943
|
-
console.log('');
|
|
3944
|
-
process.exit(0);
|
|
3945
|
-
});
|
|
3946
|
-
profileCmd
|
|
3947
|
-
.command('show <name>')
|
|
3948
|
-
.description('Show details for a profile')
|
|
3949
|
-
.action((name) => {
|
|
3950
|
-
const profilePath = getProfilePath(name);
|
|
3951
|
-
if (!profilePath) {
|
|
3952
|
-
console.error(`Error: Profile "${name}" not found.`);
|
|
3953
|
-
console.error('Run "webpeel profile list" to see available profiles.');
|
|
3954
|
-
process.exit(1);
|
|
3955
|
-
}
|
|
3956
|
-
try {
|
|
3957
|
-
const meta = JSON.parse(readFileSync(`${profilePath}/metadata.json`, 'utf-8'));
|
|
3958
|
-
console.log('');
|
|
3959
|
-
console.log(`Profile: ${meta.name}`);
|
|
3960
|
-
if (meta.description)
|
|
3961
|
-
console.log(`Description: ${meta.description}`);
|
|
3962
|
-
console.log(`Created: ${new Date(meta.created).toLocaleString()}`);
|
|
3963
|
-
console.log(`Last used: ${new Date(meta.lastUsed).toLocaleString()}`);
|
|
3964
|
-
console.log(`Domains: ${meta.domains.length > 0 ? meta.domains.join(', ') : '(none)'}`);
|
|
3965
|
-
console.log(`Directory: ${profilePath}`);
|
|
3966
|
-
console.log('');
|
|
3967
|
-
process.exit(0);
|
|
3968
|
-
}
|
|
3969
|
-
catch (e) {
|
|
3970
|
-
console.error(`Error reading profile: ${e instanceof Error ? e.message : String(e)}`);
|
|
3971
|
-
process.exit(1);
|
|
3972
|
-
}
|
|
3973
|
-
});
|
|
3974
|
-
profileCmd
|
|
3975
|
-
.command('delete <name>')
|
|
3976
|
-
.description('Delete a saved profile')
|
|
3977
|
-
.action((name) => {
|
|
3978
|
-
const deleted = deleteProfile(name);
|
|
3979
|
-
if (deleted) {
|
|
3980
|
-
console.log(`Profile "${name}" deleted.`);
|
|
3981
|
-
process.exit(0);
|
|
3982
|
-
}
|
|
3983
|
-
else {
|
|
3984
|
-
console.error(`Error: Profile "${name}" not found.`);
|
|
3985
|
-
console.error('Run "webpeel profile list" to see available profiles.');
|
|
3986
|
-
process.exit(1);
|
|
3987
|
-
}
|
|
3988
|
-
});
|
|
3989
|
-
// ── Hotels command ─────────────────────────────────────────────────────────────
|
|
3990
|
-
program
|
|
3991
|
-
.command('hotels <destination>')
|
|
3992
|
-
.description('Search multiple travel sites for hotels (Kayak, Booking.com, Google Travel)')
|
|
3993
|
-
.option('--checkin <date>', 'Check-in date (ISO or relative, e.g. "tomorrow", "2026-02-20"). Default: tomorrow')
|
|
3994
|
-
.option('--checkout <date>', 'Check-out date (ISO or relative). Default: checkin + 1 day')
|
|
3995
|
-
.option('--sort <method>', 'Sort by: price, rating, value (default: price)', 'price')
|
|
3996
|
-
.option('--limit <n>', 'Max results (default: 20)', '20')
|
|
3997
|
-
.option('--source <name...>', 'Only use specific source(s): kayak, booking, google (repeatable)')
|
|
3998
|
-
.option('--json', 'Output as JSON')
|
|
3999
|
-
.option('--stealth', 'Use stealth mode for all sources')
|
|
4000
|
-
.option('--proxy <url>', 'Proxy URL for requests (http://host:port, socks5://user:pass@host:port)')
|
|
4001
|
-
.option('-s, --silent', 'Suppress progress messages')
|
|
4002
|
-
.action(async (destination, options) => {
|
|
4003
|
-
const isJson = options.json;
|
|
4004
|
-
const isSilent = options.silent;
|
|
4005
|
-
// Build checkin/checkout
|
|
4006
|
-
const { parseDate, addDays: hotelAddDays } = await import('./core/hotel-search.js');
|
|
4007
|
-
let checkinStr;
|
|
4008
|
-
let checkoutStr;
|
|
4009
|
-
try {
|
|
4010
|
-
checkinStr = parseDate(options.checkin ?? 'tomorrow');
|
|
4011
|
-
checkoutStr = options.checkout
|
|
4012
|
-
? parseDate(options.checkout)
|
|
4013
|
-
: hotelAddDays(checkinStr, 1);
|
|
4014
|
-
}
|
|
4015
|
-
catch (err) {
|
|
4016
|
-
const msg = err instanceof Error ? err.message : String(err);
|
|
4017
|
-
if (isJson) {
|
|
4018
|
-
await writeStdout(JSON.stringify({ success: false, error: { type: 'invalid_request', message: msg } }) + '\n');
|
|
4019
|
-
}
|
|
4020
|
-
else {
|
|
4021
|
-
console.error(`Error: ${msg}`);
|
|
4022
|
-
}
|
|
4023
|
-
process.exit(1);
|
|
4024
|
-
}
|
|
4025
|
-
const sortMethod = (['price', 'rating', 'value'].includes(options.sort)
|
|
4026
|
-
? options.sort
|
|
4027
|
-
: 'price');
|
|
4028
|
-
const limit = Math.max(1, parseInt(options.limit, 10) || 20);
|
|
4029
|
-
const sources = options.source
|
|
4030
|
-
? (Array.isArray(options.source) ? options.source : [options.source])
|
|
4031
|
-
: undefined;
|
|
4032
|
-
// Spinner per-source progress (non-silent, non-JSON)
|
|
4033
|
-
let searchSpinner = null;
|
|
4034
|
-
if (!isSilent && !isJson) {
|
|
4035
|
-
searchSpinner = ora(`Searching hotels in ${destination}...`).start();
|
|
4036
|
-
}
|
|
4037
|
-
else if (!isSilent && !isJson) {
|
|
4038
|
-
console.error(`⏳ Searching kayak.com...`);
|
|
4039
|
-
console.error(`⏳ Searching booking.com...`);
|
|
4040
|
-
console.error(`⏳ Searching google.com...`);
|
|
4041
|
-
}
|
|
4042
|
-
try {
|
|
4043
|
-
const { searchHotels } = await import('./core/hotel-search.js');
|
|
4044
|
-
const result = await searchHotels({
|
|
4045
|
-
destination,
|
|
4046
|
-
checkin: checkinStr,
|
|
4047
|
-
checkout: checkoutStr,
|
|
4048
|
-
sort: sortMethod,
|
|
4049
|
-
limit,
|
|
4050
|
-
sources,
|
|
4051
|
-
stealth: options.stealth,
|
|
4052
|
-
silent: isSilent,
|
|
4053
|
-
proxy: options.proxy,
|
|
4054
|
-
});
|
|
4055
|
-
if (searchSpinner)
|
|
4056
|
-
searchSpinner.stop();
|
|
4057
|
-
// Show per-source status
|
|
4058
|
-
if (!isSilent && !isJson) {
|
|
4059
|
-
for (const src of result.sources) {
|
|
4060
|
-
if (src.status === 'ok') {
|
|
4061
|
-
console.error(`✅ ${src.name}: ${src.count} hotels found`);
|
|
4062
|
-
}
|
|
4063
|
-
else {
|
|
4064
|
-
console.error(`❌ ${src.name}: ${src.status}${src.error ? ' — ' + src.error : ''}`);
|
|
4065
|
-
}
|
|
4066
|
-
}
|
|
4067
|
-
}
|
|
4068
|
-
if (isJson) {
|
|
4069
|
-
await writeStdout(JSON.stringify(result, null, 2) + '\n');
|
|
4070
|
-
await cleanup();
|
|
4071
|
-
process.exit(0);
|
|
4072
|
-
}
|
|
4073
|
-
// Human-readable table output
|
|
4074
|
-
const { formatDate: fmtDate } = {
|
|
4075
|
-
formatDate: (iso) => {
|
|
4076
|
-
const d = new Date(iso + 'T12:00:00Z');
|
|
4077
|
-
return d.toLocaleDateString('en-US', { month: 'short', day: 'numeric', year: 'numeric', timeZone: 'UTC' });
|
|
4078
|
-
},
|
|
4079
|
-
};
|
|
4080
|
-
const ci = fmtDate(result.checkin);
|
|
4081
|
-
const co = fmtDate(result.checkout);
|
|
4082
|
-
console.log(`\n🏨 Hotels in ${result.destination}`);
|
|
4083
|
-
console.log(` ${ci} → ${co} | Sorted by ${sortMethod}\n`);
|
|
4084
|
-
if (result.results.length === 0) {
|
|
4085
|
-
console.log(' No hotels found.\n');
|
|
4086
|
-
}
|
|
4087
|
-
else {
|
|
4088
|
-
const colNum = 3;
|
|
4089
|
-
const colName = 42;
|
|
4090
|
-
const colPrice = 8;
|
|
4091
|
-
const colRating = 8;
|
|
4092
|
-
const colSource = 10;
|
|
4093
|
-
const padEnd = (s, w) => s.length > w ? s.slice(0, w - 1) + '…' : s.padEnd(w);
|
|
4094
|
-
const padStart = (s, w) => s.padStart(w);
|
|
4095
|
-
console.log(` ${padStart('#', colNum)} ${padEnd('Hotel', colName)} ${padEnd('Price', colPrice)} ${padEnd('Rating', colRating)} ${padEnd('Source', colSource)}`);
|
|
4096
|
-
result.results.forEach((hotel, i) => {
|
|
4097
|
-
const priceStr = hotel.priceDisplay || '—';
|
|
4098
|
-
const ratingStr = hotel.rating !== null ? String(hotel.rating) : '—';
|
|
4099
|
-
console.log(` ${padStart(String(i + 1), colNum)} ${padEnd(hotel.name, colName)} ${padEnd(priceStr, colPrice)} ${padEnd(ratingStr, colRating)} ${padEnd(hotel.source, colSource)}`);
|
|
4100
|
-
});
|
|
4101
|
-
console.log('');
|
|
4102
|
-
const sourceSummary = result.sources
|
|
4103
|
-
.map(s => `${s.name} (${s.count} ${s.status === 'ok' ? '✅' : s.status === 'blocked' ? '🚫' : '❌'})`)
|
|
4104
|
-
.join(' | ');
|
|
4105
|
-
console.log(`Sources: ${sourceSummary}`);
|
|
4106
|
-
}
|
|
4107
|
-
console.log('');
|
|
4108
|
-
await cleanup();
|
|
4109
|
-
process.exit(0);
|
|
4110
|
-
}
|
|
4111
|
-
catch (error) {
|
|
4112
|
-
if (searchSpinner)
|
|
4113
|
-
searchSpinner.fail('Hotel search failed');
|
|
4114
|
-
const msg = error instanceof Error ? error.message : 'Unknown error';
|
|
4115
|
-
if (isJson) {
|
|
4116
|
-
await writeStdout(JSON.stringify({ success: false, error: { type: 'fetch_failed', message: msg } }) + '\n');
|
|
4117
|
-
}
|
|
4118
|
-
else {
|
|
4119
|
-
console.error(`\nError: ${msg}`);
|
|
4120
|
-
}
|
|
4121
|
-
await cleanup();
|
|
4122
|
-
process.exit(1);
|
|
4123
|
-
}
|
|
4124
|
-
});
|
|
4125
|
-
// ============================================================
|
|
4126
|
-
// answer command — LLM-free web Q&A (search + fetch + BM25)
|
|
4127
|
-
// ============================================================
|
|
4128
|
-
program
|
|
4129
|
-
.command('webask <question>')
|
|
4130
|
-
.alias('ask-web')
|
|
4131
|
-
.description('Search the web and get a direct answer (no LLM key required)')
|
|
4132
|
-
.option('-n, --sources <n>', 'Number of sources to check (1-5, default 3)', '3')
|
|
4133
|
-
.option('--json', 'Output as JSON')
|
|
4134
|
-
.option('-s, --silent', 'Silent mode')
|
|
4135
|
-
.action(async (question, options) => {
|
|
4136
|
-
const isJson = !!options.json;
|
|
4137
|
-
const isSilent = !!options.silent;
|
|
4138
|
-
const numSources = Math.min(Math.max(parseInt(options.sources) || 3, 1), 5);
|
|
4139
|
-
const askCfg = loadConfig();
|
|
4140
|
-
const askApiKey = askCfg.apiKey || process.env.WEBPEEL_API_KEY;
|
|
4141
|
-
const askApiUrl = process.env.WEBPEEL_API_URL || 'https://api.webpeel.dev';
|
|
4142
|
-
if (!askApiKey) {
|
|
4143
|
-
console.error('No API key configured. Run: webpeel auth <your-key>');
|
|
4144
|
-
console.error('Get a free key at: https://app.webpeel.dev/keys');
|
|
4145
|
-
process.exit(2);
|
|
4146
|
-
}
|
|
4147
|
-
let spinner = null;
|
|
4148
|
-
if (!isSilent && !isJson) {
|
|
4149
|
-
const { default: ora } = await import('ora');
|
|
4150
|
-
spinner = ora(`Searching for: ${question}`).start();
|
|
4151
|
-
}
|
|
4152
|
-
try {
|
|
4153
|
-
const params = new URLSearchParams({ q: question, sources: String(numSources) });
|
|
4154
|
-
const res = await fetch(`${askApiUrl}/v1/ask?${params}`, {
|
|
4155
|
-
headers: { Authorization: `Bearer ${askApiKey}` },
|
|
4156
|
-
signal: AbortSignal.timeout(60000),
|
|
4157
|
-
});
|
|
4158
|
-
if (res.status === 401) {
|
|
4159
|
-
if (spinner)
|
|
4160
|
-
spinner.fail('API key invalid or expired. Run: webpeel auth <new-key>');
|
|
4161
|
-
process.exit(2);
|
|
4162
|
-
}
|
|
4163
|
-
if (res.status === 404) {
|
|
4164
|
-
if (spinner)
|
|
4165
|
-
spinner.fail('Ask endpoint not available on this server version');
|
|
4166
|
-
process.exit(1);
|
|
4167
|
-
}
|
|
4168
|
-
if (!res.ok) {
|
|
4169
|
-
const body = await res.text().catch(() => '');
|
|
4170
|
-
if (spinner)
|
|
4171
|
-
spinner.fail(`API error ${res.status}: ${body.slice(0, 100)}`);
|
|
4172
|
-
process.exit(1);
|
|
4173
|
-
}
|
|
4174
|
-
const data = await res.json();
|
|
4175
|
-
if (spinner) {
|
|
4176
|
-
if (data.answer) {
|
|
4177
|
-
spinner.succeed(`Found answer (confidence: ${Math.round((data.confidence || 0) * 100)}%)`);
|
|
4178
|
-
}
|
|
4179
|
-
else {
|
|
4180
|
-
spinner.warn('No confident answer found');
|
|
4181
|
-
}
|
|
4182
|
-
}
|
|
4183
|
-
if (isJson) {
|
|
4184
|
-
console.log(JSON.stringify(data, null, 2));
|
|
4185
|
-
}
|
|
4186
|
-
else {
|
|
4187
|
-
if (data.answer) {
|
|
4188
|
-
console.log('\n' + data.answer);
|
|
4189
|
-
if (data.sources?.length && !isSilent) {
|
|
4190
|
-
console.log('\nSources:');
|
|
4191
|
-
data.sources.slice(0, 3).forEach((s) => console.log(` • ${s.title || s.url} — ${s.url}`));
|
|
4192
|
-
}
|
|
4193
|
-
}
|
|
4194
|
-
else {
|
|
4195
|
-
console.log('\nNo confident answer found for:', question);
|
|
4196
|
-
}
|
|
4197
|
-
if (data.elapsed && !isSilent)
|
|
4198
|
-
console.log(`\n⚡ ${data.elapsed}ms`);
|
|
4199
|
-
}
|
|
4200
|
-
}
|
|
4201
|
-
catch (err) {
|
|
4202
|
-
if (spinner)
|
|
4203
|
-
spinner.fail(err.message);
|
|
4204
|
-
process.exit(1);
|
|
4205
|
-
}
|
|
4206
|
-
});
|
|
4207
|
-
// ============================================================
|
|
4208
|
-
// research command — autonomous multi-step web research
|
|
4209
|
-
// ============================================================
|
|
4210
|
-
program
|
|
4211
|
-
.command('research <query>')
|
|
4212
|
-
.description('Conduct autonomous multi-step web research on a topic and synthesize a report')
|
|
4213
|
-
.option('--max-sources <n>', 'Maximum sources to consult (default: 5)', '5')
|
|
4214
|
-
.option('--max-depth <n>', 'Link-following depth (default: 1)', '1')
|
|
4215
|
-
.option('--format <f>', 'Output format: report (default) or sources', 'report')
|
|
4216
|
-
.option('--llm-key <key>', 'LLM API key for synthesis (or env OPENAI_API_KEY)')
|
|
4217
|
-
.option('--llm-model <model>', 'LLM model for synthesis (default: gpt-4o-mini)')
|
|
4218
|
-
.option('--llm-base-url <url>', 'LLM API base URL (default: https://api.openai.com/v1)')
|
|
4219
|
-
.option('--timeout <ms>', 'Max research time in ms (default: 40000)', '60000')
|
|
4220
|
-
.option('--json', 'Output result as JSON')
|
|
4221
|
-
.option('-s, --silent', 'Suppress progress output')
|
|
4222
|
-
.action(async (query, options) => {
|
|
4223
|
-
const isSilent = !!options.silent;
|
|
4224
|
-
const isJson = !!options.json;
|
|
4225
|
-
const maxSources = parseInt(options.maxSources) || 5;
|
|
4226
|
-
const maxDepth = parseInt(options.maxDepth) || 1;
|
|
4227
|
-
const timeout = parseInt(options.timeout) || 60000;
|
|
4228
|
-
const outputFormat = options.format === 'sources' ? 'sources' : 'report';
|
|
4229
|
-
const apiKey = options.llmKey || process.env.OPENAI_API_KEY;
|
|
4230
|
-
const model = options.llmModel;
|
|
4231
|
-
const baseUrl = options.llmBaseUrl;
|
|
4232
|
-
const phaseIcons = {
|
|
4233
|
-
searching: '🔍',
|
|
4234
|
-
fetching: '📄',
|
|
4235
|
-
extracting: '🧠',
|
|
4236
|
-
following: '🔗',
|
|
4237
|
-
synthesizing: '✍️',
|
|
4238
|
-
};
|
|
4239
|
-
try {
|
|
4240
|
-
const { research } = await import('./core/research.js');
|
|
4241
|
-
const result = await research({
|
|
4242
|
-
query,
|
|
4243
|
-
maxSources,
|
|
4244
|
-
maxDepth,
|
|
4245
|
-
timeout,
|
|
4246
|
-
outputFormat: outputFormat,
|
|
4247
|
-
apiKey,
|
|
4248
|
-
model,
|
|
4249
|
-
baseUrl,
|
|
4250
|
-
onProgress: (step) => {
|
|
4251
|
-
if (!isSilent && !isJson) {
|
|
4252
|
-
const icon = phaseIcons[step.phase] ?? '⚙️';
|
|
4253
|
-
const extra = step.sourcesFound !== undefined
|
|
4254
|
-
? ` (found ${step.sourcesFound})`
|
|
4255
|
-
: step.sourcesFetched !== undefined
|
|
4256
|
-
? ` (${step.sourcesFetched} fetched)`
|
|
4257
|
-
: '';
|
|
4258
|
-
process.stderr.write(`${icon} ${step.message}${extra}...\n`);
|
|
4259
|
-
}
|
|
4260
|
-
},
|
|
4261
|
-
});
|
|
4262
|
-
if (isJson) {
|
|
4263
|
-
await writeStdout(JSON.stringify(result, null, 2) + '\n');
|
|
4264
|
-
}
|
|
4265
|
-
else {
|
|
4266
|
-
await writeStdout(result.report + '\n');
|
|
4267
|
-
if (!isSilent) {
|
|
4268
|
-
const elapsed = (result.elapsed / 1000).toFixed(1);
|
|
4269
|
-
const cost = result.cost !== undefined ? ` | cost: $${result.cost.toFixed(4)}` : '';
|
|
4270
|
-
process.stderr.write(`\n📊 ${result.sourcesConsulted} sources consulted (${result.totalSourcesFound} found) | ${elapsed}s${cost}\n`);
|
|
4271
|
-
}
|
|
4272
|
-
}
|
|
4273
|
-
await cleanup();
|
|
4274
|
-
process.exit(0);
|
|
4275
|
-
}
|
|
4276
|
-
catch (error) {
|
|
4277
|
-
const msg = error instanceof Error ? error.message : 'Unknown error';
|
|
4278
|
-
if (isJson) {
|
|
4279
|
-
await writeStdout(JSON.stringify({ success: false, error: { type: 'fetch_failed', message: msg } }) + '\n');
|
|
4280
|
-
}
|
|
4281
|
-
else {
|
|
4282
|
-
console.error(`\nError: ${msg}`);
|
|
4283
|
-
}
|
|
4284
|
-
await cleanup();
|
|
4285
|
-
process.exit(1);
|
|
4286
|
-
}
|
|
4287
|
-
});
|
|
4288
|
-
// Schema templates listing command
|
|
4289
|
-
program
|
|
4290
|
-
.command('schemas')
|
|
4291
|
-
.description('List available extraction schema templates')
|
|
4292
|
-
.action(() => {
|
|
4293
|
-
console.log('\nAvailable schema templates:\n');
|
|
4294
|
-
for (const [key, template] of Object.entries(SCHEMA_TEMPLATES)) {
|
|
4295
|
-
console.log(` ${key.padEnd(12)} ${template.description}`);
|
|
4296
|
-
console.log(` ${''.padEnd(12)} Fields: ${Object.keys(template.fields).join(', ')}`);
|
|
4297
|
-
console.log('');
|
|
4298
|
-
}
|
|
4299
|
-
console.log('Usage: webpeel "https://example.com" --schema product');
|
|
4300
|
-
console.log(' webpeel "https://example.com" --schema \'{"field":"description"}\'');
|
|
4301
|
-
});
|
|
4302
|
-
// ── design-compare command ─────────────────────────────────────────────────────
|
|
4303
|
-
//
|
|
4304
|
-
// webpeel design-compare "https://subject.com" --ref "https://reference.com"
|
|
4305
|
-
program
|
|
4306
|
-
.command('design-compare <url>')
|
|
4307
|
-
.description('Compare the design of a subject URL against a reference URL')
|
|
4308
|
-
.option('--ref <url>', 'Reference URL to compare against (required)')
|
|
4309
|
-
.option('--width <px>', 'Viewport width in pixels (default: 1440)', parseInt)
|
|
4310
|
-
.option('--height <px>', 'Viewport height in pixels (default: 900)', parseInt)
|
|
4311
|
-
.option('-o, --output <path>', 'Save comparison report to a JSON file')
|
|
4312
|
-
.option('-s, --silent', 'Silent mode (no spinner)')
|
|
4313
|
-
.option('--json', 'Output comparison as JSON to stdout')
|
|
4314
|
-
.action(async (url, options) => {
|
|
4315
|
-
// Validate subject URL
|
|
4316
|
-
try {
|
|
4317
|
-
const parsed = new URL(url);
|
|
4318
|
-
if (!['http:', 'https:'].includes(parsed.protocol)) {
|
|
4319
|
-
console.error('Error: Only HTTP and HTTPS protocols are allowed');
|
|
4320
|
-
process.exit(1);
|
|
4321
|
-
}
|
|
4322
|
-
}
|
|
4323
|
-
catch {
|
|
4324
|
-
console.error(`Error: Invalid URL format: ${url}`);
|
|
4325
|
-
process.exit(1);
|
|
4326
|
-
}
|
|
4327
|
-
// Validate --ref
|
|
4328
|
-
if (!options.ref) {
|
|
4329
|
-
console.error('Error: --ref <url> is required');
|
|
4330
|
-
process.exit(1);
|
|
4331
|
-
}
|
|
4332
|
-
try {
|
|
4333
|
-
const parsedRef = new URL(options.ref);
|
|
4334
|
-
if (!['http:', 'https:'].includes(parsedRef.protocol)) {
|
|
4335
|
-
console.error('Error: --ref must be an HTTP or HTTPS URL');
|
|
4336
|
-
process.exit(1);
|
|
4337
|
-
}
|
|
4338
|
-
}
|
|
4339
|
-
catch {
|
|
4340
|
-
console.error(`Error: Invalid --ref URL format: ${options.ref}`);
|
|
4341
|
-
process.exit(1);
|
|
4342
|
-
}
|
|
4343
|
-
const ora = (await import('ora')).default;
|
|
4344
|
-
const spinner = options.silent ? null : ora(`Comparing designs: ${url} vs ${options.ref}...`).start();
|
|
4345
|
-
try {
|
|
4346
|
-
const { takeDesignComparison } = await import('./core/screenshot.js');
|
|
4347
|
-
const result = await takeDesignComparison(url, options.ref, {
|
|
4348
|
-
width: options.width,
|
|
4349
|
-
height: options.height,
|
|
4350
|
-
});
|
|
4351
|
-
if (spinner)
|
|
4352
|
-
spinner.succeed('Design comparison complete');
|
|
4353
|
-
const { comparison } = result;
|
|
4354
|
-
const output = {
|
|
4355
|
-
subjectUrl: result.subjectUrl,
|
|
4356
|
-
referenceUrl: result.referenceUrl,
|
|
4357
|
-
score: comparison.score,
|
|
4358
|
-
summary: comparison.summary,
|
|
4359
|
-
gaps: comparison.gaps,
|
|
4360
|
-
subjectAnalysis: comparison.subjectAnalysis,
|
|
4361
|
-
referenceAnalysis: comparison.referenceAnalysis,
|
|
4362
|
-
};
|
|
4363
|
-
if (options.output) {
|
|
4364
|
-
const { writeFileSync } = await import('fs');
|
|
4365
|
-
writeFileSync(options.output, JSON.stringify(output, null, 2));
|
|
4366
|
-
if (!options.silent)
|
|
4367
|
-
console.error(`Report saved to: ${options.output}`);
|
|
4368
|
-
}
|
|
4369
|
-
if (options.json || !options.output) {
|
|
4370
|
-
const jsonStr = JSON.stringify(output, null, 2);
|
|
4371
|
-
await new Promise((resolve, reject) => {
|
|
4372
|
-
process.stdout.write(jsonStr + '\n', (err) => {
|
|
4373
|
-
if (err)
|
|
4374
|
-
reject(err);
|
|
4375
|
-
else
|
|
4376
|
-
resolve();
|
|
4377
|
-
});
|
|
4378
|
-
});
|
|
4379
|
-
}
|
|
4380
|
-
else if (!options.silent) {
|
|
4381
|
-
// Human-readable summary
|
|
4382
|
-
console.log(`\n🎨 Design Comparison`);
|
|
4383
|
-
console.log(`Subject: ${result.subjectUrl}`);
|
|
4384
|
-
console.log(`Reference: ${result.referenceUrl}`);
|
|
4385
|
-
console.log(`Score: ${comparison.score}/10`);
|
|
4386
|
-
console.log(`\n${comparison.summary}`);
|
|
4387
|
-
if (comparison.gaps.length > 0) {
|
|
4388
|
-
console.log(`\nGaps (${comparison.gaps.length}):`);
|
|
4389
|
-
for (const gap of comparison.gaps) {
|
|
4390
|
-
const sev = gap.severity === 'high' ? '🔴' : gap.severity === 'medium' ? '🟡' : '🟢';
|
|
4391
|
-
console.log(` ${sev} ${gap.property}: ${gap.description}`);
|
|
4392
|
-
console.log(` Subject: ${gap.subject}`);
|
|
4393
|
-
console.log(` Reference: ${gap.reference}`);
|
|
4394
|
-
console.log(` Suggestion: ${gap.suggestion}`);
|
|
4395
|
-
}
|
|
4396
|
-
}
|
|
4397
|
-
}
|
|
4398
|
-
}
|
|
4399
|
-
catch (error) {
|
|
4400
|
-
if (spinner)
|
|
4401
|
-
spinner.fail('Design comparison failed');
|
|
4402
|
-
console.error(`Error: ${error.message}`);
|
|
4403
|
-
process.exit(1);
|
|
4404
|
-
}
|
|
4405
|
-
});
|
|
56
|
+
// ── Update check (non-blocking, background) ───────────────────────────────────
|
|
57
|
+
void checkForUpdates();
|
|
58
|
+
// ── Register all command groups ───────────────────────────────────────────────
|
|
59
|
+
registerFetchCommands(program);
|
|
60
|
+
registerSearchCommands(program);
|
|
61
|
+
registerInteractCommands(program);
|
|
62
|
+
registerAuthCommands(program);
|
|
63
|
+
registerScreenshotCommands(program);
|
|
64
|
+
registerJobsCommands(program);
|
|
65
|
+
// ── Parse ─────────────────────────────────────────────────────────────────────
|
|
4406
66
|
program.parse();
|
|
4407
|
-
// ============================================================
|
|
4408
|
-
// Time formatting helper
|
|
4409
|
-
// ============================================================
|
|
4410
|
-
/**
|
|
4411
|
-
* Format a past Date relative to now (e.g. "2h ago", "5m ago").
|
|
4412
|
-
*/
|
|
4413
|
-
function formatRelativeTime(past) {
|
|
4414
|
-
const diffMs = Date.now() - past.getTime();
|
|
4415
|
-
const diffSec = Math.round(diffMs / 1000);
|
|
4416
|
-
if (diffSec < 60)
|
|
4417
|
-
return `${diffSec}s ago`;
|
|
4418
|
-
const diffMin = Math.round(diffSec / 60);
|
|
4419
|
-
if (diffMin < 60)
|
|
4420
|
-
return `${diffMin}m ago`;
|
|
4421
|
-
const diffHr = Math.round(diffMin / 60);
|
|
4422
|
-
if (diffHr < 24)
|
|
4423
|
-
return `${diffHr}h ago`;
|
|
4424
|
-
const diffDay = Math.round(diffHr / 24);
|
|
4425
|
-
return `${diffDay}d ago`;
|
|
4426
|
-
}
|
|
4427
|
-
// ============================================================
|
|
4428
|
-
// Error classification for JSON error output (#6)
|
|
4429
|
-
// ============================================================
|
|
4430
|
-
function classifyErrorCode(error) {
|
|
4431
|
-
if (!(error instanceof Error))
|
|
4432
|
-
return 'FETCH_FAILED';
|
|
4433
|
-
// Check for our custom _code first (set in pre-fetch validation)
|
|
4434
|
-
if (error._code)
|
|
4435
|
-
return error._code;
|
|
4436
|
-
const msg = error.message.toLowerCase();
|
|
4437
|
-
const name = error.name || '';
|
|
4438
|
-
if (name === 'TimeoutError' || msg.includes('timeout') || msg.includes('timed out')) {
|
|
4439
|
-
return 'TIMEOUT';
|
|
4440
|
-
}
|
|
4441
|
-
if (name === 'BlockedError' || msg.includes('blocked') || msg.includes('403') || msg.includes('cloudflare')) {
|
|
4442
|
-
return 'BLOCKED';
|
|
4443
|
-
}
|
|
4444
|
-
if (msg.includes('enotfound') || msg.includes('getaddrinfo') || msg.includes('dns resolution failed') || msg.includes('not found')) {
|
|
4445
|
-
return 'DNS_FAILED';
|
|
4446
|
-
}
|
|
4447
|
-
if (msg.includes('invalid url') || msg.includes('invalid hostname') || msg.includes('only http')) {
|
|
4448
|
-
return 'INVALID_URL';
|
|
4449
|
-
}
|
|
4450
|
-
return 'FETCH_FAILED';
|
|
4451
|
-
}
|
|
4452
|
-
/**
|
|
4453
|
-
* Build a unified PeelEnvelope from a PeelResult.
|
|
4454
|
-
*
|
|
4455
|
-
* All existing PeelResult fields are spread first (backward compatibility),
|
|
4456
|
-
* then canonical envelope fields override/extend them.
|
|
4457
|
-
*/
|
|
4458
|
-
function buildEnvelope(result, extra) {
|
|
4459
|
-
const envelope = {
|
|
4460
|
-
// Spread all PeelResult fields for backward compatibility
|
|
4461
|
-
...result,
|
|
4462
|
-
// Required envelope fields (override PeelResult where they overlap)
|
|
4463
|
-
url: result.url,
|
|
4464
|
-
status: 200,
|
|
4465
|
-
content: result.content,
|
|
4466
|
-
metadata: {
|
|
4467
|
-
title: result.title,
|
|
4468
|
-
...result.metadata,
|
|
4469
|
-
},
|
|
4470
|
-
tokens: result.tokens,
|
|
4471
|
-
cached: extra.cached ?? false,
|
|
4472
|
-
elapsed: result.elapsed,
|
|
4473
|
-
};
|
|
4474
|
-
// Optional envelope fields — only include when meaningful
|
|
4475
|
-
if (extra.structured !== undefined)
|
|
4476
|
-
envelope.structured = extra.structured;
|
|
4477
|
-
if (extra.truncated)
|
|
4478
|
-
envelope.truncated = true;
|
|
4479
|
-
if (extra.totalAvailable !== undefined)
|
|
4480
|
-
envelope.totalAvailable = extra.totalAvailable;
|
|
4481
|
-
return envelope;
|
|
4482
|
-
}
|
|
4483
|
-
// ============================================================
|
|
4484
|
-
// Shared output helper
|
|
4485
|
-
// ============================================================
|
|
4486
|
-
async function outputResult(result, options, extra = {}) {
|
|
4487
|
-
// --links: output only links
|
|
4488
|
-
if (options.links) {
|
|
4489
|
-
if (options.json) {
|
|
4490
|
-
const jsonStr = JSON.stringify(result.links, null, 2);
|
|
4491
|
-
await writeStdout(jsonStr + '\n');
|
|
4492
|
-
}
|
|
4493
|
-
else {
|
|
4494
|
-
for (const link of result.links) {
|
|
4495
|
-
await writeStdout(link + '\n');
|
|
4496
|
-
}
|
|
4497
|
-
}
|
|
4498
|
-
return;
|
|
4499
|
-
}
|
|
4500
|
-
// --images: output only image URLs
|
|
4501
|
-
if (options.images) {
|
|
4502
|
-
// Extract image URLs from links that point to images
|
|
4503
|
-
const imageExtensions = ['.jpg', '.jpeg', '.png', '.gif', '.webp', '.svg', '.bmp', '.ico'];
|
|
4504
|
-
const imageUrls = result.links.filter(link => {
|
|
4505
|
-
const urlLower = link.toLowerCase();
|
|
4506
|
-
return imageExtensions.some(ext => urlLower.includes(ext));
|
|
4507
|
-
});
|
|
4508
|
-
if (options.json) {
|
|
4509
|
-
const jsonStr = JSON.stringify(imageUrls, null, 2);
|
|
4510
|
-
await writeStdout(jsonStr + '\n');
|
|
4511
|
-
}
|
|
4512
|
-
else {
|
|
4513
|
-
for (const imageUrl of imageUrls) {
|
|
4514
|
-
await writeStdout(imageUrl + '\n');
|
|
4515
|
-
}
|
|
4516
|
-
}
|
|
4517
|
-
return;
|
|
4518
|
-
}
|
|
4519
|
-
// --meta: output only metadata
|
|
4520
|
-
if (options.meta) {
|
|
4521
|
-
const meta = {
|
|
4522
|
-
url: result.url,
|
|
4523
|
-
title: result.title,
|
|
4524
|
-
method: result.method,
|
|
4525
|
-
elapsed: result.elapsed,
|
|
4526
|
-
tokens: result.tokens,
|
|
4527
|
-
cached: extra.cached ?? false,
|
|
4528
|
-
...result.metadata,
|
|
4529
|
-
};
|
|
4530
|
-
if (options.json) {
|
|
4531
|
-
await writeStdout(JSON.stringify(meta, null, 2) + '\n');
|
|
4532
|
-
}
|
|
4533
|
-
else {
|
|
4534
|
-
console.log(`Title: ${meta.title || '(none)'}`);
|
|
4535
|
-
console.log(`URL: ${meta.url}`);
|
|
4536
|
-
if (meta.description)
|
|
4537
|
-
console.log(`Description: ${meta.description}`);
|
|
4538
|
-
if (meta.author)
|
|
4539
|
-
console.log(`Author: ${meta.author}`);
|
|
4540
|
-
if (meta.published)
|
|
4541
|
-
console.log(`Published: ${meta.published}`);
|
|
4542
|
-
if (meta.canonical)
|
|
4543
|
-
console.log(`Canonical: ${meta.canonical}`);
|
|
4544
|
-
if (meta.image)
|
|
4545
|
-
console.log(`OG Image: ${meta.image}`);
|
|
4546
|
-
console.log(`Method: ${meta.method}`);
|
|
4547
|
-
console.log(`Elapsed: ${meta.elapsed}ms`);
|
|
4548
|
-
console.log(`Tokens: ${meta.tokens}`);
|
|
4549
|
-
console.log(`Cached: ${meta.cached}`);
|
|
4550
|
-
}
|
|
4551
|
-
return;
|
|
4552
|
-
}
|
|
4553
|
-
// Default: full output
|
|
4554
|
-
if (options.json) {
|
|
4555
|
-
// Build clean JSON output with guaranteed top-level fields
|
|
4556
|
-
const output = {
|
|
4557
|
-
url: result.url,
|
|
4558
|
-
title: result.metadata?.title || result.title || null,
|
|
4559
|
-
tokens: result.tokens || 0,
|
|
4560
|
-
fetchedAt: new Date().toISOString(),
|
|
4561
|
-
method: result.method || 'simple',
|
|
4562
|
-
elapsed: result.elapsed,
|
|
4563
|
-
content: result.content,
|
|
4564
|
-
};
|
|
4565
|
-
// Add optional fields only if present (filter out undefined/null values from metadata)
|
|
4566
|
-
if (result.metadata) {
|
|
4567
|
-
const cleanMeta = {};
|
|
4568
|
-
for (const [k, v] of Object.entries(result.metadata)) {
|
|
4569
|
-
if (v !== undefined && v !== null)
|
|
4570
|
-
cleanMeta[k] = v;
|
|
4571
|
-
}
|
|
4572
|
-
if (Object.keys(cleanMeta).length > 0)
|
|
4573
|
-
output.metadata = cleanMeta;
|
|
4574
|
-
}
|
|
4575
|
-
if (result.links?.length)
|
|
4576
|
-
output.links = result.links;
|
|
4577
|
-
if (result.images?.length)
|
|
4578
|
-
output.images = result.images;
|
|
4579
|
-
if (result.structured)
|
|
4580
|
-
output.structured = result.structured;
|
|
4581
|
-
if (result.domainData)
|
|
4582
|
-
output.domainData = result.domainData;
|
|
4583
|
-
if (result.readability)
|
|
4584
|
-
output.readability = result.readability;
|
|
4585
|
-
if (result.quickAnswer)
|
|
4586
|
-
output.quickAnswer = result.quickAnswer;
|
|
4587
|
-
if (result.quality)
|
|
4588
|
-
output.quality = result.quality;
|
|
4589
|
-
if (result.contentType)
|
|
4590
|
-
output.contentType = result.contentType;
|
|
4591
|
-
if (result.chunks)
|
|
4592
|
-
output.chunks = result.chunks;
|
|
4593
|
-
if (result.totalChunks)
|
|
4594
|
-
output.totalChunks = result.totalChunks;
|
|
4595
|
-
if (result.warning)
|
|
4596
|
-
output.warning = result.warning;
|
|
4597
|
-
if (result.focusQuery)
|
|
4598
|
-
output.focusQuery = result.focusQuery;
|
|
4599
|
-
if (result.focusReduction)
|
|
4600
|
-
output.focusReduction = result.focusReduction;
|
|
4601
|
-
if (result.extracted)
|
|
4602
|
-
output.extracted = result.extracted;
|
|
4603
|
-
if (extra.cached)
|
|
4604
|
-
output.cached = true;
|
|
4605
|
-
if (extra.truncated)
|
|
4606
|
-
output.truncated = true;
|
|
4607
|
-
if (extra.totalAvailable !== undefined)
|
|
4608
|
-
output.totalAvailable = extra.totalAvailable;
|
|
4609
|
-
output._meta = { version: cliVersion, method: result.method || 'simple', timing: result.timing, serverMarkdown: result.serverMarkdown || false };
|
|
4610
|
-
await writeStdout(JSON.stringify(output, null, 2) + '\n');
|
|
4611
|
-
}
|
|
4612
|
-
else {
|
|
4613
|
-
// Smart terminal header (interactive mode only)
|
|
4614
|
-
const isTerminalOutput = process.stdout.isTTY && !options.silent;
|
|
4615
|
-
if (isTerminalOutput) {
|
|
4616
|
-
const meta = result.metadata || {};
|
|
4617
|
-
const parts = [];
|
|
4618
|
-
if (meta.title || result.title)
|
|
4619
|
-
parts.push(`\x1b[1m${meta.title || result.title}\x1b[0m`);
|
|
4620
|
-
if (meta.author)
|
|
4621
|
-
parts.push(`By ${meta.author}`);
|
|
4622
|
-
if (meta.wordCount)
|
|
4623
|
-
parts.push(`${meta.wordCount} words`);
|
|
4624
|
-
const totalMs = result.timing?.total ?? result.elapsed;
|
|
4625
|
-
if (totalMs)
|
|
4626
|
-
parts.push(`${totalMs}ms`);
|
|
4627
|
-
if (parts.length > 0) {
|
|
4628
|
-
await writeStdout(`\n ${parts.join(' · ')}\n`);
|
|
4629
|
-
await writeStdout(' ' + '─'.repeat(60) + '\n\n');
|
|
4630
|
-
}
|
|
4631
|
-
}
|
|
4632
|
-
// Stream content immediately to stdout — consumer gets it without waiting
|
|
4633
|
-
await writeStdout(result.content + '\n');
|
|
4634
|
-
// Append timing summary to stderr so it doesn't pollute piped content
|
|
4635
|
-
if (!options.silent) {
|
|
4636
|
-
const totalMs = result.timing?.total ?? result.elapsed;
|
|
4637
|
-
process.stderr.write(`\n--- ${result.tokens} tokens · ${totalMs}ms ---\n`);
|
|
4638
|
-
}
|
|
4639
|
-
}
|
|
4640
|
-
}
|
|
4641
|
-
function writeStdout(data) {
|
|
4642
|
-
return new Promise((resolve, reject) => {
|
|
4643
|
-
process.stdout.write(data, (err) => {
|
|
4644
|
-
if (err)
|
|
4645
|
-
reject(err);
|
|
4646
|
-
else
|
|
4647
|
-
resolve();
|
|
4648
|
-
});
|
|
4649
|
-
});
|
|
4650
|
-
}
|
|
4651
|
-
/**
|
|
4652
|
-
* Convert an array of listing items to CSV.
|
|
4653
|
-
*/
|
|
4654
|
-
function formatListingsCsv(items) {
|
|
4655
|
-
if (items.length === 0)
|
|
4656
|
-
return '';
|
|
4657
|
-
// Collect all keys
|
|
4658
|
-
const keySet = new Set();
|
|
4659
|
-
for (const item of items) {
|
|
4660
|
-
for (const key of Object.keys(item)) {
|
|
4661
|
-
if (item[key] !== undefined)
|
|
4662
|
-
keySet.add(key);
|
|
4663
|
-
}
|
|
4664
|
-
}
|
|
4665
|
-
const keys = Array.from(keySet);
|
|
4666
|
-
const escapeCsv = (s) => {
|
|
4667
|
-
if (s === undefined || s === null)
|
|
4668
|
-
return '""';
|
|
4669
|
-
const str = String(s);
|
|
4670
|
-
if (str.includes('"') || str.includes(',') || str.includes('\n') || str.includes('\r')) {
|
|
4671
|
-
return '"' + str.replace(/"/g, '""') + '"';
|
|
4672
|
-
}
|
|
4673
|
-
return '"' + str + '"';
|
|
4674
|
-
};
|
|
4675
|
-
const lines = [keys.join(',')];
|
|
4676
|
-
for (const item of items) {
|
|
4677
|
-
lines.push(keys.map(k => escapeCsv(item[k])).join(','));
|
|
4678
|
-
}
|
|
4679
|
-
return lines.join('\n') + '\n';
|
|
4680
|
-
}
|
|
4681
|
-
/**
|
|
4682
|
-
* Normalise the result of --extract (which may be a flat object or contain
|
|
4683
|
-
* arrays) into an array of row objects suitable for CSV / table rendering.
|
|
4684
|
-
*/
|
|
4685
|
-
function normaliseExtractedToRows(extracted) {
|
|
4686
|
-
// If every value is an array of the same length, zip them into rows
|
|
4687
|
-
const values = Object.values(extracted);
|
|
4688
|
-
const allArrays = values.length > 0 && values.every(v => Array.isArray(v));
|
|
4689
|
-
if (allArrays) {
|
|
4690
|
-
const length = values[0].length;
|
|
4691
|
-
const rows = [];
|
|
4692
|
-
for (let i = 0; i < length; i++) {
|
|
4693
|
-
const row = {};
|
|
4694
|
-
for (const key of Object.keys(extracted)) {
|
|
4695
|
-
const val = extracted[key][i];
|
|
4696
|
-
row[key] = val != null ? String(val) : undefined;
|
|
4697
|
-
}
|
|
4698
|
-
rows.push(row);
|
|
4699
|
-
}
|
|
4700
|
-
return rows;
|
|
4701
|
-
}
|
|
4702
|
-
// Otherwise treat as a single row
|
|
4703
|
-
const row = {};
|
|
4704
|
-
for (const [k, v] of Object.entries(extracted)) {
|
|
4705
|
-
row[k] = v != null ? String(v) : undefined;
|
|
4706
|
-
}
|
|
4707
|
-
return [row];
|
|
4708
|
-
}
|
|
4709
|
-
// Helper function to extract colors from content
|
|
4710
|
-
function extractColors(content) {
|
|
4711
|
-
const colors = [];
|
|
4712
|
-
const hexRegex = /#[0-9A-Fa-f]{6}|#[0-9A-Fa-f]{3}/g;
|
|
4713
|
-
const matches = content.match(hexRegex);
|
|
4714
|
-
if (matches) {
|
|
4715
|
-
colors.push(...[...new Set(matches)].slice(0, 10));
|
|
4716
|
-
}
|
|
4717
|
-
return colors;
|
|
4718
|
-
}
|
|
4719
|
-
// Helper function to extract font information
|
|
4720
|
-
function extractFonts(content) {
|
|
4721
|
-
const fonts = [];
|
|
4722
|
-
const fontRegex = /font-family:\s*([^;}"'\n]+)/gi;
|
|
4723
|
-
let match;
|
|
4724
|
-
while ((match = fontRegex.exec(content)) !== null) {
|
|
4725
|
-
fonts.push(match[1].trim());
|
|
4726
|
-
}
|
|
4727
|
-
return [...new Set(fonts)].slice(0, 5);
|
|
4728
|
-
}
|
|
4729
|
-
//# sourceMappingURL=cli.js.map
|