webpeel 0.19.4 → 0.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/dist/cache.d.ts +0 -1
- package/dist/cache.js +0 -1
- package/dist/cli/commands/auth.d.ts +5 -0
- package/dist/cli/commands/auth.js +476 -0
- package/dist/cli/commands/fetch.d.ts +6 -0
- package/dist/cli/commands/fetch.js +1015 -0
- package/dist/cli/commands/interact.d.ts +5 -0
- package/dist/cli/commands/interact.js +839 -0
- package/dist/cli/commands/jobs.d.ts +5 -0
- package/dist/cli/commands/jobs.js +997 -0
- package/dist/cli/commands/screenshot.d.ts +5 -0
- package/dist/cli/commands/screenshot.js +273 -0
- package/dist/cli/commands/search.d.ts +5 -0
- package/dist/cli/commands/search.js +524 -0
- package/dist/cli/utils.d.ts +84 -0
- package/dist/cli/utils.js +686 -0
- package/dist/cli-auth.d.ts +0 -1
- package/dist/cli-auth.js +0 -1
- package/dist/cli.d.ts +7 -6
- package/dist/cli.js +35 -4698
- package/dist/core/actions.d.ts +0 -1
- package/dist/core/actions.js +0 -1
- package/dist/core/agent.d.ts +0 -1
- package/dist/core/agent.js +9 -12
- package/dist/core/answer.d.ts +0 -1
- package/dist/core/answer.js +0 -1
- package/dist/core/application-tracker.d.ts +0 -1
- package/dist/core/application-tracker.js +0 -1
- package/dist/core/apply.d.ts +0 -1
- package/dist/core/apply.js +0 -1
- package/dist/core/auto-extract.d.ts +0 -1
- package/dist/core/auto-extract.js +0 -1
- package/dist/core/auto-interact.d.ts +0 -1
- package/dist/core/auto-interact.js +0 -1
- package/dist/core/bm25-filter.d.ts +0 -1
- package/dist/core/bm25-filter.js +0 -1
- package/dist/core/branding.d.ts +0 -1
- package/dist/core/branding.js +0 -1
- package/dist/core/browser-fetch.d.ts +0 -1
- package/dist/core/browser-fetch.js +17 -10
- package/dist/core/browser-pool.d.ts +0 -1
- package/dist/core/browser-pool.js +0 -1
- package/dist/core/budget.d.ts +0 -1
- package/dist/core/budget.js +0 -1
- package/dist/core/cache.d.ts +0 -1
- package/dist/core/cache.js +0 -1
- package/dist/core/cf-worker-proxy.d.ts +0 -1
- package/dist/core/cf-worker-proxy.js +0 -1
- package/dist/core/challenge-detection.d.ts +0 -1
- package/dist/core/challenge-detection.js +0 -1
- package/dist/core/change-tracking.d.ts +0 -1
- package/dist/core/change-tracking.js +0 -1
- package/dist/core/chunker.d.ts +0 -1
- package/dist/core/chunker.js +0 -1
- package/dist/core/chunking.d.ts +0 -1
- package/dist/core/chunking.js +0 -1
- package/dist/core/cloak-fetch.d.ts +0 -1
- package/dist/core/cloak-fetch.js +0 -1
- package/dist/core/content-pruner.d.ts +0 -1
- package/dist/core/content-pruner.js +0 -1
- package/dist/core/crawl-checkpoint.d.ts +0 -1
- package/dist/core/crawl-checkpoint.js +0 -1
- package/dist/core/crawler.d.ts +0 -1
- package/dist/core/crawler.js +6 -5
- package/dist/core/cycle-fetch.d.ts +0 -1
- package/dist/core/cycle-fetch.js +0 -1
- package/dist/core/deep-fetch.d.ts +0 -1
- package/dist/core/deep-fetch.js +0 -1
- package/dist/core/design-analysis.d.ts +0 -1
- package/dist/core/design-analysis.js +0 -1
- package/dist/core/design-compare.d.ts +0 -1
- package/dist/core/design-compare.js +0 -1
- package/dist/core/diff.d.ts +0 -1
- package/dist/core/diff.js +0 -1
- package/dist/core/dns-cache.d.ts +0 -1
- package/dist/core/dns-cache.js +0 -1
- package/dist/core/documents.d.ts +0 -1
- package/dist/core/documents.js +0 -1
- package/dist/core/domain-extractors.d.ts +0 -1
- package/dist/core/domain-extractors.js +0 -1
- package/dist/core/extract-inline.d.ts +0 -1
- package/dist/core/extract-inline.js +0 -1
- package/dist/core/extract-listings.d.ts +0 -1
- package/dist/core/extract-listings.js +0 -1
- package/dist/core/extract.d.ts +0 -1
- package/dist/core/extract.js +0 -1
- package/dist/core/fetcher.d.ts +0 -1
- package/dist/core/fetcher.js +0 -1
- package/dist/core/google-cache.d.ts +0 -1
- package/dist/core/google-cache.js +0 -1
- package/dist/core/hotel-search.d.ts +0 -1
- package/dist/core/hotel-search.js +0 -1
- package/dist/core/http-fetch.d.ts +0 -1
- package/dist/core/http-fetch.js +5 -7
- package/dist/core/human.d.ts +0 -1
- package/dist/core/human.js +0 -1
- package/dist/core/jobs.d.ts +0 -1
- package/dist/core/jobs.js +0 -1
- package/dist/core/json-ld.d.ts +0 -1
- package/dist/core/json-ld.js +0 -1
- package/dist/core/llm-extract.d.ts +0 -1
- package/dist/core/llm-extract.js +0 -1
- package/dist/core/logger.d.ts +17 -0
- package/dist/core/logger.js +44 -0
- package/dist/core/map.d.ts +0 -1
- package/dist/core/map.js +0 -1
- package/dist/core/markdown.d.ts +0 -1
- package/dist/core/markdown.js +0 -1
- package/dist/core/metadata.d.ts +0 -1
- package/dist/core/metadata.js +0 -1
- package/dist/core/paginate.d.ts +0 -1
- package/dist/core/paginate.js +0 -1
- package/dist/core/pdf.d.ts +0 -1
- package/dist/core/pdf.js +0 -1
- package/dist/core/peel-tls.d.ts +0 -1
- package/dist/core/peel-tls.js +0 -1
- package/dist/core/pipeline.d.ts +0 -1
- package/dist/core/pipeline.js +22 -25
- package/dist/core/profiles.d.ts +0 -1
- package/dist/core/profiles.js +0 -1
- package/dist/core/quick-answer.d.ts +0 -1
- package/dist/core/quick-answer.js +0 -1
- package/dist/core/rate-governor.d.ts +0 -1
- package/dist/core/rate-governor.js +0 -1
- package/dist/core/readability.d.ts +0 -1
- package/dist/core/readability.js +0 -1
- package/dist/core/research.d.ts +0 -1
- package/dist/core/research.js +0 -1
- package/dist/core/schema-extraction.d.ts +0 -1
- package/dist/core/schema-extraction.js +0 -1
- package/dist/core/schema-postprocess.d.ts +0 -1
- package/dist/core/schema-postprocess.js +0 -1
- package/dist/core/schema-templates.d.ts +0 -1
- package/dist/core/schema-templates.js +0 -1
- package/dist/core/screenshot.d.ts +0 -1
- package/dist/core/screenshot.js +0 -1
- package/dist/core/search-fallback.d.ts +0 -1
- package/dist/core/search-fallback.js +0 -1
- package/dist/core/search-provider.d.ts +0 -1
- package/dist/core/search-provider.js +18 -21
- package/dist/core/site-search.d.ts +0 -1
- package/dist/core/site-search.js +0 -1
- package/dist/core/sitemap.d.ts +0 -1
- package/dist/core/sitemap.js +0 -1
- package/dist/core/stealth-patches.d.ts +0 -1
- package/dist/core/stealth-patches.js +0 -1
- package/dist/core/stemmer.d.ts +0 -1
- package/dist/core/stemmer.js +0 -1
- package/dist/core/strategies.d.ts +6 -1
- package/dist/core/strategies.js +29 -41
- package/dist/core/strategy-hooks.d.ts +0 -1
- package/dist/core/strategy-hooks.js +0 -1
- package/dist/core/summarize.d.ts +0 -1
- package/dist/core/summarize.js +0 -1
- package/dist/core/synonyms.d.ts +0 -1
- package/dist/core/synonyms.js +0 -1
- package/dist/core/table-format.d.ts +0 -1
- package/dist/core/table-format.js +0 -1
- package/dist/core/timing.d.ts +0 -1
- package/dist/core/timing.js +0 -1
- package/dist/core/user-agents.d.ts +0 -1
- package/dist/core/user-agents.js +0 -1
- package/dist/core/watch-manager.d.ts +0 -1
- package/dist/core/watch-manager.js +0 -1
- package/dist/core/watch.d.ts +0 -1
- package/dist/core/watch.js +0 -1
- package/dist/core/youtube.d.ts +0 -1
- package/dist/core/youtube.js +0 -1
- package/dist/index.d.ts +8 -3
- package/dist/index.js +27 -3
- package/dist/integrations/index.d.ts +0 -1
- package/dist/integrations/index.js +0 -1
- package/dist/integrations/langchain.d.ts +0 -1
- package/dist/integrations/langchain.js +0 -1
- package/dist/integrations/llamaindex.d.ts +0 -1
- package/dist/integrations/llamaindex.js +0 -1
- package/dist/mcp/handlers/act.d.ts +5 -0
- package/dist/mcp/handlers/act.js +34 -0
- package/dist/mcp/handlers/definitions.d.ts +6 -0
- package/dist/mcp/handlers/definitions.js +266 -0
- package/dist/mcp/handlers/extract.d.ts +6 -0
- package/dist/mcp/handlers/extract.js +102 -0
- package/dist/mcp/handlers/fetch.d.ts +6 -0
- package/dist/mcp/handlers/fetch.js +98 -0
- package/dist/mcp/handlers/find.d.ts +5 -0
- package/dist/mcp/handlers/find.js +137 -0
- package/dist/mcp/handlers/index.d.ts +13 -0
- package/dist/mcp/handlers/index.js +61 -0
- package/dist/mcp/handlers/legacy.d.ts +25 -0
- package/dist/mcp/handlers/legacy.js +450 -0
- package/dist/mcp/handlers/meta.d.ts +6 -0
- package/dist/mcp/handlers/meta.js +31 -0
- package/dist/mcp/handlers/monitor.d.ts +5 -0
- package/dist/mcp/handlers/monitor.js +41 -0
- package/dist/mcp/handlers/read.d.ts +6 -0
- package/dist/mcp/handlers/read.js +63 -0
- package/dist/mcp/handlers/see.d.ts +5 -0
- package/dist/mcp/handlers/see.js +75 -0
- package/dist/mcp/handlers/types.d.ts +29 -0
- package/dist/mcp/handlers/types.js +28 -0
- package/dist/mcp/server.d.ts +3 -4
- package/dist/mcp/server.js +35 -1101
- package/dist/mcp/smart-router.d.ts +0 -1
- package/dist/mcp/smart-router.js +3 -1
- package/dist/types.d.ts +6 -1
- package/dist/types.js +0 -1
- package/package.json +3 -13
- package/dist/cache.d.ts.map +0 -1
- package/dist/cache.js.map +0 -1
- package/dist/cli-auth.d.ts.map +0 -1
- package/dist/cli-auth.js.map +0 -1
- package/dist/cli.bundle.cjs +0 -159248
- package/dist/cli.d.ts.map +0 -1
- package/dist/cli.js.map +0 -1
- package/dist/core/actions.d.ts.map +0 -1
- package/dist/core/actions.js.map +0 -1
- package/dist/core/agent.d.ts.map +0 -1
- package/dist/core/agent.js.map +0 -1
- package/dist/core/answer.d.ts.map +0 -1
- package/dist/core/answer.js.map +0 -1
- package/dist/core/application-tracker.d.ts.map +0 -1
- package/dist/core/application-tracker.js.map +0 -1
- package/dist/core/apply.d.ts.map +0 -1
- package/dist/core/apply.js.map +0 -1
- package/dist/core/auto-extract.d.ts.map +0 -1
- package/dist/core/auto-extract.js.map +0 -1
- package/dist/core/auto-interact.d.ts.map +0 -1
- package/dist/core/auto-interact.js.map +0 -1
- package/dist/core/bm25-filter.d.ts.map +0 -1
- package/dist/core/bm25-filter.js.map +0 -1
- package/dist/core/branding.d.ts.map +0 -1
- package/dist/core/branding.js.map +0 -1
- package/dist/core/browser-fetch.d.ts.map +0 -1
- package/dist/core/browser-fetch.js.map +0 -1
- package/dist/core/browser-pool.d.ts.map +0 -1
- package/dist/core/browser-pool.js.map +0 -1
- package/dist/core/budget.d.ts.map +0 -1
- package/dist/core/budget.js.map +0 -1
- package/dist/core/cache.d.ts.map +0 -1
- package/dist/core/cache.js.map +0 -1
- package/dist/core/cf-worker-proxy.d.ts.map +0 -1
- package/dist/core/cf-worker-proxy.js.map +0 -1
- package/dist/core/challenge-detection.d.ts.map +0 -1
- package/dist/core/challenge-detection.js.map +0 -1
- package/dist/core/change-tracking.d.ts.map +0 -1
- package/dist/core/change-tracking.js.map +0 -1
- package/dist/core/chunker.d.ts.map +0 -1
- package/dist/core/chunker.js.map +0 -1
- package/dist/core/chunking.d.ts.map +0 -1
- package/dist/core/chunking.js.map +0 -1
- package/dist/core/cloak-fetch.d.ts.map +0 -1
- package/dist/core/cloak-fetch.js.map +0 -1
- package/dist/core/content-pruner.d.ts.map +0 -1
- package/dist/core/content-pruner.js.map +0 -1
- package/dist/core/crawl-checkpoint.d.ts.map +0 -1
- package/dist/core/crawl-checkpoint.js.map +0 -1
- package/dist/core/crawler.d.ts.map +0 -1
- package/dist/core/crawler.js.map +0 -1
- package/dist/core/cycle-fetch.d.ts.map +0 -1
- package/dist/core/cycle-fetch.js.map +0 -1
- package/dist/core/deep-fetch.d.ts.map +0 -1
- package/dist/core/deep-fetch.js.map +0 -1
- package/dist/core/design-analysis.d.ts.map +0 -1
- package/dist/core/design-analysis.js.map +0 -1
- package/dist/core/design-compare.d.ts.map +0 -1
- package/dist/core/design-compare.js.map +0 -1
- package/dist/core/diff.d.ts.map +0 -1
- package/dist/core/diff.js.map +0 -1
- package/dist/core/dns-cache.d.ts.map +0 -1
- package/dist/core/dns-cache.js.map +0 -1
- package/dist/core/documents.d.ts.map +0 -1
- package/dist/core/documents.js.map +0 -1
- package/dist/core/domain-extractors.d.ts.map +0 -1
- package/dist/core/domain-extractors.js.map +0 -1
- package/dist/core/extract-inline.d.ts.map +0 -1
- package/dist/core/extract-inline.js.map +0 -1
- package/dist/core/extract-listings.d.ts.map +0 -1
- package/dist/core/extract-listings.js.map +0 -1
- package/dist/core/extract.d.ts.map +0 -1
- package/dist/core/extract.js.map +0 -1
- package/dist/core/fetcher.d.ts.map +0 -1
- package/dist/core/fetcher.js.map +0 -1
- package/dist/core/google-cache.d.ts.map +0 -1
- package/dist/core/google-cache.js.map +0 -1
- package/dist/core/hotel-search.d.ts.map +0 -1
- package/dist/core/hotel-search.js.map +0 -1
- package/dist/core/http-fetch.d.ts.map +0 -1
- package/dist/core/http-fetch.js.map +0 -1
- package/dist/core/human.d.ts.map +0 -1
- package/dist/core/human.js.map +0 -1
- package/dist/core/jobs.d.ts.map +0 -1
- package/dist/core/jobs.js.map +0 -1
- package/dist/core/json-ld.d.ts.map +0 -1
- package/dist/core/json-ld.js.map +0 -1
- package/dist/core/llm-extract.d.ts.map +0 -1
- package/dist/core/llm-extract.js.map +0 -1
- package/dist/core/map.d.ts.map +0 -1
- package/dist/core/map.js.map +0 -1
- package/dist/core/markdown.d.ts.map +0 -1
- package/dist/core/markdown.js.map +0 -1
- package/dist/core/metadata.d.ts.map +0 -1
- package/dist/core/metadata.js.map +0 -1
- package/dist/core/paginate.d.ts.map +0 -1
- package/dist/core/paginate.js.map +0 -1
- package/dist/core/pdf.d.ts.map +0 -1
- package/dist/core/pdf.js.map +0 -1
- package/dist/core/peel-tls.d.ts.map +0 -1
- package/dist/core/peel-tls.js.map +0 -1
- package/dist/core/pipeline.d.ts.map +0 -1
- package/dist/core/pipeline.js.map +0 -1
- package/dist/core/profiles.d.ts.map +0 -1
- package/dist/core/profiles.js.map +0 -1
- package/dist/core/quick-answer.d.ts.map +0 -1
- package/dist/core/quick-answer.js.map +0 -1
- package/dist/core/rate-governor.d.ts.map +0 -1
- package/dist/core/rate-governor.js.map +0 -1
- package/dist/core/readability.d.ts.map +0 -1
- package/dist/core/readability.js.map +0 -1
- package/dist/core/research.d.ts.map +0 -1
- package/dist/core/research.js.map +0 -1
- package/dist/core/schema-extraction.d.ts.map +0 -1
- package/dist/core/schema-extraction.js.map +0 -1
- package/dist/core/schema-postprocess.d.ts.map +0 -1
- package/dist/core/schema-postprocess.js.map +0 -1
- package/dist/core/schema-templates.d.ts.map +0 -1
- package/dist/core/schema-templates.js.map +0 -1
- package/dist/core/screenshot.d.ts.map +0 -1
- package/dist/core/screenshot.js.map +0 -1
- package/dist/core/search-fallback.d.ts.map +0 -1
- package/dist/core/search-fallback.js.map +0 -1
- package/dist/core/search-provider.d.ts.map +0 -1
- package/dist/core/search-provider.js.map +0 -1
- package/dist/core/site-search.d.ts.map +0 -1
- package/dist/core/site-search.js.map +0 -1
- package/dist/core/sitemap.d.ts.map +0 -1
- package/dist/core/sitemap.js.map +0 -1
- package/dist/core/stealth-patches.d.ts.map +0 -1
- package/dist/core/stealth-patches.js.map +0 -1
- package/dist/core/stemmer.d.ts.map +0 -1
- package/dist/core/stemmer.js.map +0 -1
- package/dist/core/strategies.d.ts.map +0 -1
- package/dist/core/strategies.js.map +0 -1
- package/dist/core/strategy-hooks.d.ts.map +0 -1
- package/dist/core/strategy-hooks.js.map +0 -1
- package/dist/core/summarize.d.ts.map +0 -1
- package/dist/core/summarize.js.map +0 -1
- package/dist/core/synonyms.d.ts.map +0 -1
- package/dist/core/synonyms.js.map +0 -1
- package/dist/core/table-format.d.ts.map +0 -1
- package/dist/core/table-format.js.map +0 -1
- package/dist/core/timing.d.ts.map +0 -1
- package/dist/core/timing.js.map +0 -1
- package/dist/core/user-agents.d.ts.map +0 -1
- package/dist/core/user-agents.js.map +0 -1
- package/dist/core/watch-manager.d.ts.map +0 -1
- package/dist/core/watch-manager.js.map +0 -1
- package/dist/core/watch.d.ts.map +0 -1
- package/dist/core/watch.js.map +0 -1
- package/dist/core/youtube.d.ts.map +0 -1
- package/dist/core/youtube.js.map +0 -1
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/integrations/index.d.ts.map +0 -1
- package/dist/integrations/index.js.map +0 -1
- package/dist/integrations/langchain.d.ts.map +0 -1
- package/dist/integrations/langchain.js.map +0 -1
- package/dist/integrations/llamaindex.d.ts.map +0 -1
- package/dist/integrations/llamaindex.js.map +0 -1
- package/dist/mcp/server.d.ts.map +0 -1
- package/dist/mcp/server.js.map +0 -1
- package/dist/mcp/smart-router.d.ts.map +0 -1
- package/dist/mcp/smart-router.js.map +0 -1
- package/dist/server/app.d.ts +0 -15
- package/dist/server/app.d.ts.map +0 -1
- package/dist/server/app.js +0 -350
- package/dist/server/app.js.map +0 -1
- package/dist/server/auth-store.d.ts +0 -28
- package/dist/server/auth-store.d.ts.map +0 -1
- package/dist/server/auth-store.js +0 -89
- package/dist/server/auth-store.js.map +0 -1
- package/dist/server/email-service.d.ts +0 -22
- package/dist/server/email-service.d.ts.map +0 -1
- package/dist/server/email-service.js +0 -80
- package/dist/server/email-service.js.map +0 -1
- package/dist/server/job-queue.d.ts +0 -93
- package/dist/server/job-queue.d.ts.map +0 -1
- package/dist/server/job-queue.js +0 -146
- package/dist/server/job-queue.js.map +0 -1
- package/dist/server/logger.d.ts +0 -11
- package/dist/server/logger.d.ts.map +0 -1
- package/dist/server/logger.js +0 -38
- package/dist/server/logger.js.map +0 -1
- package/dist/server/middleware/auth.d.ts +0 -29
- package/dist/server/middleware/auth.d.ts.map +0 -1
- package/dist/server/middleware/auth.js +0 -222
- package/dist/server/middleware/auth.js.map +0 -1
- package/dist/server/middleware/rate-limit.d.ts +0 -25
- package/dist/server/middleware/rate-limit.d.ts.map +0 -1
- package/dist/server/middleware/rate-limit.js +0 -168
- package/dist/server/middleware/rate-limit.js.map +0 -1
- package/dist/server/middleware/url-validator.d.ts +0 -16
- package/dist/server/middleware/url-validator.d.ts.map +0 -1
- package/dist/server/middleware/url-validator.js +0 -187
- package/dist/server/middleware/url-validator.js.map +0 -1
- package/dist/server/openapi.yaml +0 -4944
- package/dist/server/pg-auth-store.d.ts +0 -133
- package/dist/server/pg-auth-store.d.ts.map +0 -1
- package/dist/server/pg-auth-store.js +0 -473
- package/dist/server/pg-auth-store.js.map +0 -1
- package/dist/server/pg-job-queue.d.ts +0 -60
- package/dist/server/pg-job-queue.d.ts.map +0 -1
- package/dist/server/pg-job-queue.js +0 -365
- package/dist/server/pg-job-queue.js.map +0 -1
- package/dist/server/premium/domain-intel.d.ts +0 -17
- package/dist/server/premium/domain-intel.d.ts.map +0 -1
- package/dist/server/premium/domain-intel.js +0 -134
- package/dist/server/premium/domain-intel.js.map +0 -1
- package/dist/server/premium/index.d.ts +0 -18
- package/dist/server/premium/index.d.ts.map +0 -1
- package/dist/server/premium/index.js +0 -36
- package/dist/server/premium/index.js.map +0 -1
- package/dist/server/premium/swr-cache.d.ts +0 -15
- package/dist/server/premium/swr-cache.d.ts.map +0 -1
- package/dist/server/premium/swr-cache.js +0 -35
- package/dist/server/premium/swr-cache.js.map +0 -1
- package/dist/server/routes/activity.d.ts +0 -7
- package/dist/server/routes/activity.d.ts.map +0 -1
- package/dist/server/routes/activity.js +0 -68
- package/dist/server/routes/activity.js.map +0 -1
- package/dist/server/routes/agent.d.ts +0 -16
- package/dist/server/routes/agent.d.ts.map +0 -1
- package/dist/server/routes/agent.js +0 -247
- package/dist/server/routes/agent.js.map +0 -1
- package/dist/server/routes/answer.d.ts +0 -6
- package/dist/server/routes/answer.d.ts.map +0 -1
- package/dist/server/routes/answer.js +0 -133
- package/dist/server/routes/answer.js.map +0 -1
- package/dist/server/routes/ask.d.ts +0 -23
- package/dist/server/routes/ask.d.ts.map +0 -1
- package/dist/server/routes/ask.js +0 -119
- package/dist/server/routes/ask.js.map +0 -1
- package/dist/server/routes/batch.d.ts +0 -7
- package/dist/server/routes/batch.d.ts.map +0 -1
- package/dist/server/routes/batch.js +0 -412
- package/dist/server/routes/batch.js.map +0 -1
- package/dist/server/routes/cli-usage.d.ts +0 -7
- package/dist/server/routes/cli-usage.d.ts.map +0 -1
- package/dist/server/routes/cli-usage.js +0 -121
- package/dist/server/routes/cli-usage.js.map +0 -1
- package/dist/server/routes/compat.d.ts +0 -24
- package/dist/server/routes/compat.d.ts.map +0 -1
- package/dist/server/routes/compat.js +0 -653
- package/dist/server/routes/compat.js.map +0 -1
- package/dist/server/routes/deep-fetch.d.ts +0 -9
- package/dist/server/routes/deep-fetch.d.ts.map +0 -1
- package/dist/server/routes/deep-fetch.js +0 -50
- package/dist/server/routes/deep-fetch.js.map +0 -1
- package/dist/server/routes/demo.d.ts +0 -25
- package/dist/server/routes/demo.d.ts.map +0 -1
- package/dist/server/routes/demo.js +0 -434
- package/dist/server/routes/demo.js.map +0 -1
- package/dist/server/routes/extract.d.ts +0 -9
- package/dist/server/routes/extract.d.ts.map +0 -1
- package/dist/server/routes/extract.js +0 -150
- package/dist/server/routes/extract.js.map +0 -1
- package/dist/server/routes/fetch.d.ts +0 -8
- package/dist/server/routes/fetch.d.ts.map +0 -1
- package/dist/server/routes/fetch.js +0 -988
- package/dist/server/routes/fetch.js.map +0 -1
- package/dist/server/routes/health.d.ts +0 -8
- package/dist/server/routes/health.d.ts.map +0 -1
- package/dist/server/routes/health.js +0 -20
- package/dist/server/routes/health.js.map +0 -1
- package/dist/server/routes/jobs.d.ts +0 -8
- package/dist/server/routes/jobs.d.ts.map +0 -1
- package/dist/server/routes/jobs.js +0 -487
- package/dist/server/routes/jobs.js.map +0 -1
- package/dist/server/routes/mcp.d.ts +0 -18
- package/dist/server/routes/mcp.d.ts.map +0 -1
- package/dist/server/routes/mcp.js +0 -1260
- package/dist/server/routes/mcp.js.map +0 -1
- package/dist/server/routes/oauth.d.ts +0 -10
- package/dist/server/routes/oauth.d.ts.map +0 -1
- package/dist/server/routes/oauth.js +0 -334
- package/dist/server/routes/oauth.js.map +0 -1
- package/dist/server/routes/quick-answer.d.ts +0 -9
- package/dist/server/routes/quick-answer.d.ts.map +0 -1
- package/dist/server/routes/quick-answer.js +0 -93
- package/dist/server/routes/quick-answer.js.map +0 -1
- package/dist/server/routes/screenshot.d.ts +0 -23
- package/dist/server/routes/screenshot.d.ts.map +0 -1
- package/dist/server/routes/screenshot.js +0 -819
- package/dist/server/routes/screenshot.js.map +0 -1
- package/dist/server/routes/search.d.ts +0 -7
- package/dist/server/routes/search.d.ts.map +0 -1
- package/dist/server/routes/search.js +0 -312
- package/dist/server/routes/search.js.map +0 -1
- package/dist/server/routes/session.d.ts +0 -16
- package/dist/server/routes/session.d.ts.map +0 -1
- package/dist/server/routes/session.js +0 -278
- package/dist/server/routes/session.js.map +0 -1
- package/dist/server/routes/stats.d.ts +0 -7
- package/dist/server/routes/stats.d.ts.map +0 -1
- package/dist/server/routes/stats.js +0 -65
- package/dist/server/routes/stats.js.map +0 -1
- package/dist/server/routes/stripe.d.ts +0 -16
- package/dist/server/routes/stripe.d.ts.map +0 -1
- package/dist/server/routes/stripe.js +0 -283
- package/dist/server/routes/stripe.js.map +0 -1
- package/dist/server/routes/users.d.ts +0 -9
- package/dist/server/routes/users.d.ts.map +0 -1
- package/dist/server/routes/users.js +0 -1211
- package/dist/server/routes/users.js.map +0 -1
- package/dist/server/routes/watch.d.ts +0 -16
- package/dist/server/routes/watch.d.ts.map +0 -1
- package/dist/server/routes/watch.js +0 -257
- package/dist/server/routes/watch.js.map +0 -1
- package/dist/server/routes/webhooks.d.ts +0 -16
- package/dist/server/routes/webhooks.d.ts.map +0 -1
- package/dist/server/routes/webhooks.js +0 -74
- package/dist/server/routes/webhooks.js.map +0 -1
- package/dist/server/routes/youtube.d.ts +0 -7
- package/dist/server/routes/youtube.d.ts.map +0 -1
- package/dist/server/routes/youtube.js +0 -93
- package/dist/server/routes/youtube.js.map +0 -1
- package/dist/server/sentry.d.ts +0 -14
- package/dist/server/sentry.d.ts.map +0 -1
- package/dist/server/sentry.js +0 -39
- package/dist/server/sentry.js.map +0 -1
- package/dist/server/types.d.ts +0 -16
- package/dist/server/types.d.ts.map +0 -1
- package/dist/server/types.js +0 -8
- package/dist/server/types.js.map +0 -1
- package/dist/server/utils/response.d.ts +0 -45
- package/dist/server/utils/response.d.ts.map +0 -1
- package/dist/server/utils/response.js +0 -70
- package/dist/server/utils/response.js.map +0 -1
- package/dist/server/utils/sse.d.ts +0 -23
- package/dist/server/utils/sse.d.ts.map +0 -1
- package/dist/server/utils/sse.js +0 -39
- package/dist/server/utils/sse.js.map +0 -1
- package/dist/types.d.ts.map +0 -1
- package/dist/types.js.map +0 -1
|
@@ -0,0 +1,686 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared CLI utilities — config, API client, output formatting, helpers.
|
|
3
|
+
* Imported by all command modules.
|
|
4
|
+
*/
|
|
5
|
+
import { readFileSync } from 'fs';
|
|
6
|
+
import { fileURLToPath } from 'url';
|
|
7
|
+
import { dirname, resolve } from 'path';
|
|
8
|
+
// ─── CLI version ────────────────────────────────────────────────────────────
|
|
9
|
+
let _cliVersion = '0.0.0';
|
|
10
|
+
try {
|
|
11
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
12
|
+
const __dirname = dirname(__filename);
|
|
13
|
+
// utils.ts compiles to dist/cli/utils.js; package.json is at dist/../../package.json
|
|
14
|
+
const pkgPath = resolve(__dirname, '..', '..', 'package.json');
|
|
15
|
+
const pkg = JSON.parse(readFileSync(pkgPath, 'utf-8'));
|
|
16
|
+
_cliVersion = pkg.version;
|
|
17
|
+
}
|
|
18
|
+
catch { /* fallback to 0.0.0 */ }
|
|
19
|
+
export const cliVersion = _cliVersion;
|
|
20
|
+
// ─── Verb aliases ────────────────────────────────────────────────────────────
|
|
21
|
+
// Intercept verb-first syntax before Commander parses
|
|
22
|
+
// "webpeel fetch <url>" → "webpeel <url>"
|
|
23
|
+
// Note: 'read' is intentionally excluded — it's a registered subcommand.
|
|
24
|
+
export const VERB_ALIASES = new Set(['fetch', 'get', 'scrape', 'peel']);
|
|
25
|
+
// ─── Update check ────────────────────────────────────────────────────────────
|
|
26
|
+
export async function checkForUpdates() {
|
|
27
|
+
try {
|
|
28
|
+
const res = await fetch('https://registry.npmjs.org/webpeel/latest', {
|
|
29
|
+
signal: AbortSignal.timeout(2000),
|
|
30
|
+
});
|
|
31
|
+
if (!res.ok)
|
|
32
|
+
return;
|
|
33
|
+
const data = await res.json();
|
|
34
|
+
const latest = data.version;
|
|
35
|
+
if (latest && latest !== cliVersion && cliVersion !== '0.0.0') {
|
|
36
|
+
console.error(`\n💡 WebPeel v${latest} available (you have v${cliVersion}). Update: npm i -g webpeel@latest\n`);
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
catch { /* silently ignore — don't slow down the user */ }
|
|
40
|
+
}
|
|
41
|
+
// ─── ANSI helpers ────────────────────────────────────────────────────────────
|
|
42
|
+
export const NO_COLOR = process.env.NO_COLOR !== undefined || !process.stdout.isTTY;
|
|
43
|
+
export const bold = (s) => NO_COLOR ? s : `\x1b[1m${s}\x1b[0m`;
|
|
44
|
+
export const dim = (s) => NO_COLOR ? s : `\x1b[2m${s}\x1b[0m`;
|
|
45
|
+
export const cyan = (s) => NO_COLOR ? s : `\x1b[36m${s}\x1b[0m`;
|
|
46
|
+
// ─── Parse page actions ──────────────────────────────────────────────────────
|
|
47
|
+
/**
|
|
48
|
+
* Parse action strings into PageAction array
|
|
49
|
+
* Formats:
|
|
50
|
+
* click:.selector — click an element
|
|
51
|
+
* type:.selector=text — type text into an input
|
|
52
|
+
* fill:.selector=text — fill an input (replaces existing value)
|
|
53
|
+
* scroll:down:500 — scroll direction + amount
|
|
54
|
+
* scroll:bottom — scroll to bottom (legacy)
|
|
55
|
+
* scroll:top — scroll to top (legacy)
|
|
56
|
+
* wait:2000 — wait N ms
|
|
57
|
+
* press:Enter — press a keyboard key
|
|
58
|
+
* hover:.selector — hover over an element
|
|
59
|
+
* waitFor:.selector — wait for a selector to appear
|
|
60
|
+
* select:.selector=value — select dropdown option
|
|
61
|
+
* screenshot — take a screenshot
|
|
62
|
+
*/
|
|
63
|
+
export function parseActions(actionStrings) {
|
|
64
|
+
return actionStrings.map(str => {
|
|
65
|
+
const [type, ...rest] = str.split(':');
|
|
66
|
+
const value = rest.join(':');
|
|
67
|
+
switch (type) {
|
|
68
|
+
case 'wait':
|
|
69
|
+
return { type: 'wait', ms: parseInt(value) || 1000 };
|
|
70
|
+
case 'click':
|
|
71
|
+
return { type: 'click', selector: value };
|
|
72
|
+
case 'scroll': {
|
|
73
|
+
// scroll:down:500 or scroll:bottom or scroll:500 or scroll:0,1500
|
|
74
|
+
const parts = value.split(':');
|
|
75
|
+
const dir = parts[0];
|
|
76
|
+
// Handle scroll:x,y format (e.g., scroll:0,1500)
|
|
77
|
+
if (dir && dir.includes(',')) {
|
|
78
|
+
const [x, y] = dir.split(',').map(Number);
|
|
79
|
+
if (!isNaN(x) && !isNaN(y)) {
|
|
80
|
+
return { type: 'scroll', to: { x, y } };
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
if (dir === 'top' || dir === 'bottom') {
|
|
84
|
+
return { type: 'scroll', to: dir };
|
|
85
|
+
}
|
|
86
|
+
if (dir === 'down' || dir === 'up' || dir === 'left' || dir === 'right') {
|
|
87
|
+
const amount = parseInt(parts[1] || '500', 10);
|
|
88
|
+
return { type: 'scroll', direction: dir, amount };
|
|
89
|
+
}
|
|
90
|
+
// Bare number: absolute position
|
|
91
|
+
const num = parseInt(dir, 10);
|
|
92
|
+
if (!isNaN(num)) {
|
|
93
|
+
return { type: 'scroll', to: num };
|
|
94
|
+
}
|
|
95
|
+
// Default: scroll to bottom
|
|
96
|
+
return { type: 'scroll', to: 'bottom' };
|
|
97
|
+
}
|
|
98
|
+
case 'type': {
|
|
99
|
+
const [sel, ...text] = value.split('=');
|
|
100
|
+
return { type: 'type', selector: sel, value: text.join('=') };
|
|
101
|
+
}
|
|
102
|
+
case 'fill': {
|
|
103
|
+
const [sel, ...text] = value.split('=');
|
|
104
|
+
return { type: 'fill', selector: sel, value: text.join('=') };
|
|
105
|
+
}
|
|
106
|
+
case 'select': {
|
|
107
|
+
const [sel, ...vals] = value.split('=');
|
|
108
|
+
return { type: 'select', selector: sel, value: vals.join('=') };
|
|
109
|
+
}
|
|
110
|
+
case 'press':
|
|
111
|
+
return { type: 'press', key: value };
|
|
112
|
+
case 'hover':
|
|
113
|
+
return { type: 'hover', selector: value };
|
|
114
|
+
case 'waitFor':
|
|
115
|
+
return { type: 'waitForSelector', selector: value };
|
|
116
|
+
case 'wait-for':
|
|
117
|
+
return { type: 'waitForSelector', selector: value, timeout: 10000 };
|
|
118
|
+
case 'screenshot':
|
|
119
|
+
return { type: 'screenshot' };
|
|
120
|
+
default:
|
|
121
|
+
throw new Error(`Unknown action type: ${type}`);
|
|
122
|
+
}
|
|
123
|
+
});
|
|
124
|
+
}
|
|
125
|
+
// ─── Format error ────────────────────────────────────────────────────────────
|
|
126
|
+
/**
|
|
127
|
+
* Format an error with actionable suggestions based on error type
|
|
128
|
+
*/
|
|
129
|
+
export function formatError(error, _url, options) {
|
|
130
|
+
const msg = error.message || String(error);
|
|
131
|
+
const lines = [`\x1b[31m✖ ${msg}\x1b[0m`];
|
|
132
|
+
if (msg.includes('net::ERR_') || msg.includes('ECONNREFUSED') || msg.includes('ENOTFOUND')) {
|
|
133
|
+
lines.push('\x1b[33m💡 Check the URL is correct and the site is accessible.\x1b[0m');
|
|
134
|
+
}
|
|
135
|
+
else if (msg.includes('timeout') || msg.includes('Timeout') || msg.includes('Navigation timeout')) {
|
|
136
|
+
lines.push('\x1b[33m💡 Try increasing timeout: --timeout 60000\x1b[0m');
|
|
137
|
+
if (!options.render) {
|
|
138
|
+
lines.push('\x1b[33m💡 Site may need browser rendering: --render\x1b[0m');
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
else if (msg.includes('blocked') || msg.includes('403') || msg.includes('Access Denied') || msg.includes('challenge')) {
|
|
142
|
+
if (!options.stealth) {
|
|
143
|
+
lines.push('\x1b[33m💡 Try stealth mode to bypass bot detection: --stealth\x1b[0m');
|
|
144
|
+
}
|
|
145
|
+
lines.push('\x1b[33m💡 Try a different user agent: --ua "Mozilla/5.0..."\x1b[0m');
|
|
146
|
+
}
|
|
147
|
+
else if (msg.includes('empty') || msg.includes('no content') || msg.includes('0 tokens')) {
|
|
148
|
+
if (!options.render) {
|
|
149
|
+
lines.push('\x1b[33m💡 Page may be JavaScript-rendered. Try: --render\x1b[0m');
|
|
150
|
+
}
|
|
151
|
+
else if (!options.stealth) {
|
|
152
|
+
lines.push('\x1b[33m💡 Content may be behind bot detection. Try: --stealth\x1b[0m');
|
|
153
|
+
}
|
|
154
|
+
lines.push('\x1b[33m💡 Try waiting longer for content: --wait 5000\x1b[0m');
|
|
155
|
+
}
|
|
156
|
+
else if (msg.includes('captcha') || msg.includes('CAPTCHA') || msg.includes('Captcha')) {
|
|
157
|
+
lines.push('\x1b[33m💡 This site requires CAPTCHA solving. Try a browser profile: --profile mysite --headed\x1b[0m');
|
|
158
|
+
}
|
|
159
|
+
else if (msg.includes('rate limit') || msg.includes('429')) {
|
|
160
|
+
lines.push('\x1b[33m💡 Rate limited. Wait a moment and try again, or use --proxy.\x1b[0m');
|
|
161
|
+
}
|
|
162
|
+
else if (msg.toLowerCase().includes('enotfound') || msg.toLowerCase().includes('getaddrinfo')) {
|
|
163
|
+
lines.push('\x1b[33m💡 Could not resolve hostname. Check the URL is correct.\x1b[0m');
|
|
164
|
+
}
|
|
165
|
+
else if (msg.toLowerCase().includes('certificate') || msg.toLowerCase().includes('ssl') || msg.toLowerCase().includes('tls')) {
|
|
166
|
+
lines.push('\x1b[33m💡 SSL/TLS error. The site may have an invalid certificate.\x1b[0m');
|
|
167
|
+
}
|
|
168
|
+
else if (msg.toLowerCase().includes('usage') || msg.toLowerCase().includes('quota') || msg.toLowerCase().includes('limit')) {
|
|
169
|
+
lines.push('\x1b[33m💡 Run `webpeel usage` to check your quota, or `webpeel login` to authenticate.\x1b[0m');
|
|
170
|
+
}
|
|
171
|
+
return lines.join('\n');
|
|
172
|
+
}
|
|
173
|
+
// ─── API-based fetch ─────────────────────────────────────────────────────────
|
|
174
|
+
/**
|
|
175
|
+
* Routes ALL fetch requests through the WebPeel API.
|
|
176
|
+
* CLI is a pure API client — no local Playwright.
|
|
177
|
+
*/
|
|
178
|
+
export async function fetchViaApi(url, options, apiKey, apiUrl) {
|
|
179
|
+
// --format is a CLI output flag; API format is always the content extraction format
|
|
180
|
+
const apiFormat = (['text', 'html', 'markdown', 'md'].includes((options.format || '').toLowerCase()))
|
|
181
|
+
? (options.format.toLowerCase() === 'md' ? 'markdown' : options.format.toLowerCase())
|
|
182
|
+
: (options.html ? 'html' : options.text ? 'text' : 'markdown');
|
|
183
|
+
const params = new URLSearchParams({ url, format: apiFormat });
|
|
184
|
+
if (options.render)
|
|
185
|
+
params.set('render', 'true');
|
|
186
|
+
if (options.stealth)
|
|
187
|
+
params.set('stealth', 'true');
|
|
188
|
+
if (options.wait)
|
|
189
|
+
params.set('wait', String(options.wait));
|
|
190
|
+
if (options.selector)
|
|
191
|
+
params.set('selector', options.selector);
|
|
192
|
+
if (options.readable)
|
|
193
|
+
params.set('readable', 'true');
|
|
194
|
+
if (options.summary)
|
|
195
|
+
params.set('summary', 'true');
|
|
196
|
+
if (options.budget)
|
|
197
|
+
params.set('budget', String(options.budget));
|
|
198
|
+
if (options.question)
|
|
199
|
+
params.set('question', options.question);
|
|
200
|
+
const res = await fetch(`${apiUrl}/v1/fetch?${params}`, {
|
|
201
|
+
headers: { Authorization: `Bearer ${apiKey}` },
|
|
202
|
+
signal: AbortSignal.timeout(60000),
|
|
203
|
+
});
|
|
204
|
+
if (res.status === 401) {
|
|
205
|
+
throw Object.assign(new Error('API key invalid or expired. Run: webpeel auth <new-key>'), { code: 'AUTH_FAILED' });
|
|
206
|
+
}
|
|
207
|
+
if (res.status === 429) {
|
|
208
|
+
throw Object.assign(new Error('Rate limit exceeded. Check your plan at https://app.webpeel.dev/billing'), { code: 'RATE_LIMITED' });
|
|
209
|
+
}
|
|
210
|
+
if (!res.ok) {
|
|
211
|
+
const body = await res.text().catch(() => '');
|
|
212
|
+
throw new Error(`API error ${res.status}: ${body.slice(0, 200)}`);
|
|
213
|
+
}
|
|
214
|
+
const data = await res.json();
|
|
215
|
+
// Map API response to PeelResult shape that the CLI already handles
|
|
216
|
+
return {
|
|
217
|
+
url: data.url || url,
|
|
218
|
+
title: data.metadata?.title || data.title || '',
|
|
219
|
+
content: data.content || '',
|
|
220
|
+
method: data.method || 'simple',
|
|
221
|
+
tokens: data.tokenCount || data.tokens || 0,
|
|
222
|
+
elapsed: data.fetchTimeMs || data.elapsed || 0,
|
|
223
|
+
tokenSavingsPercent: data.tokenSavingsPercent,
|
|
224
|
+
rawTokenEstimate: data.rawTokenEstimate,
|
|
225
|
+
metadata: data.metadata || {},
|
|
226
|
+
links: data.links || [],
|
|
227
|
+
answer: data.answer,
|
|
228
|
+
summary: data.summary,
|
|
229
|
+
format: options.format || 'markdown',
|
|
230
|
+
};
|
|
231
|
+
}
|
|
232
|
+
// ─── Help formatting ─────────────────────────────────────────────────────────
|
|
233
|
+
/**
|
|
234
|
+
* Reconstruct the standard Commander help layout for --help-all and subcommands.
|
|
235
|
+
* This mirrors Commander's own default formatHelp() so subcommand help keeps working.
|
|
236
|
+
*/
|
|
237
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
238
|
+
export function buildCommanderHelp(cmd, helper) {
|
|
239
|
+
const termWidth = helper.padWidth(cmd, helper);
|
|
240
|
+
const helpWidth = helper.helpWidth ?? 80;
|
|
241
|
+
const pad = ' ';
|
|
242
|
+
const formatItem = (term, description) => {
|
|
243
|
+
if (description) {
|
|
244
|
+
const full = `${term.padEnd(termWidth + 2)}${description}`;
|
|
245
|
+
return helper.wrap(full, helpWidth - pad.length, termWidth + 2);
|
|
246
|
+
}
|
|
247
|
+
return term;
|
|
248
|
+
};
|
|
249
|
+
const formatList = (items) => items.join('\n').replace(/^/gm, pad);
|
|
250
|
+
let out = [`Usage: ${helper.commandUsage(cmd)}`, ''];
|
|
251
|
+
const desc = helper.commandDescription(cmd);
|
|
252
|
+
if (desc.length > 0) {
|
|
253
|
+
out = out.concat([helper.wrap(desc, helpWidth, 0), '']);
|
|
254
|
+
}
|
|
255
|
+
// Arguments
|
|
256
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
257
|
+
const args = helper.visibleArguments(cmd).map(a => formatItem(helper.argumentTerm(a), helper.argumentDescription(a)));
|
|
258
|
+
if (args.length > 0)
|
|
259
|
+
out = out.concat(['Arguments:', formatList(args), '']);
|
|
260
|
+
// Options
|
|
261
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
262
|
+
const opts = helper.visibleOptions(cmd).map(o => formatItem(helper.optionTerm(o), helper.optionDescription(o)));
|
|
263
|
+
if (opts.length > 0)
|
|
264
|
+
out = out.concat(['Options:', formatList(opts), '']);
|
|
265
|
+
// Subcommands
|
|
266
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
267
|
+
const cmds = helper.visibleCommands(cmd).map(c => formatItem(helper.subcommandTerm(c), helper.subcommandDescription(c)));
|
|
268
|
+
if (cmds.length > 0)
|
|
269
|
+
out = out.concat(['Commands:', formatList(cmds), '']);
|
|
270
|
+
// Append grouped option sections only on root command (--help-all)
|
|
271
|
+
if (cmd.parent === null) {
|
|
272
|
+
out = out.concat([`
|
|
273
|
+
Output Formats:
|
|
274
|
+
--json JSON output with full metadata
|
|
275
|
+
--html Raw HTML output
|
|
276
|
+
--text Plain text output
|
|
277
|
+
--csv / --table Tabular output for extractions
|
|
278
|
+
-s, --silent No spinner or progress output
|
|
279
|
+
|
|
280
|
+
Content Control:
|
|
281
|
+
--readable Reader mode — clean article content only
|
|
282
|
+
--budget <n> Smart token budget (no LLM key needed)
|
|
283
|
+
--focus <query> BM25 query-focused filtering
|
|
284
|
+
--selector <css> Extract specific CSS selector
|
|
285
|
+
--only-main-content Just main/article content
|
|
286
|
+
--full-content Disable content pruning
|
|
287
|
+
-q, --question <q> Ask a question about the content
|
|
288
|
+
|
|
289
|
+
Rendering:
|
|
290
|
+
-r, --render Browser rendering for JS-heavy sites
|
|
291
|
+
--stealth Stealth mode for bot-protected sites
|
|
292
|
+
--profile <path> Persistent browser profile
|
|
293
|
+
--headed Visible browser (for debugging)
|
|
294
|
+
--action <actions> Browser automation (click, type, scroll...)
|
|
295
|
+
|
|
296
|
+
Extraction:
|
|
297
|
+
--extract <json> CSS selector extraction
|
|
298
|
+
--extract-all Auto-detect listing items
|
|
299
|
+
--schema <name> Named extraction schema
|
|
300
|
+
--llm-extract [inst] LLM-powered extraction (BYOK)
|
|
301
|
+
|
|
302
|
+
Examples:
|
|
303
|
+
$ webpeel "https://example.com" Basic fetch
|
|
304
|
+
$ webpeel "https://youtube.com/watch?v=..." --json YouTube transcript
|
|
305
|
+
$ webpeel "https://openai.com/pricing" -q "GPT-4 cost?" Quick answer
|
|
306
|
+
$ webpeel "https://nytimes.com/article" --readable Reader mode
|
|
307
|
+
$ webpeel search "best restaurants in NYC" Web search
|
|
308
|
+
$ webpeel hotels "Manhattan" --checkin tomorrow Hotel search
|
|
309
|
+
|
|
310
|
+
Agent Integration:
|
|
311
|
+
$ webpeel mcp Start MCP server
|
|
312
|
+
$ cat urls.txt | webpeel batch Batch from stdin
|
|
313
|
+
$ webpeel pipe "https://example.com" | jq .content Pipe-friendly JSON
|
|
314
|
+
$ webpeel "https://site.com" --json --silent Same as pipe
|
|
315
|
+
$ curl https://webpeel.dev/llms.txt AI-readable docs
|
|
316
|
+
`]);
|
|
317
|
+
}
|
|
318
|
+
return out.join('\n');
|
|
319
|
+
}
|
|
320
|
+
/**
|
|
321
|
+
* Condensed, Anthropic-style help for the root command (default --help).
|
|
322
|
+
*/
|
|
323
|
+
export function buildCondensedHelp() {
|
|
324
|
+
const v = cliVersion;
|
|
325
|
+
return [
|
|
326
|
+
'',
|
|
327
|
+
` ${bold('◆ WebPeel')} ${dim(`v${v}`)}`,
|
|
328
|
+
` ${dim('The web data platform for AI agents')}`,
|
|
329
|
+
'',
|
|
330
|
+
` ${bold('Usage:')} webpeel [url] [options]`,
|
|
331
|
+
` webpeel <command> [options]`,
|
|
332
|
+
'',
|
|
333
|
+
` ${bold('Examples:')}`,
|
|
334
|
+
` webpeel https://example.com ${dim('Clean content (reader mode)')}`,
|
|
335
|
+
` webpeel read https://example.com ${dim('Explicit reader mode')}`,
|
|
336
|
+
` webpeel screenshot https://example.com ${dim('Screenshot any page')}`,
|
|
337
|
+
` webpeel ask https://news.com "summary" ${dim('Ask about any page')}`,
|
|
338
|
+
` webpeel search "webpeel vs jina" ${dim('Web search')}`,
|
|
339
|
+
` echo "url" | webpeel ${dim('Pipe mode (auto JSON)')}`,
|
|
340
|
+
'',
|
|
341
|
+
` ${bold('Commands:')}`,
|
|
342
|
+
` fetch (default) Fetch a URL as clean markdown`,
|
|
343
|
+
` read <url> Reader mode (article content only)`,
|
|
344
|
+
` screenshot <url> Take a screenshot`,
|
|
345
|
+
` ask <url> <question> Ask about any page`,
|
|
346
|
+
` search <query> Search the web (DuckDuckGo + sources)`,
|
|
347
|
+
` crawl <url> Crawl a website`,
|
|
348
|
+
` mcp Start MCP server for AI tools`,
|
|
349
|
+
` ${dim('... (use --help-all for all 25+ commands)')}`,
|
|
350
|
+
'',
|
|
351
|
+
` ${bold('Common Options:')}`,
|
|
352
|
+
` -r, --render Browser rendering (JS-heavy sites)`,
|
|
353
|
+
` --stealth Stealth mode (anti-bot bypass)`,
|
|
354
|
+
` --raw Full page (disable auto reader mode)`,
|
|
355
|
+
` --full Full page, no budget limit`,
|
|
356
|
+
` --json JSON output with metadata`,
|
|
357
|
+
` --budget: 4000)`,
|
|
358
|
+
` -q, --question <q> Ask about the content`,
|
|
359
|
+
` -s, --silent No spinner output`,
|
|
360
|
+
'',
|
|
361
|
+
` Use ${cyan("'webpeel <command> --help'")} for command-specific options.`,
|
|
362
|
+
` Use ${cyan("'webpeel --help-all'")} for the full option reference.`,
|
|
363
|
+
'',
|
|
364
|
+
` Docs: ${cyan('https://webpeel.dev/docs')}`,
|
|
365
|
+
'',
|
|
366
|
+
].join('\n');
|
|
367
|
+
}
|
|
368
|
+
// ─── Time formatting ─────────────────────────────────────────────────────────
|
|
369
|
+
/**
|
|
370
|
+
* Format a past Date relative to now (e.g. "2h ago", "5m ago").
|
|
371
|
+
*/
|
|
372
|
+
export function formatRelativeTime(past) {
|
|
373
|
+
const diffMs = Date.now() - past.getTime();
|
|
374
|
+
const diffSec = Math.round(diffMs / 1000);
|
|
375
|
+
if (diffSec < 60)
|
|
376
|
+
return `${diffSec}s ago`;
|
|
377
|
+
const diffMin = Math.round(diffSec / 60);
|
|
378
|
+
if (diffMin < 60)
|
|
379
|
+
return `${diffMin}m ago`;
|
|
380
|
+
const diffHr = Math.round(diffMin / 60);
|
|
381
|
+
if (diffHr < 24)
|
|
382
|
+
return `${diffHr}h ago`;
|
|
383
|
+
const diffDay = Math.round(diffHr / 24);
|
|
384
|
+
return `${diffDay}d ago`;
|
|
385
|
+
}
|
|
386
|
+
// ─── Error classification ─────────────────────────────────────────────────────
|
|
387
|
+
export function classifyErrorCode(error) {
|
|
388
|
+
if (!(error instanceof Error))
|
|
389
|
+
return 'FETCH_FAILED';
|
|
390
|
+
// Check for our custom _code first (set in pre-fetch validation)
|
|
391
|
+
if (error._code)
|
|
392
|
+
return error._code;
|
|
393
|
+
const msg = error.message.toLowerCase();
|
|
394
|
+
const name = error.name || '';
|
|
395
|
+
if (name === 'TimeoutError' || msg.includes('timeout') || msg.includes('timed out')) {
|
|
396
|
+
return 'TIMEOUT';
|
|
397
|
+
}
|
|
398
|
+
if (name === 'BlockedError' || msg.includes('blocked') || msg.includes('403') || msg.includes('cloudflare')) {
|
|
399
|
+
return 'BLOCKED';
|
|
400
|
+
}
|
|
401
|
+
if (msg.includes('enotfound') || msg.includes('getaddrinfo') || msg.includes('dns resolution failed') || msg.includes('not found')) {
|
|
402
|
+
return 'DNS_FAILED';
|
|
403
|
+
}
|
|
404
|
+
if (msg.includes('invalid url') || msg.includes('invalid hostname') || msg.includes('only http')) {
|
|
405
|
+
return 'INVALID_URL';
|
|
406
|
+
}
|
|
407
|
+
return 'FETCH_FAILED';
|
|
408
|
+
}
|
|
409
|
+
/**
|
|
410
|
+
* Build a unified PeelEnvelope from a PeelResult.
|
|
411
|
+
*
|
|
412
|
+
* All existing PeelResult fields are spread first (backward compatibility),
|
|
413
|
+
* then canonical envelope fields override/extend them.
|
|
414
|
+
*/
|
|
415
|
+
export function buildEnvelope(result, extra) {
|
|
416
|
+
const envelope = {
|
|
417
|
+
// Spread all PeelResult fields for backward compatibility
|
|
418
|
+
...result,
|
|
419
|
+
// Required envelope fields (override PeelResult where they overlap)
|
|
420
|
+
url: result.url,
|
|
421
|
+
status: 200,
|
|
422
|
+
content: result.content,
|
|
423
|
+
metadata: {
|
|
424
|
+
title: result.title,
|
|
425
|
+
...result.metadata,
|
|
426
|
+
},
|
|
427
|
+
tokens: result.tokens,
|
|
428
|
+
cached: extra.cached ?? false,
|
|
429
|
+
elapsed: result.elapsed,
|
|
430
|
+
};
|
|
431
|
+
// Optional envelope fields — only include when meaningful
|
|
432
|
+
if (extra.structured !== undefined)
|
|
433
|
+
envelope.structured = extra.structured;
|
|
434
|
+
if (extra.truncated)
|
|
435
|
+
envelope.truncated = true;
|
|
436
|
+
if (extra.totalAvailable !== undefined)
|
|
437
|
+
envelope.totalAvailable = extra.totalAvailable;
|
|
438
|
+
return envelope;
|
|
439
|
+
}
|
|
440
|
+
// ─── Output result ───────────────────────────────────────────────────────────
|
|
441
|
+
export async function outputResult(result, options, extra = {}) {
|
|
442
|
+
// --links: output only links
|
|
443
|
+
if (options.links) {
|
|
444
|
+
if (options.json) {
|
|
445
|
+
const jsonStr = JSON.stringify(result.links, null, 2);
|
|
446
|
+
await writeStdout(jsonStr + '\n');
|
|
447
|
+
}
|
|
448
|
+
else {
|
|
449
|
+
for (const link of result.links) {
|
|
450
|
+
await writeStdout(link + '\n');
|
|
451
|
+
}
|
|
452
|
+
}
|
|
453
|
+
return;
|
|
454
|
+
}
|
|
455
|
+
// --images: output only image URLs
|
|
456
|
+
if (options.images) {
|
|
457
|
+
// Extract image URLs from links that point to images
|
|
458
|
+
const imageExtensions = ['.jpg', '.jpeg', '.png', '.gif', '.webp', '.svg', '.bmp', '.ico'];
|
|
459
|
+
const imageUrls = result.links.filter(link => {
|
|
460
|
+
const urlLower = link.toLowerCase();
|
|
461
|
+
return imageExtensions.some(ext => urlLower.includes(ext));
|
|
462
|
+
});
|
|
463
|
+
if (options.json) {
|
|
464
|
+
const jsonStr = JSON.stringify(imageUrls, null, 2);
|
|
465
|
+
await writeStdout(jsonStr + '\n');
|
|
466
|
+
}
|
|
467
|
+
else {
|
|
468
|
+
for (const imageUrl of imageUrls) {
|
|
469
|
+
await writeStdout(imageUrl + '\n');
|
|
470
|
+
}
|
|
471
|
+
}
|
|
472
|
+
return;
|
|
473
|
+
}
|
|
474
|
+
// --meta: output only metadata
|
|
475
|
+
if (options.meta) {
|
|
476
|
+
const meta = {
|
|
477
|
+
url: result.url,
|
|
478
|
+
title: result.title,
|
|
479
|
+
method: result.method,
|
|
480
|
+
elapsed: result.elapsed,
|
|
481
|
+
tokens: result.tokens,
|
|
482
|
+
cached: extra.cached ?? false,
|
|
483
|
+
...result.metadata,
|
|
484
|
+
};
|
|
485
|
+
if (options.json) {
|
|
486
|
+
await writeStdout(JSON.stringify(meta, null, 2) + '\n');
|
|
487
|
+
}
|
|
488
|
+
else {
|
|
489
|
+
console.log(`Title: ${meta.title || '(none)'}`);
|
|
490
|
+
console.log(`URL: ${meta.url}`);
|
|
491
|
+
if (meta.description)
|
|
492
|
+
console.log(`Description: ${meta.description}`);
|
|
493
|
+
if (meta.author)
|
|
494
|
+
console.log(`Author: ${meta.author}`);
|
|
495
|
+
if (meta.published)
|
|
496
|
+
console.log(`Published: ${meta.published}`);
|
|
497
|
+
if (meta.canonical)
|
|
498
|
+
console.log(`Canonical: ${meta.canonical}`);
|
|
499
|
+
if (meta.image)
|
|
500
|
+
console.log(`OG Image: ${meta.image}`);
|
|
501
|
+
console.log(`Method: ${meta.method}`);
|
|
502
|
+
console.log(`Elapsed: ${meta.elapsed}ms`);
|
|
503
|
+
console.log(`Tokens: ${meta.tokens}`);
|
|
504
|
+
console.log(`Cached: ${meta.cached}`);
|
|
505
|
+
}
|
|
506
|
+
return;
|
|
507
|
+
}
|
|
508
|
+
// Default: full output
|
|
509
|
+
if (options.json) {
|
|
510
|
+
// Build clean JSON output with guaranteed top-level fields
|
|
511
|
+
const output = {
|
|
512
|
+
url: result.url,
|
|
513
|
+
title: result.metadata?.title || result.title || null,
|
|
514
|
+
tokens: result.tokens || 0,
|
|
515
|
+
fetchedAt: new Date().toISOString(),
|
|
516
|
+
method: result.method || 'simple',
|
|
517
|
+
elapsed: result.elapsed,
|
|
518
|
+
content: result.content,
|
|
519
|
+
};
|
|
520
|
+
// Add optional fields only if present (filter out undefined/null values from metadata)
|
|
521
|
+
if (result.metadata) {
|
|
522
|
+
const cleanMeta = {};
|
|
523
|
+
for (const [k, v] of Object.entries(result.metadata)) {
|
|
524
|
+
if (v !== undefined && v !== null)
|
|
525
|
+
cleanMeta[k] = v;
|
|
526
|
+
}
|
|
527
|
+
if (Object.keys(cleanMeta).length > 0)
|
|
528
|
+
output.metadata = cleanMeta;
|
|
529
|
+
}
|
|
530
|
+
if (result.links?.length)
|
|
531
|
+
output.links = result.links;
|
|
532
|
+
if (result.images?.length)
|
|
533
|
+
output.images = result.images;
|
|
534
|
+
if (result.structured)
|
|
535
|
+
output.structured = result.structured;
|
|
536
|
+
if (result.domainData)
|
|
537
|
+
output.domainData = result.domainData;
|
|
538
|
+
if (result.readability)
|
|
539
|
+
output.readability = result.readability;
|
|
540
|
+
if (result.quickAnswer)
|
|
541
|
+
output.quickAnswer = result.quickAnswer;
|
|
542
|
+
if (result.quality)
|
|
543
|
+
output.quality = result.quality;
|
|
544
|
+
if (result.contentType)
|
|
545
|
+
output.contentType = result.contentType;
|
|
546
|
+
if (result.chunks)
|
|
547
|
+
output.chunks = result.chunks;
|
|
548
|
+
if (result.totalChunks)
|
|
549
|
+
output.totalChunks = result.totalChunks;
|
|
550
|
+
if (result.warning)
|
|
551
|
+
output.warning = result.warning;
|
|
552
|
+
if (result.focusQuery)
|
|
553
|
+
output.focusQuery = result.focusQuery;
|
|
554
|
+
if (result.focusReduction)
|
|
555
|
+
output.focusReduction = result.focusReduction;
|
|
556
|
+
if (result.extracted)
|
|
557
|
+
output.extracted = result.extracted;
|
|
558
|
+
if (extra.cached)
|
|
559
|
+
output.cached = true;
|
|
560
|
+
if (extra.truncated)
|
|
561
|
+
output.truncated = true;
|
|
562
|
+
if (extra.totalAvailable !== undefined)
|
|
563
|
+
output.totalAvailable = extra.totalAvailable;
|
|
564
|
+
output._meta = { version: cliVersion, method: result.method || 'simple', timing: result.timing, serverMarkdown: result.serverMarkdown || false };
|
|
565
|
+
await writeStdout(JSON.stringify(output, null, 2) + '\n');
|
|
566
|
+
}
|
|
567
|
+
else {
|
|
568
|
+
// Smart terminal header (interactive mode only)
|
|
569
|
+
const isTerminalOutput = process.stdout.isTTY && !options.silent;
|
|
570
|
+
if (isTerminalOutput) {
|
|
571
|
+
const meta = result.metadata || {};
|
|
572
|
+
const parts = [];
|
|
573
|
+
if (meta.title || result.title)
|
|
574
|
+
parts.push(`\x1b[1m${meta.title || result.title}\x1b[0m`);
|
|
575
|
+
if (meta.author)
|
|
576
|
+
parts.push(`By ${meta.author}`);
|
|
577
|
+
if (meta.wordCount)
|
|
578
|
+
parts.push(`${meta.wordCount} words`);
|
|
579
|
+
const totalMs = result.timing?.total ?? result.elapsed;
|
|
580
|
+
if (totalMs)
|
|
581
|
+
parts.push(`${totalMs}ms`);
|
|
582
|
+
if (parts.length > 0) {
|
|
583
|
+
await writeStdout(`\n ${parts.join(' · ')}\n`);
|
|
584
|
+
await writeStdout(' ' + '─'.repeat(60) + '\n\n');
|
|
585
|
+
}
|
|
586
|
+
}
|
|
587
|
+
// Stream content immediately to stdout — consumer gets it without waiting
|
|
588
|
+
await writeStdout(result.content + '\n');
|
|
589
|
+
// Append timing summary to stderr so it doesn't pollute piped content
|
|
590
|
+
if (!options.silent) {
|
|
591
|
+
const totalMs = result.timing?.total ?? result.elapsed;
|
|
592
|
+
process.stderr.write(`\n--- ${result.tokens} tokens · ${totalMs}ms ---\n`);
|
|
593
|
+
}
|
|
594
|
+
}
|
|
595
|
+
}
|
|
596
|
+
// ─── Write helpers ────────────────────────────────────────────────────────────
|
|
597
|
+
export function writeStdout(data) {
|
|
598
|
+
return new Promise((resolve, reject) => {
|
|
599
|
+
process.stdout.write(data, (err) => {
|
|
600
|
+
if (err)
|
|
601
|
+
reject(err);
|
|
602
|
+
else
|
|
603
|
+
resolve();
|
|
604
|
+
});
|
|
605
|
+
});
|
|
606
|
+
}
|
|
607
|
+
// ─── Listings / CSV / table helpers ──────────────────────────────────────────
|
|
608
|
+
/**
|
|
609
|
+
* Convert an array of listing items to CSV.
|
|
610
|
+
*/
|
|
611
|
+
export function formatListingsCsv(items) {
|
|
612
|
+
if (items.length === 0)
|
|
613
|
+
return '';
|
|
614
|
+
// Collect all keys
|
|
615
|
+
const keySet = new Set();
|
|
616
|
+
for (const item of items) {
|
|
617
|
+
for (const key of Object.keys(item)) {
|
|
618
|
+
if (item[key] !== undefined)
|
|
619
|
+
keySet.add(key);
|
|
620
|
+
}
|
|
621
|
+
}
|
|
622
|
+
const keys = Array.from(keySet);
|
|
623
|
+
const escapeCsv = (s) => {
|
|
624
|
+
if (s === undefined || s === null)
|
|
625
|
+
return '""';
|
|
626
|
+
const str = String(s);
|
|
627
|
+
if (str.includes('"') || str.includes(',') || str.includes('\n') || str.includes('\r')) {
|
|
628
|
+
return '"' + str.replace(/"/g, '""') + '"';
|
|
629
|
+
}
|
|
630
|
+
return '"' + str + '"';
|
|
631
|
+
};
|
|
632
|
+
const lines = [keys.join(',')];
|
|
633
|
+
for (const item of items) {
|
|
634
|
+
lines.push(keys.map(k => escapeCsv(item[k])).join(','));
|
|
635
|
+
}
|
|
636
|
+
return lines.join('\n') + '\n';
|
|
637
|
+
}
|
|
638
|
+
/**
|
|
639
|
+
* Normalise the result of --extract (which may be a flat object or contain
|
|
640
|
+
* arrays) into an array of row objects suitable for CSV / table rendering.
|
|
641
|
+
*/
|
|
642
|
+
export function normaliseExtractedToRows(extracted) {
|
|
643
|
+
// If every value is an array of the same length, zip them into rows
|
|
644
|
+
const values = Object.values(extracted);
|
|
645
|
+
const allArrays = values.length > 0 && values.every(v => Array.isArray(v));
|
|
646
|
+
if (allArrays) {
|
|
647
|
+
const length = values[0].length;
|
|
648
|
+
const rows = [];
|
|
649
|
+
for (let i = 0; i < length; i++) {
|
|
650
|
+
const row = {};
|
|
651
|
+
for (const key of Object.keys(extracted)) {
|
|
652
|
+
const val = extracted[key][i];
|
|
653
|
+
row[key] = val != null ? String(val) : undefined;
|
|
654
|
+
}
|
|
655
|
+
rows.push(row);
|
|
656
|
+
}
|
|
657
|
+
return rows;
|
|
658
|
+
}
|
|
659
|
+
// Otherwise treat as a single row
|
|
660
|
+
const row = {};
|
|
661
|
+
for (const [k, v] of Object.entries(extracted)) {
|
|
662
|
+
row[k] = v != null ? String(v) : undefined;
|
|
663
|
+
}
|
|
664
|
+
return [row];
|
|
665
|
+
}
|
|
666
|
+
// ─── Branding helpers ────────────────────────────────────────────────────────
|
|
667
|
+
/** Helper function to extract colors from content */
|
|
668
|
+
export function extractColors(content) {
|
|
669
|
+
const colors = [];
|
|
670
|
+
const hexRegex = /#[0-9A-Fa-f]{6}|#[0-9A-Fa-f]{3}/g;
|
|
671
|
+
const matches = content.match(hexRegex);
|
|
672
|
+
if (matches) {
|
|
673
|
+
colors.push(...[...new Set(matches)].slice(0, 10));
|
|
674
|
+
}
|
|
675
|
+
return colors;
|
|
676
|
+
}
|
|
677
|
+
/** Helper function to extract font information */
|
|
678
|
+
export function extractFonts(content) {
|
|
679
|
+
const fonts = [];
|
|
680
|
+
const fontRegex = /font-family:\s*([^;}"'\n]+)/gi;
|
|
681
|
+
let match;
|
|
682
|
+
while ((match = fontRegex.exec(content)) !== null) {
|
|
683
|
+
fonts.push(match[1].trim());
|
|
684
|
+
}
|
|
685
|
+
return [...new Set(fonts)].slice(0, 5);
|
|
686
|
+
}
|