webpeel 0.19.4 → 0.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/dist/cache.d.ts +0 -1
- package/dist/cache.js +0 -1
- package/dist/cli/commands/auth.d.ts +5 -0
- package/dist/cli/commands/auth.js +476 -0
- package/dist/cli/commands/fetch.d.ts +6 -0
- package/dist/cli/commands/fetch.js +1015 -0
- package/dist/cli/commands/interact.d.ts +5 -0
- package/dist/cli/commands/interact.js +839 -0
- package/dist/cli/commands/jobs.d.ts +5 -0
- package/dist/cli/commands/jobs.js +997 -0
- package/dist/cli/commands/screenshot.d.ts +5 -0
- package/dist/cli/commands/screenshot.js +273 -0
- package/dist/cli/commands/search.d.ts +5 -0
- package/dist/cli/commands/search.js +524 -0
- package/dist/cli/utils.d.ts +84 -0
- package/dist/cli/utils.js +686 -0
- package/dist/cli-auth.d.ts +0 -1
- package/dist/cli-auth.js +0 -1
- package/dist/cli.d.ts +7 -6
- package/dist/cli.js +35 -4698
- package/dist/core/actions.d.ts +0 -1
- package/dist/core/actions.js +0 -1
- package/dist/core/agent.d.ts +0 -1
- package/dist/core/agent.js +9 -12
- package/dist/core/answer.d.ts +0 -1
- package/dist/core/answer.js +0 -1
- package/dist/core/application-tracker.d.ts +0 -1
- package/dist/core/application-tracker.js +0 -1
- package/dist/core/apply.d.ts +0 -1
- package/dist/core/apply.js +0 -1
- package/dist/core/auto-extract.d.ts +0 -1
- package/dist/core/auto-extract.js +0 -1
- package/dist/core/auto-interact.d.ts +0 -1
- package/dist/core/auto-interact.js +0 -1
- package/dist/core/bm25-filter.d.ts +0 -1
- package/dist/core/bm25-filter.js +0 -1
- package/dist/core/branding.d.ts +0 -1
- package/dist/core/branding.js +0 -1
- package/dist/core/browser-fetch.d.ts +0 -1
- package/dist/core/browser-fetch.js +17 -10
- package/dist/core/browser-pool.d.ts +0 -1
- package/dist/core/browser-pool.js +0 -1
- package/dist/core/budget.d.ts +0 -1
- package/dist/core/budget.js +0 -1
- package/dist/core/cache.d.ts +0 -1
- package/dist/core/cache.js +0 -1
- package/dist/core/cf-worker-proxy.d.ts +0 -1
- package/dist/core/cf-worker-proxy.js +0 -1
- package/dist/core/challenge-detection.d.ts +0 -1
- package/dist/core/challenge-detection.js +0 -1
- package/dist/core/change-tracking.d.ts +0 -1
- package/dist/core/change-tracking.js +0 -1
- package/dist/core/chunker.d.ts +0 -1
- package/dist/core/chunker.js +0 -1
- package/dist/core/chunking.d.ts +0 -1
- package/dist/core/chunking.js +0 -1
- package/dist/core/cloak-fetch.d.ts +0 -1
- package/dist/core/cloak-fetch.js +0 -1
- package/dist/core/content-pruner.d.ts +0 -1
- package/dist/core/content-pruner.js +0 -1
- package/dist/core/crawl-checkpoint.d.ts +0 -1
- package/dist/core/crawl-checkpoint.js +0 -1
- package/dist/core/crawler.d.ts +0 -1
- package/dist/core/crawler.js +6 -5
- package/dist/core/cycle-fetch.d.ts +0 -1
- package/dist/core/cycle-fetch.js +0 -1
- package/dist/core/deep-fetch.d.ts +0 -1
- package/dist/core/deep-fetch.js +0 -1
- package/dist/core/design-analysis.d.ts +0 -1
- package/dist/core/design-analysis.js +0 -1
- package/dist/core/design-compare.d.ts +0 -1
- package/dist/core/design-compare.js +0 -1
- package/dist/core/diff.d.ts +0 -1
- package/dist/core/diff.js +0 -1
- package/dist/core/dns-cache.d.ts +0 -1
- package/dist/core/dns-cache.js +0 -1
- package/dist/core/documents.d.ts +0 -1
- package/dist/core/documents.js +0 -1
- package/dist/core/domain-extractors.d.ts +0 -1
- package/dist/core/domain-extractors.js +0 -1
- package/dist/core/extract-inline.d.ts +0 -1
- package/dist/core/extract-inline.js +0 -1
- package/dist/core/extract-listings.d.ts +0 -1
- package/dist/core/extract-listings.js +0 -1
- package/dist/core/extract.d.ts +0 -1
- package/dist/core/extract.js +0 -1
- package/dist/core/fetcher.d.ts +0 -1
- package/dist/core/fetcher.js +0 -1
- package/dist/core/google-cache.d.ts +0 -1
- package/dist/core/google-cache.js +0 -1
- package/dist/core/hotel-search.d.ts +0 -1
- package/dist/core/hotel-search.js +0 -1
- package/dist/core/http-fetch.d.ts +0 -1
- package/dist/core/http-fetch.js +5 -7
- package/dist/core/human.d.ts +0 -1
- package/dist/core/human.js +0 -1
- package/dist/core/jobs.d.ts +0 -1
- package/dist/core/jobs.js +0 -1
- package/dist/core/json-ld.d.ts +0 -1
- package/dist/core/json-ld.js +0 -1
- package/dist/core/llm-extract.d.ts +0 -1
- package/dist/core/llm-extract.js +0 -1
- package/dist/core/logger.d.ts +17 -0
- package/dist/core/logger.js +44 -0
- package/dist/core/map.d.ts +0 -1
- package/dist/core/map.js +0 -1
- package/dist/core/markdown.d.ts +0 -1
- package/dist/core/markdown.js +0 -1
- package/dist/core/metadata.d.ts +0 -1
- package/dist/core/metadata.js +0 -1
- package/dist/core/paginate.d.ts +0 -1
- package/dist/core/paginate.js +0 -1
- package/dist/core/pdf.d.ts +0 -1
- package/dist/core/pdf.js +0 -1
- package/dist/core/peel-tls.d.ts +0 -1
- package/dist/core/peel-tls.js +0 -1
- package/dist/core/pipeline.d.ts +0 -1
- package/dist/core/pipeline.js +22 -25
- package/dist/core/profiles.d.ts +0 -1
- package/dist/core/profiles.js +0 -1
- package/dist/core/quick-answer.d.ts +0 -1
- package/dist/core/quick-answer.js +0 -1
- package/dist/core/rate-governor.d.ts +0 -1
- package/dist/core/rate-governor.js +0 -1
- package/dist/core/readability.d.ts +0 -1
- package/dist/core/readability.js +0 -1
- package/dist/core/research.d.ts +0 -1
- package/dist/core/research.js +0 -1
- package/dist/core/schema-extraction.d.ts +0 -1
- package/dist/core/schema-extraction.js +0 -1
- package/dist/core/schema-postprocess.d.ts +0 -1
- package/dist/core/schema-postprocess.js +0 -1
- package/dist/core/schema-templates.d.ts +0 -1
- package/dist/core/schema-templates.js +0 -1
- package/dist/core/screenshot.d.ts +0 -1
- package/dist/core/screenshot.js +0 -1
- package/dist/core/search-fallback.d.ts +0 -1
- package/dist/core/search-fallback.js +0 -1
- package/dist/core/search-provider.d.ts +0 -1
- package/dist/core/search-provider.js +18 -21
- package/dist/core/site-search.d.ts +0 -1
- package/dist/core/site-search.js +0 -1
- package/dist/core/sitemap.d.ts +0 -1
- package/dist/core/sitemap.js +0 -1
- package/dist/core/stealth-patches.d.ts +0 -1
- package/dist/core/stealth-patches.js +0 -1
- package/dist/core/stemmer.d.ts +0 -1
- package/dist/core/stemmer.js +0 -1
- package/dist/core/strategies.d.ts +6 -1
- package/dist/core/strategies.js +29 -41
- package/dist/core/strategy-hooks.d.ts +0 -1
- package/dist/core/strategy-hooks.js +0 -1
- package/dist/core/summarize.d.ts +0 -1
- package/dist/core/summarize.js +0 -1
- package/dist/core/synonyms.d.ts +0 -1
- package/dist/core/synonyms.js +0 -1
- package/dist/core/table-format.d.ts +0 -1
- package/dist/core/table-format.js +0 -1
- package/dist/core/timing.d.ts +0 -1
- package/dist/core/timing.js +0 -1
- package/dist/core/user-agents.d.ts +0 -1
- package/dist/core/user-agents.js +0 -1
- package/dist/core/watch-manager.d.ts +0 -1
- package/dist/core/watch-manager.js +0 -1
- package/dist/core/watch.d.ts +0 -1
- package/dist/core/watch.js +0 -1
- package/dist/core/youtube.d.ts +0 -1
- package/dist/core/youtube.js +0 -1
- package/dist/index.d.ts +8 -3
- package/dist/index.js +27 -3
- package/dist/integrations/index.d.ts +0 -1
- package/dist/integrations/index.js +0 -1
- package/dist/integrations/langchain.d.ts +0 -1
- package/dist/integrations/langchain.js +0 -1
- package/dist/integrations/llamaindex.d.ts +0 -1
- package/dist/integrations/llamaindex.js +0 -1
- package/dist/mcp/handlers/act.d.ts +5 -0
- package/dist/mcp/handlers/act.js +34 -0
- package/dist/mcp/handlers/definitions.d.ts +6 -0
- package/dist/mcp/handlers/definitions.js +266 -0
- package/dist/mcp/handlers/extract.d.ts +6 -0
- package/dist/mcp/handlers/extract.js +102 -0
- package/dist/mcp/handlers/fetch.d.ts +6 -0
- package/dist/mcp/handlers/fetch.js +98 -0
- package/dist/mcp/handlers/find.d.ts +5 -0
- package/dist/mcp/handlers/find.js +137 -0
- package/dist/mcp/handlers/index.d.ts +13 -0
- package/dist/mcp/handlers/index.js +61 -0
- package/dist/mcp/handlers/legacy.d.ts +25 -0
- package/dist/mcp/handlers/legacy.js +450 -0
- package/dist/mcp/handlers/meta.d.ts +6 -0
- package/dist/mcp/handlers/meta.js +31 -0
- package/dist/mcp/handlers/monitor.d.ts +5 -0
- package/dist/mcp/handlers/monitor.js +41 -0
- package/dist/mcp/handlers/read.d.ts +6 -0
- package/dist/mcp/handlers/read.js +63 -0
- package/dist/mcp/handlers/see.d.ts +5 -0
- package/dist/mcp/handlers/see.js +75 -0
- package/dist/mcp/handlers/types.d.ts +29 -0
- package/dist/mcp/handlers/types.js +28 -0
- package/dist/mcp/server.d.ts +3 -4
- package/dist/mcp/server.js +35 -1101
- package/dist/mcp/smart-router.d.ts +0 -1
- package/dist/mcp/smart-router.js +3 -1
- package/dist/types.d.ts +6 -1
- package/dist/types.js +0 -1
- package/package.json +3 -13
- package/dist/cache.d.ts.map +0 -1
- package/dist/cache.js.map +0 -1
- package/dist/cli-auth.d.ts.map +0 -1
- package/dist/cli-auth.js.map +0 -1
- package/dist/cli.bundle.cjs +0 -159248
- package/dist/cli.d.ts.map +0 -1
- package/dist/cli.js.map +0 -1
- package/dist/core/actions.d.ts.map +0 -1
- package/dist/core/actions.js.map +0 -1
- package/dist/core/agent.d.ts.map +0 -1
- package/dist/core/agent.js.map +0 -1
- package/dist/core/answer.d.ts.map +0 -1
- package/dist/core/answer.js.map +0 -1
- package/dist/core/application-tracker.d.ts.map +0 -1
- package/dist/core/application-tracker.js.map +0 -1
- package/dist/core/apply.d.ts.map +0 -1
- package/dist/core/apply.js.map +0 -1
- package/dist/core/auto-extract.d.ts.map +0 -1
- package/dist/core/auto-extract.js.map +0 -1
- package/dist/core/auto-interact.d.ts.map +0 -1
- package/dist/core/auto-interact.js.map +0 -1
- package/dist/core/bm25-filter.d.ts.map +0 -1
- package/dist/core/bm25-filter.js.map +0 -1
- package/dist/core/branding.d.ts.map +0 -1
- package/dist/core/branding.js.map +0 -1
- package/dist/core/browser-fetch.d.ts.map +0 -1
- package/dist/core/browser-fetch.js.map +0 -1
- package/dist/core/browser-pool.d.ts.map +0 -1
- package/dist/core/browser-pool.js.map +0 -1
- package/dist/core/budget.d.ts.map +0 -1
- package/dist/core/budget.js.map +0 -1
- package/dist/core/cache.d.ts.map +0 -1
- package/dist/core/cache.js.map +0 -1
- package/dist/core/cf-worker-proxy.d.ts.map +0 -1
- package/dist/core/cf-worker-proxy.js.map +0 -1
- package/dist/core/challenge-detection.d.ts.map +0 -1
- package/dist/core/challenge-detection.js.map +0 -1
- package/dist/core/change-tracking.d.ts.map +0 -1
- package/dist/core/change-tracking.js.map +0 -1
- package/dist/core/chunker.d.ts.map +0 -1
- package/dist/core/chunker.js.map +0 -1
- package/dist/core/chunking.d.ts.map +0 -1
- package/dist/core/chunking.js.map +0 -1
- package/dist/core/cloak-fetch.d.ts.map +0 -1
- package/dist/core/cloak-fetch.js.map +0 -1
- package/dist/core/content-pruner.d.ts.map +0 -1
- package/dist/core/content-pruner.js.map +0 -1
- package/dist/core/crawl-checkpoint.d.ts.map +0 -1
- package/dist/core/crawl-checkpoint.js.map +0 -1
- package/dist/core/crawler.d.ts.map +0 -1
- package/dist/core/crawler.js.map +0 -1
- package/dist/core/cycle-fetch.d.ts.map +0 -1
- package/dist/core/cycle-fetch.js.map +0 -1
- package/dist/core/deep-fetch.d.ts.map +0 -1
- package/dist/core/deep-fetch.js.map +0 -1
- package/dist/core/design-analysis.d.ts.map +0 -1
- package/dist/core/design-analysis.js.map +0 -1
- package/dist/core/design-compare.d.ts.map +0 -1
- package/dist/core/design-compare.js.map +0 -1
- package/dist/core/diff.d.ts.map +0 -1
- package/dist/core/diff.js.map +0 -1
- package/dist/core/dns-cache.d.ts.map +0 -1
- package/dist/core/dns-cache.js.map +0 -1
- package/dist/core/documents.d.ts.map +0 -1
- package/dist/core/documents.js.map +0 -1
- package/dist/core/domain-extractors.d.ts.map +0 -1
- package/dist/core/domain-extractors.js.map +0 -1
- package/dist/core/extract-inline.d.ts.map +0 -1
- package/dist/core/extract-inline.js.map +0 -1
- package/dist/core/extract-listings.d.ts.map +0 -1
- package/dist/core/extract-listings.js.map +0 -1
- package/dist/core/extract.d.ts.map +0 -1
- package/dist/core/extract.js.map +0 -1
- package/dist/core/fetcher.d.ts.map +0 -1
- package/dist/core/fetcher.js.map +0 -1
- package/dist/core/google-cache.d.ts.map +0 -1
- package/dist/core/google-cache.js.map +0 -1
- package/dist/core/hotel-search.d.ts.map +0 -1
- package/dist/core/hotel-search.js.map +0 -1
- package/dist/core/http-fetch.d.ts.map +0 -1
- package/dist/core/http-fetch.js.map +0 -1
- package/dist/core/human.d.ts.map +0 -1
- package/dist/core/human.js.map +0 -1
- package/dist/core/jobs.d.ts.map +0 -1
- package/dist/core/jobs.js.map +0 -1
- package/dist/core/json-ld.d.ts.map +0 -1
- package/dist/core/json-ld.js.map +0 -1
- package/dist/core/llm-extract.d.ts.map +0 -1
- package/dist/core/llm-extract.js.map +0 -1
- package/dist/core/map.d.ts.map +0 -1
- package/dist/core/map.js.map +0 -1
- package/dist/core/markdown.d.ts.map +0 -1
- package/dist/core/markdown.js.map +0 -1
- package/dist/core/metadata.d.ts.map +0 -1
- package/dist/core/metadata.js.map +0 -1
- package/dist/core/paginate.d.ts.map +0 -1
- package/dist/core/paginate.js.map +0 -1
- package/dist/core/pdf.d.ts.map +0 -1
- package/dist/core/pdf.js.map +0 -1
- package/dist/core/peel-tls.d.ts.map +0 -1
- package/dist/core/peel-tls.js.map +0 -1
- package/dist/core/pipeline.d.ts.map +0 -1
- package/dist/core/pipeline.js.map +0 -1
- package/dist/core/profiles.d.ts.map +0 -1
- package/dist/core/profiles.js.map +0 -1
- package/dist/core/quick-answer.d.ts.map +0 -1
- package/dist/core/quick-answer.js.map +0 -1
- package/dist/core/rate-governor.d.ts.map +0 -1
- package/dist/core/rate-governor.js.map +0 -1
- package/dist/core/readability.d.ts.map +0 -1
- package/dist/core/readability.js.map +0 -1
- package/dist/core/research.d.ts.map +0 -1
- package/dist/core/research.js.map +0 -1
- package/dist/core/schema-extraction.d.ts.map +0 -1
- package/dist/core/schema-extraction.js.map +0 -1
- package/dist/core/schema-postprocess.d.ts.map +0 -1
- package/dist/core/schema-postprocess.js.map +0 -1
- package/dist/core/schema-templates.d.ts.map +0 -1
- package/dist/core/schema-templates.js.map +0 -1
- package/dist/core/screenshot.d.ts.map +0 -1
- package/dist/core/screenshot.js.map +0 -1
- package/dist/core/search-fallback.d.ts.map +0 -1
- package/dist/core/search-fallback.js.map +0 -1
- package/dist/core/search-provider.d.ts.map +0 -1
- package/dist/core/search-provider.js.map +0 -1
- package/dist/core/site-search.d.ts.map +0 -1
- package/dist/core/site-search.js.map +0 -1
- package/dist/core/sitemap.d.ts.map +0 -1
- package/dist/core/sitemap.js.map +0 -1
- package/dist/core/stealth-patches.d.ts.map +0 -1
- package/dist/core/stealth-patches.js.map +0 -1
- package/dist/core/stemmer.d.ts.map +0 -1
- package/dist/core/stemmer.js.map +0 -1
- package/dist/core/strategies.d.ts.map +0 -1
- package/dist/core/strategies.js.map +0 -1
- package/dist/core/strategy-hooks.d.ts.map +0 -1
- package/dist/core/strategy-hooks.js.map +0 -1
- package/dist/core/summarize.d.ts.map +0 -1
- package/dist/core/summarize.js.map +0 -1
- package/dist/core/synonyms.d.ts.map +0 -1
- package/dist/core/synonyms.js.map +0 -1
- package/dist/core/table-format.d.ts.map +0 -1
- package/dist/core/table-format.js.map +0 -1
- package/dist/core/timing.d.ts.map +0 -1
- package/dist/core/timing.js.map +0 -1
- package/dist/core/user-agents.d.ts.map +0 -1
- package/dist/core/user-agents.js.map +0 -1
- package/dist/core/watch-manager.d.ts.map +0 -1
- package/dist/core/watch-manager.js.map +0 -1
- package/dist/core/watch.d.ts.map +0 -1
- package/dist/core/watch.js.map +0 -1
- package/dist/core/youtube.d.ts.map +0 -1
- package/dist/core/youtube.js.map +0 -1
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/integrations/index.d.ts.map +0 -1
- package/dist/integrations/index.js.map +0 -1
- package/dist/integrations/langchain.d.ts.map +0 -1
- package/dist/integrations/langchain.js.map +0 -1
- package/dist/integrations/llamaindex.d.ts.map +0 -1
- package/dist/integrations/llamaindex.js.map +0 -1
- package/dist/mcp/server.d.ts.map +0 -1
- package/dist/mcp/server.js.map +0 -1
- package/dist/mcp/smart-router.d.ts.map +0 -1
- package/dist/mcp/smart-router.js.map +0 -1
- package/dist/server/app.d.ts +0 -15
- package/dist/server/app.d.ts.map +0 -1
- package/dist/server/app.js +0 -350
- package/dist/server/app.js.map +0 -1
- package/dist/server/auth-store.d.ts +0 -28
- package/dist/server/auth-store.d.ts.map +0 -1
- package/dist/server/auth-store.js +0 -89
- package/dist/server/auth-store.js.map +0 -1
- package/dist/server/email-service.d.ts +0 -22
- package/dist/server/email-service.d.ts.map +0 -1
- package/dist/server/email-service.js +0 -80
- package/dist/server/email-service.js.map +0 -1
- package/dist/server/job-queue.d.ts +0 -93
- package/dist/server/job-queue.d.ts.map +0 -1
- package/dist/server/job-queue.js +0 -146
- package/dist/server/job-queue.js.map +0 -1
- package/dist/server/logger.d.ts +0 -11
- package/dist/server/logger.d.ts.map +0 -1
- package/dist/server/logger.js +0 -38
- package/dist/server/logger.js.map +0 -1
- package/dist/server/middleware/auth.d.ts +0 -29
- package/dist/server/middleware/auth.d.ts.map +0 -1
- package/dist/server/middleware/auth.js +0 -222
- package/dist/server/middleware/auth.js.map +0 -1
- package/dist/server/middleware/rate-limit.d.ts +0 -25
- package/dist/server/middleware/rate-limit.d.ts.map +0 -1
- package/dist/server/middleware/rate-limit.js +0 -168
- package/dist/server/middleware/rate-limit.js.map +0 -1
- package/dist/server/middleware/url-validator.d.ts +0 -16
- package/dist/server/middleware/url-validator.d.ts.map +0 -1
- package/dist/server/middleware/url-validator.js +0 -187
- package/dist/server/middleware/url-validator.js.map +0 -1
- package/dist/server/openapi.yaml +0 -4944
- package/dist/server/pg-auth-store.d.ts +0 -133
- package/dist/server/pg-auth-store.d.ts.map +0 -1
- package/dist/server/pg-auth-store.js +0 -473
- package/dist/server/pg-auth-store.js.map +0 -1
- package/dist/server/pg-job-queue.d.ts +0 -60
- package/dist/server/pg-job-queue.d.ts.map +0 -1
- package/dist/server/pg-job-queue.js +0 -365
- package/dist/server/pg-job-queue.js.map +0 -1
- package/dist/server/premium/domain-intel.d.ts +0 -17
- package/dist/server/premium/domain-intel.d.ts.map +0 -1
- package/dist/server/premium/domain-intel.js +0 -134
- package/dist/server/premium/domain-intel.js.map +0 -1
- package/dist/server/premium/index.d.ts +0 -18
- package/dist/server/premium/index.d.ts.map +0 -1
- package/dist/server/premium/index.js +0 -36
- package/dist/server/premium/index.js.map +0 -1
- package/dist/server/premium/swr-cache.d.ts +0 -15
- package/dist/server/premium/swr-cache.d.ts.map +0 -1
- package/dist/server/premium/swr-cache.js +0 -35
- package/dist/server/premium/swr-cache.js.map +0 -1
- package/dist/server/routes/activity.d.ts +0 -7
- package/dist/server/routes/activity.d.ts.map +0 -1
- package/dist/server/routes/activity.js +0 -68
- package/dist/server/routes/activity.js.map +0 -1
- package/dist/server/routes/agent.d.ts +0 -16
- package/dist/server/routes/agent.d.ts.map +0 -1
- package/dist/server/routes/agent.js +0 -247
- package/dist/server/routes/agent.js.map +0 -1
- package/dist/server/routes/answer.d.ts +0 -6
- package/dist/server/routes/answer.d.ts.map +0 -1
- package/dist/server/routes/answer.js +0 -133
- package/dist/server/routes/answer.js.map +0 -1
- package/dist/server/routes/ask.d.ts +0 -23
- package/dist/server/routes/ask.d.ts.map +0 -1
- package/dist/server/routes/ask.js +0 -119
- package/dist/server/routes/ask.js.map +0 -1
- package/dist/server/routes/batch.d.ts +0 -7
- package/dist/server/routes/batch.d.ts.map +0 -1
- package/dist/server/routes/batch.js +0 -412
- package/dist/server/routes/batch.js.map +0 -1
- package/dist/server/routes/cli-usage.d.ts +0 -7
- package/dist/server/routes/cli-usage.d.ts.map +0 -1
- package/dist/server/routes/cli-usage.js +0 -121
- package/dist/server/routes/cli-usage.js.map +0 -1
- package/dist/server/routes/compat.d.ts +0 -24
- package/dist/server/routes/compat.d.ts.map +0 -1
- package/dist/server/routes/compat.js +0 -653
- package/dist/server/routes/compat.js.map +0 -1
- package/dist/server/routes/deep-fetch.d.ts +0 -9
- package/dist/server/routes/deep-fetch.d.ts.map +0 -1
- package/dist/server/routes/deep-fetch.js +0 -50
- package/dist/server/routes/deep-fetch.js.map +0 -1
- package/dist/server/routes/demo.d.ts +0 -25
- package/dist/server/routes/demo.d.ts.map +0 -1
- package/dist/server/routes/demo.js +0 -434
- package/dist/server/routes/demo.js.map +0 -1
- package/dist/server/routes/extract.d.ts +0 -9
- package/dist/server/routes/extract.d.ts.map +0 -1
- package/dist/server/routes/extract.js +0 -150
- package/dist/server/routes/extract.js.map +0 -1
- package/dist/server/routes/fetch.d.ts +0 -8
- package/dist/server/routes/fetch.d.ts.map +0 -1
- package/dist/server/routes/fetch.js +0 -988
- package/dist/server/routes/fetch.js.map +0 -1
- package/dist/server/routes/health.d.ts +0 -8
- package/dist/server/routes/health.d.ts.map +0 -1
- package/dist/server/routes/health.js +0 -20
- package/dist/server/routes/health.js.map +0 -1
- package/dist/server/routes/jobs.d.ts +0 -8
- package/dist/server/routes/jobs.d.ts.map +0 -1
- package/dist/server/routes/jobs.js +0 -487
- package/dist/server/routes/jobs.js.map +0 -1
- package/dist/server/routes/mcp.d.ts +0 -18
- package/dist/server/routes/mcp.d.ts.map +0 -1
- package/dist/server/routes/mcp.js +0 -1260
- package/dist/server/routes/mcp.js.map +0 -1
- package/dist/server/routes/oauth.d.ts +0 -10
- package/dist/server/routes/oauth.d.ts.map +0 -1
- package/dist/server/routes/oauth.js +0 -334
- package/dist/server/routes/oauth.js.map +0 -1
- package/dist/server/routes/quick-answer.d.ts +0 -9
- package/dist/server/routes/quick-answer.d.ts.map +0 -1
- package/dist/server/routes/quick-answer.js +0 -93
- package/dist/server/routes/quick-answer.js.map +0 -1
- package/dist/server/routes/screenshot.d.ts +0 -23
- package/dist/server/routes/screenshot.d.ts.map +0 -1
- package/dist/server/routes/screenshot.js +0 -819
- package/dist/server/routes/screenshot.js.map +0 -1
- package/dist/server/routes/search.d.ts +0 -7
- package/dist/server/routes/search.d.ts.map +0 -1
- package/dist/server/routes/search.js +0 -312
- package/dist/server/routes/search.js.map +0 -1
- package/dist/server/routes/session.d.ts +0 -16
- package/dist/server/routes/session.d.ts.map +0 -1
- package/dist/server/routes/session.js +0 -278
- package/dist/server/routes/session.js.map +0 -1
- package/dist/server/routes/stats.d.ts +0 -7
- package/dist/server/routes/stats.d.ts.map +0 -1
- package/dist/server/routes/stats.js +0 -65
- package/dist/server/routes/stats.js.map +0 -1
- package/dist/server/routes/stripe.d.ts +0 -16
- package/dist/server/routes/stripe.d.ts.map +0 -1
- package/dist/server/routes/stripe.js +0 -283
- package/dist/server/routes/stripe.js.map +0 -1
- package/dist/server/routes/users.d.ts +0 -9
- package/dist/server/routes/users.d.ts.map +0 -1
- package/dist/server/routes/users.js +0 -1211
- package/dist/server/routes/users.js.map +0 -1
- package/dist/server/routes/watch.d.ts +0 -16
- package/dist/server/routes/watch.d.ts.map +0 -1
- package/dist/server/routes/watch.js +0 -257
- package/dist/server/routes/watch.js.map +0 -1
- package/dist/server/routes/webhooks.d.ts +0 -16
- package/dist/server/routes/webhooks.d.ts.map +0 -1
- package/dist/server/routes/webhooks.js +0 -74
- package/dist/server/routes/webhooks.js.map +0 -1
- package/dist/server/routes/youtube.d.ts +0 -7
- package/dist/server/routes/youtube.d.ts.map +0 -1
- package/dist/server/routes/youtube.js +0 -93
- package/dist/server/routes/youtube.js.map +0 -1
- package/dist/server/sentry.d.ts +0 -14
- package/dist/server/sentry.d.ts.map +0 -1
- package/dist/server/sentry.js +0 -39
- package/dist/server/sentry.js.map +0 -1
- package/dist/server/types.d.ts +0 -16
- package/dist/server/types.d.ts.map +0 -1
- package/dist/server/types.js +0 -8
- package/dist/server/types.js.map +0 -1
- package/dist/server/utils/response.d.ts +0 -45
- package/dist/server/utils/response.d.ts.map +0 -1
- package/dist/server/utils/response.js +0 -70
- package/dist/server/utils/response.js.map +0 -1
- package/dist/server/utils/sse.d.ts +0 -23
- package/dist/server/utils/sse.d.ts.map +0 -1
- package/dist/server/utils/sse.js +0 -39
- package/dist/server/utils/sse.js.map +0 -1
- package/dist/types.d.ts.map +0 -1
- package/dist/types.js.map +0 -1
|
@@ -15,6 +15,8 @@
|
|
|
15
15
|
import { fetch as undiciFetch } from 'undici';
|
|
16
16
|
import { load } from 'cheerio';
|
|
17
17
|
import { getStealthBrowser, getRandomUserAgent, applyStealthScripts } from './browser-pool.js';
|
|
18
|
+
import { createLogger } from './logger.js';
|
|
19
|
+
const log = createLogger('search');
|
|
18
20
|
function decodeHtmlEntities(input) {
|
|
19
21
|
// Cheerio usually decodes entities when using `.text()`, but keep this as a
|
|
20
22
|
// safety net since DuckDuckGo snippets sometimes leak encoded entities.
|
|
@@ -624,7 +626,7 @@ export class DuckDuckGoProvider {
|
|
|
624
626
|
const ddgHttpRate = providerStats.getFailureRate('ddg-http');
|
|
625
627
|
const skipDdgHttp = providerStats.shouldSkip('ddg-http');
|
|
626
628
|
if (skipDdgHttp) {
|
|
627
|
-
|
|
629
|
+
log.debug(`DDG HTTP skipped (failure rate ${Math.round(ddgHttpRate * 100)}% ≥ 80%)`);
|
|
628
630
|
}
|
|
629
631
|
else {
|
|
630
632
|
const ddgTimeoutMs = ddgHttpRate > 0.5 ? 2_000 : 8_000;
|
|
@@ -636,7 +638,7 @@ export class DuckDuckGoProvider {
|
|
|
636
638
|
const results = await this.searchOnce(q, ddgOptions);
|
|
637
639
|
if (results.length > 0) {
|
|
638
640
|
providerStats.record('ddg-http', true);
|
|
639
|
-
|
|
641
|
+
log.debug(`source=ddg-http returned ${results.length} results` +
|
|
640
642
|
(ddgTimeoutMs < 8_000 ? ` (fast-timeout ${ddgTimeoutMs}ms)` : ''));
|
|
641
643
|
return results;
|
|
642
644
|
}
|
|
@@ -644,7 +646,7 @@ export class DuckDuckGoProvider {
|
|
|
644
646
|
}
|
|
645
647
|
catch (e) {
|
|
646
648
|
const msg = e instanceof Error ? e.message : String(e);
|
|
647
|
-
|
|
649
|
+
log.debug('DDG HTTP failed:', msg);
|
|
648
650
|
break;
|
|
649
651
|
}
|
|
650
652
|
}
|
|
@@ -659,26 +661,26 @@ export class DuckDuckGoProvider {
|
|
|
659
661
|
const ddgLiteRate = providerStats.getFailureRate('ddg-lite');
|
|
660
662
|
const skipDdgLite = providerStats.shouldSkip('ddg-lite');
|
|
661
663
|
if (skipDdgLite) {
|
|
662
|
-
|
|
664
|
+
log.debug(`DDG Lite skipped (failure rate ${Math.round(ddgLiteRate * 100)}% ≥ 80%)`);
|
|
663
665
|
}
|
|
664
666
|
else {
|
|
665
|
-
|
|
667
|
+
log.debug('DDG returned 0 results, trying DDG Lite...');
|
|
666
668
|
const liteTimeoutMs = ddgLiteRate > 0.5 ? 2_000 : 8_000;
|
|
667
669
|
const liteSignal = createTimeoutSignal(liteTimeoutMs, options.signal);
|
|
668
670
|
try {
|
|
669
671
|
const liteResults = await this.searchLite(query, { ...options, signal: liteSignal });
|
|
670
672
|
if (liteResults.length > 0) {
|
|
671
673
|
providerStats.record('ddg-lite', true);
|
|
672
|
-
|
|
674
|
+
log.debug(`source=ddg-lite returned ${liteResults.length} results` +
|
|
673
675
|
(liteTimeoutMs < 8_000 ? ` (fast-timeout ${liteTimeoutMs}ms)` : ''));
|
|
674
676
|
return liteResults;
|
|
675
677
|
}
|
|
676
678
|
providerStats.record('ddg-lite', false);
|
|
677
|
-
|
|
679
|
+
log.debug('DDG Lite also returned 0 results');
|
|
678
680
|
}
|
|
679
681
|
catch (e) {
|
|
680
682
|
providerStats.record('ddg-lite', false);
|
|
681
|
-
|
|
683
|
+
log.debug('DDG Lite failed:', e instanceof Error ? e.message : e);
|
|
682
684
|
}
|
|
683
685
|
}
|
|
684
686
|
// -----------------------------------------------------------
|
|
@@ -690,12 +692,12 @@ export class DuckDuckGoProvider {
|
|
|
690
692
|
const braveProvider = new BraveSearchProvider();
|
|
691
693
|
const braveResults = await braveProvider.searchWeb(query, { ...options, apiKey: braveKey });
|
|
692
694
|
if (braveResults.length > 0) {
|
|
693
|
-
|
|
695
|
+
log.debug(`source=brave returned ${braveResults.length} results`);
|
|
694
696
|
return braveResults;
|
|
695
697
|
}
|
|
696
698
|
}
|
|
697
699
|
catch (e) {
|
|
698
|
-
|
|
700
|
+
log.debug('Brave search failed:', e instanceof Error ? e.message : e);
|
|
699
701
|
}
|
|
700
702
|
}
|
|
701
703
|
// -----------------------------------------------------------
|
|
@@ -703,18 +705,18 @@ export class DuckDuckGoProvider {
|
|
|
703
705
|
// Bypasses bot-detection on datacenter IPs. This is the reliable
|
|
704
706
|
// last resort — but it spins up a browser so it takes a few seconds.
|
|
705
707
|
// -----------------------------------------------------------
|
|
706
|
-
|
|
708
|
+
log.debug('Trying stealth browser search (DDG + Bing + Ecosia)...');
|
|
707
709
|
try {
|
|
708
710
|
const stealthProvider = new StealthSearchProvider();
|
|
709
711
|
const stealthResults = await stealthProvider.searchWeb(query, options);
|
|
710
712
|
if (stealthResults.length > 0) {
|
|
711
|
-
|
|
713
|
+
log.debug(`source=stealth returned ${stealthResults.length} results`);
|
|
712
714
|
return stealthResults;
|
|
713
715
|
}
|
|
714
|
-
|
|
716
|
+
log.debug('Stealth search returned 0 results');
|
|
715
717
|
}
|
|
716
718
|
catch (e) {
|
|
717
|
-
|
|
719
|
+
log.debug('Stealth search failed:', e instanceof Error ? e.message : e);
|
|
718
720
|
}
|
|
719
721
|
return [];
|
|
720
722
|
}
|
|
@@ -891,9 +893,7 @@ export class GoogleSearchProvider {
|
|
|
891
893
|
}
|
|
892
894
|
}
|
|
893
895
|
catch (e) {
|
|
894
|
-
|
|
895
|
-
console.debug('[webpeel] Google stealth (peel) error:', e.message);
|
|
896
|
-
}
|
|
896
|
+
log.debug('Google stealth (peel) error:', e.message);
|
|
897
897
|
}
|
|
898
898
|
// Strategy B: direct playwright-extra + stealth plugin
|
|
899
899
|
let browser;
|
|
@@ -933,9 +933,7 @@ export class GoogleSearchProvider {
|
|
|
933
933
|
return this._parseGoogleHtml(html, count);
|
|
934
934
|
}
|
|
935
935
|
catch (e) {
|
|
936
|
-
|
|
937
|
-
console.debug('[webpeel] Google stealth (playwright) error:', e.message);
|
|
938
|
-
}
|
|
936
|
+
log.debug('Google stealth (playwright) error:', e.message);
|
|
939
937
|
return [];
|
|
940
938
|
}
|
|
941
939
|
finally {
|
|
@@ -1057,4 +1055,3 @@ export function getBestSearchProvider() {
|
|
|
1057
1055
|
// (DDG HTTP → DDG Lite → stealth multi-engine (Bing + Ecosia))
|
|
1058
1056
|
return { provider: new DuckDuckGoProvider() };
|
|
1059
1057
|
}
|
|
1060
|
-
//# sourceMappingURL=search-provider.js.map
|
package/dist/core/site-search.js
CHANGED
package/dist/core/sitemap.d.ts
CHANGED
package/dist/core/sitemap.js
CHANGED
|
@@ -55,4 +55,3 @@ export declare function applyStealthPatches(page: Page): Promise<void>;
|
|
|
55
55
|
* @param locale - BCP 47 locale string, e.g. 'en-US' (default).
|
|
56
56
|
*/
|
|
57
57
|
export declare function applyAcceptLanguageHeader(page: Page, locale?: string): Promise<void>;
|
|
58
|
-
//# sourceMappingURL=stealth-patches.d.ts.map
|
package/dist/core/stemmer.d.ts
CHANGED
package/dist/core/stemmer.js
CHANGED
|
@@ -73,6 +73,12 @@ export interface StrategyOptions {
|
|
|
73
73
|
cycle?: boolean;
|
|
74
74
|
/** Use PeelTLS TLS fingerprint spoofing */
|
|
75
75
|
tls?: boolean;
|
|
76
|
+
/**
|
|
77
|
+
* Skip browser escalation on thin/shell content.
|
|
78
|
+
* When true, the simple HTTP result is returned as-is without escalating to browser.
|
|
79
|
+
* Use for Q&A/search workloads where speed matters more than JS-rendered content.
|
|
80
|
+
*/
|
|
81
|
+
noEscalate?: boolean;
|
|
76
82
|
}
|
|
77
83
|
/**
|
|
78
84
|
* Smart fetch with automatic escalation.
|
|
@@ -85,4 +91,3 @@ export declare function smartFetch(url: string, options?: StrategyOptions): Prom
|
|
|
85
91
|
* @deprecated Use `clearStrategyHooks()` from strategy-hooks.ts instead.
|
|
86
92
|
*/
|
|
87
93
|
export { clearStrategyHooks as clearDomainIntel } from './strategy-hooks.js';
|
|
88
|
-
//# sourceMappingURL=strategies.d.ts.map
|
package/dist/core/strategies.js
CHANGED
|
@@ -12,6 +12,8 @@ import { resolveAndCache } from './dns-cache.js';
|
|
|
12
12
|
import { BlockedError, NetworkError } from '../types.js';
|
|
13
13
|
import { detectChallenge } from './challenge-detection.js';
|
|
14
14
|
import { getStrategyHooks, } from './strategy-hooks.js';
|
|
15
|
+
import { createLogger } from './logger.js';
|
|
16
|
+
const log = createLogger('fetch');
|
|
15
17
|
/* ---------- hardcoded domain rules -------------------------------------- */
|
|
16
18
|
function shouldForceBrowser(url) {
|
|
17
19
|
// Hashbang URLs (#!) are always JS-routed SPAs — browser rendering required
|
|
@@ -82,8 +84,7 @@ function shouldForceBrowser(url) {
|
|
|
82
84
|
}
|
|
83
85
|
catch (e) {
|
|
84
86
|
// Ignore URL parsing errors; validation happens inside fetchers.
|
|
85
|
-
|
|
86
|
-
console.debug('[webpeel]', 'stealth domain URL parse failed:', e instanceof Error ? e.message : e);
|
|
87
|
+
log.debug('stealth domain URL parse failed:', e instanceof Error ? e.message : e);
|
|
87
88
|
}
|
|
88
89
|
return null;
|
|
89
90
|
}
|
|
@@ -190,8 +191,7 @@ function prefetchDns(url) {
|
|
|
190
191
|
}
|
|
191
192
|
catch (e) {
|
|
192
193
|
// Ignore invalid URL.
|
|
193
|
-
|
|
194
|
-
console.debug('[webpeel]', 'DNS prefetch URL parse failed:', e instanceof Error ? e.message : e);
|
|
194
|
+
log.debug('DNS prefetch URL parse failed:', e instanceof Error ? e.message : e);
|
|
195
195
|
}
|
|
196
196
|
}
|
|
197
197
|
async function fetchWithBrowserStrategy(url, options) {
|
|
@@ -308,7 +308,7 @@ async function fetchWithBrowserStrategy(url, options) {
|
|
|
308
308
|
* With premium hooks: SWR cache → domain intel → parallel race → escalation.
|
|
309
309
|
*/
|
|
310
310
|
export async function smartFetch(url, options = {}) {
|
|
311
|
-
const { forceBrowser = false, stealth = false, waitMs = 0, userAgent, timeoutMs = 30000, screenshot = false, screenshotFullPage = false, headers, cookies, actions, keepPageOpen = false, noCache = false, raceTimeoutMs = 2000, profileDir, headed = false, storageState, proxy, proxies, device, viewportWidth, viewportHeight, waitUntil, waitSelector, blockResources, cloaked = false, cycle = false, tls = false, } = options;
|
|
311
|
+
const { forceBrowser = false, stealth = false, waitMs = 0, userAgent, timeoutMs = 30000, screenshot = false, screenshotFullPage = false, headers, cookies, actions, keepPageOpen = false, noCache = false, raceTimeoutMs = 2000, profileDir, headed = false, storageState, proxy, proxies, device, viewportWidth, viewportHeight, waitUntil, waitSelector, blockResources, cloaked = false, cycle = false, tls = false, noEscalate = false, } = options;
|
|
312
312
|
const usePeelTLS = tls || cycle;
|
|
313
313
|
// Build effective proxy list: explicit proxies array, or single proxy, or empty
|
|
314
314
|
const effectiveProxies = proxies?.length ? proxies :
|
|
@@ -351,12 +351,12 @@ export async function smartFetch(url, options = {}) {
|
|
|
351
351
|
/* ---- CloakBrowser direct path (if explicitly requested) -------------- */
|
|
352
352
|
if (cloaked) {
|
|
353
353
|
try {
|
|
354
|
+
// @ts-ignore — proprietary module, gitignored
|
|
354
355
|
const { cloakFetch, isCloakBrowserAvailable } = await import('./cloak-fetch.js');
|
|
355
356
|
if (!isCloakBrowserAvailable()) {
|
|
356
357
|
throw new Error('CloakBrowser not installed. Run: npm install cloakbrowser playwright-core');
|
|
357
358
|
}
|
|
358
|
-
|
|
359
|
-
console.debug('[webpeel]', 'Using CloakBrowser stealth (explicitly requested)');
|
|
359
|
+
log.debug('Using CloakBrowser stealth (explicitly requested)');
|
|
360
360
|
const result = await cloakFetch({
|
|
361
361
|
url,
|
|
362
362
|
proxy: effectiveProxies[0],
|
|
@@ -392,8 +392,7 @@ export async function smartFetch(url, options = {}) {
|
|
|
392
392
|
if (!isPeelTLSAvailable()) {
|
|
393
393
|
throw new Error('PeelTLS binary not found. Build it with: cd peeltls && bash build.sh');
|
|
394
394
|
}
|
|
395
|
-
|
|
396
|
-
console.debug('[webpeel]', 'Using PeelTLS fingerprint spoofing (explicitly requested)');
|
|
395
|
+
log.debug('Using PeelTLS fingerprint spoofing (explicitly requested)');
|
|
397
396
|
const result = await peelTLSFetch(url, {
|
|
398
397
|
proxy: firstProxy,
|
|
399
398
|
headers,
|
|
@@ -427,8 +426,7 @@ export async function smartFetch(url, options = {}) {
|
|
|
427
426
|
}
|
|
428
427
|
catch (e) {
|
|
429
428
|
// Non-fatal: background revalidation failed, stale entry continues serving.
|
|
430
|
-
|
|
431
|
-
console.debug('[webpeel]', 'background cache revalidation failed:', e instanceof Error ? e.message : e);
|
|
429
|
+
log.debug('background cache revalidation failed:', e instanceof Error ? e.message : e);
|
|
432
430
|
}
|
|
433
431
|
})();
|
|
434
432
|
}
|
|
@@ -500,14 +498,15 @@ export async function smartFetch(url, options = {}) {
|
|
|
500
498
|
if (raceTimer)
|
|
501
499
|
clearTimeout(raceTimer);
|
|
502
500
|
if (simpleOrTimeout.type === 'simple-success') {
|
|
503
|
-
//
|
|
504
|
-
if (shouldEscalateForLowContent(simpleOrTimeout.result) || hasSpaIndicators(simpleOrTimeout.result.html)) {
|
|
501
|
+
// Skip escalation when noEscalate=true (Q&A workloads that prefer speed over JS rendering)
|
|
502
|
+
if (!noEscalate && (shouldEscalateForLowContent(simpleOrTimeout.result) || hasSpaIndicators(simpleOrTimeout.result.html))) {
|
|
505
503
|
shouldUseBrowser = true;
|
|
506
504
|
}
|
|
507
505
|
else {
|
|
508
506
|
// Check whether the response is a bot-challenge page (e.g. Cloudflare, PerimeterX)
|
|
509
|
-
|
|
510
|
-
|
|
507
|
+
// Skip challenge detection when noEscalate=true (can't fix it with browser anyway)
|
|
508
|
+
const challengeCheck = noEscalate ? null : detectChallenge(simpleOrTimeout.result.html, simpleOrTimeout.result.statusCode);
|
|
509
|
+
if (challengeCheck && challengeCheck.isChallenge && challengeCheck.confidence >= 0.7) {
|
|
511
510
|
// Escalate — the browser/stealth path will handle it below
|
|
512
511
|
shouldUseBrowser = true;
|
|
513
512
|
}
|
|
@@ -525,7 +524,8 @@ export async function smartFetch(url, options = {}) {
|
|
|
525
524
|
}
|
|
526
525
|
}
|
|
527
526
|
if (simpleOrTimeout.type === 'simple-error') {
|
|
528
|
-
|
|
527
|
+
// When noEscalate=true, don't try browser on simple fetch error — just throw
|
|
528
|
+
if (noEscalate || !shouldEscalateSimpleError(simpleOrTimeout.error)) {
|
|
529
529
|
throw simpleOrTimeout.error;
|
|
530
530
|
}
|
|
531
531
|
shouldUseBrowser = true;
|
|
@@ -578,8 +578,7 @@ export async function smartFetch(url, options = {}) {
|
|
|
578
578
|
}
|
|
579
579
|
catch (e) {
|
|
580
580
|
// Race resolution failed — determine which error to propagate
|
|
581
|
-
|
|
582
|
-
console.debug('[webpeel]', 'fetch race resolution failed:', e instanceof Error ? e.message : e);
|
|
581
|
+
log.debug('fetch race resolution failed:', e instanceof Error ? e.message : e);
|
|
583
582
|
if (simpleError &&
|
|
584
583
|
!shouldEscalateSimpleError(simpleError) &&
|
|
585
584
|
!isAbortError(simpleError)) {
|
|
@@ -694,8 +693,7 @@ export async function smartFetch(url, options = {}) {
|
|
|
694
693
|
try {
|
|
695
694
|
const { peelTLSFetch, isPeelTLSAvailable } = await import('./peel-tls.js');
|
|
696
695
|
if (isPeelTLSAvailable()) {
|
|
697
|
-
|
|
698
|
-
console.debug('[webpeel]', 'Escalating to PeelTLS fingerprint spoofing');
|
|
696
|
+
log.debug('Escalating to PeelTLS fingerprint spoofing');
|
|
699
697
|
const peelResult = await peelTLSFetch(url, {
|
|
700
698
|
proxy: currentProxy,
|
|
701
699
|
headers,
|
|
@@ -712,13 +710,11 @@ export async function smartFetch(url, options = {}) {
|
|
|
712
710
|
return peelStrategyResult;
|
|
713
711
|
}
|
|
714
712
|
// PeelTLS still challenged — fall through to CloakBrowser
|
|
715
|
-
|
|
716
|
-
console.debug('[webpeel]', 'PeelTLS still challenged, escalating to CloakBrowser');
|
|
713
|
+
log.debug('PeelTLS still challenged, escalating to CloakBrowser');
|
|
717
714
|
}
|
|
718
715
|
}
|
|
719
716
|
catch (peelError) {
|
|
720
|
-
|
|
721
|
-
console.debug('[webpeel]', 'PeelTLS failed:', peelError instanceof Error ? peelError.message : peelError);
|
|
717
|
+
log.debug('PeelTLS failed:', peelError instanceof Error ? peelError.message : peelError);
|
|
722
718
|
// Fall through to CloakBrowser
|
|
723
719
|
}
|
|
724
720
|
}
|
|
@@ -727,8 +723,7 @@ export async function smartFetch(url, options = {}) {
|
|
|
727
723
|
try {
|
|
728
724
|
const { cfWorkerFetch, isCfWorkerAvailable } = await import('./cf-worker-proxy.js');
|
|
729
725
|
if (isCfWorkerAvailable()) {
|
|
730
|
-
|
|
731
|
-
console.debug('[webpeel]', 'Escalating to CF Worker proxy');
|
|
726
|
+
log.debug('Escalating to CF Worker proxy');
|
|
732
727
|
const cfResult = await cfWorkerFetch(url, {
|
|
733
728
|
headers,
|
|
734
729
|
timeout: timeoutMs,
|
|
@@ -743,22 +738,20 @@ export async function smartFetch(url, options = {}) {
|
|
|
743
738
|
recordMethod('cf-worker');
|
|
744
739
|
return cfStrategyResult;
|
|
745
740
|
}
|
|
746
|
-
|
|
747
|
-
console.debug('[webpeel]', 'CF Worker still challenged, escalating to CloakBrowser');
|
|
741
|
+
log.debug('CF Worker still challenged, escalating to CloakBrowser');
|
|
748
742
|
}
|
|
749
743
|
}
|
|
750
744
|
catch (cfError) {
|
|
751
|
-
|
|
752
|
-
console.debug('[webpeel]', 'CF Worker proxy failed:', cfError instanceof Error ? cfError.message : cfError);
|
|
745
|
+
log.debug('CF Worker proxy failed:', cfError instanceof Error ? cfError.message : cfError);
|
|
753
746
|
}
|
|
754
747
|
}
|
|
755
748
|
// If still challenged after CF Worker, try CloakBrowser
|
|
756
749
|
if (finalResult.challengeDetected) {
|
|
757
750
|
try {
|
|
751
|
+
// @ts-ignore — proprietary module, gitignored
|
|
758
752
|
const { cloakFetch, isCloakBrowserAvailable } = await import('./cloak-fetch.js');
|
|
759
753
|
if (isCloakBrowserAvailable()) {
|
|
760
|
-
|
|
761
|
-
console.debug('[webpeel]', 'Escalating to CloakBrowser stealth');
|
|
754
|
+
log.debug('Escalating to CloakBrowser stealth');
|
|
762
755
|
const cloakResult = await cloakFetch({
|
|
763
756
|
url,
|
|
764
757
|
proxy: currentProxy,
|
|
@@ -783,8 +776,7 @@ export async function smartFetch(url, options = {}) {
|
|
|
783
776
|
}
|
|
784
777
|
}
|
|
785
778
|
catch (cloakError) {
|
|
786
|
-
|
|
787
|
-
console.debug('[webpeel]', 'CloakBrowser failed:', cloakError instanceof Error ? cloakError.message : cloakError);
|
|
779
|
+
log.debug('CloakBrowser failed:', cloakError instanceof Error ? cloakError.message : cloakError);
|
|
788
780
|
// Fall through to Google Cache fallback
|
|
789
781
|
}
|
|
790
782
|
}
|
|
@@ -794,8 +786,7 @@ export async function smartFetch(url, options = {}) {
|
|
|
794
786
|
const { fetchGoogleCache } = await import('./google-cache.js');
|
|
795
787
|
const cacheResult = await fetchGoogleCache(url, { timeout: timeoutMs });
|
|
796
788
|
if (cacheResult && cacheResult.html.length > 200) {
|
|
797
|
-
|
|
798
|
-
console.debug('[webpeel]', 'Using Google Cache fallback');
|
|
789
|
+
log.debug('Using Google Cache fallback');
|
|
799
790
|
const cacheStrategyResult = {
|
|
800
791
|
html: cacheResult.html,
|
|
801
792
|
url: cacheResult.url,
|
|
@@ -807,8 +798,7 @@ export async function smartFetch(url, options = {}) {
|
|
|
807
798
|
}
|
|
808
799
|
}
|
|
809
800
|
catch (cacheError) {
|
|
810
|
-
|
|
811
|
-
console.debug('[webpeel]', 'Google Cache failed:', cacheError);
|
|
801
|
+
log.debug('Google Cache failed:', cacheError);
|
|
812
802
|
}
|
|
813
803
|
}
|
|
814
804
|
// Success (or gave up with challengeDetected=true on the last proxy)
|
|
@@ -823,8 +813,7 @@ export async function smartFetch(url, options = {}) {
|
|
|
823
813
|
if (isAbortError(e))
|
|
824
814
|
throw e; // Don't retry on abort
|
|
825
815
|
// Log and try next proxy
|
|
826
|
-
|
|
827
|
-
console.debug('[webpeel]', `proxy ${currentProxy || 'direct'} failed:`, e instanceof Error ? e.message : e);
|
|
816
|
+
log.debug(`proxy ${currentProxy || 'direct'} failed:`, e instanceof Error ? e.message : e);
|
|
828
817
|
// If last proxy, throw below; otherwise continue loop
|
|
829
818
|
}
|
|
830
819
|
}
|
|
@@ -836,4 +825,3 @@ export async function smartFetch(url, options = {}) {
|
|
|
836
825
|
* @deprecated Use `clearStrategyHooks()` from strategy-hooks.ts instead.
|
|
837
826
|
*/
|
|
838
827
|
export { clearStrategyHooks as clearDomainIntel } from './strategy-hooks.js';
|
|
839
|
-
//# sourceMappingURL=strategies.js.map
|
package/dist/core/summarize.d.ts
CHANGED
package/dist/core/summarize.js
CHANGED
package/dist/core/synonyms.d.ts
CHANGED
package/dist/core/synonyms.js
CHANGED
package/dist/core/timing.d.ts
CHANGED
package/dist/core/timing.js
CHANGED
package/dist/core/user-agents.js
CHANGED
package/dist/core/watch.d.ts
CHANGED
package/dist/core/watch.js
CHANGED
package/dist/core/youtube.d.ts
CHANGED
package/dist/core/youtube.js
CHANGED
|
@@ -753,4 +753,3 @@ function extractMetaTag(html, property) {
|
|
|
753
753
|
const m = html.match(regex) ?? html.match(new RegExp(`<meta[^>]+content=["']([^"']+)["'][^>]+(?:property|name)=["']${property.replace(/:/g, '\\:')}["']`, 'i'));
|
|
754
754
|
return m ? decodeHtmlEntities(m[1]) : null;
|
|
755
755
|
}
|
|
756
|
-
//# sourceMappingURL=youtube.js.map
|
package/dist/index.d.ts
CHANGED
|
@@ -35,7 +35,12 @@ export { quickAnswer, type QuickAnswerOptions, type QuickAnswerResult } from './
|
|
|
35
35
|
export { extractValueFromPassage, smartExtractSchemaFields } from './core/schema-postprocess.js';
|
|
36
36
|
export { Timer, type PipelineTiming } from './core/timing.js';
|
|
37
37
|
export { chunkContent, type ChunkOptions, type ContentChunk, type ChunkResult } from './core/chunker.js';
|
|
38
|
-
export
|
|
38
|
+
export type SearchFallbackResult = {
|
|
39
|
+
content: string;
|
|
40
|
+
url: string;
|
|
41
|
+
method: string;
|
|
42
|
+
};
|
|
43
|
+
export declare function searchFallback(..._args: any[]): Promise<SearchFallbackResult | null>;
|
|
39
44
|
export { peelTLSFetch, isPeelTLSAvailable, shutdownPeelTLS, type PeelTLSOptions, type PeelTLSResult } from './core/peel-tls.js';
|
|
40
45
|
/**
|
|
41
46
|
* Fetch and extract content from a URL
|
|
@@ -87,7 +92,8 @@ export { humanDelay, humanMouseMove, humanRead, warmupBrowse, humanType, humanCl
|
|
|
87
92
|
export { SCHEMA_TEMPLATES, getSchemaTemplate, listSchemaTemplates, type SchemaTemplate } from './core/schema-templates.js';
|
|
88
93
|
export { WebPeelLoader, type WebPeelLoaderOptions } from './integrations/langchain.js';
|
|
89
94
|
export { WebPeelReader, type WebPeelReaderOptions } from './integrations/llamaindex.js';
|
|
90
|
-
export
|
|
95
|
+
export declare function applyStealthPatches(page: any): Promise<void>;
|
|
96
|
+
export declare function applyAcceptLanguageHeader(page: any, lang?: string): Promise<void>;
|
|
91
97
|
export { fetchGoogleCache, isGoogleCacheAvailable, type GoogleCacheResult } from './core/google-cache.js';
|
|
92
98
|
export { cfWorkerFetch, isCfWorkerAvailable, type CfWorkerProxyOptions, type CfWorkerProxyResult } from './core/cf-worker-proxy.js';
|
|
93
99
|
/**
|
|
@@ -117,4 +123,3 @@ export declare class WebPeel {
|
|
|
117
123
|
/** Extract structured data */
|
|
118
124
|
extract(url: string, _schema: Record<string, unknown>, options?: PeelOptions): Promise<unknown>;
|
|
119
125
|
}
|
|
120
|
-
//# sourceMappingURL=index.d.ts.map
|
package/dist/index.js
CHANGED
|
@@ -36,7 +36,16 @@ export { quickAnswer } from './core/quick-answer.js';
|
|
|
36
36
|
export { extractValueFromPassage, smartExtractSchemaFields } from './core/schema-postprocess.js';
|
|
37
37
|
export { Timer } from './core/timing.js';
|
|
38
38
|
export { chunkContent } from './core/chunker.js';
|
|
39
|
-
export
|
|
39
|
+
export async function searchFallback(..._args) {
|
|
40
|
+
// @ts-ignore — proprietary module, gitignored
|
|
41
|
+
try {
|
|
42
|
+
const m = await import('./core/search-fallback.js');
|
|
43
|
+
return m.searchFallback(..._args);
|
|
44
|
+
}
|
|
45
|
+
catch {
|
|
46
|
+
return null;
|
|
47
|
+
}
|
|
48
|
+
}
|
|
40
49
|
export { peelTLSFetch, isPeelTLSAvailable, shutdownPeelTLS } from './core/peel-tls.js';
|
|
41
50
|
/**
|
|
42
51
|
* Fetch and extract content from a URL
|
|
@@ -132,7 +141,23 @@ export { WebPeelLoader } from './integrations/langchain.js';
|
|
|
132
141
|
export { WebPeelReader } from './integrations/llamaindex.js';
|
|
133
142
|
// Advanced stealth utilities — for power users who want to apply extra evasions
|
|
134
143
|
// to their own Playwright pages.
|
|
135
|
-
|
|
144
|
+
// stealth-patches: proprietary module, loaded at runtime only
|
|
145
|
+
export async function applyStealthPatches(page) {
|
|
146
|
+
// @ts-ignore — proprietary module, gitignored
|
|
147
|
+
try {
|
|
148
|
+
const m = await import('./core/stealth-patches.js');
|
|
149
|
+
await m.applyStealthPatches(page);
|
|
150
|
+
}
|
|
151
|
+
catch { /* not available */ }
|
|
152
|
+
}
|
|
153
|
+
export async function applyAcceptLanguageHeader(page, lang) {
|
|
154
|
+
// @ts-ignore — proprietary module, gitignored
|
|
155
|
+
try {
|
|
156
|
+
const m = await import('./core/stealth-patches.js');
|
|
157
|
+
await m.applyAcceptLanguageHeader(page, lang);
|
|
158
|
+
}
|
|
159
|
+
catch { /* not available */ }
|
|
160
|
+
}
|
|
136
161
|
// Google Cache fallback — fetch cached copies of blocked pages
|
|
137
162
|
export { fetchGoogleCache, isGoogleCacheAvailable } from './core/google-cache.js';
|
|
138
163
|
export { cfWorkerFetch, isCfWorkerAvailable } from './core/cf-worker-proxy.js';
|
|
@@ -181,4 +206,3 @@ export class WebPeel {
|
|
|
181
206
|
return result;
|
|
182
207
|
}
|
|
183
208
|
}
|
|
184
|
-
//# sourceMappingURL=index.js.map
|