@staticn0va/wigolo 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -1
- package/README.md +195 -73
- package/SKILL.md +382 -0
- package/assets/blocks/claude-code/CLAUDE.md.block +20 -0
- package/assets/blocks/claude-code/wigolo-command.md +40 -0
- package/assets/blocks/cursor/wigolo.mdc +46 -0
- package/assets/blocks/gemini-cli/GEMINI.md.block +18 -0
- package/assets/blocks/vscode/copilot-instructions.md.block +18 -0
- package/assets/skills/wigolo/SKILL.md +50 -0
- package/assets/skills/wigolo/rules/cache-first.md +30 -0
- package/assets/skills/wigolo/rules/synthesis.md +43 -0
- package/assets/skills/wigolo-agent/SKILL.md +73 -0
- package/assets/skills/wigolo-crawl/SKILL.md +60 -0
- package/assets/skills/wigolo-extract/SKILL.md +59 -0
- package/assets/skills/wigolo-fetch/SKILL.md +65 -0
- package/assets/skills/wigolo-find-similar/SKILL.md +72 -0
- package/assets/skills/wigolo-research/SKILL.md +77 -0
- package/assets/skills/wigolo-search/SKILL.md +78 -0
- package/dist/agent/executor.d.ts +33 -0
- package/dist/agent/executor.d.ts.map +1 -0
- package/dist/agent/executor.js +233 -0
- package/dist/agent/executor.js.map +1 -0
- package/dist/agent/pipeline.d.ts +5 -0
- package/dist/agent/pipeline.d.ts.map +1 -0
- package/dist/agent/pipeline.js +208 -0
- package/dist/agent/pipeline.js.map +1 -0
- package/dist/agent/planner.d.ts +13 -0
- package/dist/agent/planner.d.ts.map +1 -0
- package/dist/agent/planner.js +271 -0
- package/dist/agent/planner.js.map +1 -0
- package/dist/agent/relevance.d.ts +15 -0
- package/dist/agent/relevance.d.ts.map +1 -0
- package/dist/agent/relevance.js +60 -0
- package/dist/agent/relevance.js.map +1 -0
- package/dist/cache/backfill-embeddings.d.ts +23 -0
- package/dist/cache/backfill-embeddings.d.ts.map +1 -0
- package/dist/cache/backfill-embeddings.js +105 -0
- package/dist/cache/backfill-embeddings.js.map +1 -0
- package/dist/cache/change-detector.d.ts +7 -0
- package/dist/cache/change-detector.d.ts.map +1 -0
- package/dist/cache/change-detector.js +43 -0
- package/dist/cache/change-detector.js.map +1 -0
- package/dist/cache/db.d.ts +1 -0
- package/dist/cache/db.d.ts.map +1 -1
- package/dist/cache/db.js +94 -22
- package/dist/cache/db.js.map +1 -1
- package/dist/cache/diff-summary.d.ts +2 -0
- package/dist/cache/diff-summary.d.ts.map +1 -0
- package/dist/cache/diff-summary.js +82 -0
- package/dist/cache/diff-summary.js.map +1 -0
- package/dist/cache/migrations/runner.d.ts +29 -0
- package/dist/cache/migrations/runner.d.ts.map +1 -0
- package/dist/cache/migrations/runner.js +147 -0
- package/dist/cache/migrations/runner.js.map +1 -0
- package/dist/cache/sqlite-vec-store.d.ts +42 -0
- package/dist/cache/sqlite-vec-store.d.ts.map +1 -0
- package/dist/cache/sqlite-vec-store.js +176 -0
- package/dist/cache/sqlite-vec-store.js.map +1 -0
- package/dist/cache/store.d.ts +46 -1
- package/dist/cache/store.d.ts.map +1 -1
- package/dist/cache/store.js +362 -168
- package/dist/cache/store.js.map +1 -1
- package/dist/cli/agents/antigravity.d.ts +20 -0
- package/dist/cli/agents/antigravity.d.ts.map +1 -0
- package/dist/cli/agents/antigravity.js +49 -0
- package/dist/cli/agents/antigravity.js.map +1 -0
- package/dist/cli/agents/claude-code.d.ts +25 -0
- package/dist/cli/agents/claude-code.d.ts.map +1 -0
- package/dist/cli/agents/claude-code.js +111 -0
- package/dist/cli/agents/claude-code.js.map +1 -0
- package/dist/cli/agents/cursor.d.ts +21 -0
- package/dist/cli/agents/cursor.d.ts.map +1 -0
- package/dist/cli/agents/cursor.js +58 -0
- package/dist/cli/agents/cursor.js.map +1 -0
- package/dist/cli/agents/gemini-cli.d.ts +21 -0
- package/dist/cli/agents/gemini-cli.d.ts.map +1 -0
- package/dist/cli/agents/gemini-cli.js +55 -0
- package/dist/cli/agents/gemini-cli.js.map +1 -0
- package/dist/cli/agents/registry.d.ts +21 -0
- package/dist/cli/agents/registry.d.ts.map +1 -0
- package/dist/cli/agents/registry.js +27 -0
- package/dist/cli/agents/registry.js.map +1 -0
- package/dist/cli/agents/utils.d.ts +26 -0
- package/dist/cli/agents/utils.d.ts.map +1 -0
- package/dist/cli/agents/utils.js +136 -0
- package/dist/cli/agents/utils.js.map +1 -0
- package/dist/cli/agents/vscode.d.ts +21 -0
- package/dist/cli/agents/vscode.d.ts.map +1 -0
- package/dist/cli/agents/vscode.js +62 -0
- package/dist/cli/agents/vscode.js.map +1 -0
- package/dist/cli/auth.d.ts +2 -0
- package/dist/cli/auth.d.ts.map +1 -0
- package/dist/cli/auth.js +94 -0
- package/dist/cli/auth.js.map +1 -0
- package/dist/cli/backfill.d.ts +2 -0
- package/dist/cli/backfill.d.ts.map +1 -0
- package/dist/cli/backfill.js +58 -0
- package/dist/cli/backfill.js.map +1 -0
- package/dist/cli/daemon.d.ts +6 -1
- package/dist/cli/daemon.d.ts.map +1 -1
- package/dist/cli/daemon.js +61 -3
- package/dist/cli/daemon.js.map +1 -1
- package/dist/cli/doctor.d.ts +8 -0
- package/dist/cli/doctor.d.ts.map +1 -0
- package/dist/cli/doctor.js +318 -0
- package/dist/cli/doctor.js.map +1 -0
- package/dist/cli/health.d.ts +1 -1
- package/dist/cli/health.d.ts.map +1 -1
- package/dist/cli/health.js +42 -3
- package/dist/cli/health.js.map +1 -1
- package/dist/cli/help.d.ts +6 -0
- package/dist/cli/help.d.ts.map +1 -0
- package/dist/cli/help.js +63 -0
- package/dist/cli/help.js.map +1 -0
- package/dist/cli/index.d.ts +1 -1
- package/dist/cli/index.d.ts.map +1 -1
- package/dist/cli/index.js +35 -7
- package/dist/cli/index.js.map +1 -1
- package/dist/cli/init.d.ts +2 -0
- package/dist/cli/init.d.ts.map +1 -0
- package/dist/cli/init.js +201 -0
- package/dist/cli/init.js.map +1 -0
- package/dist/cli/plugin.d.ts +5 -0
- package/dist/cli/plugin.d.ts.map +1 -0
- package/dist/cli/plugin.js +185 -0
- package/dist/cli/plugin.js.map +1 -0
- package/dist/cli/setup-mcp.d.ts +2 -0
- package/dist/cli/setup-mcp.d.ts.map +1 -0
- package/dist/cli/setup-mcp.js +114 -0
- package/dist/cli/setup-mcp.js.map +1 -0
- package/dist/cli/shell.d.ts +2 -0
- package/dist/cli/shell.d.ts.map +1 -0
- package/dist/cli/shell.js +86 -0
- package/dist/cli/shell.js.map +1 -0
- package/dist/cli/status.d.ts +2 -0
- package/dist/cli/status.d.ts.map +1 -0
- package/dist/cli/status.js +31 -0
- package/dist/cli/status.js.map +1 -0
- package/dist/cli/telemetry.d.ts +10 -0
- package/dist/cli/telemetry.d.ts.map +1 -0
- package/dist/cli/telemetry.js +56 -0
- package/dist/cli/telemetry.js.map +1 -0
- package/dist/cli/tui/agents-types.d.ts +28 -0
- package/dist/cli/tui/agents-types.d.ts.map +1 -0
- package/dist/cli/tui/agents-types.js +1 -0
- package/dist/cli/tui/agents-types.js.map +1 -0
- package/dist/cli/tui/agents.d.ts +11 -0
- package/dist/cli/tui/agents.d.ts.map +1 -0
- package/dist/cli/tui/agents.js +93 -0
- package/dist/cli/tui/agents.js.map +1 -0
- package/dist/cli/tui/banner.d.ts +3 -0
- package/dist/cli/tui/banner.d.ts.map +1 -0
- package/dist/cli/tui/banner.js +30 -0
- package/dist/cli/tui/banner.js.map +1 -0
- package/dist/cli/tui/components/AgentSelect.d.ts +13 -0
- package/dist/cli/tui/components/AgentSelect.d.ts.map +1 -0
- package/dist/cli/tui/components/AgentSelect.js +116 -0
- package/dist/cli/tui/components/AgentSelect.js.map +1 -0
- package/dist/cli/tui/components/Banner.d.ts +6 -0
- package/dist/cli/tui/components/Banner.d.ts.map +1 -0
- package/dist/cli/tui/components/Banner.js +25 -0
- package/dist/cli/tui/components/Banner.js.map +1 -0
- package/dist/cli/tui/components/BrowserSelect.d.ts +7 -0
- package/dist/cli/tui/components/BrowserSelect.d.ts.map +1 -0
- package/dist/cli/tui/components/BrowserSelect.js +19 -0
- package/dist/cli/tui/components/BrowserSelect.js.map +1 -0
- package/dist/cli/tui/components/InstallProgress.d.ts +9 -0
- package/dist/cli/tui/components/InstallProgress.d.ts.map +1 -0
- package/dist/cli/tui/components/InstallProgress.js +67 -0
- package/dist/cli/tui/components/InstallProgress.js.map +1 -0
- package/dist/cli/tui/components/SkillInstall.d.ts +14 -0
- package/dist/cli/tui/components/SkillInstall.d.ts.map +1 -0
- package/dist/cli/tui/components/SkillInstall.js +94 -0
- package/dist/cli/tui/components/SkillInstall.js.map +1 -0
- package/dist/cli/tui/components/Summary.d.ts +22 -0
- package/dist/cli/tui/components/Summary.d.ts.map +1 -0
- package/dist/cli/tui/components/Summary.js +135 -0
- package/dist/cli/tui/components/Summary.js.map +1 -0
- package/dist/cli/tui/components/SystemCheck.d.ts +8 -0
- package/dist/cli/tui/components/SystemCheck.d.ts.map +1 -0
- package/dist/cli/tui/components/SystemCheck.js +71 -0
- package/dist/cli/tui/components/SystemCheck.js.map +1 -0
- package/dist/cli/tui/components/Verification.d.ts +8 -0
- package/dist/cli/tui/components/Verification.d.ts.map +1 -0
- package/dist/cli/tui/components/Verification.js +63 -0
- package/dist/cli/tui/components/Verification.js.map +1 -0
- package/dist/cli/tui/config-writer-cli.d.ts +12 -0
- package/dist/cli/tui/config-writer-cli.d.ts.map +1 -0
- package/dist/cli/tui/config-writer-cli.js +39 -0
- package/dist/cli/tui/config-writer-cli.js.map +1 -0
- package/dist/cli/tui/config-writer-json.d.ts +16 -0
- package/dist/cli/tui/config-writer-json.d.ts.map +1 -0
- package/dist/cli/tui/config-writer-json.js +86 -0
- package/dist/cli/tui/config-writer-json.js.map +1 -0
- package/dist/cli/tui/config-writer-toml.d.ts +16 -0
- package/dist/cli/tui/config-writer-toml.d.ts.map +1 -0
- package/dist/cli/tui/config-writer-toml.js +83 -0
- package/dist/cli/tui/config-writer-toml.js.map +1 -0
- package/dist/cli/tui/config-writer.d.ts +25 -0
- package/dist/cli/tui/config-writer.d.ts.map +1 -0
- package/dist/cli/tui/config-writer.js +101 -0
- package/dist/cli/tui/config-writer.js.map +1 -0
- package/dist/cli/tui/detect-helpers.d.ts +6 -0
- package/dist/cli/tui/detect-helpers.d.ts.map +1 -0
- package/dist/cli/tui/detect-helpers.js +45 -0
- package/dist/cli/tui/detect-helpers.js.map +1 -0
- package/dist/cli/tui/extras-prompt.d.ts +7 -0
- package/dist/cli/tui/extras-prompt.d.ts.map +1 -0
- package/dist/cli/tui/extras-prompt.js +42 -0
- package/dist/cli/tui/extras-prompt.js.map +1 -0
- package/dist/cli/tui/flags-types.d.ts +19 -0
- package/dist/cli/tui/flags-types.d.ts.map +1 -0
- package/dist/cli/tui/flags-types.js +23 -0
- package/dist/cli/tui/flags-types.js.map +1 -0
- package/dist/cli/tui/flags.d.ts +5 -0
- package/dist/cli/tui/flags.d.ts.map +1 -0
- package/dist/cli/tui/flags.js +132 -0
- package/dist/cli/tui/flags.js.map +1 -0
- package/dist/cli/tui/format.d.ts +14 -0
- package/dist/cli/tui/format.d.ts.map +1 -0
- package/dist/cli/tui/format.js +37 -0
- package/dist/cli/tui/format.js.map +1 -0
- package/dist/cli/tui/hooks/useAgentDetect.d.ts +6 -0
- package/dist/cli/tui/hooks/useAgentDetect.d.ts.map +1 -0
- package/dist/cli/tui/hooks/useAgentDetect.js +19 -0
- package/dist/cli/tui/hooks/useAgentDetect.js.map +1 -0
- package/dist/cli/tui/hooks/useInstall.d.ts +14 -0
- package/dist/cli/tui/hooks/useInstall.d.ts.map +1 -0
- package/dist/cli/tui/hooks/useInstall.js +90 -0
- package/dist/cli/tui/hooks/useInstall.js.map +1 -0
- package/dist/cli/tui/hooks/useSystemCheck.d.ts +13 -0
- package/dist/cli/tui/hooks/useSystemCheck.d.ts.map +1 -0
- package/dist/cli/tui/hooks/useSystemCheck.js +95 -0
- package/dist/cli/tui/hooks/useSystemCheck.js.map +1 -0
- package/dist/cli/tui/hooks/useVerify.d.ts +14 -0
- package/dist/cli/tui/hooks/useVerify.d.ts.map +1 -0
- package/dist/cli/tui/hooks/useVerify.js +71 -0
- package/dist/cli/tui/hooks/useVerify.js.map +1 -0
- package/dist/cli/tui/ink-init.d.ts +2 -0
- package/dist/cli/tui/ink-init.d.ts.map +1 -0
- package/dist/cli/tui/ink-init.js +198 -0
- package/dist/cli/tui/ink-init.js.map +1 -0
- package/dist/cli/tui/reporter-auto.d.ts +7 -0
- package/dist/cli/tui/reporter-auto.d.ts.map +1 -0
- package/dist/cli/tui/reporter-auto.js +15 -0
- package/dist/cli/tui/reporter-auto.js.map +1 -0
- package/dist/cli/tui/reporter.d.ts +26 -0
- package/dist/cli/tui/reporter.d.ts.map +1 -0
- package/dist/cli/tui/reporter.js +32 -0
- package/dist/cli/tui/reporter.js.map +1 -0
- package/dist/cli/tui/run-command.d.ts +14 -0
- package/dist/cli/tui/run-command.d.ts.map +1 -0
- package/dist/cli/tui/run-command.js +72 -0
- package/dist/cli/tui/run-command.js.map +1 -0
- package/dist/cli/tui/select-agents.d.ts +6 -0
- package/dist/cli/tui/select-agents.d.ts.map +1 -0
- package/dist/cli/tui/select-agents.js +32 -0
- package/dist/cli/tui/select-agents.js.map +1 -0
- package/dist/cli/tui/status-agents.d.ts +11 -0
- package/dist/cli/tui/status-agents.d.ts.map +1 -0
- package/dist/cli/tui/status-agents.js +53 -0
- package/dist/cli/tui/status-agents.js.map +1 -0
- package/dist/cli/tui/status-cache.d.ts +6 -0
- package/dist/cli/tui/status-cache.d.ts.map +1 -0
- package/dist/cli/tui/status-cache.js +39 -0
- package/dist/cli/tui/status-cache.js.map +1 -0
- package/dist/cli/tui/status-format.d.ts +14 -0
- package/dist/cli/tui/status-format.d.ts.map +1 -0
- package/dist/cli/tui/status-format.js +41 -0
- package/dist/cli/tui/status-format.js.map +1 -0
- package/dist/cli/tui/status-python.d.ts +6 -0
- package/dist/cli/tui/status-python.d.ts.map +1 -0
- package/dist/cli/tui/status-python.js +30 -0
- package/dist/cli/tui/status-python.js.map +1 -0
- package/dist/cli/tui/system-check.d.ts +24 -0
- package/dist/cli/tui/system-check.d.ts.map +1 -0
- package/dist/cli/tui/system-check.js +103 -0
- package/dist/cli/tui/system-check.js.map +1 -0
- package/dist/cli/tui/tui-reporter.d.ts +19 -0
- package/dist/cli/tui/tui-reporter.d.ts.map +1 -0
- package/dist/cli/tui/tui-reporter.js +95 -0
- package/dist/cli/tui/tui-reporter.js.map +1 -0
- package/dist/cli/tui/utils/config-writer.d.ts +3 -0
- package/dist/cli/tui/utils/config-writer.d.ts.map +1 -0
- package/dist/cli/tui/utils/config-writer.js +22 -0
- package/dist/cli/tui/utils/config-writer.js.map +1 -0
- package/dist/cli/tui/utils/suppress-logs.d.ts +3 -0
- package/dist/cli/tui/utils/suppress-logs.d.ts.map +1 -0
- package/dist/cli/tui/utils/suppress-logs.js +11 -0
- package/dist/cli/tui/utils/suppress-logs.js.map +1 -0
- package/dist/cli/tui/verify-suggestions.d.ts +5 -0
- package/dist/cli/tui/verify-suggestions.d.ts.map +1 -0
- package/dist/cli/tui/verify-suggestions.js +20 -0
- package/dist/cli/tui/verify-suggestions.js.map +1 -0
- package/dist/cli/tui/verify.d.ts +14 -0
- package/dist/cli/tui/verify.d.ts.map +1 -0
- package/dist/cli/tui/verify.js +101 -0
- package/dist/cli/tui/verify.js.map +1 -0
- package/dist/cli/tui/version.d.ts +2 -0
- package/dist/cli/tui/version.d.ts.map +1 -0
- package/dist/cli/tui/version.js +14 -0
- package/dist/cli/tui/version.js.map +1 -0
- package/dist/cli/uninstall.d.ts +2 -0
- package/dist/cli/uninstall.d.ts.map +1 -0
- package/dist/cli/uninstall.js +57 -0
- package/dist/cli/uninstall.js.map +1 -0
- package/dist/cli/warmup.d.ts +10 -2
- package/dist/cli/warmup.d.ts.map +1 -1
- package/dist/cli/warmup.js +226 -93
- package/dist/cli/warmup.js.map +1 -1
- package/dist/config.d.ts +28 -2
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +106 -56
- package/dist/config.js.map +1 -1
- package/dist/crawl/crawler.d.ts +6 -0
- package/dist/crawl/crawler.d.ts.map +1 -1
- package/dist/crawl/crawler.js +210 -209
- package/dist/crawl/crawler.js.map +1 -1
- package/dist/crawl/dedup.d.ts +1 -0
- package/dist/crawl/dedup.d.ts.map +1 -1
- package/dist/crawl/dedup.js +124 -81
- package/dist/crawl/dedup.js.map +1 -1
- package/dist/crawl/etag-incremental.d.ts +43 -0
- package/dist/crawl/etag-incremental.d.ts.map +1 -0
- package/dist/crawl/etag-incremental.js +94 -0
- package/dist/crawl/etag-incremental.js.map +1 -0
- package/dist/crawl/index-to-vec.d.ts +10 -0
- package/dist/crawl/index-to-vec.d.ts.map +1 -0
- package/dist/crawl/index-to-vec.js +44 -0
- package/dist/crawl/index-to-vec.js.map +1 -0
- package/dist/crawl/mapper.js +136 -164
- package/dist/crawl/mapper.js.map +1 -1
- package/dist/crawl/rate-limiter.js +63 -66
- package/dist/crawl/rate-limiter.js.map +1 -1
- package/dist/crawl/robots.js +58 -57
- package/dist/crawl/robots.js.map +1 -1
- package/dist/crawl/sitemap-first.d.ts +12 -0
- package/dist/crawl/sitemap-first.d.ts.map +1 -0
- package/dist/crawl/sitemap-first.js +47 -0
- package/dist/crawl/sitemap-first.js.map +1 -0
- package/dist/crawl/sitemap.js +33 -32
- package/dist/crawl/sitemap.js.map +1 -1
- package/dist/crawl/url-utils.d.ts +1 -0
- package/dist/crawl/url-utils.d.ts.map +1 -1
- package/dist/crawl/url-utils.js +49 -37
- package/dist/crawl/url-utils.js.map +1 -1
- package/dist/daemon/health-check.d.ts +16 -0
- package/dist/daemon/health-check.d.ts.map +1 -0
- package/dist/daemon/health-check.js +33 -0
- package/dist/daemon/health-check.js.map +1 -0
- package/dist/daemon/http-server.d.ts +26 -0
- package/dist/daemon/http-server.d.ts.map +1 -0
- package/dist/daemon/http-server.js +275 -0
- package/dist/daemon/http-server.js.map +1 -0
- package/dist/daemon/proxy.d.ts +10 -0
- package/dist/daemon/proxy.d.ts.map +1 -0
- package/dist/daemon/proxy.js +93 -0
- package/dist/daemon/proxy.js.map +1 -0
- package/dist/embedding/embed.d.ts +59 -0
- package/dist/embedding/embed.d.ts.map +1 -0
- package/dist/embedding/embed.js +233 -0
- package/dist/embedding/embed.js.map +1 -0
- package/dist/embedding/fastembed-provider.d.ts +19 -0
- package/dist/embedding/fastembed-provider.d.ts.map +1 -0
- package/dist/embedding/fastembed-provider.js +51 -0
- package/dist/embedding/fastembed-provider.js.map +1 -0
- package/dist/embedding/key-terms.d.ts +12 -0
- package/dist/embedding/key-terms.d.ts.map +1 -0
- package/dist/embedding/key-terms.js +234 -0
- package/dist/embedding/key-terms.js.map +1 -0
- package/dist/extraction/boilerplate.d.ts +15 -0
- package/dist/extraction/boilerplate.d.ts.map +1 -0
- package/dist/extraction/boilerplate.js +52 -0
- package/dist/extraction/boilerplate.js.map +1 -0
- package/dist/extraction/defuddle.d.ts.map +1 -1
- package/dist/extraction/defuddle.js +27 -23
- package/dist/extraction/defuddle.js.map +1 -1
- package/dist/extraction/extract.d.ts.map +1 -1
- package/dist/extraction/extract.js +76 -76
- package/dist/extraction/extract.js.map +1 -1
- package/dist/extraction/jsonld.js +50 -54
- package/dist/extraction/jsonld.js.map +1 -1
- package/dist/extraction/lang-hints.d.ts +2 -0
- package/dist/extraction/lang-hints.d.ts.map +1 -0
- package/dist/extraction/lang-hints.js +30 -0
- package/dist/extraction/lang-hints.js.map +1 -0
- package/dist/extraction/llm-fallback.d.ts +17 -0
- package/dist/extraction/llm-fallback.d.ts.map +1 -0
- package/dist/extraction/llm-fallback.js +130 -0
- package/dist/extraction/llm-fallback.js.map +1 -0
- package/dist/extraction/markdown-sanitize.d.ts +2 -0
- package/dist/extraction/markdown-sanitize.d.ts.map +1 -0
- package/dist/extraction/markdown-sanitize.js +151 -0
- package/dist/extraction/markdown-sanitize.js.map +1 -0
- package/dist/extraction/markdown.d.ts +11 -0
- package/dist/extraction/markdown.d.ts.map +1 -1
- package/dist/extraction/markdown.js +195 -91
- package/dist/extraction/markdown.js.map +1 -1
- package/dist/extraction/pipeline.d.ts +8 -0
- package/dist/extraction/pipeline.d.ts.map +1 -1
- package/dist/extraction/pipeline.js +57 -91
- package/dist/extraction/pipeline.js.map +1 -1
- package/dist/extraction/readability.d.ts +1 -1
- package/dist/extraction/readability.d.ts.map +1 -1
- package/dist/extraction/readability.js +28 -29
- package/dist/extraction/readability.js.map +1 -1
- package/dist/extraction/schema.d.ts +12 -0
- package/dist/extraction/schema.d.ts.map +1 -1
- package/dist/extraction/schema.js +135 -72
- package/dist/extraction/schema.js.map +1 -1
- package/dist/extraction/site-extractors/docs-generic.d.ts.map +1 -1
- package/dist/extraction/site-extractors/docs-generic.js +81 -91
- package/dist/extraction/site-extractors/docs-generic.js.map +1 -1
- package/dist/extraction/site-extractors/github.d.ts.map +1 -1
- package/dist/extraction/site-extractors/github.js +87 -95
- package/dist/extraction/site-extractors/github.js.map +1 -1
- package/dist/extraction/site-extractors/mdn.d.ts.map +1 -1
- package/dist/extraction/site-extractors/mdn.js +46 -54
- package/dist/extraction/site-extractors/mdn.js.map +1 -1
- package/dist/extraction/site-extractors/stackoverflow.d.ts.map +1 -1
- package/dist/extraction/site-extractors/stackoverflow.js +71 -80
- package/dist/extraction/site-extractors/stackoverflow.js.map +1 -1
- package/dist/extraction/structured-data.d.ts +4 -0
- package/dist/extraction/structured-data.d.ts.map +1 -0
- package/dist/extraction/structured-data.js +173 -0
- package/dist/extraction/structured-data.js.map +1 -0
- package/dist/extraction/structured.d.ts +4 -0
- package/dist/extraction/structured.d.ts.map +1 -0
- package/dist/extraction/structured.js +163 -0
- package/dist/extraction/structured.js.map +1 -0
- package/dist/extraction/v1/classifier.d.ts +3 -0
- package/dist/extraction/v1/classifier.d.ts.map +1 -0
- package/dist/extraction/v1/classifier.js +110 -0
- package/dist/extraction/v1/classifier.js.map +1 -0
- package/dist/extraction/v1/extract-provider.d.ts +16 -0
- package/dist/extraction/v1/extract-provider.d.ts.map +1 -0
- package/dist/extraction/v1/extract-provider.js +43 -0
- package/dist/extraction/v1/extract-provider.js.map +1 -0
- package/dist/extraction/v1/local-llm.d.ts +8 -0
- package/dist/extraction/v1/local-llm.d.ts.map +1 -0
- package/dist/extraction/v1/local-llm.js +58 -0
- package/dist/extraction/v1/local-llm.js.map +1 -0
- package/dist/extraction/v1/news.d.ts +3 -0
- package/dist/extraction/v1/news.d.ts.map +1 -0
- package/dist/extraction/v1/news.js +61 -0
- package/dist/extraction/v1/news.js.map +1 -0
- package/dist/extraction/v1/product.d.ts +3 -0
- package/dist/extraction/v1/product.d.ts.map +1 -0
- package/dist/extraction/v1/product.js +166 -0
- package/dist/extraction/v1/product.js.map +1 -0
- package/dist/extraction/v1/recipe.d.ts +3 -0
- package/dist/extraction/v1/recipe.d.ts.map +1 -0
- package/dist/extraction/v1/recipe.js +136 -0
- package/dist/extraction/v1/recipe.js.map +1 -0
- package/dist/extraction/v1/routed.d.ts +17 -0
- package/dist/extraction/v1/routed.d.ts.map +1 -0
- package/dist/extraction/v1/routed.js +68 -0
- package/dist/extraction/v1/routed.js.map +1 -0
- package/dist/extraction/v1/schemas/Article.d.ts +11 -0
- package/dist/extraction/v1/schemas/Article.d.ts.map +1 -0
- package/dist/extraction/v1/schemas/Article.js +23 -0
- package/dist/extraction/v1/schemas/Article.js.map +1 -0
- package/dist/extraction/v1/schemas/CodeSnippet.d.ts +9 -0
- package/dist/extraction/v1/schemas/CodeSnippet.d.ts.map +1 -0
- package/dist/extraction/v1/schemas/CodeSnippet.js +90 -0
- package/dist/extraction/v1/schemas/CodeSnippet.js.map +1 -0
- package/dist/extraction/v1/schemas/EventListing.d.ts +10 -0
- package/dist/extraction/v1/schemas/EventListing.d.ts.map +1 -0
- package/dist/extraction/v1/schemas/EventListing.js +122 -0
- package/dist/extraction/v1/schemas/EventListing.js.map +1 -0
- package/dist/extraction/v1/schemas/Paper.d.ts +10 -0
- package/dist/extraction/v1/schemas/Paper.d.ts.map +1 -0
- package/dist/extraction/v1/schemas/Paper.js +156 -0
- package/dist/extraction/v1/schemas/Paper.js.map +1 -0
- package/dist/extraction/v1/schemas/Product.d.ts +17 -0
- package/dist/extraction/v1/schemas/Product.d.ts.map +1 -0
- package/dist/extraction/v1/schemas/Product.js +149 -0
- package/dist/extraction/v1/schemas/Product.js.map +1 -0
- package/dist/extraction/v1/schemas/Recipe.d.ts +14 -0
- package/dist/extraction/v1/schemas/Recipe.d.ts.map +1 -0
- package/dist/extraction/v1/schemas/Recipe.js +160 -0
- package/dist/extraction/v1/schemas/Recipe.js.map +1 -0
- package/dist/extraction/v1/schemas/index.d.ts +13 -0
- package/dist/extraction/v1/schemas/index.d.ts.map +1 -0
- package/dist/extraction/v1/schemas/index.js +44 -0
- package/dist/extraction/v1/schemas/index.js.map +1 -0
- package/dist/extraction/v1/site-extractors.d.ts +5 -0
- package/dist/extraction/v1/site-extractors.d.ts.map +1 -0
- package/dist/extraction/v1/site-extractors.js +31 -0
- package/dist/extraction/v1/site-extractors.js.map +1 -0
- package/dist/fetch/action-executor.d.ts +28 -0
- package/dist/fetch/action-executor.d.ts.map +1 -0
- package/dist/fetch/action-executor.js +88 -0
- package/dist/fetch/action-executor.js.map +1 -0
- package/dist/fetch/auth.d.ts +2 -1
- package/dist/fetch/auth.d.ts.map +1 -1
- package/dist/fetch/auth.js +56 -26
- package/dist/fetch/auth.js.map +1 -1
- package/dist/fetch/browser-pool.d.ts +30 -11
- package/dist/fetch/browser-pool.d.ts.map +1 -1
- package/dist/fetch/browser-pool.js +303 -127
- package/dist/fetch/browser-pool.js.map +1 -1
- package/dist/fetch/browser-selector.d.ts +17 -0
- package/dist/fetch/browser-selector.d.ts.map +1 -0
- package/dist/fetch/browser-selector.js +72 -0
- package/dist/fetch/browser-selector.js.map +1 -0
- package/dist/fetch/browser-types.d.ts +3 -0
- package/dist/fetch/browser-types.d.ts.map +1 -0
- package/dist/fetch/browser-types.js +45 -0
- package/dist/fetch/browser-types.js.map +1 -0
- package/dist/fetch/cdp-client.d.ts +9 -0
- package/dist/fetch/cdp-client.d.ts.map +1 -0
- package/dist/fetch/cdp-client.js +89 -0
- package/dist/fetch/cdp-client.js.map +1 -0
- package/dist/fetch/content-check.js +39 -46
- package/dist/fetch/content-check.js.map +1 -1
- package/dist/fetch/http-client.d.ts +4 -0
- package/dist/fetch/http-client.d.ts.map +1 -1
- package/dist/fetch/http-client.js +147 -128
- package/dist/fetch/http-client.js.map +1 -1
- package/dist/fetch/lightpanda.d.ts +28 -0
- package/dist/fetch/lightpanda.d.ts.map +1 -0
- package/dist/fetch/lightpanda.js +174 -0
- package/dist/fetch/lightpanda.js.map +1 -0
- package/dist/fetch/playwright-tier.d.ts +19 -0
- package/dist/fetch/playwright-tier.d.ts.map +1 -0
- package/dist/fetch/playwright-tier.js +76 -0
- package/dist/fetch/playwright-tier.js.map +1 -0
- package/dist/fetch/router.d.ts +49 -3
- package/dist/fetch/router.d.ts.map +1 -1
- package/dist/fetch/router.js +185 -81
- package/dist/fetch/router.js.map +1 -1
- package/dist/index.js +97 -17
- package/dist/index.js.map +1 -1
- package/dist/instructions.d.ts +31 -0
- package/dist/instructions.d.ts.map +1 -0
- package/dist/instructions.js +245 -0
- package/dist/instructions.js.map +1 -0
- package/dist/integrations/cloud/llm/anthropic.d.ts +3 -0
- package/dist/integrations/cloud/llm/anthropic.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/anthropic.js +41 -0
- package/dist/integrations/cloud/llm/anthropic.js.map +1 -0
- package/dist/integrations/cloud/llm/cache.d.ts +5 -0
- package/dist/integrations/cloud/llm/cache.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/cache.js +49 -0
- package/dist/integrations/cloud/llm/cache.js.map +1 -0
- package/dist/integrations/cloud/llm/gemini.d.ts +3 -0
- package/dist/integrations/cloud/llm/gemini.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/gemini.js +37 -0
- package/dist/integrations/cloud/llm/gemini.js.map +1 -0
- package/dist/integrations/cloud/llm/groq.d.ts +3 -0
- package/dist/integrations/cloud/llm/groq.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/groq.js +74 -0
- package/dist/integrations/cloud/llm/groq.js.map +1 -0
- package/dist/integrations/cloud/llm/hash.d.ts +3 -0
- package/dist/integrations/cloud/llm/hash.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/hash.js +26 -0
- package/dist/integrations/cloud/llm/hash.js.map +1 -0
- package/dist/integrations/cloud/llm/openai.d.ts +3 -0
- package/dist/integrations/cloud/llm/openai.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/openai.js +43 -0
- package/dist/integrations/cloud/llm/openai.js.map +1 -0
- package/dist/integrations/cloud/llm/select.d.ts +5 -0
- package/dist/integrations/cloud/llm/select.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/select.js +30 -0
- package/dist/integrations/cloud/llm/select.js.map +1 -0
- package/dist/integrations/cloud/llm/types.d.ts +24 -0
- package/dist/integrations/cloud/llm/types.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/types.js +1 -0
- package/dist/integrations/cloud/llm/types.js.map +1 -0
- package/dist/integrations/cloud/llm/validate.d.ts +6 -0
- package/dist/integrations/cloud/llm/validate.d.ts.map +1 -0
- package/dist/integrations/cloud/llm/validate.js +63 -0
- package/dist/integrations/cloud/llm/validate.js.map +1 -0
- package/dist/logger.d.ts +4 -1
- package/dist/logger.d.ts.map +1 -1
- package/dist/logger.js +71 -30
- package/dist/logger.js.map +1 -1
- package/dist/pdf-parse.d.js +1 -0
- package/dist/pdf-parse.d.js.map +1 -0
- package/dist/plugins/loader.d.ts +20 -0
- package/dist/plugins/loader.d.ts.map +1 -0
- package/dist/plugins/loader.js +157 -0
- package/dist/plugins/loader.js.map +1 -0
- package/dist/plugins/registry.d.ts +26 -0
- package/dist/plugins/registry.d.ts.map +1 -0
- package/dist/plugins/registry.js +71 -0
- package/dist/plugins/registry.js.map +1 -0
- package/dist/plugins/validate.d.ts +9 -0
- package/dist/plugins/validate.d.ts.map +1 -0
- package/dist/plugins/validate.js +79 -0
- package/dist/plugins/validate.js.map +1 -0
- package/dist/providers/embed-provider.d.ts +11 -0
- package/dist/providers/embed-provider.d.ts.map +1 -0
- package/dist/providers/embed-provider.js +24 -0
- package/dist/providers/embed-provider.js.map +1 -0
- package/dist/providers/extract-provider.d.ts +23 -0
- package/dist/providers/extract-provider.d.ts.map +1 -0
- package/dist/providers/extract-provider.js +25 -0
- package/dist/providers/extract-provider.js.map +1 -0
- package/dist/providers/rerank-provider.d.ts +16 -0
- package/dist/providers/rerank-provider.d.ts.map +1 -0
- package/dist/providers/rerank-provider.js +28 -0
- package/dist/providers/rerank-provider.js.map +1 -0
- package/dist/providers/search-provider.d.ts +25 -0
- package/dist/providers/search-provider.d.ts.map +1 -0
- package/dist/providers/search-provider.js +44 -0
- package/dist/providers/search-provider.js.map +1 -0
- package/dist/providers/vector-store.d.ts +27 -0
- package/dist/providers/vector-store.d.ts.map +1 -0
- package/dist/providers/vector-store.js +27 -0
- package/dist/providers/vector-store.js.map +1 -0
- package/dist/python-env.d.ts +9 -0
- package/dist/python-env.d.ts.map +1 -0
- package/dist/python-env.js +13 -0
- package/dist/python-env.js.map +1 -0
- package/dist/repl/commands/agent.d.ts +5 -0
- package/dist/repl/commands/agent.d.ts.map +1 -0
- package/dist/repl/commands/agent.js +62 -0
- package/dist/repl/commands/agent.js.map +1 -0
- package/dist/repl/commands/cache.d.ts +4 -0
- package/dist/repl/commands/cache.d.ts.map +1 -0
- package/dist/repl/commands/cache.js +43 -0
- package/dist/repl/commands/cache.js.map +1 -0
- package/dist/repl/commands/crawl.d.ts +7 -0
- package/dist/repl/commands/crawl.d.ts.map +1 -0
- package/dist/repl/commands/crawl.js +44 -0
- package/dist/repl/commands/crawl.js.map +1 -0
- package/dist/repl/commands/extract.d.ts +5 -0
- package/dist/repl/commands/extract.d.ts.map +1 -0
- package/dist/repl/commands/extract.js +47 -0
- package/dist/repl/commands/extract.js.map +1 -0
- package/dist/repl/commands/fetch.d.ts +5 -0
- package/dist/repl/commands/fetch.d.ts.map +1 -0
- package/dist/repl/commands/fetch.js +67 -0
- package/dist/repl/commands/fetch.js.map +1 -0
- package/dist/repl/commands/find-similar.d.ts +5 -0
- package/dist/repl/commands/find-similar.d.ts.map +1 -0
- package/dist/repl/commands/find-similar.js +74 -0
- package/dist/repl/commands/find-similar.js.map +1 -0
- package/dist/repl/commands/research.d.ts +5 -0
- package/dist/repl/commands/research.d.ts.map +1 -0
- package/dist/repl/commands/research.js +65 -0
- package/dist/repl/commands/research.js.map +1 -0
- package/dist/repl/commands/search.d.ts +5 -0
- package/dist/repl/commands/search.d.ts.map +1 -0
- package/dist/repl/commands/search.js +74 -0
- package/dist/repl/commands/search.js.map +1 -0
- package/dist/repl/commands/types.d.ts +9 -0
- package/dist/repl/commands/types.d.ts.map +1 -0
- package/dist/repl/commands/types.js +1 -0
- package/dist/repl/commands/types.js.map +1 -0
- package/dist/repl/formatters.d.ts +13 -0
- package/dist/repl/formatters.d.ts.map +1 -0
- package/dist/repl/formatters.js +283 -0
- package/dist/repl/formatters.js.map +1 -0
- package/dist/repl/parser.d.ts +9 -0
- package/dist/repl/parser.d.ts.map +1 -0
- package/dist/repl/parser.js +86 -0
- package/dist/repl/parser.js.map +1 -0
- package/dist/repl/shell.d.ts +8 -0
- package/dist/repl/shell.d.ts.map +1 -0
- package/dist/repl/shell.js +184 -0
- package/dist/repl/shell.js.map +1 -0
- package/dist/research/branch-exploration.d.ts +14 -0
- package/dist/research/branch-exploration.d.ts.map +1 -0
- package/dist/research/branch-exploration.js +100 -0
- package/dist/research/branch-exploration.js.map +1 -0
- package/dist/research/brief.d.ts +5 -0
- package/dist/research/brief.d.ts.map +1 -0
- package/dist/research/brief.js +242 -0
- package/dist/research/brief.js.map +1 -0
- package/dist/research/citation-graph.d.ts +9 -0
- package/dist/research/citation-graph.d.ts.map +1 -0
- package/dist/research/citation-graph.js +114 -0
- package/dist/research/citation-graph.js.map +1 -0
- package/dist/research/decompose.d.ts +14 -0
- package/dist/research/decompose.d.ts.map +1 -0
- package/dist/research/decompose.js +439 -0
- package/dist/research/decompose.js.map +1 -0
- package/dist/research/pipeline.d.ts +5 -0
- package/dist/research/pipeline.d.ts.map +1 -0
- package/dist/research/pipeline.js +269 -0
- package/dist/research/pipeline.js.map +1 -0
- package/dist/research/synthesis-local.d.ts +16 -0
- package/dist/research/synthesis-local.d.ts.map +1 -0
- package/dist/research/synthesis-local.js +73 -0
- package/dist/research/synthesis-local.js.map +1 -0
- package/dist/research/synthesize.d.ts +10 -0
- package/dist/research/synthesize.d.ts.map +1 -0
- package/dist/research/synthesize.js +137 -0
- package/dist/research/synthesize.js.map +1 -0
- package/dist/search/answer-synthesis.d.ts +33 -0
- package/dist/search/answer-synthesis.d.ts.map +1 -0
- package/dist/search/answer-synthesis.js +244 -0
- package/dist/search/answer-synthesis.js.map +1 -0
- package/dist/search/context-formatter.d.ts +3 -0
- package/dist/search/context-formatter.d.ts.map +1 -0
- package/dist/search/context-formatter.js +56 -0
- package/dist/search/context-formatter.js.map +1 -0
- package/dist/search/dedup.d.ts +1 -0
- package/dist/search/dedup.d.ts.map +1 -1
- package/dist/search/dedup.js +40 -32
- package/dist/search/dedup.js.map +1 -1
- package/dist/search/engines/arxiv.d.ts +7 -0
- package/dist/search/engines/arxiv.d.ts.map +1 -0
- package/dist/search/engines/arxiv.js +70 -0
- package/dist/search/engines/arxiv.js.map +1 -0
- package/dist/search/engines/bing-news.d.ts +7 -0
- package/dist/search/engines/bing-news.d.ts.map +1 -0
- package/dist/search/engines/bing-news.js +97 -0
- package/dist/search/engines/bing-news.js.map +1 -0
- package/dist/search/engines/bing.d.ts +1 -0
- package/dist/search/engines/bing.d.ts.map +1 -1
- package/dist/search/engines/bing.js +100 -44
- package/dist/search/engines/bing.js.map +1 -1
- package/dist/search/engines/devdocs.d.ts +6 -0
- package/dist/search/engines/devdocs.d.ts.map +1 -0
- package/dist/search/engines/devdocs.js +56 -0
- package/dist/search/engines/devdocs.js.map +1 -0
- package/dist/search/engines/duckduckgo.d.ts.map +1 -1
- package/dist/search/engines/duckduckgo.js +56 -44
- package/dist/search/engines/duckduckgo.js.map +1 -1
- package/dist/search/engines/github-code.d.ts +7 -0
- package/dist/search/engines/github-code.d.ts.map +1 -0
- package/dist/search/engines/github-code.js +55 -0
- package/dist/search/engines/github-code.js.map +1 -0
- package/dist/search/engines/hn-algolia.d.ts +7 -0
- package/dist/search/engines/hn-algolia.d.ts.map +1 -0
- package/dist/search/engines/hn-algolia.js +76 -0
- package/dist/search/engines/hn-algolia.js.map +1 -0
- package/dist/search/engines/lobsters.d.ts +7 -0
- package/dist/search/engines/lobsters.d.ts.map +1 -0
- package/dist/search/engines/lobsters.js +83 -0
- package/dist/search/engines/lobsters.js.map +1 -0
- package/dist/search/engines/mdn.d.ts +7 -0
- package/dist/search/engines/mdn.d.ts.map +1 -0
- package/dist/search/engines/mdn.js +48 -0
- package/dist/search/engines/mdn.js.map +1 -0
- package/dist/search/engines/semantic-scholar.d.ts +7 -0
- package/dist/search/engines/semantic-scholar.d.ts.map +1 -0
- package/dist/search/engines/semantic-scholar.js +69 -0
- package/dist/search/engines/semantic-scholar.js.map +1 -0
- package/dist/search/engines/stackoverflow.d.ts +7 -0
- package/dist/search/engines/stackoverflow.d.ts.map +1 -0
- package/dist/search/engines/stackoverflow.js +73 -0
- package/dist/search/engines/stackoverflow.js.map +1 -0
- package/dist/search/engines/startpage.d.ts.map +1 -1
- package/dist/search/engines/startpage.js +65 -46
- package/dist/search/engines/startpage.js.map +1 -1
- package/dist/search/evidence.d.ts +25 -0
- package/dist/search/evidence.d.ts.map +1 -0
- package/dist/search/evidence.js +220 -0
- package/dist/search/evidence.js.map +1 -0
- package/dist/search/filters.js +49 -55
- package/dist/search/filters.js.map +1 -1
- package/dist/search/find-similar/crawl-rank.d.ts +9 -0
- package/dist/search/find-similar/crawl-rank.d.ts.map +1 -0
- package/dist/search/find-similar/crawl-rank.js +272 -0
- package/dist/search/find-similar/crawl-rank.js.map +1 -0
- package/dist/search/find-similar/mode.d.ts +4 -0
- package/dist/search/find-similar/mode.d.ts.map +1 -0
- package/dist/search/find-similar/mode.js +12 -0
- package/dist/search/find-similar/mode.js.map +1 -0
- package/dist/search/find-similar.d.ts +5 -0
- package/dist/search/find-similar.d.ts.map +1 -0
- package/dist/search/find-similar.js +509 -0
- package/dist/search/find-similar.js.map +1 -0
- package/dist/search/highlights.d.ts +19 -0
- package/dist/search/highlights.d.ts.map +1 -0
- package/dist/search/highlights.js +167 -0
- package/dist/search/highlights.js.map +1 -0
- package/dist/search/language-filter.d.ts +29 -0
- package/dist/search/language-filter.d.ts.map +1 -0
- package/dist/search/language-filter.js +126 -0
- package/dist/search/language-filter.js.map +1 -0
- package/dist/search/legacy/searxng-orchestrator.d.ts +4 -0
- package/dist/search/legacy/searxng-orchestrator.d.ts.map +1 -0
- package/dist/search/legacy/searxng-orchestrator.js +501 -0
- package/dist/search/legacy/searxng-orchestrator.js.map +1 -0
- package/dist/search/legacy/searxng-provider.d.ts +7 -0
- package/dist/search/legacy/searxng-provider.d.ts.map +1 -0
- package/dist/search/legacy/searxng-provider.js +11 -0
- package/dist/search/legacy/searxng-provider.js.map +1 -0
- package/dist/search/multi-query.d.ts +25 -0
- package/dist/search/multi-query.d.ts.map +1 -0
- package/dist/search/multi-query.js +228 -0
- package/dist/search/multi-query.js.map +1 -0
- package/dist/search/query.js +32 -34
- package/dist/search/query.js.map +1 -1
- package/dist/search/rerank.d.ts +3 -1
- package/dist/search/rerank.d.ts.map +1 -1
- package/dist/search/rerank.js +44 -35
- package/dist/search/rerank.js.map +1 -1
- package/dist/search/reranker/authority-boost.d.ts +3 -0
- package/dist/search/reranker/authority-boost.d.ts.map +1 -0
- package/dist/search/reranker/authority-boost.js +179 -0
- package/dist/search/reranker/authority-boost.js.map +1 -0
- package/dist/search/reranker/consensus-boost.d.ts +3 -0
- package/dist/search/reranker/consensus-boost.d.ts.map +1 -0
- package/dist/search/reranker/consensus-boost.js +27 -0
- package/dist/search/reranker/consensus-boost.js.map +1 -0
- package/dist/search/reranker/recency-boost.d.ts +3 -0
- package/dist/search/reranker/recency-boost.d.ts.map +1 -0
- package/dist/search/reranker/recency-boost.js +13 -0
- package/dist/search/reranker/recency-boost.js.map +1 -0
- package/dist/search/reranker/recency.d.ts +3 -0
- package/dist/search/reranker/recency.d.ts.map +1 -0
- package/dist/search/reranker/recency.js +23 -0
- package/dist/search/reranker/recency.js.map +1 -0
- package/dist/search/reranker/transformers-rerank-provider.d.ts +12 -0
- package/dist/search/reranker/transformers-rerank-provider.d.ts.map +1 -0
- package/dist/search/reranker/transformers-rerank-provider.js +78 -0
- package/dist/search/reranker/transformers-rerank-provider.js.map +1 -0
- package/dist/search/rrf.d.ts +17 -0
- package/dist/search/rrf.d.ts.map +1 -0
- package/dist/search/rrf.js +39 -0
- package/dist/search/rrf.js.map +1 -0
- package/dist/search/sampling.d.ts +25 -0
- package/dist/search/sampling.d.ts.map +1 -0
- package/dist/search/sampling.js +52 -0
- package/dist/search/sampling.js.map +1 -0
- package/dist/search/searxng.d.ts.map +1 -1
- package/dist/search/searxng.js +69 -79
- package/dist/search/searxng.js.map +1 -1
- package/dist/search/tokens.d.ts +3 -0
- package/dist/search/tokens.d.ts.map +1 -0
- package/dist/search/tokens.js +39 -0
- package/dist/search/tokens.js.map +1 -0
- package/dist/search/truncate.d.ts +6 -0
- package/dist/search/truncate.d.ts.map +1 -0
- package/dist/search/truncate.js +26 -0
- package/dist/search/truncate.js.map +1 -0
- package/dist/search/url-unwrap.d.ts +3 -0
- package/dist/search/url-unwrap.d.ts.map +1 -0
- package/dist/search/url-unwrap.js +43 -0
- package/dist/search/url-unwrap.js.map +1 -0
- package/dist/search/v1/context-rank.d.ts +13 -0
- package/dist/search/v1/context-rank.d.ts.map +1 -0
- package/dist/search/v1/context-rank.js +74 -0
- package/dist/search/v1/context-rank.js.map +1 -0
- package/dist/search/v1/engine-base.d.ts +27 -0
- package/dist/search/v1/engine-base.d.ts.map +1 -0
- package/dist/search/v1/engine-base.js +110 -0
- package/dist/search/v1/engine-base.js.map +1 -0
- package/dist/search/v1/intent-router.d.ts +22 -0
- package/dist/search/v1/intent-router.d.ts.map +1 -0
- package/dist/search/v1/intent-router.js +138 -0
- package/dist/search/v1/intent-router.js.map +1 -0
- package/dist/search/v1/orchestrator.d.ts +24 -0
- package/dist/search/v1/orchestrator.d.ts.map +1 -0
- package/dist/search/v1/orchestrator.js +163 -0
- package/dist/search/v1/orchestrator.js.map +1 -0
- package/dist/search/v1/recency-boost.d.ts +9 -0
- package/dist/search/v1/recency-boost.d.ts.map +1 -0
- package/dist/search/v1/recency-boost.js +37 -0
- package/dist/search/v1/recency-boost.js.map +1 -0
- package/dist/search/v1/recent-cache-dedup.d.ts +6 -0
- package/dist/search/v1/recent-cache-dedup.d.ts.map +1 -0
- package/dist/search/v1/recent-cache-dedup.js +85 -0
- package/dist/search/v1/recent-cache-dedup.js.map +1 -0
- package/dist/search/v1/rss/feed-config.d.ts +21 -0
- package/dist/search/v1/rss/feed-config.d.ts.map +1 -0
- package/dist/search/v1/rss/feed-config.js +90 -0
- package/dist/search/v1/rss/feed-config.js.map +1 -0
- package/dist/search/v1/rss/feed-parser.d.ts +14 -0
- package/dist/search/v1/rss/feed-parser.d.ts.map +1 -0
- package/dist/search/v1/rss/feed-parser.js +104 -0
- package/dist/search/v1/rss/feed-parser.js.map +1 -0
- package/dist/search/v1/rss/feed-poller.d.ts +22 -0
- package/dist/search/v1/rss/feed-poller.d.ts.map +1 -0
- package/dist/search/v1/rss/feed-poller.js +102 -0
- package/dist/search/v1/rss/feed-poller.js.map +1 -0
- package/dist/search/v1/rss/feed-store.d.ts +30 -0
- package/dist/search/v1/rss/feed-store.d.ts.map +1 -0
- package/dist/search/v1/rss/feed-store.js +134 -0
- package/dist/search/v1/rss/feed-store.js.map +1 -0
- package/dist/search/v1/rss/rss-engine.d.ts +6 -0
- package/dist/search/v1/rss/rss-engine.d.ts.map +1 -0
- package/dist/search/v1/rss/rss-engine.js +28 -0
- package/dist/search/v1/rss/rss-engine.js.map +1 -0
- package/dist/search/v1/v1-provider.d.ts +7 -0
- package/dist/search/v1/v1-provider.d.ts.map +1 -0
- package/dist/search/v1/v1-provider.js +68 -0
- package/dist/search/v1/v1-provider.js.map +1 -0
- package/dist/search/v1/verticals/code.d.ts +4 -0
- package/dist/search/v1/verticals/code.d.ts.map +1 -0
- package/dist/search/v1/verticals/code.js +20 -0
- package/dist/search/v1/verticals/code.js.map +1 -0
- package/dist/search/v1/verticals/docs.d.ts +4 -0
- package/dist/search/v1/verticals/docs.d.ts.map +1 -0
- package/dist/search/v1/verticals/docs.js +20 -0
- package/dist/search/v1/verticals/docs.js.map +1 -0
- package/dist/search/v1/verticals/general.d.ts +4 -0
- package/dist/search/v1/verticals/general.d.ts.map +1 -0
- package/dist/search/v1/verticals/general.js +22 -0
- package/dist/search/v1/verticals/general.js.map +1 -0
- package/dist/search/v1/verticals/news.d.ts +10 -0
- package/dist/search/v1/verticals/news.d.ts.map +1 -0
- package/dist/search/v1/verticals/news.js +52 -0
- package/dist/search/v1/verticals/news.js.map +1 -0
- package/dist/search/v1/verticals/papers.d.ts +4 -0
- package/dist/search/v1/verticals/papers.d.ts.map +1 -0
- package/dist/search/v1/verticals/papers.js +23 -0
- package/dist/search/v1/verticals/papers.js.map +1 -0
- package/dist/search/validator.js +31 -31
- package/dist/search/validator.js.map +1 -1
- package/dist/searxng/bootstrap.d.ts +30 -0
- package/dist/searxng/bootstrap.d.ts.map +1 -1
- package/dist/searxng/bootstrap.js +223 -85
- package/dist/searxng/bootstrap.js.map +1 -1
- package/dist/searxng/docker.d.ts.map +1 -1
- package/dist/searxng/docker.js +69 -60
- package/dist/searxng/docker.js.map +1 -1
- package/dist/searxng/process.d.ts +13 -1
- package/dist/searxng/process.d.ts.map +1 -1
- package/dist/searxng/process.js +231 -164
- package/dist/searxng/process.js.map +1 -1
- package/dist/server/backend-status.d.ts +13 -0
- package/dist/server/backend-status.d.ts.map +1 -0
- package/dist/server/backend-status.js +40 -0
- package/dist/server/backend-status.js.map +1 -0
- package/dist/server/tool-schemas.d.ts +549 -0
- package/dist/server/tool-schemas.d.ts.map +1 -0
- package/dist/server/tool-schemas.js +464 -0
- package/dist/server/tool-schemas.js.map +1 -0
- package/dist/server/warmup-on-start.d.ts +9 -0
- package/dist/server/warmup-on-start.d.ts.map +1 -0
- package/dist/server/warmup-on-start.js +55 -0
- package/dist/server/warmup-on-start.js.map +1 -0
- package/dist/server.d.ts +17 -0
- package/dist/server.d.ts.map +1 -1
- package/dist/server.js +454 -297
- package/dist/server.js.map +1 -1
- package/dist/tools/agent.d.ts +5 -0
- package/dist/tools/agent.d.ts.map +1 -0
- package/dist/tools/agent.js +128 -0
- package/dist/tools/agent.js.map +1 -0
- package/dist/tools/cache.d.ts +2 -1
- package/dist/tools/cache.d.ts.map +1 -1
- package/dist/tools/cache.js +175 -44
- package/dist/tools/cache.js.map +1 -1
- package/dist/tools/crawl.d.ts.map +1 -1
- package/dist/tools/crawl.js +171 -88
- package/dist/tools/crawl.js.map +1 -1
- package/dist/tools/extract.d.ts +2 -2
- package/dist/tools/extract.d.ts.map +1 -1
- package/dist/tools/extract.js +175 -59
- package/dist/tools/extract.js.map +1 -1
- package/dist/tools/fetch.d.ts +2 -2
- package/dist/tools/fetch.d.ts.map +1 -1
- package/dist/tools/fetch.js +161 -68
- package/dist/tools/fetch.js.map +1 -1
- package/dist/tools/find-similar.d.ts +5 -0
- package/dist/tools/find-similar.d.ts.map +1 -0
- package/dist/tools/find-similar.js +127 -0
- package/dist/tools/find-similar.js.map +1 -0
- package/dist/tools/research.d.ts +5 -0
- package/dist/tools/research.d.ts.map +1 -0
- package/dist/tools/research.js +107 -0
- package/dist/tools/research.js.map +1 -0
- package/dist/tools/search.d.ts +10 -2
- package/dist/tools/search.d.ts.map +1 -1
- package/dist/tools/search.js +13 -158
- package/dist/tools/search.js.map +1 -1
- package/dist/types.d.ts +350 -7
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js +6 -1
- package/dist/types.js.map +1 -1
- package/dist/util/mode.d.ts +4 -0
- package/dist/util/mode.d.ts.map +1 -0
- package/dist/util/mode.js +34 -0
- package/dist/util/mode.js.map +1 -0
- package/package.json +78 -8
- package/dist/extraction/trafilatura.d.ts +0 -6
- package/dist/extraction/trafilatura.d.ts.map +0 -1
- package/dist/extraction/trafilatura.js +0 -105
- package/dist/extraction/trafilatura.js.map +0 -1
- package/dist/search/flashrank.d.ts +0 -12
- package/dist/search/flashrank.d.ts.map +0 -1
- package/dist/search/flashrank.js +0 -63
- package/dist/search/flashrank.js.map +0 -1
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../src/fetch/browser-types.ts"],"sourcesContent":["import type { BrowserType } from '../types.js';\n\nconst VALID_BROWSER_TYPES: ReadonlySet<string> = new Set(['chromium', 'firefox', 'webkit', 'lightpanda']);\n\nconst DEFAULT_BROWSER_TYPES: BrowserType[] = ['chromium'];\n\nfunction warn(msg: string, data: Record<string, unknown>): void {\n const line = JSON.stringify({ ts: new Date().toISOString(), level: 'warn', msg, module: 'fetch', data });\n process.stderr.write(line + '\\n');\n}\n\nexport function parseBrowserTypes(input: string | undefined | null): BrowserType[] {\n if (!input || typeof input !== 'string') {\n return [...DEFAULT_BROWSER_TYPES];\n }\n\n const trimmed = input.trim();\n if (trimmed.length === 0) {\n return [...DEFAULT_BROWSER_TYPES];\n }\n\n const parts = trimmed\n .split(',')\n .map(s => s.trim())\n .filter(s => s.length > 0);\n\n if (parts.length === 0) {\n return [...DEFAULT_BROWSER_TYPES];\n }\n\n const seen = new Set<string>();\n const valid: BrowserType[] = [];\n const invalid: string[] = [];\n\n for (const part of parts) {\n if (!VALID_BROWSER_TYPES.has(part)) {\n invalid.push(part);\n continue;\n }\n if (seen.has(part)) {\n continue;\n }\n seen.add(part);\n valid.push(part as BrowserType);\n }\n\n if (invalid.length > 0) {\n warn('ignored invalid browser types', { invalid, valid: [...valid] });\n }\n\n if (valid.length === 0) {\n warn('no valid browser types found, falling back to chromium', { input });\n return [...DEFAULT_BROWSER_TYPES];\n }\n\n return valid;\n}\n"],"mappings":"AAEA,MAAM,sBAA2C,oBAAI,IAAI,CAAC,YAAY,WAAW,UAAU,YAAY,CAAC;AAExG,MAAM,wBAAuC,CAAC,UAAU;AAExD,SAAS,KAAK,KAAa,MAAqC;AAC9D,QAAM,OAAO,KAAK,UAAU,EAAE,KAAI,oBAAI,KAAK,GAAE,YAAY,GAAG,OAAO,QAAQ,KAAK,QAAQ,SAAS,KAAK,CAAC;AACvG,UAAQ,OAAO,MAAM,OAAO,IAAI;AAClC;AAEO,SAAS,kBAAkB,OAAiD;AACjF,MAAI,CAAC,SAAS,OAAO,UAAU,UAAU;AACvC,WAAO,CAAC,GAAG,qBAAqB;AAAA,EAClC;AAEA,QAAM,UAAU,MAAM,KAAK;AAC3B,MAAI,QAAQ,WAAW,GAAG;AACxB,WAAO,CAAC,GAAG,qBAAqB;AAAA,EAClC;AAEA,QAAM,QAAQ,QACX,MAAM,GAAG,EACT,IAAI,OAAK,EAAE,KAAK,CAAC,EACjB,OAAO,OAAK,EAAE,SAAS,CAAC;AAE3B,MAAI,MAAM,WAAW,GAAG;AACtB,WAAO,CAAC,GAAG,qBAAqB;AAAA,EAClC;AAEA,QAAM,OAAO,oBAAI,IAAY;AAC7B,QAAM,QAAuB,CAAC;AAC9B,QAAM,UAAoB,CAAC;AAE3B,aAAW,QAAQ,OAAO;AACxB,QAAI,CAAC,oBAAoB,IAAI,IAAI,GAAG;AAClC,cAAQ,KAAK,IAAI;AACjB;AAAA,IACF;AACA,QAAI,KAAK,IAAI,IAAI,GAAG;AAClB;AAAA,IACF;AACA,SAAK,IAAI,IAAI;AACb,UAAM,KAAK,IAAmB;AAAA,EAChC;AAEA,MAAI,QAAQ,SAAS,GAAG;AACtB,SAAK,iCAAiC,EAAE,SAAS,OAAO,CAAC,GAAG,KAAK,EAAE,CAAC;AAAA,EACtE;AAEA,MAAI,MAAM,WAAW,GAAG;AACtB,SAAK,0DAA0D,EAAE,MAAM,CAAC;AACxE,WAAO,CAAC,GAAG,qBAAqB;AAAA,EAClC;AAEA,SAAO;AACT;","names":[]}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import type { CDPSession } from '../types.js';
|
|
2
|
+
export interface DiscoverOptions {
|
|
3
|
+
timeoutMs?: number;
|
|
4
|
+
filterPages?: boolean;
|
|
5
|
+
}
|
|
6
|
+
export declare function parseCDPResponse(raw: string): CDPSession[];
|
|
7
|
+
export declare function discoverSessions(cdpUrl: string, options?: DiscoverOptions): Promise<CDPSession[]>;
|
|
8
|
+
export declare function isCDPReachable(cdpUrl: string, timeoutMs?: number): Promise<boolean>;
|
|
9
|
+
//# sourceMappingURL=cdp-client.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cdp-client.d.ts","sourceRoot":"","sources":["../../src/fetch/cdp-client.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAM9C,MAAM,WAAW,eAAe;IAC9B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,OAAO,CAAC;CACvB;AAiCD,wBAAgB,gBAAgB,CAAC,GAAG,EAAE,MAAM,GAAG,UAAU,EAAE,CA2B1D;AAED,wBAAsB,gBAAgB,CACpC,MAAM,EAAE,MAAM,EACd,OAAO,CAAC,EAAE,eAAe,GACxB,OAAO,CAAC,UAAU,EAAE,CAAC,CA6BvB;AAED,wBAAsB,cAAc,CAAC,MAAM,EAAE,MAAM,EAAE,SAAS,SAAqB,GAAG,OAAO,CAAC,OAAO,CAAC,CAQrG"}
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
import * as http from "node:http";
|
|
2
|
+
import { createLogger } from "../logger.js";
|
|
3
|
+
const log = createLogger("fetch");
|
|
4
|
+
const DEFAULT_TIMEOUT_MS = 3e3;
|
|
5
|
+
function httpGet(url, timeoutMs) {
|
|
6
|
+
return new Promise((resolve, reject) => {
|
|
7
|
+
const req = http.get(url, { timeout: timeoutMs }, (res) => {
|
|
8
|
+
let body = "";
|
|
9
|
+
res.on("data", (chunk) => {
|
|
10
|
+
body += chunk;
|
|
11
|
+
});
|
|
12
|
+
res.on("end", () => {
|
|
13
|
+
resolve({ statusCode: res.statusCode ?? 0, body });
|
|
14
|
+
});
|
|
15
|
+
});
|
|
16
|
+
req.on("error", (err) => {
|
|
17
|
+
reject(err);
|
|
18
|
+
});
|
|
19
|
+
req.on("timeout", () => {
|
|
20
|
+
req.destroy();
|
|
21
|
+
reject(new Error(`CDP request timed out after ${timeoutMs}ms`));
|
|
22
|
+
});
|
|
23
|
+
});
|
|
24
|
+
}
|
|
25
|
+
function parseCDPResponse(raw) {
|
|
26
|
+
try {
|
|
27
|
+
const parsed = JSON.parse(raw);
|
|
28
|
+
if (!Array.isArray(parsed)) {
|
|
29
|
+
log.debug("CDP response is not an array");
|
|
30
|
+
return [];
|
|
31
|
+
}
|
|
32
|
+
return parsed.filter((entry) => {
|
|
33
|
+
if (!entry || typeof entry !== "object") return false;
|
|
34
|
+
if (!entry.id || typeof entry.id !== "string") return false;
|
|
35
|
+
if (!entry.webSocketDebuggerUrl || typeof entry.webSocketDebuggerUrl !== "string") return false;
|
|
36
|
+
return true;
|
|
37
|
+
}).map((entry) => ({
|
|
38
|
+
id: entry.id,
|
|
39
|
+
url: entry.url ?? "",
|
|
40
|
+
title: entry.title ?? "",
|
|
41
|
+
webSocketDebuggerUrl: entry.webSocketDebuggerUrl,
|
|
42
|
+
type: entry.type,
|
|
43
|
+
devtoolsFrontendUrl: entry.devtoolsFrontendUrl
|
|
44
|
+
}));
|
|
45
|
+
} catch (err) {
|
|
46
|
+
log.debug("failed to parse CDP response", { error: err instanceof Error ? err.message : String(err) });
|
|
47
|
+
return [];
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
async function discoverSessions(cdpUrl, options) {
|
|
51
|
+
const timeoutMs = options?.timeoutMs ?? DEFAULT_TIMEOUT_MS;
|
|
52
|
+
const filterPages = options?.filterPages ?? false;
|
|
53
|
+
const jsonUrl = cdpUrl.endsWith("/") ? `${cdpUrl}json` : `${cdpUrl}/json`;
|
|
54
|
+
try {
|
|
55
|
+
log.debug("discovering CDP sessions", { url: jsonUrl, timeoutMs });
|
|
56
|
+
const { statusCode, body } = await httpGet(jsonUrl, timeoutMs);
|
|
57
|
+
if (statusCode !== 200) {
|
|
58
|
+
log.warn("CDP endpoint returned non-200", { statusCode, url: jsonUrl });
|
|
59
|
+
return [];
|
|
60
|
+
}
|
|
61
|
+
let sessions = parseCDPResponse(body);
|
|
62
|
+
if (filterPages) {
|
|
63
|
+
sessions = sessions.filter((s) => s.type === "page" || s.type === void 0);
|
|
64
|
+
}
|
|
65
|
+
log.info("CDP sessions discovered", { count: sessions.length, url: jsonUrl });
|
|
66
|
+
return sessions;
|
|
67
|
+
} catch (err) {
|
|
68
|
+
log.debug("CDP discovery failed", {
|
|
69
|
+
url: jsonUrl,
|
|
70
|
+
error: err instanceof Error ? err.message : String(err)
|
|
71
|
+
});
|
|
72
|
+
return [];
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
async function isCDPReachable(cdpUrl, timeoutMs = DEFAULT_TIMEOUT_MS) {
|
|
76
|
+
const jsonUrl = cdpUrl.endsWith("/") ? `${cdpUrl}json` : `${cdpUrl}/json`;
|
|
77
|
+
try {
|
|
78
|
+
const { statusCode } = await httpGet(jsonUrl, timeoutMs);
|
|
79
|
+
return statusCode === 200;
|
|
80
|
+
} catch {
|
|
81
|
+
return false;
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
export {
|
|
85
|
+
discoverSessions,
|
|
86
|
+
isCDPReachable,
|
|
87
|
+
parseCDPResponse
|
|
88
|
+
};
|
|
89
|
+
//# sourceMappingURL=cdp-client.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../src/fetch/cdp-client.ts"],"sourcesContent":["import * as http from 'node:http';\nimport { createLogger } from '../logger.js';\nimport type { CDPSession } from '../types.js';\n\nconst log = createLogger('fetch');\n\nconst DEFAULT_TIMEOUT_MS = 3000;\n\nexport interface DiscoverOptions {\n timeoutMs?: number;\n filterPages?: boolean;\n}\n\nfunction httpGet(url: string, timeoutMs: number): Promise<{ statusCode: number; body: string }> {\n return new Promise((resolve, reject) => {\n const req = http.get(url, { timeout: timeoutMs }, (res) => {\n let body = '';\n res.on('data', (chunk: string) => { body += chunk; });\n res.on('end', () => {\n resolve({ statusCode: res.statusCode ?? 0, body });\n });\n });\n\n req.on('error', (err) => {\n reject(err);\n });\n\n req.on('timeout', () => {\n req.destroy();\n reject(new Error(`CDP request timed out after ${timeoutMs}ms`));\n });\n });\n}\n\ninterface RawCDPEntry {\n id?: string;\n url?: string;\n title?: string;\n webSocketDebuggerUrl?: string;\n type?: string;\n devtoolsFrontendUrl?: string;\n description?: string;\n}\n\nexport function parseCDPResponse(raw: string): CDPSession[] {\n try {\n const parsed = JSON.parse(raw);\n if (!Array.isArray(parsed)) {\n log.debug('CDP response is not an array');\n return [];\n }\n\n return parsed\n .filter((entry: RawCDPEntry) => {\n if (!entry || typeof entry !== 'object') return false;\n if (!entry.id || typeof entry.id !== 'string') return false;\n if (!entry.webSocketDebuggerUrl || typeof entry.webSocketDebuggerUrl !== 'string') return false;\n return true;\n })\n .map((entry: RawCDPEntry) => ({\n id: entry.id!,\n url: entry.url ?? '',\n title: entry.title ?? '',\n webSocketDebuggerUrl: entry.webSocketDebuggerUrl!,\n type: entry.type,\n devtoolsFrontendUrl: entry.devtoolsFrontendUrl,\n }));\n } catch (err) {\n log.debug('failed to parse CDP response', { error: err instanceof Error ? err.message : String(err) });\n return [];\n }\n}\n\nexport async function discoverSessions(\n cdpUrl: string,\n options?: DiscoverOptions,\n): Promise<CDPSession[]> {\n const timeoutMs = options?.timeoutMs ?? DEFAULT_TIMEOUT_MS;\n const filterPages = options?.filterPages ?? false;\n const jsonUrl = cdpUrl.endsWith('/') ? `${cdpUrl}json` : `${cdpUrl}/json`;\n\n try {\n log.debug('discovering CDP sessions', { url: jsonUrl, timeoutMs });\n const { statusCode, body } = await httpGet(jsonUrl, timeoutMs);\n\n if (statusCode !== 200) {\n log.warn('CDP endpoint returned non-200', { statusCode, url: jsonUrl });\n return [];\n }\n\n let sessions = parseCDPResponse(body);\n\n if (filterPages) {\n sessions = sessions.filter(s => s.type === 'page' || s.type === undefined);\n }\n\n log.info('CDP sessions discovered', { count: sessions.length, url: jsonUrl });\n return sessions;\n } catch (err) {\n log.debug('CDP discovery failed', {\n url: jsonUrl,\n error: err instanceof Error ? err.message : String(err),\n });\n return [];\n }\n}\n\nexport async function isCDPReachable(cdpUrl: string, timeoutMs = DEFAULT_TIMEOUT_MS): Promise<boolean> {\n const jsonUrl = cdpUrl.endsWith('/') ? `${cdpUrl}json` : `${cdpUrl}/json`;\n try {\n const { statusCode } = await httpGet(jsonUrl, timeoutMs);\n return statusCode === 200;\n } catch {\n return false;\n }\n}\n"],"mappings":"AAAA,YAAY,UAAU;AACtB,SAAS,oBAAoB;AAG7B,MAAM,MAAM,aAAa,OAAO;AAEhC,MAAM,qBAAqB;AAO3B,SAAS,QAAQ,KAAa,WAAkE;AAC9F,SAAO,IAAI,QAAQ,CAAC,SAAS,WAAW;AACtC,UAAM,MAAM,KAAK,IAAI,KAAK,EAAE,SAAS,UAAU,GAAG,CAAC,QAAQ;AACzD,UAAI,OAAO;AACX,UAAI,GAAG,QAAQ,CAAC,UAAkB;AAAE,gBAAQ;AAAA,MAAO,CAAC;AACpD,UAAI,GAAG,OAAO,MAAM;AAClB,gBAAQ,EAAE,YAAY,IAAI,cAAc,GAAG,KAAK,CAAC;AAAA,MACnD,CAAC;AAAA,IACH,CAAC;AAED,QAAI,GAAG,SAAS,CAAC,QAAQ;AACvB,aAAO,GAAG;AAAA,IACZ,CAAC;AAED,QAAI,GAAG,WAAW,MAAM;AACtB,UAAI,QAAQ;AACZ,aAAO,IAAI,MAAM,+BAA+B,SAAS,IAAI,CAAC;AAAA,IAChE,CAAC;AAAA,EACH,CAAC;AACH;AAYO,SAAS,iBAAiB,KAA2B;AAC1D,MAAI;AACF,UAAM,SAAS,KAAK,MAAM,GAAG;AAC7B,QAAI,CAAC,MAAM,QAAQ,MAAM,GAAG;AAC1B,UAAI,MAAM,8BAA8B;AACxC,aAAO,CAAC;AAAA,IACV;AAEA,WAAO,OACJ,OAAO,CAAC,UAAuB;AAC9B,UAAI,CAAC,SAAS,OAAO,UAAU,SAAU,QAAO;AAChD,UAAI,CAAC,MAAM,MAAM,OAAO,MAAM,OAAO,SAAU,QAAO;AACtD,UAAI,CAAC,MAAM,wBAAwB,OAAO,MAAM,yBAAyB,SAAU,QAAO;AAC1F,aAAO;AAAA,IACT,CAAC,EACA,IAAI,CAAC,WAAwB;AAAA,MAC5B,IAAI,MAAM;AAAA,MACV,KAAK,MAAM,OAAO;AAAA,MAClB,OAAO,MAAM,SAAS;AAAA,MACtB,sBAAsB,MAAM;AAAA,MAC5B,MAAM,MAAM;AAAA,MACZ,qBAAqB,MAAM;AAAA,IAC7B,EAAE;AAAA,EACN,SAAS,KAAK;AACZ,QAAI,MAAM,gCAAgC,EAAE,OAAO,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG,EAAE,CAAC;AACrG,WAAO,CAAC;AAAA,EACV;AACF;AAEA,eAAsB,iBACpB,QACA,SACuB;AACvB,QAAM,YAAY,SAAS,aAAa;AACxC,QAAM,cAAc,SAAS,eAAe;AAC5C,QAAM,UAAU,OAAO,SAAS,GAAG,IAAI,GAAG,MAAM,SAAS,GAAG,MAAM;AAElE,MAAI;AACF,QAAI,MAAM,4BAA4B,EAAE,KAAK,SAAS,UAAU,CAAC;AACjE,UAAM,EAAE,YAAY,KAAK,IAAI,MAAM,QAAQ,SAAS,SAAS;AAE7D,QAAI,eAAe,KAAK;AACtB,UAAI,KAAK,iCAAiC,EAAE,YAAY,KAAK,QAAQ,CAAC;AACtE,aAAO,CAAC;AAAA,IACV;AAEA,QAAI,WAAW,iBAAiB,IAAI;AAEpC,QAAI,aAAa;AACf,iBAAW,SAAS,OAAO,OAAK,EAAE,SAAS,UAAU,EAAE,SAAS,MAAS;AAAA,IAC3E;AAEA,QAAI,KAAK,2BAA2B,EAAE,OAAO,SAAS,QAAQ,KAAK,QAAQ,CAAC;AAC5E,WAAO;AAAA,EACT,SAAS,KAAK;AACZ,QAAI,MAAM,wBAAwB;AAAA,MAChC,KAAK;AAAA,MACL,OAAO,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAAA,IACxD,CAAC;AACD,WAAO,CAAC;AAAA,EACV;AACF;AAEA,eAAsB,eAAe,QAAgB,YAAY,oBAAsC;AACrG,QAAM,UAAU,OAAO,SAAS,GAAG,IAAI,GAAG,MAAM,SAAS,GAAG,MAAM;AAClE,MAAI;AACF,UAAM,EAAE,WAAW,IAAI,MAAM,QAAQ,SAAS,SAAS;AACvD,WAAO,eAAe;AAAA,EACxB,QAAQ;AACN,WAAO;AAAA,EACT;AACF;","names":[]}
|
|
@@ -1,62 +1,55 @@
|
|
|
1
1
|
const VISIBLE_TEXT_THRESHOLD = 200;
|
|
2
2
|
const SCRIPT_RATIO_THRESHOLD = 0.8;
|
|
3
3
|
function stripScriptsAndStyles(html) {
|
|
4
|
-
|
|
5
|
-
.replace(/<script[\s\S]*?<\/script>/gi, '')
|
|
6
|
-
.replace(/<style[\s\S]*?<\/style>/gi, '');
|
|
4
|
+
return html.replace(/<script[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "");
|
|
7
5
|
}
|
|
8
6
|
function extractVisibleText(html) {
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
7
|
+
const stripped = stripScriptsAndStyles(html);
|
|
8
|
+
const noTags = stripped.replace(/<[^>]+>/g, " ");
|
|
9
|
+
return noTags.replace(/\s+/g, " ").trim();
|
|
12
10
|
}
|
|
13
11
|
function hasSpaShellIndicator(html) {
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
12
|
+
const spaPatterns = [
|
|
13
|
+
/<div[^>]+id=["']root["'][^>]*>\s*<\/div>/i,
|
|
14
|
+
/<div[^>]+id=["']app["'][^>]*>\s*<\/div>/i,
|
|
15
|
+
/<div[^>]+id=["']__next["'][^>]*>\s*<\/div>/i
|
|
16
|
+
];
|
|
17
|
+
return spaPatterns.some((pattern) => pattern.test(html));
|
|
20
18
|
}
|
|
21
19
|
function hasNextData(html) {
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
return visibleText.length < VISIBLE_TEXT_THRESHOLD;
|
|
20
|
+
if (!/__NEXT_DATA__/.test(html)) return false;
|
|
21
|
+
const withoutScripts = stripScriptsAndStyles(html);
|
|
22
|
+
const visibleText = withoutScripts.replace(/<[^>]+>/g, " ").replace(/\s+/g, " ").trim();
|
|
23
|
+
return visibleText.length < VISIBLE_TEXT_THRESHOLD;
|
|
27
24
|
}
|
|
28
25
|
function hasNoscriptRequired(html) {
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
});
|
|
26
|
+
const noscriptMatches = html.match(/<noscript[^>]*>([\s\S]*?)<\/noscript>/gi);
|
|
27
|
+
if (!noscriptMatches) return false;
|
|
28
|
+
return noscriptMatches.some((tag) => {
|
|
29
|
+
const inner = tag.replace(/<[^>]+>/g, "").toLowerCase();
|
|
30
|
+
return inner.includes("javascript") || inner.includes("enable");
|
|
31
|
+
});
|
|
36
32
|
}
|
|
37
33
|
function hasHighScriptRatio(html) {
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
return scriptLen / totalLen > SCRIPT_RATIO_THRESHOLD;
|
|
34
|
+
const bodyMatch = html.match(/<body[^>]*>([\s\S]*?)<\/body>/i);
|
|
35
|
+
const bodyContent = bodyMatch ? bodyMatch[1] : html;
|
|
36
|
+
const scriptMatches = bodyContent.match(/<script[\s\S]*?<\/script>/gi) ?? [];
|
|
37
|
+
const scriptText = scriptMatches.join("");
|
|
38
|
+
const scriptLen = scriptText.length;
|
|
39
|
+
const totalLen = bodyContent.length;
|
|
40
|
+
if (totalLen === 0) return false;
|
|
41
|
+
return scriptLen / totalLen > SCRIPT_RATIO_THRESHOLD;
|
|
47
42
|
}
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
if (hasNoscriptRequired(html))
|
|
57
|
-
return true;
|
|
58
|
-
if (hasHighScriptRatio(html))
|
|
59
|
-
return true;
|
|
60
|
-
return false;
|
|
43
|
+
function contentAppearsEmpty(html) {
|
|
44
|
+
const visibleText = extractVisibleText(html);
|
|
45
|
+
if (visibleText.length < VISIBLE_TEXT_THRESHOLD) return true;
|
|
46
|
+
if (hasSpaShellIndicator(html)) return true;
|
|
47
|
+
if (hasNextData(html)) return true;
|
|
48
|
+
if (hasNoscriptRequired(html)) return true;
|
|
49
|
+
if (hasHighScriptRatio(html)) return true;
|
|
50
|
+
return false;
|
|
61
51
|
}
|
|
52
|
+
export {
|
|
53
|
+
contentAppearsEmpty
|
|
54
|
+
};
|
|
62
55
|
//# sourceMappingURL=content-check.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"
|
|
1
|
+
{"version":3,"sources":["../../src/fetch/content-check.ts"],"sourcesContent":["const VISIBLE_TEXT_THRESHOLD = 200;\nconst SCRIPT_RATIO_THRESHOLD = 0.8;\n\nfunction stripScriptsAndStyles(html: string): string {\n return html\n .replace(/<script[\\s\\S]*?<\\/script>/gi, '')\n .replace(/<style[\\s\\S]*?<\\/style>/gi, '');\n}\n\nfunction extractVisibleText(html: string): string {\n const stripped = stripScriptsAndStyles(html);\n const noTags = stripped.replace(/<[^>]+>/g, ' ');\n return noTags.replace(/\\s+/g, ' ').trim();\n}\n\nfunction hasSpaShellIndicator(html: string): boolean {\n const spaPatterns = [\n /<div[^>]+id=[\"']root[\"'][^>]*>\\s*<\\/div>/i,\n /<div[^>]+id=[\"']app[\"'][^>]*>\\s*<\\/div>/i,\n /<div[^>]+id=[\"']__next[\"'][^>]*>\\s*<\\/div>/i,\n ];\n return spaPatterns.some((pattern) => pattern.test(html));\n}\n\nfunction hasNextData(html: string): boolean {\n if (!/__NEXT_DATA__/.test(html)) return false;\n const withoutScripts = stripScriptsAndStyles(html);\n const visibleText = withoutScripts.replace(/<[^>]+>/g, ' ').replace(/\\s+/g, ' ').trim();\n return visibleText.length < VISIBLE_TEXT_THRESHOLD;\n}\n\nfunction hasNoscriptRequired(html: string): boolean {\n const noscriptMatches = html.match(/<noscript[^>]*>([\\s\\S]*?)<\\/noscript>/gi);\n if (!noscriptMatches) return false;\n return noscriptMatches.some((tag) => {\n const inner = tag.replace(/<[^>]+>/g, '').toLowerCase();\n return inner.includes('javascript') || inner.includes('enable');\n });\n}\n\nfunction hasHighScriptRatio(html: string): boolean {\n const bodyMatch = html.match(/<body[^>]*>([\\s\\S]*?)<\\/body>/i);\n const bodyContent = bodyMatch ? bodyMatch[1] : html;\n\n const scriptMatches = bodyContent.match(/<script[\\s\\S]*?<\\/script>/gi) ?? [];\n const scriptText = scriptMatches.join('');\n\n const scriptLen = scriptText.length;\n const totalLen = bodyContent.length;\n\n if (totalLen === 0) return false;\n return scriptLen / totalLen > SCRIPT_RATIO_THRESHOLD;\n}\n\nexport function contentAppearsEmpty(html: string): boolean {\n const visibleText = extractVisibleText(html);\n if (visibleText.length < VISIBLE_TEXT_THRESHOLD) return true;\n\n if (hasSpaShellIndicator(html)) return true;\n if (hasNextData(html)) return true;\n if (hasNoscriptRequired(html)) return true;\n if (hasHighScriptRatio(html)) return true;\n\n return false;\n}\n"],"mappings":"AAAA,MAAM,yBAAyB;AAC/B,MAAM,yBAAyB;AAE/B,SAAS,sBAAsB,MAAsB;AACnD,SAAO,KACJ,QAAQ,+BAA+B,EAAE,EACzC,QAAQ,6BAA6B,EAAE;AAC5C;AAEA,SAAS,mBAAmB,MAAsB;AAChD,QAAM,WAAW,sBAAsB,IAAI;AAC3C,QAAM,SAAS,SAAS,QAAQ,YAAY,GAAG;AAC/C,SAAO,OAAO,QAAQ,QAAQ,GAAG,EAAE,KAAK;AAC1C;AAEA,SAAS,qBAAqB,MAAuB;AACnD,QAAM,cAAc;AAAA,IAClB;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACA,SAAO,YAAY,KAAK,CAAC,YAAY,QAAQ,KAAK,IAAI,CAAC;AACzD;AAEA,SAAS,YAAY,MAAuB;AAC1C,MAAI,CAAC,gBAAgB,KAAK,IAAI,EAAG,QAAO;AACxC,QAAM,iBAAiB,sBAAsB,IAAI;AACjD,QAAM,cAAc,eAAe,QAAQ,YAAY,GAAG,EAAE,QAAQ,QAAQ,GAAG,EAAE,KAAK;AACtF,SAAO,YAAY,SAAS;AAC9B;AAEA,SAAS,oBAAoB,MAAuB;AAClD,QAAM,kBAAkB,KAAK,MAAM,yCAAyC;AAC5E,MAAI,CAAC,gBAAiB,QAAO;AAC7B,SAAO,gBAAgB,KAAK,CAAC,QAAQ;AACnC,UAAM,QAAQ,IAAI,QAAQ,YAAY,EAAE,EAAE,YAAY;AACtD,WAAO,MAAM,SAAS,YAAY,KAAK,MAAM,SAAS,QAAQ;AAAA,EAChE,CAAC;AACH;AAEA,SAAS,mBAAmB,MAAuB;AACjD,QAAM,YAAY,KAAK,MAAM,gCAAgC;AAC7D,QAAM,cAAc,YAAY,UAAU,CAAC,IAAI;AAE/C,QAAM,gBAAgB,YAAY,MAAM,6BAA6B,KAAK,CAAC;AAC3E,QAAM,aAAa,cAAc,KAAK,EAAE;AAExC,QAAM,YAAY,WAAW;AAC7B,QAAM,WAAW,YAAY;AAE7B,MAAI,aAAa,EAAG,QAAO;AAC3B,SAAO,YAAY,WAAW;AAChC;AAEO,SAAS,oBAAoB,MAAuB;AACzD,QAAM,cAAc,mBAAmB,IAAI;AAC3C,MAAI,YAAY,SAAS,uBAAwB,QAAO;AAExD,MAAI,qBAAqB,IAAI,EAAG,QAAO;AACvC,MAAI,YAAY,IAAI,EAAG,QAAO;AAC9B,MAAI,oBAAoB,IAAI,EAAG,QAAO;AACtC,MAAI,mBAAmB,IAAI,EAAG,QAAO;AAErC,SAAO;AACT;","names":[]}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"http-client.d.ts","sourceRoot":"","sources":["../../src/fetch/http-client.ts"],"names":[],"mappings":"AAGA,MAAM,WAAW,gBAAgB;IAC/B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,SAAS,CAAC,EAAE,MAAM,CAAC;
|
|
1
|
+
{"version":3,"file":"http-client.d.ts","sourceRoot":"","sources":["../../src/fetch/http-client.ts"],"names":[],"mappings":"AAGA,MAAM,WAAW,gBAAgB;IAC/B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,kBAAkB,CAAC,EAAE;QACnB,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,eAAe,CAAC,EAAE,MAAM,CAAC;KAC1B,CAAC;CACH;AAED,MAAM,WAAW,eAAe;IAC9B,GAAG,EAAE,MAAM,CAAC;IACZ,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAChC,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAmCD,wBAAsB,SAAS,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,GAAE,gBAAqB,GAAG,OAAO,CAAC,eAAe,CAAC,CAyCrG"}
|
|
@@ -1,146 +1,165 @@
|
|
|
1
|
-
import { getConfig } from
|
|
2
|
-
import { createLogger } from
|
|
3
|
-
const RETRYABLE_STATUSES = new Set([429, 502, 503]);
|
|
4
|
-
const RETRYABLE_ERROR_CODES = new Set([
|
|
5
|
-
const REDIRECT_STATUSES = new Set([301, 302, 307, 308]);
|
|
1
|
+
import { getConfig } from "../config.js";
|
|
2
|
+
import { createLogger } from "../logger.js";
|
|
3
|
+
const RETRYABLE_STATUSES = /* @__PURE__ */ new Set([429, 502, 503]);
|
|
4
|
+
const RETRYABLE_ERROR_CODES = /* @__PURE__ */ new Set(["ECONNRESET", "ETIMEDOUT", "ECONNREFUSED"]);
|
|
5
|
+
const REDIRECT_STATUSES = /* @__PURE__ */ new Set([301, 302, 307, 308]);
|
|
6
6
|
const DEFAULT_USER_AGENTS = [
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
7
|
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36",
|
|
8
|
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36",
|
|
9
|
+
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36"
|
|
10
10
|
];
|
|
11
11
|
function getRotatingUserAgent(config) {
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
return DEFAULT_USER_AGENTS[Math.floor(Math.random() * DEFAULT_USER_AGENTS.length)];
|
|
12
|
+
if (config.userAgent) return config.userAgent;
|
|
13
|
+
return DEFAULT_USER_AGENTS[Math.floor(Math.random() * DEFAULT_USER_AGENTS.length)];
|
|
15
14
|
}
|
|
16
15
|
function isRetryableError(err) {
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
return true;
|
|
24
|
-
}
|
|
25
|
-
return false;
|
|
16
|
+
if (err instanceof Error) {
|
|
17
|
+
const code = err.code;
|
|
18
|
+
if (code && RETRYABLE_ERROR_CODES.has(code)) return true;
|
|
19
|
+
if (err.name === "TimeoutError") return true;
|
|
20
|
+
}
|
|
21
|
+
return false;
|
|
26
22
|
}
|
|
27
23
|
function backoffMs(attempt) {
|
|
28
|
-
|
|
24
|
+
return 500 * Math.pow(2, attempt) + Math.random() * 500;
|
|
29
25
|
}
|
|
30
26
|
function sleep(ms) {
|
|
31
|
-
|
|
27
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
32
28
|
}
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
});
|
|
64
|
-
}
|
|
29
|
+
async function httpFetch(url, options = {}) {
|
|
30
|
+
const config = getConfig();
|
|
31
|
+
const logger = createLogger("fetch");
|
|
32
|
+
const maxRetries = config.fetchMaxRetries;
|
|
33
|
+
const timeoutMs = options.timeoutMs ?? config.fetchTimeoutMs;
|
|
34
|
+
const maxRedirects = config.maxRedirects;
|
|
35
|
+
let lastError;
|
|
36
|
+
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
|
37
|
+
if (attempt > 0) {
|
|
38
|
+
const delay = backoffMs(attempt - 1);
|
|
39
|
+
logger.debug("retrying after backoff", { attempt, delayMs: delay, url });
|
|
40
|
+
await sleep(delay);
|
|
41
|
+
}
|
|
42
|
+
try {
|
|
43
|
+
const result = await fetchWithRedirects(url, options, timeoutMs, maxRedirects, logger);
|
|
44
|
+
return result;
|
|
45
|
+
} catch (err) {
|
|
46
|
+
lastError = err;
|
|
47
|
+
if (err instanceof HttpFetchError && !err.retryable) {
|
|
48
|
+
throw err;
|
|
49
|
+
}
|
|
50
|
+
const retryable = err instanceof HttpFetchError ? err.retryable : isRetryableError(err);
|
|
51
|
+
if (!retryable || attempt >= maxRetries) {
|
|
52
|
+
throw err;
|
|
53
|
+
}
|
|
54
|
+
logger.warn("fetch failed, will retry", {
|
|
55
|
+
attempt,
|
|
56
|
+
url,
|
|
57
|
+
error: err instanceof Error ? err.message : String(err)
|
|
58
|
+
});
|
|
65
59
|
}
|
|
66
|
-
|
|
60
|
+
}
|
|
61
|
+
throw lastError;
|
|
67
62
|
}
|
|
68
63
|
class HttpFetchError extends Error {
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
64
|
+
constructor(message, retryable) {
|
|
65
|
+
super(message);
|
|
66
|
+
this.retryable = retryable;
|
|
67
|
+
this.name = "HttpFetchError";
|
|
68
|
+
}
|
|
69
|
+
retryable;
|
|
75
70
|
}
|
|
76
71
|
async function fetchWithRedirects(originalUrl, options, timeoutMs, maxRedirects, logger) {
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
response.headers.
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
72
|
+
const visited = /* @__PURE__ */ new Set();
|
|
73
|
+
let currentUrl = originalUrl;
|
|
74
|
+
let redirectCount = 0;
|
|
75
|
+
while (true) {
|
|
76
|
+
if (visited.has(currentUrl)) {
|
|
77
|
+
throw new HttpFetchError(`Redirect loop detected at ${currentUrl}`, false);
|
|
78
|
+
}
|
|
79
|
+
visited.add(currentUrl);
|
|
80
|
+
logger.debug("fetching", { url: currentUrl, attempt: redirectCount });
|
|
81
|
+
const signal = AbortSignal.timeout(timeoutMs);
|
|
82
|
+
let response;
|
|
83
|
+
try {
|
|
84
|
+
const ua = getRotatingUserAgent(getConfig());
|
|
85
|
+
const mergedHeaders = { "User-Agent": ua, ...options.headers };
|
|
86
|
+
if (options.conditionalHeaders?.ifNoneMatch) {
|
|
87
|
+
mergedHeaders["If-None-Match"] = options.conditionalHeaders.ifNoneMatch;
|
|
88
|
+
}
|
|
89
|
+
if (options.conditionalHeaders?.ifModifiedSince) {
|
|
90
|
+
mergedHeaders["If-Modified-Since"] = options.conditionalHeaders.ifModifiedSince;
|
|
91
|
+
}
|
|
92
|
+
response = await fetch(currentUrl, {
|
|
93
|
+
headers: mergedHeaders,
|
|
94
|
+
redirect: "manual",
|
|
95
|
+
signal
|
|
96
|
+
});
|
|
97
|
+
} catch (err) {
|
|
98
|
+
const isTimeout = err instanceof Error && err.name === "TimeoutError";
|
|
99
|
+
const isConnErr = err instanceof Error && RETRYABLE_ERROR_CODES.has(err.code ?? "");
|
|
100
|
+
const retryable = isTimeout || isConnErr;
|
|
101
|
+
throw Object.assign(err instanceof Error ? err : new Error(String(err)), { retryable });
|
|
102
|
+
}
|
|
103
|
+
if (response.status === 304) {
|
|
104
|
+
const headers2 = {};
|
|
105
|
+
response.headers.forEach((value, key) => {
|
|
106
|
+
headers2[key] = value;
|
|
107
|
+
});
|
|
108
|
+
try {
|
|
109
|
+
await response.arrayBuffer();
|
|
110
|
+
} catch {
|
|
111
|
+
}
|
|
112
|
+
return {
|
|
113
|
+
url: originalUrl,
|
|
114
|
+
finalUrl: currentUrl,
|
|
115
|
+
html: "",
|
|
116
|
+
contentType: response.headers.get("content-type") ?? "",
|
|
117
|
+
statusCode: 304,
|
|
118
|
+
headers: headers2
|
|
119
|
+
};
|
|
120
|
+
}
|
|
121
|
+
if (REDIRECT_STATUSES.has(response.status)) {
|
|
122
|
+
const location = response.headers.get("location");
|
|
123
|
+
if (!location) {
|
|
124
|
+
throw new HttpFetchError(`Redirect with no location header at ${currentUrl}`, false);
|
|
125
|
+
}
|
|
126
|
+
redirectCount++;
|
|
127
|
+
if (redirectCount > maxRedirects) {
|
|
128
|
+
throw new HttpFetchError(`Too many redirects (>${maxRedirects}) from ${originalUrl}`, false);
|
|
129
|
+
}
|
|
130
|
+
currentUrl = new URL(location, currentUrl).toString();
|
|
131
|
+
continue;
|
|
132
|
+
}
|
|
133
|
+
if (RETRYABLE_STATUSES.has(response.status)) {
|
|
134
|
+
throw new HttpFetchError(`HTTP ${response.status} from ${currentUrl}`, true);
|
|
135
|
+
}
|
|
136
|
+
const contentType = response.headers.get("content-type") ?? "";
|
|
137
|
+
const headers = {};
|
|
138
|
+
response.headers.forEach((value, key) => {
|
|
139
|
+
headers[key] = value;
|
|
140
|
+
});
|
|
141
|
+
const isPdf = contentType.includes("application/pdf");
|
|
142
|
+
let html;
|
|
143
|
+
let rawBuffer;
|
|
144
|
+
if (isPdf) {
|
|
145
|
+
const arrayBuf = await response.arrayBuffer();
|
|
146
|
+
rawBuffer = Buffer.from(arrayBuf);
|
|
147
|
+
html = "";
|
|
148
|
+
} else {
|
|
149
|
+
html = await response.text();
|
|
144
150
|
}
|
|
151
|
+
return {
|
|
152
|
+
url: originalUrl,
|
|
153
|
+
finalUrl: currentUrl,
|
|
154
|
+
html,
|
|
155
|
+
contentType,
|
|
156
|
+
statusCode: response.status,
|
|
157
|
+
headers,
|
|
158
|
+
rawBuffer
|
|
159
|
+
};
|
|
160
|
+
}
|
|
145
161
|
}
|
|
162
|
+
export {
|
|
163
|
+
httpFetch
|
|
164
|
+
};
|
|
146
165
|
//# sourceMappingURL=http-client.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"http-client.js","sourceRoot":"","sources":["../../src/fetch/http-client.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AACzC,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAiB5C,MAAM,kBAAkB,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC;AACpD,MAAM,qBAAqB,GAAG,IAAI,GAAG,CAAC,CAAC,YAAY,EAAE,WAAW,EAAE,cAAc,CAAC,CAAC,CAAC;AACnF,MAAM,iBAAiB,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC;AAExD,MAAM,mBAAmB,GAAG;IAC1B,uHAAuH;IACvH,iHAAiH;IACjH,uGAAuG;CACxG,CAAC;AAEF,SAAS,oBAAoB,CAAC,MAAqC;IACjE,IAAI,MAAM,CAAC,SAAS;QAAE,OAAO,MAAM,CAAC,SAAS,CAAC;IAC9C,OAAO,mBAAmB,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,mBAAmB,CAAC,MAAM,CAAC,CAAC,CAAC;AACrF,CAAC;AAED,SAAS,gBAAgB,CAAC,GAAY;IACpC,IAAI,GAAG,YAAY,KAAK,EAAE,CAAC;QACzB,MAAM,IAAI,GAAI,GAA6B,CAAC,IAAI,CAAC;QACjD,IAAI,IAAI,IAAI,qBAAqB,CAAC,GAAG,CAAC,IAAI,CAAC;YAAE,OAAO,IAAI,CAAC;QACzD,iEAAiE;QACjE,IAAI,GAAG,CAAC,IAAI,KAAK,cAAc;YAAE,OAAO,IAAI,CAAC;IAC/C,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,SAAS,SAAS,CAAC,OAAe;IAChC,OAAO,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,OAAO,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,GAAG,GAAG,CAAC;AAC1D,CAAC;AAED,SAAS,KAAK,CAAC,EAAU;IACvB,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC,CAAC;AAC3D,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,GAAW,EAAE,UAA4B,EAAE;IACzE,MAAM,MAAM,GAAG,SAAS,EAAE,CAAC;IAC3B,MAAM,MAAM,GAAG,YAAY,CAAC,OAAO,CAAC,CAAC;IACrC,MAAM,UAAU,GAAG,MAAM,CAAC,eAAe,CAAC;IAC1C,MAAM,SAAS,GAAG,OAAO,CAAC,SAAS,IAAI,MAAM,CAAC,cAAc,CAAC;IAC7D,MAAM,YAAY,GAAG,MAAM,CAAC,YAAY,CAAC;IAEzC,IAAI,SAAkB,CAAC;IAEvB,KAAK,IAAI,OAAO,GAAG,CAAC,EAAE,OAAO,IAAI,UAAU,EAAE,OAAO,EAAE,EAAE,CAAC;QACvD,IAAI,OAAO,GAAG,CAAC,EAAE,CAAC;YAChB,MAAM,KAAK,GAAG,SAAS,CAAC,OAAO,GAAG,CAAC,CAAC,CAAC;YACrC,MAAM,CAAC,KAAK,CAAC,wBAAwB,EAAE,EAAE,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,GAAG,EAAE,CAAC,CAAC;YACzE,MAAM,KAAK,CAAC,KAAK,CAAC,CAAC;QACrB,CAAC;QAED,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,kBAAkB,CAAC,GAAG,EAAE,OAAO,EAAE,SAAS,EAAE,YAAY,EAAE,MAAM,CAAC,CAAC;YACvF,OAAO,MAAM,CAAC;QAChB,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,SAAS,GAAG,GAAG,CAAC;YAEhB,IAAI,GAAG,YAAY,cAAc,IAAI,CAAC,GAAG,CAAC,SAAS,EAAE,CAAC;gBACpD,MAAM,GAAG,CAAC;YACZ,CAAC;YAED,MAAM,SAAS,GAAG,GAAG,YAAY,cAAc,CAAC,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC,gBAAgB,CAAC,GAAG,CAAC,CAAC;YAExF,IAAI,CAAC,SAAS,IAAI,OAAO,IAAI,UAAU,EAAE,CAAC;gBACxC,MAAM,GAAG,CAAC;YACZ,CAAC;YAED,MAAM,CAAC,IAAI,CAAC,0BAA0B,EAAE;gBACtC,OAAO;gBACP,GAAG;gBACH,KAAK,EAAE,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC;aACxD,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,MAAM,SAAS,CAAC;AAClB,CAAC;AAED,MAAM,cAAe,SAAQ,KAAK;IACa;IAA7C,YAAY,OAAe,EAAkB,SAAkB;QAC7D,KAAK,CAAC,OAAO,CAAC,CAAC;QAD4B,cAAS,GAAT,SAAS,CAAS;QAE7D,IAAI,CAAC,IAAI,GAAG,gBAAgB,CAAC;IAC/B,CAAC;CACF;AAED,KAAK,UAAU,kBAAkB,CAC/B,WAAmB,EACnB,OAAyB,EACzB,SAAiB,EACjB,YAAoB,EACpB,MAAuC;IAEvC,MAAM,OAAO,GAAG,IAAI,GAAG,EAAU,CAAC;IAClC,IAAI,UAAU,GAAG,WAAW,CAAC;IAC7B,IAAI,aAAa,GAAG,CAAC,CAAC;IAEtB,OAAO,IAAI,EAAE,CAAC;QACZ,IAAI,OAAO,CAAC,GAAG,CAAC,UAAU,CAAC,EAAE,CAAC;YAC5B,MAAM,IAAI,cAAc,CAAC,6BAA6B,UAAU,EAAE,EAAE,KAAK,CAAC,CAAC;QAC7E,CAAC;QACD,OAAO,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;QAExB,MAAM,CAAC,KAAK,CAAC,UAAU,EAAE,EAAE,GAAG,EAAE,UAAU,EAAE,OAAO,EAAE,aAAa,EAAE,CAAC,CAAC;QAEtE,MAAM,MAAM,GAAG,WAAW,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;QAE9C,IAAI,QAAkB,CAAC;QACvB,IAAI,CAAC;YACH,MAAM,EAAE,GAAG,oBAAoB,CAAC,SAAS,EAAE,CAAC,CAAC;YAC7C,MAAM,aAAa,GAAG,EAAE,YAAY,EAAE,EAAE,EAAE,GAAG,OAAO,CAAC,OAAO,EAAE,CAAC;YAC/D,QAAQ,GAAG,MAAM,KAAK,CAAC,UAAU,EAAE;gBACjC,OAAO,EAAE,aAAa;gBACtB,QAAQ,EAAE,QAAQ;gBAClB,MAAM;aACP,CAAC,CAAC;QACL,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,SAAS,GAAG,GAAG,YAAY,KAAK,IAAI,GAAG,CAAC,IAAI,KAAK,cAAc,CAAC;YACtE,MAAM,SAAS,GAAG,GAAG,YAAY,KAAK,IAAI,qBAAqB,CAAC,GAAG,CAAE,GAA6B,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC;YAC/G,MAAM,SAAS,GAAG,SAAS,IAAI,SAAS,CAAC;YACzC,MAAM,MAAM,CAAC,MAAM,CAAC,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,SAAS,EAAE,CAAC,CAAC;QAC1F,CAAC;QAED,IAAI,iBAAiB,CAAC,GAAG,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;YAC3C,MAAM,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;YAClD,IAAI,CAAC,QAAQ,EAAE,CAAC;gBACd,MAAM,IAAI,cAAc,CAAC,uCAAuC,UAAU,EAAE,EAAE,KAAK,CAAC,CAAC;YACvF,CAAC;YAED,aAAa,EAAE,CAAC;YAChB,IAAI,aAAa,GAAG,YAAY,EAAE,CAAC;gBACjC,MAAM,IAAI,cAAc,CAAC,wBAAwB,YAAY,UAAU,WAAW,EAAE,EAAE,KAAK,CAAC,CAAC;YAC/F,CAAC;YAED,6BAA6B;YAC7B,UAAU,GAAG,IAAI,GAAG,CAAC,QAAQ,EAAE,UAAU,CAAC,CAAC,QAAQ,EAAE,CAAC;YACtD,SAAS;QACX,CAAC;QAED,IAAI,kBAAkB,CAAC,GAAG,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;YAC5C,MAAM,IAAI,cAAc,CAAC,QAAQ,QAAQ,CAAC,MAAM,SAAS,UAAU,EAAE,EAAE,IAAI,CAAC,CAAC;QAC/E,CAAC;QAED,MAAM,WAAW,GAAG,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,IAAI,EAAE,CAAC;QAC/D,MAAM,OAAO,GAA2B,EAAE,CAAC;QAC3C,QAAQ,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,KAAK,EAAE,GAAG,EAAE,EAAE;YACtC,OAAO,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC;QACvB,CAAC,CAAC,CAAC;QAEH,MAAM,KAAK,GAAG,WAAW,CAAC,QAAQ,CAAC,iBAAiB,CAAC,CAAC;QACtD,IAAI,IAAY,CAAC;QACjB,IAAI,SAA6B,CAAC;QAElC,IAAI,KAAK,EAAE,CAAC;YACV,MAAM,QAAQ,GAAG,MAAM,QAAQ,CAAC,WAAW,EAAE,CAAC;YAC9C,SAAS,GAAG,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;YAClC,IAAI,GAAG,EAAE,CAAC;QACZ,CAAC;aAAM,CAAC;YACN,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;QAC/B,CAAC;QAED,OAAO;YACL,GAAG,EAAE,WAAW;YAChB,QAAQ,EAAE,UAAU;YACpB,IAAI;YACJ,WAAW;YACX,UAAU,EAAE,QAAQ,CAAC,MAAM;YAC3B,OAAO;YACP,SAAS;SACV,CAAC;IACJ,CAAC;AACH,CAAC"}
|
|
1
|
+
{"version":3,"sources":["../../src/fetch/http-client.ts"],"sourcesContent":["import { getConfig } from '../config.js';\nimport { createLogger } from '../logger.js';\n\nexport interface HttpFetchOptions {\n headers?: Record<string, string>;\n timeoutMs?: number;\n conditionalHeaders?: {\n ifNoneMatch?: string;\n ifModifiedSince?: string;\n };\n}\n\nexport interface HttpFetchResult {\n url: string;\n finalUrl: string;\n html: string;\n contentType: string;\n statusCode: number;\n headers: Record<string, string>;\n rawBuffer?: Buffer;\n}\n\nconst RETRYABLE_STATUSES = new Set([429, 502, 503]);\nconst RETRYABLE_ERROR_CODES = new Set(['ECONNRESET', 'ETIMEDOUT', 'ECONNREFUSED']);\nconst REDIRECT_STATUSES = new Set([301, 302, 307, 308]);\n\nconst DEFAULT_USER_AGENTS = [\n 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36',\n 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36',\n 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36',\n];\n\nfunction getRotatingUserAgent(config: { userAgent?: string | null }): string {\n if (config.userAgent) return config.userAgent;\n return DEFAULT_USER_AGENTS[Math.floor(Math.random() * DEFAULT_USER_AGENTS.length)];\n}\n\nfunction isRetryableError(err: unknown): boolean {\n if (err instanceof Error) {\n const code = (err as NodeJS.ErrnoException).code;\n if (code && RETRYABLE_ERROR_CODES.has(code)) return true;\n // AbortSignal timeout throws DOMException with name TimeoutError\n if (err.name === 'TimeoutError') return true;\n }\n return false;\n}\n\nfunction backoffMs(attempt: number): number {\n return 500 * Math.pow(2, attempt) + Math.random() * 500;\n}\n\nfunction sleep(ms: number): Promise<void> {\n return new Promise((resolve) => setTimeout(resolve, ms));\n}\n\nexport async function httpFetch(url: string, options: HttpFetchOptions = {}): Promise<HttpFetchResult> {\n const config = getConfig();\n const logger = createLogger('fetch');\n const maxRetries = config.fetchMaxRetries;\n const timeoutMs = options.timeoutMs ?? config.fetchTimeoutMs;\n const maxRedirects = config.maxRedirects;\n\n let lastError: unknown;\n\n for (let attempt = 0; attempt <= maxRetries; attempt++) {\n if (attempt > 0) {\n const delay = backoffMs(attempt - 1);\n logger.debug('retrying after backoff', { attempt, delayMs: delay, url });\n await sleep(delay);\n }\n\n try {\n const result = await fetchWithRedirects(url, options, timeoutMs, maxRedirects, logger);\n return result;\n } catch (err) {\n lastError = err;\n\n if (err instanceof HttpFetchError && !err.retryable) {\n throw err;\n }\n\n const retryable = err instanceof HttpFetchError ? err.retryable : isRetryableError(err);\n\n if (!retryable || attempt >= maxRetries) {\n throw err;\n }\n\n logger.warn('fetch failed, will retry', {\n attempt,\n url,\n error: err instanceof Error ? err.message : String(err),\n });\n }\n }\n\n throw lastError;\n}\n\nclass HttpFetchError extends Error {\n constructor(message: string, public readonly retryable: boolean) {\n super(message);\n this.name = 'HttpFetchError';\n }\n}\n\nasync function fetchWithRedirects(\n originalUrl: string,\n options: HttpFetchOptions,\n timeoutMs: number,\n maxRedirects: number,\n logger: ReturnType<typeof createLogger>,\n): Promise<HttpFetchResult> {\n const visited = new Set<string>();\n let currentUrl = originalUrl;\n let redirectCount = 0;\n\n while (true) {\n if (visited.has(currentUrl)) {\n throw new HttpFetchError(`Redirect loop detected at ${currentUrl}`, false);\n }\n visited.add(currentUrl);\n\n logger.debug('fetching', { url: currentUrl, attempt: redirectCount });\n\n const signal = AbortSignal.timeout(timeoutMs);\n\n let response: Response;\n try {\n const ua = getRotatingUserAgent(getConfig());\n const mergedHeaders: Record<string, string> = { 'User-Agent': ua, ...options.headers };\n // Conditional GET: inject If-None-Match / If-Modified-Since so the\n // server can return 304 + no body when the resource hasn't changed.\n // Callers (eg. etag-incremental crawl) wire these from the persisted\n // crawl_etags row for the URL.\n if (options.conditionalHeaders?.ifNoneMatch) {\n mergedHeaders['If-None-Match'] = options.conditionalHeaders.ifNoneMatch;\n }\n if (options.conditionalHeaders?.ifModifiedSince) {\n mergedHeaders['If-Modified-Since'] = options.conditionalHeaders.ifModifiedSince;\n }\n response = await fetch(currentUrl, {\n headers: mergedHeaders,\n redirect: 'manual',\n signal,\n });\n } catch (err) {\n const isTimeout = err instanceof Error && err.name === 'TimeoutError';\n const isConnErr = err instanceof Error && RETRYABLE_ERROR_CODES.has((err as NodeJS.ErrnoException).code ?? '');\n const retryable = isTimeout || isConnErr;\n throw Object.assign(err instanceof Error ? err : new Error(String(err)), { retryable });\n }\n\n if (response.status === 304) {\n const headers: Record<string, string> = {};\n response.headers.forEach((value, key) => {\n headers[key] = value;\n });\n // Drain so the connection can be released; ignore the (empty) body.\n try { await response.arrayBuffer(); } catch { /* */ }\n return {\n url: originalUrl,\n finalUrl: currentUrl,\n html: '',\n contentType: response.headers.get('content-type') ?? '',\n statusCode: 304,\n headers,\n };\n }\n\n if (REDIRECT_STATUSES.has(response.status)) {\n const location = response.headers.get('location');\n if (!location) {\n throw new HttpFetchError(`Redirect with no location header at ${currentUrl}`, false);\n }\n\n redirectCount++;\n if (redirectCount > maxRedirects) {\n throw new HttpFetchError(`Too many redirects (>${maxRedirects}) from ${originalUrl}`, false);\n }\n\n // Resolve relative redirects\n currentUrl = new URL(location, currentUrl).toString();\n continue;\n }\n\n if (RETRYABLE_STATUSES.has(response.status)) {\n throw new HttpFetchError(`HTTP ${response.status} from ${currentUrl}`, true);\n }\n\n const contentType = response.headers.get('content-type') ?? '';\n const headers: Record<string, string> = {};\n response.headers.forEach((value, key) => {\n headers[key] = value;\n });\n\n const isPdf = contentType.includes('application/pdf');\n let html: string;\n let rawBuffer: Buffer | undefined;\n\n if (isPdf) {\n const arrayBuf = await response.arrayBuffer();\n rawBuffer = Buffer.from(arrayBuf);\n html = '';\n } else {\n html = await response.text();\n }\n\n return {\n url: originalUrl,\n finalUrl: currentUrl,\n html,\n contentType,\n statusCode: response.status,\n headers,\n rawBuffer,\n };\n }\n}\n"],"mappings":"AAAA,SAAS,iBAAiB;AAC1B,SAAS,oBAAoB;AAqB7B,MAAM,qBAAqB,oBAAI,IAAI,CAAC,KAAK,KAAK,GAAG,CAAC;AAClD,MAAM,wBAAwB,oBAAI,IAAI,CAAC,cAAc,aAAa,cAAc,CAAC;AACjF,MAAM,oBAAoB,oBAAI,IAAI,CAAC,KAAK,KAAK,KAAK,GAAG,CAAC;AAEtD,MAAM,sBAAsB;AAAA,EAC1B;AAAA,EACA;AAAA,EACA;AACF;AAEA,SAAS,qBAAqB,QAA+C;AAC3E,MAAI,OAAO,UAAW,QAAO,OAAO;AACpC,SAAO,oBAAoB,KAAK,MAAM,KAAK,OAAO,IAAI,oBAAoB,MAAM,CAAC;AACnF;AAEA,SAAS,iBAAiB,KAAuB;AAC/C,MAAI,eAAe,OAAO;AACxB,UAAM,OAAQ,IAA8B;AAC5C,QAAI,QAAQ,sBAAsB,IAAI,IAAI,EAAG,QAAO;AAEpD,QAAI,IAAI,SAAS,eAAgB,QAAO;AAAA,EAC1C;AACA,SAAO;AACT;AAEA,SAAS,UAAU,SAAyB;AAC1C,SAAO,MAAM,KAAK,IAAI,GAAG,OAAO,IAAI,KAAK,OAAO,IAAI;AACtD;AAEA,SAAS,MAAM,IAA2B;AACxC,SAAO,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,EAAE,CAAC;AACzD;AAEA,eAAsB,UAAU,KAAa,UAA4B,CAAC,GAA6B;AACrG,QAAM,SAAS,UAAU;AACzB,QAAM,SAAS,aAAa,OAAO;AACnC,QAAM,aAAa,OAAO;AAC1B,QAAM,YAAY,QAAQ,aAAa,OAAO;AAC9C,QAAM,eAAe,OAAO;AAE5B,MAAI;AAEJ,WAAS,UAAU,GAAG,WAAW,YAAY,WAAW;AACtD,QAAI,UAAU,GAAG;AACf,YAAM,QAAQ,UAAU,UAAU,CAAC;AACnC,aAAO,MAAM,0BAA0B,EAAE,SAAS,SAAS,OAAO,IAAI,CAAC;AACvE,YAAM,MAAM,KAAK;AAAA,IACnB;AAEA,QAAI;AACF,YAAM,SAAS,MAAM,mBAAmB,KAAK,SAAS,WAAW,cAAc,MAAM;AACrF,aAAO;AAAA,IACT,SAAS,KAAK;AACZ,kBAAY;AAEZ,UAAI,eAAe,kBAAkB,CAAC,IAAI,WAAW;AACnD,cAAM;AAAA,MACR;AAEA,YAAM,YAAY,eAAe,iBAAiB,IAAI,YAAY,iBAAiB,GAAG;AAEtF,UAAI,CAAC,aAAa,WAAW,YAAY;AACvC,cAAM;AAAA,MACR;AAEA,aAAO,KAAK,4BAA4B;AAAA,QACtC;AAAA,QACA;AAAA,QACA,OAAO,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAAA,MACxD,CAAC;AAAA,IACH;AAAA,EACF;AAEA,QAAM;AACR;AAEA,MAAM,uBAAuB,MAAM;AAAA,EACjC,YAAY,SAAiC,WAAoB;AAC/D,UAAM,OAAO;AAD8B;AAE3C,SAAK,OAAO;AAAA,EACd;AAAA,EAH6C;AAI/C;AAEA,eAAe,mBACb,aACA,SACA,WACA,cACA,QAC0B;AAC1B,QAAM,UAAU,oBAAI,IAAY;AAChC,MAAI,aAAa;AACjB,MAAI,gBAAgB;AAEpB,SAAO,MAAM;AACX,QAAI,QAAQ,IAAI,UAAU,GAAG;AAC3B,YAAM,IAAI,eAAe,6BAA6B,UAAU,IAAI,KAAK;AAAA,IAC3E;AACA,YAAQ,IAAI,UAAU;AAEtB,WAAO,MAAM,YAAY,EAAE,KAAK,YAAY,SAAS,cAAc,CAAC;AAEpE,UAAM,SAAS,YAAY,QAAQ,SAAS;AAE5C,QAAI;AACJ,QAAI;AACF,YAAM,KAAK,qBAAqB,UAAU,CAAC;AAC3C,YAAM,gBAAwC,EAAE,cAAc,IAAI,GAAG,QAAQ,QAAQ;AAKrF,UAAI,QAAQ,oBAAoB,aAAa;AAC3C,sBAAc,eAAe,IAAI,QAAQ,mBAAmB;AAAA,MAC9D;AACA,UAAI,QAAQ,oBAAoB,iBAAiB;AAC/C,sBAAc,mBAAmB,IAAI,QAAQ,mBAAmB;AAAA,MAClE;AACA,iBAAW,MAAM,MAAM,YAAY;AAAA,QACjC,SAAS;AAAA,QACT,UAAU;AAAA,QACV;AAAA,MACF,CAAC;AAAA,IACH,SAAS,KAAK;AACZ,YAAM,YAAY,eAAe,SAAS,IAAI,SAAS;AACvD,YAAM,YAAY,eAAe,SAAS,sBAAsB,IAAK,IAA8B,QAAQ,EAAE;AAC7G,YAAM,YAAY,aAAa;AAC/B,YAAM,OAAO,OAAO,eAAe,QAAQ,MAAM,IAAI,MAAM,OAAO,GAAG,CAAC,GAAG,EAAE,UAAU,CAAC;AAAA,IACxF;AAEA,QAAI,SAAS,WAAW,KAAK;AAC3B,YAAMA,WAAkC,CAAC;AACzC,eAAS,QAAQ,QAAQ,CAAC,OAAO,QAAQ;AACvC,QAAAA,SAAQ,GAAG,IAAI;AAAA,MACjB,CAAC;AAED,UAAI;AAAE,cAAM,SAAS,YAAY;AAAA,MAAG,QAAQ;AAAA,MAAQ;AACpD,aAAO;AAAA,QACL,KAAK;AAAA,QACL,UAAU;AAAA,QACV,MAAM;AAAA,QACN,aAAa,SAAS,QAAQ,IAAI,cAAc,KAAK;AAAA,QACrD,YAAY;AAAA,QACZ,SAAAA;AAAA,MACF;AAAA,IACF;AAEA,QAAI,kBAAkB,IAAI,SAAS,MAAM,GAAG;AAC1C,YAAM,WAAW,SAAS,QAAQ,IAAI,UAAU;AAChD,UAAI,CAAC,UAAU;AACb,cAAM,IAAI,eAAe,uCAAuC,UAAU,IAAI,KAAK;AAAA,MACrF;AAEA;AACA,UAAI,gBAAgB,cAAc;AAChC,cAAM,IAAI,eAAe,wBAAwB,YAAY,UAAU,WAAW,IAAI,KAAK;AAAA,MAC7F;AAGA,mBAAa,IAAI,IAAI,UAAU,UAAU,EAAE,SAAS;AACpD;AAAA,IACF;AAEA,QAAI,mBAAmB,IAAI,SAAS,MAAM,GAAG;AAC3C,YAAM,IAAI,eAAe,QAAQ,SAAS,MAAM,SAAS,UAAU,IAAI,IAAI;AAAA,IAC7E;AAEA,UAAM,cAAc,SAAS,QAAQ,IAAI,cAAc,KAAK;AAC5D,UAAM,UAAkC,CAAC;AACzC,aAAS,QAAQ,QAAQ,CAAC,OAAO,QAAQ;AACvC,cAAQ,GAAG,IAAI;AAAA,IACjB,CAAC;AAED,UAAM,QAAQ,YAAY,SAAS,iBAAiB;AACpD,QAAI;AACJ,QAAI;AAEJ,QAAI,OAAO;AACT,YAAM,WAAW,MAAM,SAAS,YAAY;AAC5C,kBAAY,OAAO,KAAK,QAAQ;AAChC,aAAO;AAAA,IACT,OAAO;AACL,aAAO,MAAM,SAAS,KAAK;AAAA,IAC7B;AAEA,WAAO;AAAA,MACL,KAAK;AAAA,MACL,UAAU;AAAA,MACV;AAAA,MACA;AAAA,MACA,YAAY,SAAS;AAAA,MACrB;AAAA,MACA;AAAA,IACF;AAAA,EACF;AACF;","names":["headers"]}
|